Hi!
[WCC/WPP 1.4]
- WPP: empty _DATA section described as byte-aligned, else as word-aligned.
- alignment differences; explicit zero initialization of static variables:
; int a, b = 0, c = 1;
WCC (word aligned, no optimization for explicit zero initialization):
CONST SEGMENT WORD PUBLIC USE16 'DATA'
CONST2 SEGMENT WORD PUBLIC USE16 'DATA'
_DATA SEGMENT WORD PUBLIC USE16 'DATA'
_b:
> DB 0, 0
WPP (byte aligned, extraneous references):
> EXTRN _small_code_:BYTE
CONST SEGMENT BYTE PUBLIC USE16 'DATA'
CONST2 SEGMENT BYTE PUBLIC USE16 'DATA'
_BSS SEGMENT WORD PUBLIC USE16 'BSS'
ORG 2
W?b$ni LABEL BYTE
- reusing modulo after division (and vice versa):
; unsigned a, b;
; void f (unsigned c, unsigned d) { a = c / d, b = c % d; }
> mov bx,ax
mov cx,dx
xor dx,dx
div cx
mov word ptr _a,ax
> mov ax,bx
> xor dx,dx
> div cx
mov word ptr _b,dx
- multiplication:
; int f (int x) { return x * 123; }
push cx
push dx
mov dx,ax
shl ax,1
shl ax,1
add ax,dx
mov cl,3
shl ax,cl
add ax,dx
mov dx,ax
shl ax,1
shl ax,1
sub ax,dx
pop dx
pop cx
Should be:
mov dx,123
imul dx
; int g (int x, int y) { return y * x; }
push bx
mov bx,ax
mov ax,dx
imul bx
pop bx
Should be (as for "x * y"):
imul dx
- indexing optimization:
; int a [5] [5]; int f (int i, int j) { return a [i] [j]; }
mov bx,dx
mov dx,ax
shl ax,1
shl ax,1
add ax,dx
shl ax,1
shl bx,1
add bx,ax
mov ax,word ptr [bx+_a]
Should be:
mov bx,ax
shl bx,1
shl bx,1
add bx,ax
add bx,dx
shl bx,1
mov ax,word ptr [bx+_a]
- array initialization:
; unsigned zero [100], nonzero [100];
; void f (void) { unsigned i; for (i = 0; i < 100; ++i) zero [i] = 0;
xor ax,ax
L$1:
mov bx,ax
shl bx,1
mov word ptr [bx+_zero],0
inc ax
cmp ax,64H
jb L$1
; for (i = 0; i < 100; ++i) nonzero [i] = 100; }
xor ax,ax
L$2:
mov bx,ax
shl bx,1
mov word ptr [bx+_nonzero],64H
inc ax
cmp ax,64H
jb L$2
BCC 3.1 generates:
; void f (void) { unsigned i; for (i = 0; i < 100; ++i) zero [i] = 0;
mov cx,100
mov di,offset DGROUP:_zero
push ds
pop es
xor ax,ax
rep stosw
; for (i = 0; i < 100; ++i) nonzero [i] =
100; }
mov cx,100
mov di,offset DGROUP:_nonzero
push ds
pop es
mov ax,100
rep stosw
- tail jump optimization in loops:
; int g (void), h (void); void f (void) { while (g ()) h (); }
f_:
call near ptr g_
test ax,ax
> je L$1
call near ptr h_
> jmp f_
L$1:
BCC 3.1 generates:
jmp short @[EMAIL PROTECTED]
@[EMAIL PROTECTED]:
call near ptr _h
@[EMAIL PROTECTED]:
call near ptr _g
or ax,ax
jne short @[EMAIL PROTECTED]
- loops optimization (with and without -os option):
; char p [5]; void f () { int i; for (i = 0; i < 5; ++i) if (p [i]) p [i] =
1; }
xor bx,bx
> jmp L$2
L$1:
inc bx
cmp bx,5
jge L$3
L$2:
cmp byte ptr [bx+_p],0
je L$1
mov byte ptr [bx+_p],1
> jmp L$1
L$3:
BCC 3.1 generates:
xor si,si
@[EMAIL PROTECTED]:
cmp byte ptr DGROUP:_p[si],0
je short @[EMAIL PROTECTED]
mov byte ptr DGROUP:_p[si],1
@[EMAIL PROTECTED]:
inc si
cmp si,5
jl short @[EMAIL PROTECTED]
Should be:
> mov bx,offset _p
L$2:
cmp byte ptr [bx],0
je L$1
mov byte ptr [bx],1
L$1:
inc bx
> cmp bx,_p+5
jl L$2
L$3:
- common conditions elimination:
; void g (void);
; void f (unsigned x, unsigned y) { if (x == 0) x = y; if (x) g(); }
test ax,ax
> jne L$1
mov ax,dx
L$1:
test ax,ax
jne L$2
ret
L$2:
jmp near ptr g_
Should be:
test ax,ax
jne L$2
test dx,dx
jne L$2
ret
L$2:
jmp near ptr g_
- code merging; worse effect of "copy propagation" optimization:
; char ph [5];
; void f (void) { unsigned root, last, j;
; for (root = 0, last = root, j = root;;) {
xor ax,ax
> xor bx,bx
> xor si,si
; ph [j] = last, last = j; if (last != root) { j = root; continue; }
L$1:
mov byte ptr [si+_ph],bl
mov bx,si
cmp si,ax
je L$3
> L$2:
> mov si,ax
> jmp L$1
; do last = ph [last]; while (last != root);
L$3:
mov bl,byte ptr [bx+_ph]
xor bh,bh
cmp bx,ax
jne L$3
; j = last, last = ph [j], ph [j] = 5;
mov si,ax
mov bl,byte ptr [bx+_ph]
xor bh,ah
L$4:
mov byte ptr [si+_ph],5
; for (;;) {
; ++j; if (j < 5) break;
inc si
cmp si,5
jb L$1
; if (last != root) { j = last, last = ph [j], ph [j] = 5; continue; }
cmp bx,ax
jne L$5
; ++root; if (root < 5) { last = root, j = root; break; }
inc ax
cmp ax,5
jae L$6
> mov bx,ax
> jmp L$2
; return;
> L$5:
> mov si,bx
> mov bl,byte ptr [bx+_ph]
> xor bh,bh
> jmp L$4
; } } }
L$6:
BCC 3.1 generates:
; for (root = 0, last = root, j = root;;) {
xor cx,cx
@[EMAIL PROTECTED]:
mov dx,cx
@[EMAIL PROTECTED]:
mov si,cx
@[EMAIL PROTECTED]:
; ph [j] = last, last = j; if (last != root) { j = root; continue; }
mov byte ptr DGROUP:_ph[si],dl
mov dx,si
cmp dx,cx
jne short @[EMAIL PROTECTED]
@[EMAIL PROTECTED]:
; do last = ph [last]; while (last != root);
mov bx,dx
mov al,byte ptr DGROUP:_ph[bx]
cbw
mov dx,ax
cmp dx,cx
jne short @[EMAIL PROTECTED]
@[EMAIL PROTECTED]:
; j = last, last = ph [j], ph [j] = 5;
mov si,dx
mov al,byte ptr DGROUP:_ph[si]
cbw
mov dx,ax
mov byte ptr DGROUP:_ph[si],5
; for (;;) {
; ++j; if (j < 5) break;
inc si
cmp si,5
jb short @[EMAIL PROTECTED]
; if (last != root) { j = last, last = ph [j], ph [j] = 5;
continue; }
cmp dx,cx
jne short @[EMAIL PROTECTED]
; ++root; if (root < 5) { last = root, j = root; break; }
inc cx
cmp cx,5
jb short @[EMAIL PROTECTED]
; return;
; } } }
- string literals merging:
WCC:
; void g (const char s []); void f (void) { g ("abc"); g ("abc"); g ("bc"); }
mov ax,offset DGROUP:L$1
call near ptr g_
mov ax,offset DGROUP:L$1
call near ptr g_
> mov ax,offset DGROUP:L$2
jmp near ptr g_
L$1:
DB 61H, 62H, 63H, 0
> L$2:
> DB 62H, 63H, 0
WPP:
; void g (const char s []); void f (void) { g ("abc"); g ("abc"); g ("bc"); }
mov ax,offset DGROUP:L$1
call near ptr `W?g$n(pnxa)v`
> mov ax,offset DGROUP:L$2
call near ptr `W?g$n(pnxa)v`
> mov ax,offset DGROUP:L$3
jmp near ptr `W?g$n(pnxa)v`
L$1:
DB 61H, 62H, 63H, 0
> L$2:
> DB 61H, 62H, 63H, 0
> L$3:
> DB 62H, 63H, 0
BCC 3.1 generates:
; void g (const char s []); void f (void) { g ("abc"); g ("abc"); g
("bc"); }
mov ax,offset DGROUP:s@
call near ptr _g
mov ax,offset DGROUP:s@
call near ptr _g
mov ax,offset DGROUP:[EMAIL PROTECTED]
call near ptr _g
s@ label byte
db 'abc'
db 0
- range checking (eg. ch >= '0' && ch <= '9') optimization:
; void x (void);
; void f (unsigned ch) { if (ch >= 10 || ch == 0) x (); }
cmp ax,0aH
jae L$2
test ax,ax
je L$2
L$1:
ret
; void g (unsigned ch) { if (ch > 9 || ch == 0) x (); }
cmp ax,9
ja L$2
test ax,ax
jne L$1
jmp near ptr x_
; void h (unsigned ch) { if ((ch-1) > 8) x (); }
dec ax
cmp ax,8
jbe L$1
L$2:
jmp near ptr x_
GCC -O2 generates identical code for all of the above constructs:
dec eax
cmp eax,0x00000008
ja X$1
-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
_______________________________________________
Freedos-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/freedos-devel