Hi Paul,
When one needs a fast test whether an addition of two 'signed char' or 'short'
overflows, the macros in intprops.h yield a valid answer, but it is not so
well optimized. The functions my_signed1_overflow, my_signed2_overflow
in the attached file produce better machine code than the corresponding
functions signed1_overflow, signed2_overflow that use intprops.h primitives.
Similarly, on 64-bit platforms, my_signed4_overflow produces slightly better
machine code (no conditional branch) than signed4_overflow. On 32-bit platforms
it depends: on SPARC my_signed4_overflow is good as well, but not on i386
(because 64-bit computations on 32-bit CPUs needs many registers, and i386
has few registers).
Would it be possible to include some of these tricks into intprops.h?
Bruno
#include "intprops.h"
int signed1_overflow (signed char a, signed char b)
{
//return INT_ADD_RANGE_OVERFLOW (a, b, (signed char) 0x80, (signed char) 0x7F);
return _GL_ADD_OVERFLOW (a, b, (signed char) 0x80, (signed char) 0x7F);
}
int my_signed1_overflow (signed char a, signed char b)
{
return (((int) (signed char) ((unsigned char) a + (unsigned char) b) - (int) a) ^ (int) b) < 0;
}
int signed2_overflow (short a, short b)
{
//return INT_ADD_RANGE_OVERFLOW (a, b, (short) 0x8000, (short) 0x7FFF);
return _GL_ADD_OVERFLOW (a, b, (short) 0x8000, (short) 0x7FFF);
}
int my_signed2_overflow (short a, short b)
{
return (((int) (short) ((unsigned short) a + (unsigned short) b) - (int) a) ^ (int) b) < 0;
}
int signed4_overflow (int a, int b)
{
//return INT_ADD_RANGE_OVERFLOW (a, b, (int) 0x80000000, (int) 0x7FFFFFFF);
//return _GL_ADD_OVERFLOW (a, b, (int) 0x80000000, (int) 0x7FFFFFFF);
return INT_ADD_OVERFLOW (a, b);
}
int my_signed4_overflow (int a, int b)
{
return (((long long) (int) ((unsigned int) a + (unsigned int) b) - (long long) a) ^ (long long) b) < 0;
}
#ifdef TEST
#include <stdio.h>
int main ()
{
int u, v;
/* Verify that signed1_overflow and my_signed1_overflow agree. */
for (u = -0x8; u <= 0x7; u++)
for (v = -0x8; v <= 0x7; v++)
{
int a = u << 4;
int b = v << 4;
int x = signed1_overflow (a, b);
int y = my_signed1_overflow (a, b);
if (x != y)
printf ("signed1 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y);
}
/* Verify that signed2_overflow and my_signed2_overflow agree. */
for (u = -0x8; u <= 0x7; u++)
for (v = -0x8; v <= 0x7; v++)
{
int a = u << 12;
int b = v << 12;
int x = signed2_overflow (a, b);
int y = my_signed2_overflow (a, b);
if (x != y)
printf ("signed2 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y);
}
/* Verify that signed4_overflow and my_signed4_overflow agree. */
for (u = -0x8; u <= 0x7; u++)
for (v = -0x8; v <= 0x7; v++)
{
int a = u << 28;
int b = v << 28;
int x = signed4_overflow (a, b);
int y = my_signed4_overflow (a, b);
if (x != y)
printf ("signed4 mistake: a=%d b=%d x=%d y=%d\n", a, b, x, y);
}
return 0;
}
#endif
.file "foo.c"
.section .text.unlikely,"ax",@progbits
.LCOLDB0:
.text
.LHOTB0:
.p2align 4,,15
.globl signed1_overflow
.type signed1_overflow, @function
signed1_overflow:
.LFB0:
.cfi_startproc
testb %sil, %sil
js .L5
movsbl %sil, %esi
movl $127, %eax
movsbl %dil, %edi
subl %esi, %eax
cmpl %edi, %eax
setl %al
movzbl %al, %eax
ret
.p2align 4,,10
.p2align 3
.L5:
movsbl %sil, %esi
movl $-128, %eax
movsbl %dil, %edi
subl %esi, %eax
cmpl %eax, %edi
setl %al
movzbl %al, %eax
ret
.cfi_endproc
.LFE0:
.size signed1_overflow, .-signed1_overflow
.section .text.unlikely
.LCOLDE0:
.text
.LHOTE0:
.section .text.unlikely
.LCOLDB1:
.text
.LHOTB1:
.p2align 4,,15
.globl my_signed1_overflow
.type my_signed1_overflow, @function
my_signed1_overflow:
.LFB1:
.cfi_startproc
leal (%rsi,%rdi), %eax
movsbl %dil, %edi
movsbl %sil, %esi
movsbl %al, %eax
subl %edi, %eax
xorl %esi, %eax
shrl $31, %eax
ret
.cfi_endproc
.LFE1:
.size my_signed1_overflow, .-my_signed1_overflow
.section .text.unlikely
.LCOLDE1:
.text
.LHOTE1:
.section .text.unlikely
.LCOLDB2:
.text
.LHOTB2:
.p2align 4,,15
.globl signed2_overflow
.type signed2_overflow, @function
signed2_overflow:
.LFB2:
.cfi_startproc
testw %si, %si
js .L10
movswl %si, %esi
movl $32767, %eax
movswl %di, %edi
subl %esi, %eax
cmpl %edi, %eax
setl %al
movzbl %al, %eax
ret
.p2align 4,,10
.p2align 3
.L10:
movswl %si, %esi
movl $-32768, %eax
movswl %di, %edi
subl %esi, %eax
cmpl %eax, %edi
setl %al
movzbl %al, %eax
ret
.cfi_endproc
.LFE2:
.size signed2_overflow, .-signed2_overflow
.section .text.unlikely
.LCOLDE2:
.text
.LHOTE2:
.section .text.unlikely
.LCOLDB3:
.text
.LHOTB3:
.p2align 4,,15
.globl my_signed2_overflow
.type my_signed2_overflow, @function
my_signed2_overflow:
.LFB3:
.cfi_startproc
leal (%rsi,%rdi), %eax
movswl %di, %edi
movswl %si, %esi
cwtl
subl %edi, %eax
xorl %esi, %eax
shrl $31, %eax
ret
.cfi_endproc
.LFE3:
.size my_signed2_overflow, .-my_signed2_overflow
.section .text.unlikely
.LCOLDE3:
.text
.LHOTE3:
.section .text.unlikely
.LCOLDB4:
.text
.LHOTB4:
.p2align 4,,15
.globl signed4_overflow
.type signed4_overflow, @function
signed4_overflow:
.LFB4:
.cfi_startproc
testl %esi, %esi
js .L15
movl $2147483647, %eax
subl %esi, %eax
cmpl %eax, %edi
setg %al
movzbl %al, %eax
ret
.p2align 4,,10
.p2align 3
.L15:
movl $-2147483648, %eax
subl %esi, %eax
cmpl %edi, %eax
setg %al
movzbl %al, %eax
ret
.cfi_endproc
.LFE4:
.size signed4_overflow, .-signed4_overflow
.section .text.unlikely
.LCOLDE4:
.text
.LHOTE4:
.section .text.unlikely
.LCOLDB5:
.text
.LHOTB5:
.p2align 4,,15
.globl my_signed4_overflow
.type my_signed4_overflow, @function
my_signed4_overflow:
.LFB5:
.cfi_startproc
leal (%rdi,%rsi), %eax
movslq %edi, %rdi
movslq %esi, %rsi
cltq
subq %rdi, %rax
xorq %rsi, %rax
shrq $63, %rax
ret
.cfi_endproc
.LFE5:
.size my_signed4_overflow, .-my_signed4_overflow
.section .text.unlikely
.LCOLDE5:
.text
.LHOTE5:
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609"
.section .note.GNU-stack,"",@progbits