On Sun, 27 Oct 2013, Richard Sandiford wrote: > This patch adds some more optimisations to the wi:: comparison functions. > It uses the: > > #define CONSTANT(X) (__builtin_constant_p (X) && (X)) > > idiom that was mentioned before, except that I thought CONSTANT would be > too easily confused with CONSTANT_P, so I went for CAN_TELL instead. > Better names welcome.
STATIC_CONSTANT_P similar to static_assert? > The changes are: > > - Add a fast path to eq_p for when one of the inputs isn't sign-extended. > This includes code to handle compile-time 0 specially. > > - Add the opposite optimisation to Mike's lts_p change, if we can tell at > compile time that it applies. I think the cases Mike added should only be enabled when we can figure them out at compile-time, too. Ok with that change. Thanks, Richard. > - Add fast paths to ltu_p for constants. > > E.g.: > > bool > f1 (const_tree x) > { > return wi::eq_p (x, 0); > } > > now gives: > > xorl %eax, %eax > cmpw $1, 4(%rdi) > je .L5 > rep ret > .p2align 4,,10 > .p2align 3 > .L5: > cmpq $0, 16(%rdi) > sete %al > ret > > bool > f2 (const_tree x, HOST_WIDE_INT y) > { > return wi::eq_p (x, y); > } > > gives: > > movq 8(%rdi), %rax > movzwl 52(%rax), %edx > xorl %eax, %eax > andw $1023, %dx > cmpw $1, 4(%rdi) > je .L10 > rep ret > .p2align 4,,10 > .p2align 3 > .L10: > xorq 16(%rdi), %rsi > movzwl %dx, %edx > movl $64, %ecx > subl %edx, %ecx > movq %rsi, %rax > salq %cl, %rax > testl %ecx, %ecx > cmovg %rax, %rsi > testq %rsi, %rsi > sete %al > ret > > bool > f3 (HOST_WIDE_INT x, const_tree y) > { > return wi::lts_p (x, y); > } > > is similarly ugly because of way that it ignores TYPE_SIGN and so has to > explicitly sign-extend "small-prec" cases: > > movq 8(%rsi), %rax > movzwl 4(%rsi), %ecx > movzwl 52(%rax), %edx > andl $1023, %edx > cmpl $1, %ecx > je .L16 > leal -1(%rcx), %eax > sall $6, %ecx > subl %edx, %ecx > movq 16(%rsi,%rax,8), %rax > movq %rax, %rdx > salq %cl, %rdx > testl %ecx, %ecx > cmovg %rdx, %rax > sarq $63, %rax > addl $1, %eax > ret > .p2align 4,,10 > .p2align 3 > .L16: > cmpl $63, %edx > movq 16(%rsi), %rax > ja .L13 > movb $64, %cl > subl %edx, %ecx > salq %cl, %rax > sarq %cl, %rax > .L13: > cmpq %rdi, %rax > setg %al > ret > > but: > > bool > f4 (HOST_WIDE_INT x, const_tree y) > { > return wi::lts_p (x, wi::to_widest (y)); > } > > is a bit more respectable: > > movzwl 6(%rsi), %eax > cmpl $1, %eax > je .L20 > subl $1, %eax > movq 16(%rsi,%rax,8), %rax > sarq $63, %rax > addl $1, %eax > ret > .p2align 4,,10 > .p2align 3 > .L20: > cmpq %rdi, 16(%rsi) > setg %al > ret > > For similar reasons: > > bool > f5 (const_tree x) > { > return wi::ltu_p (x, 100); > } > > gives: > > movq 8(%rdi), %rax > movzwl 52(%rax), %ecx > xorl %eax, %eax > andw $1023, %cx > cmpw $1, 4(%rdi) > je .L26 > rep ret > .p2align 4,,10 > .p2align 3 > .L26: > cmpw $63, %cx > ja .L23 > movl $1, %eax > salq %cl, %rax > subq $1, %rax > andq 16(%rdi), %rax > .L24: > cmpq $99, %rax > setbe %al > ret > .p2align 4,,10 > .p2align 3 > .L23: > movq 16(%rdi), %rax > jmp .L24 > > but: > > bool > f6 (const_tree x) > { > return wi::ltu_p (wi::to_widest (x), 100); > } > > gives: > > xorl %eax, %eax > cmpw $1, 6(%rdi) > je .L30 > rep ret > .p2align 4,,10 > .p2align 3 > .L30: > cmpq $99, 16(%rdi) > setbe %al > ret > > Tested on powerpc64-linux-gnu and x86_64-linux-gnu. OK for wide-int? > > Thanks, > Richard > > > Index: gcc/system.h > =================================================================== > --- gcc/system.h 2013-10-27 14:25:19.144723977 +0000 > +++ gcc/system.h 2013-10-27 14:25:20.716738045 +0000 > @@ -711,6 +711,12 @@ #define gcc_unreachable() __builtin_unre > #define gcc_unreachable() (fancy_abort (__FILE__, __LINE__, __FUNCTION__)) > #endif > > +#if GCC_VERSION >= 3001 > +#define CAN_TELL(X) (__builtin_constant_p (X) && (X)) > +#else > +#define CAN_TELL(X) (false && (X)) > +#endif > + > /* Until we can use STATIC_ASSERT. */ > #define STATIC_ASSERT(X) \ > typedef int assertion1[(X) ? 1 : -1] ATTRIBUTE_UNUSED > Index: gcc/wide-int.h > =================================================================== > --- gcc/wide-int.h 2013-10-27 14:25:19.144723977 +0000 > +++ gcc/wide-int.h 2013-10-27 14:37:34.834443832 +0000 > @@ -1495,6 +1495,7 @@ wi::eq_p (const T1 &x, const T2 &y) > WIDE_INT_REF_FOR (T2) yi (y, precision); > if (xi.is_sign_extended && yi.is_sign_extended) > { > + /* This case reduces to array equality. */ > if (xi.len != yi.len) > return false; > unsigned int i = 0; > @@ -1504,10 +1505,21 @@ wi::eq_p (const T1 &x, const T2 &y) > while (++i != xi.len); > return true; > } > - if (precision <= HOST_BITS_PER_WIDE_INT) > + if (yi.len == 1) > { > - unsigned HOST_WIDE_INT diff = xi.ulow () ^ yi.ulow (); > - return (diff << (-precision % HOST_BITS_PER_WIDE_INT)) == 0; > + /* XI is only equal to YI if it too has a single HWI. */ > + if (xi.len != 1) > + return false; > + /* Excess bits in xi.val[0] will be signs or zeros, so comparisons > + with 0 are simple. */ > + if (CAN_TELL (yi.val[0] == 0)) > + return xi.val[0] == 0; > + /* Otherwise flush out any excess bits first. */ > + unsigned HOST_WIDE_INT diff = xi.val[0] ^ yi.val[0]; > + int excess = HOST_BITS_PER_WIDE_INT - precision; > + if (excess > 0) > + diff <<= excess; > + return diff == 0; > } > return eq_p_large (xi.val, xi.len, yi.val, yi.len, precision); > } > @@ -1528,20 +1540,28 @@ wi::lts_p (const T1 &x, const T2 &y) > unsigned int precision = get_binary_precision (x, y); > WIDE_INT_REF_FOR (T1) xi (x, precision); > WIDE_INT_REF_FOR (T2) yi (y, precision); > - // We optimize x < y, where y is 64 or fewer bits. > + /* We optimize x < y, where y is 64 or fewer bits. */ > if (wi::fits_shwi_p (yi)) > { > - // If x fits directly into a shwi, we can compare directly. > + /* Make lts_p (x, 0) as efficient as wi::neg_p (x). */ > + if (CAN_TELL (yi.val[0] == 0)) > + return neg_p (xi); > + /* If x fits directly into a shwi, we can compare directly. */ > if (wi::fits_shwi_p (xi)) > return xi.to_shwi () < yi.to_shwi (); > - // If x doesn't fit and is negative, then it must be more > - // negative than any value in y, and hence smaller than y. > - if (neg_p (xi, SIGNED)) > + /* If x doesn't fit and is negative, then it must be more > + negative than any value in y, and hence smaller than y. */ > + if (neg_p (xi)) > return true; > - // If x is positive, then it must be larger than any value in y, > - // and hence greater than y. > + /* If x is positive, then it must be larger than any value in y, > + and hence greater than y. */ > return false; > } > + /* Optimize the opposite case, if it can be detected at compile time. */ > + if (CAN_TELL (xi.len == 1)) > + /* If YI is negative it is lower than the least HWI. > + If YI is positive it is greater than the greatest HWI. */ > + return !neg_p (yi); > return lts_p_large (xi.val, xi.len, precision, yi.val, yi.len); > } > > @@ -1553,6 +1573,12 @@ wi::ltu_p (const T1 &x, const T2 &y) > unsigned int precision = get_binary_precision (x, y); > WIDE_INT_REF_FOR (T1) xi (x, precision); > WIDE_INT_REF_FOR (T2) yi (y, precision); > + /* Optimize comparisons with constants and with sub-HWI unsigned > + integers. */ > + if (CAN_TELL (yi.len == 1 && yi.val[0] >= 0)) > + return xi.len == 1 && xi.to_uhwi () < (unsigned HOST_WIDE_INT) yi.val[0]; > + if (CAN_TELL (xi.len == 1 && xi.val[0] >= 0)) > + return yi.len != 1 || yi.to_uhwi () > (unsigned HOST_WIDE_INT) xi.val[0]; > if (precision <= HOST_BITS_PER_WIDE_INT) > { > unsigned HOST_WIDE_INT xl = xi.to_uhwi (); > > -- Richard Biener <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imend