On Tuesday, 11 October 2016 at 15:01:47 UTC, Andrei Alexandrescu
wrote:
On 10/11/2016 10:49 AM, Matthias Bentrup wrote:
void popFrontAsmIntel(ref char[] s) @trusted pure nothrow {
immutable c = s[0];
if (c < 0x80) {
s = s[1 .. $];
} else {
uint l = void;
asm pure nothrow @nogc {
mov EAX, 1;
mov BL, 0xf8-1;
sub BL, c;
cmp BL, 0xf8-0xc0;
adc EAX, 0;
cmp BL, 0xf8-0xe0;
adc EAX, 0;
cmp BL, 0xf8-0xf0;
adc EAX, 0;
mov l, EAX;
}
s = s[l <= $ ? l : $ .. $];
}
}
Did you take a look at the codegen on http://ldc.acomirei.ru?
It's huge. -- Andrei
Here are three branch-less variants that use the sign instead of
the carry bit.
The last one is the fastest on my machine, although it mixes the
rare error case and the common 1-byte case into one branch.
void popFront1(ref char[] s) @trusted pure nothrow {
immutable c = cast(byte)s[0];
if (c >= 0) {
s = s[1 .. $];
} else if (c < -8) {
uint i = 4 + (c + 64 >> 31) + (c + 32 >> 31) + (c + 16 >> 31);
import std.algorithm;
s = s[min(i, $) .. $];
} else {
s = s[1 .. $];
}
}
void popFront1a(ref char[] s) @trusted pure nothrow {
immutable c = cast(byte)s[0];
if (c >= 0) {Three
s = s[1 .. $];
} else {
uint i = 1 + ((3 + (c + 64 >> 31) + (c + 32 >> 31) + (c + 16
>> 31)) & (c + 8 >> 31));
import std.algorithm;
s = s[min(i, $) .. $];
}
}
void popFront1b(ref char[] s) @trusted pure nothrow {
immutable c = cast(byte)s[0];
if (c >= -8) {
s = s[1 .. $];
} else {
uint i = 4 + (c + 64 >> 31) + (c + 32 >> 31) + (c + 16 >> 31);
import std.algorithm;
s = s[min(i, $) .. $];
}
}