On Tuesday, 11 October 2016 at 15:01:47 UTC, Andrei Alexandrescu wrote:
On 10/11/2016 10:49 AM, Matthias Bentrup wrote:

void popFrontAsmIntel(ref char[] s) @trusted pure nothrow {
  immutable c = s[0];
  if (c < 0x80) {
    s = s[1 .. $];
  } else {
    uint l = void;
    asm pure nothrow @nogc {
      mov EAX, 1;
      mov BL, 0xf8-1;
      sub BL, c;
      cmp BL, 0xf8-0xc0;
      adc EAX, 0;
      cmp BL, 0xf8-0xe0;
      adc EAX, 0;
      cmp BL, 0xf8-0xf0;
      adc EAX, 0;
      mov l, EAX;
    }
    s = s[l <= $ ? l : $ .. $];
  }
}

Did you take a look at the codegen on http://ldc.acomirei.ru? It's huge. -- Andrei

Here are three branch-less variants that use the sign instead of the carry bit.

The last one is the fastest on my machine, although it mixes the rare error case and the common 1-byte case into one branch.

void popFront1(ref char[] s) @trusted pure nothrow {
  immutable c = cast(byte)s[0];
  if (c >= 0) {
    s = s[1 .. $];
  } else if (c < -8) {
    uint i = 4 + (c + 64 >> 31) + (c + 32 >> 31) + (c + 16 >> 31);
    import std.algorithm;
    s = s[min(i, $) .. $];
  } else {
    s = s[1 .. $];
  }
}

void popFront1a(ref char[] s) @trusted pure nothrow {
  immutable c = cast(byte)s[0];
  if (c >= 0) {Three
    s = s[1 .. $];
  } else {
uint i = 1 + ((3 + (c + 64 >> 31) + (c + 32 >> 31) + (c + 16 >> 31)) & (c + 8 >> 31));
    import std.algorithm;
    s = s[min(i, $) .. $];
  }
}

void popFront1b(ref char[] s) @trusted pure nothrow {
  immutable c = cast(byte)s[0];
  if (c >= -8) {
    s = s[1 .. $];
  } else {
    uint i = 4 + (c + 64 >> 31) + (c + 32 >> 31) + (c + 16 >> 31);
    import std.algorithm;
    s = s[min(i, $) .. $];
  }
}

Reply via email to