Test for array literal arguments?

bearophile Tue, 05 Jun 2012 16:53:26 -0700

Warning: this post contains some partially uncooked ideas.


I hope Walter will read this post :-)

First of all the little problem. I'd like:

[1, 3, 7].canFind(x)

To be compiled with an in-lined:

x == 1 || x == 3 || x == 7

(A very optimizing D back-end is able to inline the arraycreation, see that the array length is known at compile-time, tounroll the search loop according to that length, doing what I amasking here. Maybe LDC2 with link-time optimization is able to doit. DMD is not able to do it. Having a very opitimizing back-endis good, but languages like C and Go (and Java as acounter-example) show that not _requiring_ a very optimizingback-end is good for a language).


------------------------------

This is a starting point for the discussion, it's a modified andsimplified implementation of canFind() that also contains anoptimization for short fixed-sized arrays:




import std.stdio: writeln;
import std.traits: ForeachType, isStaticArray;
import std.string: xformat, join;

bool canFind(Range, T)(Range items, T item)
if (is(ForeachType!Range == T)) {
  static if (isStaticArray!Range && Range.length < 5) {
    static if (Range.length == 0) {
      return false;
    } else {

static string genEq(string seqName, string itemName, intlen)

      /*pure nothrow*/ {
        string[] result;
        foreach (i; 0 .. len)
          result ~= xformat("%s[%d] == %s", seqName, i, itemName);
        return result.join(" || ");
      }
      return mixin(genEq("items", "item", Range.length));
    }
  } else {
    foreach (x; items)
      if (x == item)
        return true;
    return false;
  }
}

int main() {
  int x = 3; // run-time value
  int[3] a = [1, 3, 7];
  return a.canFind(x);
  //assert([1, 3, 7].canFind(x));
}

The asm of the relevant functions (dmd 2.060alpha, 32 bit, -O-release -inline):



_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb13genExpressionFAyaAyaiZAya
L0:     sub ESP,0Ch
        push    EBX
        xor EBX,EBX
        push    ESI
        mov ESI,EAX
        test    ESI,ESI
        mov dword ptr 8[ESP],0
        mov dword ptr 0Ch[ESP],0
        jle L59
L1D:    push    dword ptr FLAT:_DATA[014h]
        push    dword ptr FLAT:_DATA[010h]
        push    dword ptr 02Ch[ESP]
        push    dword ptr 02Ch[ESP]
        push    EBX
        push    dword ptr 030h[ESP]
        push    dword ptr 030h[ESP]

call near ptr_D3std6string24__T7xformatTaTAyaTiTAyaZ7xformatFxAaAyaiAyaZAya

        push    EDX
        mov EDX,offset FLAT:_D13TypeInfo_AAya6__initZ
        push    EAX
        lea ECX,010h[ESP]
        push    ECX
        push    EDX
        call    near ptr __d_arrayappendcT
        inc EBX
        add ESP,010h
        cmp EBX,ESI
        jl  L1D
L59:    push    dword ptr 0Ch[ESP]
        push    dword ptr 0Ch[ESP]
        push    dword ptr FLAT:_DATA[024h]
        push    dword ptr FLAT:_DATA[020h]

call near ptr_D3std5array22__T8joinImplTAAyaTAyaZ8joinImplFAAyaAyaZAya

        pop ESI
        pop EBX
        add ESP,0Ch
        ret 010h


_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb
        mov EDX,EAX
        cmp 4[ESP],EDX
        je  L18
        cmp 8[ESP],EDX
        je  L18
        cmp 0Ch[ESP],EDX
        je  L18
        xor EAX,EAX
        jmp short   L1D
L18:    mov EAX,1
L1D:    ret 0Ch


__Dmain comdat
L0:     sub ESP,0Ch
        mov EAX,offset FLAT:_D12TypeInfo_xAi6__initZ
        push    EBX
        push    ESI
        push    0Ch
        push    3
        push    EAX
        call    near ptr __d_arrayliteralTX
        add ESP,8
        mov EBX,EAX
        mov dword ptr [EAX],1
        mov ECX,3
        mov EDX,EBX
        push    EBX
        lea ESI,010h[ESP]
        mov 4[EBX],ECX
        mov dword ptr 8[EBX],7
        push    ESI
        call    near ptr _memcpy
        add ESP,0Ch
        mov EAX,3
        push    dword ptr 010h[ESP]
        push    dword ptr 010h[ESP]
        push    dword ptr 010h[ESP]

call near ptr_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb

        and EAX,0FFh
        pop ESI
        pop EBX
        add ESP,0Ch
        ret


That asm shows two problems, that I can't fully explain:

- The very existance of a canFind instance with an inline xformatcall (!);- The missed inlining of the canFind instance that performs justthe 3 equals (_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb ).

But for this discussion I don't care about those two problems.I'd like this too to be computed using the mixin(genEq):


int main() {
    int x = 3; // run-time value
    assert([1, 3, 7].canFind(x));
}

That doesn't happen because [1, 3, 7] is a int[] literal, soinside canFind() isStaticArray!Range is false.

On the other hand currently this works, because despite [1, 3, 7]being a int[] literal, is also implicitly castable to int[3]:



import std.string: xformat, join;

bool canFind(int[3] items, int item) {
  static string genEq(string seqName, string itemName, int len) {
    string[] result;
    foreach (i; 0 .. len)
      result ~= xformat("%s[%d] == %s", seqName, i, itemName);
    return result.join(" || ");
  }
  return mixin(genEq("items", "item", items.length));
}

int main() {
  int x = 3; // run-time value
  return [1, 3, 7].canFind(x);
}


But this doesn't compile:

import std.string: xformat, join;

bool canFind(size_t N)(int[N] items, int item) {
  static string genEq(string seqName, string itemName, int len) {
    string[] result;
    foreach (i; 0 .. len)
      result ~= xformat("%s[%d] == %s", seqName, i, itemName);
    return result.join(" || ");
  }
  return mixin(genEq("items", "item", items.length));
}

int main() {
  int x = 3; // run-time value
  return [1, 3, 7].canFind(x);
}

----------------------------

So is it possible to modify D to be able to create something likethis that returns true conservatively (so it returns false whenit's an unknown type of array) if a template function argument isan array literal (or enum of array)?


__traits(is_array_literal, Range)

That probably doesn't suffice, but it's an idea.

I'd really like to know if an argument is a array/string literal,and in such case be able to see it as a fixed-sized one if Ichoose so.

An alternative is just to allow to write two template functionslike this (present at the same time):

bool canFind(size_t N, T)(@literal T[N] items, T item) if (N < 5){

  static if (N == 0) {
    return false;
  } else {

static string genEq(string seqName, string itemName, int len){

      string[] result;
      foreach (i; 0 .. len)
        result ~= xformat("%s[%d] == %s", seqName, i, itemName);
      return result.join(" || ");
    }
    return mixin(genEq("items", "item", items.length));
  }
}

bool canFind(Range)(Range items, T item)
if (is(ForeachType!Range == T)) {
  foreach (x; items)
    if (x == item)
      return true;
  return false;
}

Where "@literal T[N] items" means that canFind function templateoverload is used even when you call it with a (dynamic) arrayliteral (if N isn't too much high, according to its templateconstraint).

Suggestions for better ideas are welcome.

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

This is the second part of this post, about a related topic.

A related more general desire is to know if a template functionargument is known at compile-time, and in this case being able touse it at compile time, if I want so (in this case the D compilerinstantiates a new template instance of the function, of course).

Something like this for me is very good because it would allow Dprogrammers to do a many things that today are not possible, likerunning some test at compile-time on the inputs, optimizing thefunction on the base of some arguments (a manually implementedpoor's man partial compilation. I think this is often enough),and more.

The classic function used to explain partial compilation is theinteger exponent power function (I use printf() to get a muchsimpler and shorter asm):



import core.stdc.stdio: printf;

T iPow(T)(T base, size_t exp) {
  T result = 1;
  for (; exp > 0; exp >>= 1) {
    if (exp & 1)
      result *= base;
    base *= base;
  }

  return result;
}

void main() {
  printf("%d", iPow(5, 3));
}


Its asm:

_D5test011__T4iPowTiZ4iPowFikZi comdat
        push    EBX
        mov ECX,EAX
        mov EDX,8[ESP]
        push    ESI
        mov EBX,1
        test    ECX,ECX
        je  L2D
L11:    test    ECX,1
        je  L20
        mov EAX,EDX
        imul    EAX,EBX
        mov EBX,EAX
L20:    mov ESI,EDX
        imul    ESI,EDX
        shr ECX,1
        mov EDX,ESI
        test    ECX,ECX
        jne L11
L2D:    pop ESI
        mov EAX,EBX
        pop EBX
        ret 4


__Dmain comdat
L0:     push    EAX
        mov EAX,3
        push    5
        call    near ptr _D5test011__T4iPowTiZ4iPowFikZi
        mov ECX,offset FLAT:_DATA
        push    EAX
        push    ECX
        call    near ptr _printf
        add ESP,8
        xor EAX,EAX
        pop ECX
        ret

Often the exp is a literal or it's known at compile-time. In thiscase in D you are free to use a different function template:



import core.stdc.stdio: printf;

T iPow(size_t exp, T)(T base) {
  T result = 1;
  for (int e = exp; e > 0; e >>= 1) {
    if (e & 1)
      result *= base;
    base *= base;
  }

  return result;
}

void main() {
  printf("%d", iPow!(3)(5));
}


Its asm:

_D4test14__T4iPowVi3TiZ4iPowFiZi
        push    EBX
        mov EDX,EAX
        mov EBX,1
        push    ESI
        mov ECX,3
LE:     test    ECX,1
        je  L1D
        mov EAX,EDX
        imul    EAX,EBX
        mov EBX,EAX
L1D:    mov ESI,EDX
        imul    ESI,EDX
        sar ECX,1
        mov EDX,ESI
        test    ECX,ECX
        jg  LE
        pop ESI
        mov EAX,EBX
        pop EBX
        ret


__Dmain comdat
L0:     push    EAX
        mov EAX,5
        call    near ptr _D4test14__T4iPowVi3TiZ4iPowFiZi
        mov ECX,offset FLAT:_DATA
        push    EAX
        push    ECX
        call    near ptr _printf
        add ESP,8
        xor EAX,EAX
        pop ECX
        ret

With DMD to see an improvement you need a static foreach, nowit's fully inlined:


import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;

private template _iPowHelper(size_t exp) {
  static if (exp == 0)
    alias TypeTuple!() _iPowHelper;
  else
    alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}

T iPow(size_t exp, T)(T base) {
  // Unfortunately I can't nest template _iPowHelper here.
  T result = 1;
  // Optionally add a static if here to not use _iPowHelper
  // when exp is too much large
  /*static*/ foreach (e; _iPowHelper!exp) {
    if (e & 1)
      result *= base;
    base *= base;
  }
  return result;
}

void main() {
  printf("%lf", iPow!(3)(5));
}


Its asm:

_D5test214__T4iPowVi3TiZ4iPowFiZi
        mov ECX,EAX
        mov EDX,EAX
        imul    EAX,ECX
        mov ECX,EAX
        imul    EAX,EDX
        mov EDX,EAX
        ret


__Dmain comdat
L0:     push    EAX
        mov EAX,offset FLAT:_DATA
        push    07Dh
        push    EAX
        call    near ptr _printf
        add ESP,8
        xor EAX,EAX
        pop ECX
        ret

This variant of the last program shows that the code is reallyworking as desired:


import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;

private template _iPowHelper(size_t exp) {
  static if (exp == 0)
    alias TypeTuple!() _iPowHelper;
  else
    alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}

T iPow(size_t exp, T)(T base) {
  // Unfortunately I can't nest template _iPowHelper here.
  T result = 1;
  // Optionally add a static if here to not use _iPowHelper
  // when exp is too much large
  /*static*/ foreach (e; _iPowHelper!exp) {
    if (e & 1)
      result *= base;
    base *= base;
  }
  return result;
}

void main(string[] args) {
  double x = args.length + 4.0;
  printf("%lf", iPow!(3)(x));
}

Its asm, the main shows no calls to iPow, no loops and just twofmul:


_D6test2d14__T4iPowVi3TdZ4iPowFdZd  comdat
        sub ESP,0Ch
        fld qword ptr 010h[ESP]
        fld qword ptr 010h[ESP]
        fmul    qword ptr 010h[ESP]
        fxch    ST1
        fstp    qword ptr [ESP]
        fstp    qword ptr 010h[ESP]
        fld qword ptr 010h[ESP]
        fmul    qword ptr [ESP]
        fstp    qword ptr [ESP]
        fld qword ptr [ESP]
        add ESP,0Ch
        ret 8


__Dmain comdat
L0:     sub ESP,024h
        mov EAX,028h[ESP]
        xor ECX,ECX
        mov [ESP],EAX
        mov EDX,offset FLAT:_DATA[010h]
        mov 4[ESP],ECX
        fild    long64 ptr [ESP]
        fadd    qword ptr FLAT:_DATA[00h]
        fst qword ptr 8[ESP]
        fld qword ptr 8[ESP]
        fld qword ptr 8[ESP]
        fxch    ST2
        fstp    qword ptr 010h[ESP]
        fstp    qword ptr 018h[ESP]
        fmul    qword ptr 010h[ESP]
        fstp    qword ptr 010h[ESP]
        fld qword ptr 010h[ESP]
        fmul    qword ptr 018h[ESP]
        fstp    qword ptr 018h[ESP]
        fld qword ptr 018h[ESP]
        sub ESP,8
        fstp    qword ptr [ESP]
        push    EDX
        call    near ptr _printf
        add ESP,0Ch
        add ESP,024h
        xor EAX,EAX
        ret

So we are back to an idea Walter has tried and refused few yearsago, of compile-time known arguments with "static". The smalldifference here is (I think) that both iPow templates are allowedto exist in the code at the same time, and the iPow overload with"static" is preferred by the argument is known at compile-time:



import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;

private template _iPowHelper(size_t exp) {
  static if (exp == 0)
    alias TypeTuple!() _iPowHelper;
  else
    alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}

T iPow(T)(T base, static size_t exp) {
  // Unfortunately I can't nest template _iPowHelper here.
  T result = 1;
  /*static*/ foreach (e; _iPowHelper!exp) {
    if (e & 1)
      result *= base;
    base *= base;
  }
  return result;
}

T iPow(T)(T base, size_t exp) {
  T result = 1;
  for (; exp > 0; exp >>= 1) {
    if (exp & 1)
      result *= base;
    base *= base;
  }

  return result;
}

void main() {
  printf("%d", iPow(5, 3)); // calls first iPow overload!
}

Note that this is not what I was asking for in the first half ofthis post, because I didn't want to know the contents of "items"at compile-time, only its length (and currently arrays can't betemplate arguments, despite strings can, even dstrings, thataren't very different from a uint[], it's a small mystery):


bool canFind(T)(static T[] items, T item) {...}

Bye,
bearophile

Test for array literal arguments?

Reply via email to