Warning: this post contains some partially uncooked ideas.

I hope Walter will read this post :-)

First of all the little problem. I'd like:

[1, 3, 7].canFind(x)

To be compiled with an in-lined:

x == 1 || x == 3 || x == 7


(A very optimizing D back-end is able to inline the array creation, see that the array length is known at compile-time, to unroll the search loop according to that length, doing what I am asking here. Maybe LDC2 with link-time optimization is able to do it. DMD is not able to do it. Having a very opitimizing back-end is good, but languages like C and Go (and Java as a counter-example) show that not _requiring_ a very optimizing back-end is good for a language).

------------------------------

This is a starting point for the discussion, it's a modified and simplified implementation of canFind() that also contains an optimization for short fixed-sized arrays:



import std.stdio: writeln;
import std.traits: ForeachType, isStaticArray;
import std.string: xformat, join;

bool canFind(Range, T)(Range items, T item)
if (is(ForeachType!Range == T)) {
  static if (isStaticArray!Range && Range.length < 5) {
    static if (Range.length == 0) {
      return false;
    } else {
static string genEq(string seqName, string itemName, int len)
      /*pure nothrow*/ {
        string[] result;
        foreach (i; 0 .. len)
          result ~= xformat("%s[%d] == %s", seqName, i, itemName);
        return result.join(" || ");
      }
      return mixin(genEq("items", "item", Range.length));
    }
  } else {
    foreach (x; items)
      if (x == item)
        return true;
    return false;
  }
}

int main() {
  int x = 3; // run-time value
  int[3] a = [1, 3, 7];
  return a.canFind(x);
  //assert([1, 3, 7].canFind(x));
}


The asm of the relevant functions (dmd 2.060alpha, 32 bit, -O -release -inline):


_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb13genExpressionFAyaAyaiZAya
L0:     sub ESP,0Ch
        push    EBX
        xor EBX,EBX
        push    ESI
        mov ESI,EAX
        test    ESI,ESI
        mov dword ptr 8[ESP],0
        mov dword ptr 0Ch[ESP],0
        jle L59
L1D:    push    dword ptr FLAT:_DATA[014h]
        push    dword ptr FLAT:_DATA[010h]
        push    dword ptr 02Ch[ESP]
        push    dword ptr 02Ch[ESP]
        push    EBX
        push    dword ptr 030h[ESP]
        push    dword ptr 030h[ESP]
call near ptr _D3std6string24__T7xformatTaTAyaTiTAyaZ7xformatFxAaAyaiAyaZAya
        push    EDX
        mov EDX,offset FLAT:_D13TypeInfo_AAya6__initZ
        push    EAX
        lea ECX,010h[ESP]
        push    ECX
        push    EDX
        call    near ptr __d_arrayappendcT
        inc EBX
        add ESP,010h
        cmp EBX,ESI
        jl  L1D
L59:    push    dword ptr 0Ch[ESP]
        push    dword ptr 0Ch[ESP]
        push    dword ptr FLAT:_DATA[024h]
        push    dword ptr FLAT:_DATA[020h]
call near ptr _D3std5array22__T8joinImplTAAyaTAyaZ8joinImplFAAyaAyaZAya
        pop ESI
        pop EBX
        add ESP,0Ch
        ret 010h


_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb
        mov EDX,EAX
        cmp 4[ESP],EDX
        je  L18
        cmp 8[ESP],EDX
        je  L18
        cmp 0Ch[ESP],EDX
        je  L18
        xor EAX,EAX
        jmp short   L1D
L18:    mov EAX,1
L1D:    ret 0Ch


__Dmain comdat
L0:     sub ESP,0Ch
        mov EAX,offset FLAT:_D12TypeInfo_xAi6__initZ
        push    EBX
        push    ESI
        push    0Ch
        push    3
        push    EAX
        call    near ptr __d_arrayliteralTX
        add ESP,8
        mov EBX,EAX
        mov dword ptr [EAX],1
        mov ECX,3
        mov EDX,EBX
        push    EBX
        lea ESI,010h[ESP]
        mov 4[EBX],ECX
        mov dword ptr 8[EBX],7
        push    ESI
        call    near ptr _memcpy
        add ESP,0Ch
        mov EAX,3
        push    dword ptr 010h[ESP]
        push    dword ptr 010h[ESP]
        push    dword ptr 010h[ESP]
call near ptr _D4test18__T7canFindTG3iTiZ7canFindFG3iiZb
        and EAX,0FFh
        pop ESI
        pop EBX
        add ESP,0Ch
        ret


That asm shows two problems, that I can't fully explain:
- The very existance of a canFind instance with an inline xformat call (!); - The missed inlining of the canFind instance that performs just the 3 equals (_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb ).

But for this discussion I don't care about those two problems. I'd like this too to be computed using the mixin(genEq):

int main() {
    int x = 3; // run-time value
    assert([1, 3, 7].canFind(x));
}


That doesn't happen because [1, 3, 7] is a int[] literal, so inside canFind() isStaticArray!Range is false.

On the other hand currently this works, because despite [1, 3, 7] being a int[] literal, is also implicitly castable to int[3]:


import std.string: xformat, join;

bool canFind(int[3] items, int item) {
  static string genEq(string seqName, string itemName, int len) {
    string[] result;
    foreach (i; 0 .. len)
      result ~= xformat("%s[%d] == %s", seqName, i, itemName);
    return result.join(" || ");
  }
  return mixin(genEq("items", "item", items.length));
}

int main() {
  int x = 3; // run-time value
  return [1, 3, 7].canFind(x);
}


But this doesn't compile:

import std.string: xformat, join;

bool canFind(size_t N)(int[N] items, int item) {
  static string genEq(string seqName, string itemName, int len) {
    string[] result;
    foreach (i; 0 .. len)
      result ~= xformat("%s[%d] == %s", seqName, i, itemName);
    return result.join(" || ");
  }
  return mixin(genEq("items", "item", items.length));
}

int main() {
  int x = 3; // run-time value
  return [1, 3, 7].canFind(x);
}

----------------------------

So is it possible to modify D to be able to create something like this that returns true conservatively (so it returns false when it's an unknown type of array) if a template function argument is an array literal (or enum of array)?

__traits(is_array_literal, Range)

That probably doesn't suffice, but it's an idea.

I'd really like to know if an argument is a array/string literal, and in such case be able to see it as a fixed-sized one if I choose so.

An alternative is just to allow to write two template functions like this (present at the same time):


bool canFind(size_t N, T)(@literal T[N] items, T item) if (N < 5) {
  static if (N == 0) {
    return false;
  } else {
static string genEq(string seqName, string itemName, int len) {
      string[] result;
      foreach (i; 0 .. len)
        result ~= xformat("%s[%d] == %s", seqName, i, itemName);
      return result.join(" || ");
    }
    return mixin(genEq("items", "item", items.length));
  }
}

bool canFind(Range)(Range items, T item)
if (is(ForeachType!Range == T)) {
  foreach (x; items)
    if (x == item)
      return true;
  return false;
}


Where "@literal T[N] items" means that canFind function template overload is used even when you call it with a (dynamic) array literal (if N isn't too much high, according to its template constraint).
Suggestions for better ideas are welcome.

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

This is the second part of this post, about a related topic.

A related more general desire is to know if a template function argument is known at compile-time, and in this case being able to use it at compile time, if I want so (in this case the D compiler instantiates a new template instance of the function, of course).

Something like this for me is very good because it would allow D programmers to do a many things that today are not possible, like running some test at compile-time on the inputs, optimizing the function on the base of some arguments (a manually implemented poor's man partial compilation. I think this is often enough), and more.


The classic function used to explain partial compilation is the integer exponent power function (I use printf() to get a much simpler and shorter asm):


import core.stdc.stdio: printf;

T iPow(T)(T base, size_t exp) {
  T result = 1;
  for (; exp > 0; exp >>= 1) {
    if (exp & 1)
      result *= base;
    base *= base;
  }

  return result;
}

void main() {
  printf("%d", iPow(5, 3));
}


Its asm:

_D5test011__T4iPowTiZ4iPowFikZi comdat
        push    EBX
        mov ECX,EAX
        mov EDX,8[ESP]
        push    ESI
        mov EBX,1
        test    ECX,ECX
        je  L2D
L11:    test    ECX,1
        je  L20
        mov EAX,EDX
        imul    EAX,EBX
        mov EBX,EAX
L20:    mov ESI,EDX
        imul    ESI,EDX
        shr ECX,1
        mov EDX,ESI
        test    ECX,ECX
        jne L11
L2D:    pop ESI
        mov EAX,EBX
        pop EBX
        ret 4


__Dmain comdat
L0:     push    EAX
        mov EAX,3
        push    5
        call    near ptr _D5test011__T4iPowTiZ4iPowFikZi
        mov ECX,offset FLAT:_DATA
        push    EAX
        push    ECX
        call    near ptr _printf
        add ESP,8
        xor EAX,EAX
        pop ECX
        ret


Often the exp is a literal or it's known at compile-time. In this case in D you are free to use a different function template:


import core.stdc.stdio: printf;

T iPow(size_t exp, T)(T base) {
  T result = 1;
  for (int e = exp; e > 0; e >>= 1) {
    if (e & 1)
      result *= base;
    base *= base;
  }

  return result;
}

void main() {
  printf("%d", iPow!(3)(5));
}


Its asm:

_D4test14__T4iPowVi3TiZ4iPowFiZi
        push    EBX
        mov EDX,EAX
        mov EBX,1
        push    ESI
        mov ECX,3
LE:     test    ECX,1
        je  L1D
        mov EAX,EDX
        imul    EAX,EBX
        mov EBX,EAX
L1D:    mov ESI,EDX
        imul    ESI,EDX
        sar ECX,1
        mov EDX,ESI
        test    ECX,ECX
        jg  LE
        pop ESI
        mov EAX,EBX
        pop EBX
        ret


__Dmain comdat
L0:     push    EAX
        mov EAX,5
        call    near ptr _D4test14__T4iPowVi3TiZ4iPowFiZi
        mov ECX,offset FLAT:_DATA
        push    EAX
        push    ECX
        call    near ptr _printf
        add ESP,8
        xor EAX,EAX
        pop ECX
        ret


With DMD to see an improvement you need a static foreach, now it's fully inlined:

import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;

private template _iPowHelper(size_t exp) {
  static if (exp == 0)
    alias TypeTuple!() _iPowHelper;
  else
    alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}

T iPow(size_t exp, T)(T base) {
  // Unfortunately I can't nest template _iPowHelper here.
  T result = 1;
  // Optionally add a static if here to not use _iPowHelper
  // when exp is too much large
  /*static*/ foreach (e; _iPowHelper!exp) {
    if (e & 1)
      result *= base;
    base *= base;
  }
  return result;
}

void main() {
  printf("%lf", iPow!(3)(5));
}


Its asm:

_D5test214__T4iPowVi3TiZ4iPowFiZi
        mov ECX,EAX
        mov EDX,EAX
        imul    EAX,ECX
        mov ECX,EAX
        imul    EAX,EDX
        mov EDX,EAX
        ret


__Dmain comdat
L0:     push    EAX
        mov EAX,offset FLAT:_DATA
        push    07Dh
        push    EAX
        call    near ptr _printf
        add ESP,8
        xor EAX,EAX
        pop ECX
        ret


This variant of the last program shows that the code is really working as desired:

import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;

private template _iPowHelper(size_t exp) {
  static if (exp == 0)
    alias TypeTuple!() _iPowHelper;
  else
    alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}

T iPow(size_t exp, T)(T base) {
  // Unfortunately I can't nest template _iPowHelper here.
  T result = 1;
  // Optionally add a static if here to not use _iPowHelper
  // when exp is too much large
  /*static*/ foreach (e; _iPowHelper!exp) {
    if (e & 1)
      result *= base;
    base *= base;
  }
  return result;
}

void main(string[] args) {
  double x = args.length + 4.0;
  printf("%lf", iPow!(3)(x));
}


Its asm, the main shows no calls to iPow, no loops and just two fmul:

_D6test2d14__T4iPowVi3TdZ4iPowFdZd  comdat
        sub ESP,0Ch
        fld qword ptr 010h[ESP]
        fld qword ptr 010h[ESP]
        fmul    qword ptr 010h[ESP]
        fxch    ST1
        fstp    qword ptr [ESP]
        fstp    qword ptr 010h[ESP]
        fld qword ptr 010h[ESP]
        fmul    qword ptr [ESP]
        fstp    qword ptr [ESP]
        fld qword ptr [ESP]
        add ESP,0Ch
        ret 8


__Dmain comdat
L0:     sub ESP,024h
        mov EAX,028h[ESP]
        xor ECX,ECX
        mov [ESP],EAX
        mov EDX,offset FLAT:_DATA[010h]
        mov 4[ESP],ECX
        fild    long64 ptr [ESP]
        fadd    qword ptr FLAT:_DATA[00h]
        fst qword ptr 8[ESP]
        fld qword ptr 8[ESP]
        fld qword ptr 8[ESP]
        fxch    ST2
        fstp    qword ptr 010h[ESP]
        fstp    qword ptr 018h[ESP]
        fmul    qword ptr 010h[ESP]
        fstp    qword ptr 010h[ESP]
        fld qword ptr 010h[ESP]
        fmul    qword ptr 018h[ESP]
        fstp    qword ptr 018h[ESP]
        fld qword ptr 018h[ESP]
        sub ESP,8
        fstp    qword ptr [ESP]
        push    EDX
        call    near ptr _printf
        add ESP,0Ch
        add ESP,024h
        xor EAX,EAX
        ret


So we are back to an idea Walter has tried and refused few years ago, of compile-time known arguments with "static". The small difference here is (I think) that both iPow templates are allowed to exist in the code at the same time, and the iPow overload with "static" is preferred by the argument is known at compile-time:


import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;

private template _iPowHelper(size_t exp) {
  static if (exp == 0)
    alias TypeTuple!() _iPowHelper;
  else
    alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}

T iPow(T)(T base, static size_t exp) {
  // Unfortunately I can't nest template _iPowHelper here.
  T result = 1;
  /*static*/ foreach (e; _iPowHelper!exp) {
    if (e & 1)
      result *= base;
    base *= base;
  }
  return result;
}

T iPow(T)(T base, size_t exp) {
  T result = 1;
  for (; exp > 0; exp >>= 1) {
    if (exp & 1)
      result *= base;
    base *= base;
  }

  return result;
}

void main() {
  printf("%d", iPow(5, 3)); // calls first iPow overload!
}


Note that this is not what I was asking for in the first half of this post, because I didn't want to know the contents of "items" at compile-time, only its length (and currently arrays can't be template arguments, despite strings can, even dstrings, that aren't very different from a uint[], it's a small mystery):

bool canFind(T)(static T[] items, T item) {...}

Bye,
bearophile

Reply via email to