Warning: this post contains some partially uncooked ideas.
I hope Walter will read this post :-)
First of all the little problem. I'd like:
[1, 3, 7].canFind(x)
To be compiled with an in-lined:
x == 1 || x == 3 || x == 7
(A very optimizing D back-end is able to inline the array
creation, see that the array length is known at compile-time, to
unroll the search loop according to that length, doing what I am
asking here. Maybe LDC2 with link-time optimization is able to do
it. DMD is not able to do it. Having a very opitimizing back-end
is good, but languages like C and Go (and Java as a
counter-example) show that not _requiring_ a very optimizing
back-end is good for a language).
------------------------------
This is a starting point for the discussion, it's a modified and
simplified implementation of canFind() that also contains an
optimization for short fixed-sized arrays:
import std.stdio: writeln;
import std.traits: ForeachType, isStaticArray;
import std.string: xformat, join;
bool canFind(Range, T)(Range items, T item)
if (is(ForeachType!Range == T)) {
static if (isStaticArray!Range && Range.length < 5) {
static if (Range.length == 0) {
return false;
} else {
static string genEq(string seqName, string itemName, int
len)
/*pure nothrow*/ {
string[] result;
foreach (i; 0 .. len)
result ~= xformat("%s[%d] == %s", seqName, i, itemName);
return result.join(" || ");
}
return mixin(genEq("items", "item", Range.length));
}
} else {
foreach (x; items)
if (x == item)
return true;
return false;
}
}
int main() {
int x = 3; // run-time value
int[3] a = [1, 3, 7];
return a.canFind(x);
//assert([1, 3, 7].canFind(x));
}
The asm of the relevant functions (dmd 2.060alpha, 32 bit, -O
-release -inline):
_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb13genExpressionFAyaAyaiZAya
L0: sub ESP,0Ch
push EBX
xor EBX,EBX
push ESI
mov ESI,EAX
test ESI,ESI
mov dword ptr 8[ESP],0
mov dword ptr 0Ch[ESP],0
jle L59
L1D: push dword ptr FLAT:_DATA[014h]
push dword ptr FLAT:_DATA[010h]
push dword ptr 02Ch[ESP]
push dword ptr 02Ch[ESP]
push EBX
push dword ptr 030h[ESP]
push dword ptr 030h[ESP]
call near ptr
_D3std6string24__T7xformatTaTAyaTiTAyaZ7xformatFxAaAyaiAyaZAya
push EDX
mov EDX,offset FLAT:_D13TypeInfo_AAya6__initZ
push EAX
lea ECX,010h[ESP]
push ECX
push EDX
call near ptr __d_arrayappendcT
inc EBX
add ESP,010h
cmp EBX,ESI
jl L1D
L59: push dword ptr 0Ch[ESP]
push dword ptr 0Ch[ESP]
push dword ptr FLAT:_DATA[024h]
push dword ptr FLAT:_DATA[020h]
call near ptr
_D3std5array22__T8joinImplTAAyaTAyaZ8joinImplFAAyaAyaZAya
pop ESI
pop EBX
add ESP,0Ch
ret 010h
_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb
mov EDX,EAX
cmp 4[ESP],EDX
je L18
cmp 8[ESP],EDX
je L18
cmp 0Ch[ESP],EDX
je L18
xor EAX,EAX
jmp short L1D
L18: mov EAX,1
L1D: ret 0Ch
__Dmain comdat
L0: sub ESP,0Ch
mov EAX,offset FLAT:_D12TypeInfo_xAi6__initZ
push EBX
push ESI
push 0Ch
push 3
push EAX
call near ptr __d_arrayliteralTX
add ESP,8
mov EBX,EAX
mov dword ptr [EAX],1
mov ECX,3
mov EDX,EBX
push EBX
lea ESI,010h[ESP]
mov 4[EBX],ECX
mov dword ptr 8[EBX],7
push ESI
call near ptr _memcpy
add ESP,0Ch
mov EAX,3
push dword ptr 010h[ESP]
push dword ptr 010h[ESP]
push dword ptr 010h[ESP]
call near ptr
_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb
and EAX,0FFh
pop ESI
pop EBX
add ESP,0Ch
ret
That asm shows two problems, that I can't fully explain:
- The very existance of a canFind instance with an inline xformat
call (!);
- The missed inlining of the canFind instance that performs just
the 3 equals (_D4test18__T7canFindTG3iTiZ7canFindFG3iiZb ).
But for this discussion I don't care about those two problems.
I'd like this too to be computed using the mixin(genEq):
int main() {
int x = 3; // run-time value
assert([1, 3, 7].canFind(x));
}
That doesn't happen because [1, 3, 7] is a int[] literal, so
inside canFind() isStaticArray!Range is false.
On the other hand currently this works, because despite [1, 3, 7]
being a int[] literal, is also implicitly castable to int[3]:
import std.string: xformat, join;
bool canFind(int[3] items, int item) {
static string genEq(string seqName, string itemName, int len) {
string[] result;
foreach (i; 0 .. len)
result ~= xformat("%s[%d] == %s", seqName, i, itemName);
return result.join(" || ");
}
return mixin(genEq("items", "item", items.length));
}
int main() {
int x = 3; // run-time value
return [1, 3, 7].canFind(x);
}
But this doesn't compile:
import std.string: xformat, join;
bool canFind(size_t N)(int[N] items, int item) {
static string genEq(string seqName, string itemName, int len) {
string[] result;
foreach (i; 0 .. len)
result ~= xformat("%s[%d] == %s", seqName, i, itemName);
return result.join(" || ");
}
return mixin(genEq("items", "item", items.length));
}
int main() {
int x = 3; // run-time value
return [1, 3, 7].canFind(x);
}
----------------------------
So is it possible to modify D to be able to create something like
this that returns true conservatively (so it returns false when
it's an unknown type of array) if a template function argument is
an array literal (or enum of array)?
__traits(is_array_literal, Range)
That probably doesn't suffice, but it's an idea.
I'd really like to know if an argument is a array/string literal,
and in such case be able to see it as a fixed-sized one if I
choose so.
An alternative is just to allow to write two template functions
like this (present at the same time):
bool canFind(size_t N, T)(@literal T[N] items, T item) if (N < 5)
{
static if (N == 0) {
return false;
} else {
static string genEq(string seqName, string itemName, int len)
{
string[] result;
foreach (i; 0 .. len)
result ~= xformat("%s[%d] == %s", seqName, i, itemName);
return result.join(" || ");
}
return mixin(genEq("items", "item", items.length));
}
}
bool canFind(Range)(Range items, T item)
if (is(ForeachType!Range == T)) {
foreach (x; items)
if (x == item)
return true;
return false;
}
Where "@literal T[N] items" means that canFind function template
overload is used even when you call it with a (dynamic) array
literal (if N isn't too much high, according to its template
constraint).
Suggestions for better ideas are welcome.
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
This is the second part of this post, about a related topic.
A related more general desire is to know if a template function
argument is known at compile-time, and in this case being able to
use it at compile time, if I want so (in this case the D compiler
instantiates a new template instance of the function, of course).
Something like this for me is very good because it would allow D
programmers to do a many things that today are not possible, like
running some test at compile-time on the inputs, optimizing the
function on the base of some arguments (a manually implemented
poor's man partial compilation. I think this is often enough),
and more.
The classic function used to explain partial compilation is the
integer exponent power function (I use printf() to get a much
simpler and shorter asm):
import core.stdc.stdio: printf;
T iPow(T)(T base, size_t exp) {
T result = 1;
for (; exp > 0; exp >>= 1) {
if (exp & 1)
result *= base;
base *= base;
}
return result;
}
void main() {
printf("%d", iPow(5, 3));
}
Its asm:
_D5test011__T4iPowTiZ4iPowFikZi comdat
push EBX
mov ECX,EAX
mov EDX,8[ESP]
push ESI
mov EBX,1
test ECX,ECX
je L2D
L11: test ECX,1
je L20
mov EAX,EDX
imul EAX,EBX
mov EBX,EAX
L20: mov ESI,EDX
imul ESI,EDX
shr ECX,1
mov EDX,ESI
test ECX,ECX
jne L11
L2D: pop ESI
mov EAX,EBX
pop EBX
ret 4
__Dmain comdat
L0: push EAX
mov EAX,3
push 5
call near ptr _D5test011__T4iPowTiZ4iPowFikZi
mov ECX,offset FLAT:_DATA
push EAX
push ECX
call near ptr _printf
add ESP,8
xor EAX,EAX
pop ECX
ret
Often the exp is a literal or it's known at compile-time. In this
case in D you are free to use a different function template:
import core.stdc.stdio: printf;
T iPow(size_t exp, T)(T base) {
T result = 1;
for (int e = exp; e > 0; e >>= 1) {
if (e & 1)
result *= base;
base *= base;
}
return result;
}
void main() {
printf("%d", iPow!(3)(5));
}
Its asm:
_D4test14__T4iPowVi3TiZ4iPowFiZi
push EBX
mov EDX,EAX
mov EBX,1
push ESI
mov ECX,3
LE: test ECX,1
je L1D
mov EAX,EDX
imul EAX,EBX
mov EBX,EAX
L1D: mov ESI,EDX
imul ESI,EDX
sar ECX,1
mov EDX,ESI
test ECX,ECX
jg LE
pop ESI
mov EAX,EBX
pop EBX
ret
__Dmain comdat
L0: push EAX
mov EAX,5
call near ptr _D4test14__T4iPowVi3TiZ4iPowFiZi
mov ECX,offset FLAT:_DATA
push EAX
push ECX
call near ptr _printf
add ESP,8
xor EAX,EAX
pop ECX
ret
With DMD to see an improvement you need a static foreach, now
it's fully inlined:
import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;
private template _iPowHelper(size_t exp) {
static if (exp == 0)
alias TypeTuple!() _iPowHelper;
else
alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}
T iPow(size_t exp, T)(T base) {
// Unfortunately I can't nest template _iPowHelper here.
T result = 1;
// Optionally add a static if here to not use _iPowHelper
// when exp is too much large
/*static*/ foreach (e; _iPowHelper!exp) {
if (e & 1)
result *= base;
base *= base;
}
return result;
}
void main() {
printf("%lf", iPow!(3)(5));
}
Its asm:
_D5test214__T4iPowVi3TiZ4iPowFiZi
mov ECX,EAX
mov EDX,EAX
imul EAX,ECX
mov ECX,EAX
imul EAX,EDX
mov EDX,EAX
ret
__Dmain comdat
L0: push EAX
mov EAX,offset FLAT:_DATA
push 07Dh
push EAX
call near ptr _printf
add ESP,8
xor EAX,EAX
pop ECX
ret
This variant of the last program shows that the code is really
working as desired:
import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;
private template _iPowHelper(size_t exp) {
static if (exp == 0)
alias TypeTuple!() _iPowHelper;
else
alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}
T iPow(size_t exp, T)(T base) {
// Unfortunately I can't nest template _iPowHelper here.
T result = 1;
// Optionally add a static if here to not use _iPowHelper
// when exp is too much large
/*static*/ foreach (e; _iPowHelper!exp) {
if (e & 1)
result *= base;
base *= base;
}
return result;
}
void main(string[] args) {
double x = args.length + 4.0;
printf("%lf", iPow!(3)(x));
}
Its asm, the main shows no calls to iPow, no loops and just two
fmul:
_D6test2d14__T4iPowVi3TdZ4iPowFdZd comdat
sub ESP,0Ch
fld qword ptr 010h[ESP]
fld qword ptr 010h[ESP]
fmul qword ptr 010h[ESP]
fxch ST1
fstp qword ptr [ESP]
fstp qword ptr 010h[ESP]
fld qword ptr 010h[ESP]
fmul qword ptr [ESP]
fstp qword ptr [ESP]
fld qword ptr [ESP]
add ESP,0Ch
ret 8
__Dmain comdat
L0: sub ESP,024h
mov EAX,028h[ESP]
xor ECX,ECX
mov [ESP],EAX
mov EDX,offset FLAT:_DATA[010h]
mov 4[ESP],ECX
fild long64 ptr [ESP]
fadd qword ptr FLAT:_DATA[00h]
fst qword ptr 8[ESP]
fld qword ptr 8[ESP]
fld qword ptr 8[ESP]
fxch ST2
fstp qword ptr 010h[ESP]
fstp qword ptr 018h[ESP]
fmul qword ptr 010h[ESP]
fstp qword ptr 010h[ESP]
fld qword ptr 010h[ESP]
fmul qword ptr 018h[ESP]
fstp qword ptr 018h[ESP]
fld qword ptr 018h[ESP]
sub ESP,8
fstp qword ptr [ESP]
push EDX
call near ptr _printf
add ESP,0Ch
add ESP,024h
xor EAX,EAX
ret
So we are back to an idea Walter has tried and refused few years
ago, of compile-time known arguments with "static". The small
difference here is (I think) that both iPow templates are allowed
to exist in the code at the same time, and the iPow overload with
"static" is preferred by the argument is known at compile-time:
import core.stdc.stdio: printf;
import std.typetuple: TypeTuple;
private template _iPowHelper(size_t exp) {
static if (exp == 0)
alias TypeTuple!() _iPowHelper;
else
alias TypeTuple!(exp, _iPowHelper!(exp >> 1)) _iPowHelper;
}
T iPow(T)(T base, static size_t exp) {
// Unfortunately I can't nest template _iPowHelper here.
T result = 1;
/*static*/ foreach (e; _iPowHelper!exp) {
if (e & 1)
result *= base;
base *= base;
}
return result;
}
T iPow(T)(T base, size_t exp) {
T result = 1;
for (; exp > 0; exp >>= 1) {
if (exp & 1)
result *= base;
base *= base;
}
return result;
}
void main() {
printf("%d", iPow(5, 3)); // calls first iPow overload!
}
Note that this is not what I was asking for in the first half of
this post, because I didn't want to know the contents of "items"
at compile-time, only its length (and currently arrays can't be
template arguments, despite strings can, even dstrings, that
aren't very different from a uint[], it's a small mystery):
bool canFind(T)(static T[] items, T item) {...}
Bye,
bearophile