Re: an example of parallel calculation of metrics

2015-10-01 Thread Jay Norwood via Digitalmars-d-learn
This is another attempt with the metric parallel processing. This 
uses the results only to return an int value, which could be used 
later as an error return value.  The metric value locations are 
now allocated as a part of the input measurement values tuple.


The Tuple vs struct definitions seem to have a big difference in 
default output formatting.



import std.algorithm, std.parallelism, std.range;
import std.typecons;
import std.meta;
import std.stdio;

// define some input measurement sample tuples and output metric 
tuples

alias TR = Tuple!(long,"raw",double, "per_cycle");
//struct TR {long raw; double per_cycle;}
alias TO = Tuple!(TR, "l1_miss", TR, "l1_access" );
//struct TO {TR l1_miss; TR l1_access; };
alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, 
"L1D_READ", long, "L1D_WRITE", long, "cycles", TO, "res");


// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.

long met_l1_miss ( ref TI m){  return m.L1I_MISS + m.L1D_MISS; }
long met_l1_access ( ref TI m){  return  m.L1D_READ + 
m.L1D_WRITE; }


int met_all (ref TI m) {

with (m.res){
 l1_miss.raw = met_l1_miss(m);
 l1_access.raw = met_l1_access(m);
	 l1_miss.per_cycle =  (m.cycles == 0)? double.nan : l1_miss.raw 
/ cast(double)m.cycles;
	 l1_access.per_cycle = (m.cycles == 0)? double.nan : 
l1_access.raw / cast(double)m.cycles;

}
return 0;
}

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_all);

void main(string[] argv)
{
auto samples = iota(100);
auto meas = new TI[samples.length];
auto results = new int[samples.length];

// Initialize some values for the measured samples
foreach(i, ref m; meas){
m.L1D_MISS= 100+i; m.L1I_MISS=100-i;
m.L1D_READ= 200+i; m.L1D_WRITE=200-i;
m.cycles= 10+i;
}

ref TI getTerm(int i)
{
return meas[i];
}

	// compute the metric results for the above measured sample 
values in parallel

taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);


writeln("measurements:", meas[1]);
foreach(ref m; meas){
writeln(m.res);
}

}




Re: an example of parallel calculation of metrics

2015-10-01 Thread Jay Norwood via Digitalmars-d-learn

I re-submitted this as:
https://issues.dlang.org/show_bug.cgi?id=15135



Re: an example of parallel calculation of metrics

2015-10-01 Thread Jay Norwood via Digitalmars-d-learn
So, this is a condensed version of the original problem. It looks 
like the problem is that the return value for taskPool.amap can't 
be a tuple of tuples or a tuple of struct.  Either way, it fails 
with the Wrong buffer type error message if I uncomment the 
taskPool line


import std.algorithm, std.parallelism, std.range;
import std.typecons;
import std.meta;
import std.stdio;

// define some input measurement sample tuples and output metric 
tuples


struct TR { long raw; double per_cyc;}
//alias TR = Tuple!(long, "raw", double, "per_cyc");
alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, 
"L1D_READ", long, "L1D_WRITE", long, "cycles" );

alias TO = Tuple!(TR, "L1_MISS", TR, "L1D_ACCESS");

// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv;  rv.raw = 
m.L1I_MISS+m.L1D_MISS;  rv.per_cyc = cast(double)rv.raw/m.cycles; 
return rv; }
TR met_l1_access ( ref TI m){ TR rv;  rv.raw = 
m.L1D_READ+m.L1D_WRITE;  rv.per_cyc = 
cast(double)rv.raw/m.cycles; return rv; }


// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_l1_miss, met_l1_access);

void main(string[] argv)
{
auto samples = iota(100);
auto meas = new TI[samples.length];
auto results = new TO[samples.length];

// Initialize some values for the measured samples
foreach(i, ref m; meas){
m.L1D_MISS= 100+i; m.L1I_MISS=100-i;
m.L1D_READ= 200+i; m.L1D_WRITE=200-i;
m.cycles= 10+i;
}

ref TI getTerm(int i)
{
return meas[i];
}

	// compute the metric results for the above measured sample 
values in parallel

//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

TR rv1 = met_l1_miss( meas[1]);
TR rv2 = met_l1_access( meas[1]);

writeln("measurements:", meas[1]);
writeln("rv1:", rv1);
writeln("rv2:", rv2);
writeln("results:", results[1]);

}



Re: an example of parallel calculation of metrics

2015-10-01 Thread Jay Norwood via Digitalmars-d-learn

On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:
However, if you prove to yourself that the result tuple and 
your struct have the same memory layout, you can cast the tuple 
slice to struct slice after calling amap:


After re-reading your explanation, I see that the problem is only 
that the results needs to be a Tuple.  It works with named tuple 
members in this example as the result and array of struct as the 
input.  I'll re-check if the multi-member result also works with 
named members.  I'll update the issue report.


import std.meta;
import std.stdio;

// define some input measurement sample tuples and output metric 
tuples


struct TI {long L1I_MISS; long L1D_MISS; }
alias TO = Tuple!(long, "raw");

// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.
TO met_l1_miss ( ref TI m){ TO rv;  rv.raw = 
m.L1I_MISS+m.L1D_MISS; return rv; }


// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met_l1_miss);

void main(string[] argv)
{
auto samples = iota(100);
auto meas = new TI[samples.length];
auto results = new TO[samples.length];

// Initialize some values for the measured samples
foreach(i, ref m; meas){
m.L1D_MISS= 100+i; m.L1I_MISS=100-i;
}

ref TI getTerm(int i)
{
return meas[i];
}

	// compute the metric results for the above measured sample 
values in parallel

taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

TO rv1 = met_l1_miss( meas[1]);

writeln("measurements:", meas[1]);
writeln("rv1:", rv1);
writeln("results:", results[1]);

}



Re: an example of parallel calculation of metrics

2015-10-01 Thread Jay Norwood via Digitalmars-d-learn

On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:
Makes sense. Please open a bug at least for investigation why 
tuples with named members don't work with amap.


ok, thanks.  I opened the issue.

https://issues.dlang.org/show_bug.cgi?id=15134



Re: an example of parallel calculation of metrics

2015-10-01 Thread Ali Çehreli via Digitalmars-d-learn

On 10/01/2015 08:56 AM, Jay Norwood wrote:

> Thanks.  My particular use case, working with metric expressions, is
> easier to understand if I use the names.

Makes sense. Please open a bug at least for investigation why tuples 
with named members don't work with amap.


> I converted the use of Tuple
> to struct to see if I could get an easier error msg. Turns out the use
> of struct also results in much cleaner writeln text.
>
> Still has the compile error, though.

We have to live with the fact that amap and friends produce a Tuple 
result if there are multiple functions. A struct won't work.


However, if you prove to yourself that the result tuple and your struct 
have the same memory layout, you can cast the tuple slice to struct 
slice after calling amap:


alias TO_for_amap_result = Tuple!(TR, TR, TR, TR);
struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;}

// ...

auto results_for_amap = new TO_for_amap_result[samples.length];

// ...


taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results_for_amap);

auto results = cast(TO[])results_for_amap;

// Use 'results' from this point on...

Ali



Re: an example of parallel calculation of metrics

2015-10-01 Thread Jay Norwood via Digitalmars-d-learn

On Thursday, 1 October 2015 at 07:03:40 UTC, Ali Çehreli wrote:

Looks like a bug. Workaround: Get rid of member names


Thanks.  My particular use case, working with metric expressions, 
is easier to understand if I use the names.  I converted the use 
of Tuple to struct to see if I could get an easier error msg. 
Turns out the use of struct also results in much cleaner writeln 
text.


Still has the compile error, though.

import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric 
tuples

struct TR {double per_sec; double per_cycle; long raw;}
struct TI {long proc_cyc;  long DATA_RD; long DATA_WR; long 
INST_FETCH; long L1I_MISS; long L1I_HIT; long L1D_HIT; long 
L1D_MISS;}

struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;}
const double CYC_PER_SEC = 1_600_000_000;

// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = 
L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec 
= per_cycle*CYC_PER_SEC;} return rv; }
TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = 
L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = 
per_cycle*CYC_PER_SEC;} return rv; }
TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = 
DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = 
per_cycle*CYC_PER_SEC;} return rv; }
TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = 
DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; 
per_sec = per_cycle*CYC_PER_SEC;} return rv; }


// a convenience to use all the metrics above as a list
alias Metrics = 
AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc);


void main(string[] argv)
{
auto samples = iota(1_00);
auto meas = new TI[samples.length];
auto results = new TO[samples.length];

// Initialize some values for the measured samples
foreach(i, ref m; meas){
		with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 
2000+i; INST_FETCH=proc_cyc/2;

L1I_HIT= INST_FETCH-100; L1I_MISS=100;
L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;}
}

std.datetime.StopWatch sw;
sw.start();

ref TI getTerm(int i)
{
return meas[i];
}

	// compute the metric results for the above measured sample 
values in parallel

taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

TR rv1 = met_l1_miss( meas[0]);
TR rv2 = met_l1_hit( meas[0]);
TR rv3 = met_data_acc( meas[0]);
TR rv4 = met_all_acc( meas[0]);

// how long did this take
long exec_ms = sw.peek().msecs;
writeln("measurements:", meas[0]);
writeln("rv1:", rv1);
writeln("rv2:", rv2);
writeln("rv3:", rv3);
writeln("rv4:", rv4);
writeln("results:", results[1]);
writeln("time:", exec_ms);

}




Re: an example of parallel calculation of metrics

2015-10-01 Thread Ali Çehreli via Digitalmars-d-learn

On 09/30/2015 09:15 PM, Jay Norwood wrote:

> alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", 
TR,"ALL_ACC");


Looks like a bug. Workaround: Get rid of member names there:

alias TO = Tuple!(TR, TR, TR, TR);

> 
//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);


For some reason, having member names prevents 'results' passing one of 
amap's requirements. The following check in std.parallelism thinks that 
'results' does not have random assignable elements if it is a Tuple with 
member names.


else static if(randAssignable!(Args[$ - 1]) && Args.length > 1)
{
static assert(0, "Wrong buffer type.");
}

Ali



Re: an example of parallel calculation of metrics

2015-09-30 Thread Jay Norwood via Digitalmars-d-learn
This compiles and appears to execute correctly, but if I 
uncomment the taskPool line I get a compile error message about 
wrong buffer type.  Am I breaking some rule for 
std.parallelism.amap?


import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric 
tuples
alias TR = Tuple!(double,"per_sec", double, "per_cycle", 
long,"raw");
alias TI = Tuple!(long, "proc_cyc", long, "DATA_RD", long, 
"DATA_WR", long, "INST_FETCH", long, "L1I_MISS", long, "L1I_HIT", 
long,"L1D_HIT", long, "L1D_MISS");
alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", 
TR,"ALL_ACC");

const double CYC_PER_SEC = 1_600_000_000;

// various metric definitions
// using Tuples with defined names for each member, and use the 
names here in the metrics.
TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = 
L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec 
= per_cycle*CYC_PER_SEC;} return rv; }
TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = 
L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = 
per_cycle*CYC_PER_SEC;} return rv; }
TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = 
DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = 
per_cycle*CYC_PER_SEC;} return rv; }
TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = 
DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; 
per_sec = per_cycle*CYC_PER_SEC;} return rv; }


// a convenience to use all the metrics above as a list
alias Metrics = 
AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc);


void main(string[] argv)
{
auto samples = iota(1_00);
auto meas = new TI[samples.length];
auto results = new TO[samples.length];

// Initialize some values for the measured samples
foreach(i, ref m; meas){
		with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 
2000+i; INST_FETCH=proc_cyc/2;

L1I_HIT= INST_FETCH-100; L1I_MISS=100;
L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;}
}

std.datetime.StopWatch sw;
sw.start();

ref TI getTerm(int i)
{
return meas[i];
}

	// compute the metric results for the above measured sample 
values in parallel

//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

TR rv1 = met_l1_miss( meas[0]);
TR rv2 = met_l1_hit( meas[0]);
TR rv3 = met_data_acc( meas[0]);
TR rv4 = met_all_acc( meas[0]);

// how long did this take
long exec_ms = sw.peek().msecs;
writeln("measurements:", meas[0]);
writeln("rv1:", rv1);
writeln("rv2:", rv2);
writeln("rv3:", rv3);
writeln("rv4:", rv4);
writeln("results:", results[1]);
writeln("time:", exec_ms);

}



Re: an example of parallel calculation of metrics

2015-09-30 Thread Jay Norwood via Digitalmars-d-learn
On Wednesday, 30 September 2015 at 22:24:25 UTC, Jay Norwood 
wrote:

// various metric definitions
// the Tuples could also define names for each member and use 
the names here in the metrics.

long met1( TI m){ return m[0] + m[1] + m[2]; }
long met2( TI m){ return m[1] + m[2] + m[3]; }
long met3( TI m){ return m[0] - m[1] + m[2]; }
long met4( TI m){ return m[0] + m[1] - m[2]; }



should use reference parameters here:
long met1( ref TI m){ return m[0] + m[1] + m[2]; }
long met2( ref TI m){ return m[1] + m[2] + m[3]; }
long met3( ref TI m){ return m[0] - m[1] + m[2]; }
long met4( ref TI m){ return m[0] + m[1] - m[2]; }





an example of parallel calculation of metrics

2015-09-30 Thread Jay Norwood via Digitalmars-d-learn
This is something I'm playing with for work. We do this a lot, 
capture counter events for some number of on-chip performance 
counters, compute some metrics, display the outputs. This seems 
ideal for the application.


import std.algorithm, std.parallelism, std.range;
import std.stdio;
import std.datetime;
import std.typecons;
import std.meta;

// define some input measurement sample tuples and output metric 
tuples

alias TI = Tuple!(long, long, long, long, long);
alias TO = Tuple!(long, long, long, long);

// various metric definitions
// the Tuples could also define names for each member and use the 
names here in the metrics.

long met1( TI m){ return m[0] + m[1] + m[2]; }
long met2( TI m){ return m[1] + m[2] + m[3]; }
long met3( TI m){ return m[0] - m[1] + m[2]; }
long met4( TI m){ return m[0] + m[1] - m[2]; }

// a convenience to use all the metrics above as a list
alias Metrics = AliasSeq!(met1,met2,met3,met4);

void main(string[] argv)
{
auto samples = iota(1_000);
auto meas = new TI[samples.length];
auto results = new TO[samples.length];

// Initialize some values for the measured samples
foreach(i, ref m; meas){
m[0] = i;
m[1] = i+1;
m[2] = i+2;
m[3] = i+3;
m[4] = i+4;
}

std.datetime.StopWatch sw;
sw.start();

ref TI getTerm(int i)
{
return meas[i];
}

	// compute the metric results for the above measured sample 
values in parallel

taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results);

// how long did this take
long exec_ms = sw.peek().msecs;
writeln("results:", results);
writeln("time:", exec_ms);

}