Re: an example of parallel calculation of metrics
This is another attempt with the metric parallel processing. This uses the results only to return an int value, which could be used later as an error return value. The metric value locations are now allocated as a part of the input measurement values tuple. The Tuple vs struct definitions seem to have a big difference in default output formatting. import std.algorithm, std.parallelism, std.range; import std.typecons; import std.meta; import std.stdio; // define some input measurement sample tuples and output metric tuples alias TR = Tuple!(long,"raw",double, "per_cycle"); //struct TR {long raw; double per_cycle;} alias TO = Tuple!(TR, "l1_miss", TR, "l1_access" ); //struct TO {TR l1_miss; TR l1_access; }; alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, "L1D_READ", long, "L1D_WRITE", long, "cycles", TO, "res"); // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. long met_l1_miss ( ref TI m){ return m.L1I_MISS + m.L1D_MISS; } long met_l1_access ( ref TI m){ return m.L1D_READ + m.L1D_WRITE; } int met_all (ref TI m) { with (m.res){ l1_miss.raw = met_l1_miss(m); l1_access.raw = met_l1_access(m); l1_miss.per_cycle = (m.cycles == 0)? double.nan : l1_miss.raw / cast(double)m.cycles; l1_access.per_cycle = (m.cycles == 0)? double.nan : l1_access.raw / cast(double)m.cycles; } return 0; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_all); void main(string[] argv) { auto samples = iota(100); auto meas = new TI[samples.length]; auto results = new int[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m.L1D_MISS= 100+i; m.L1I_MISS=100-i; m.L1D_READ= 200+i; m.L1D_WRITE=200-i; m.cycles= 10+i; } ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); writeln("measurements:", meas[1]); foreach(ref m; meas){ writeln(m.res); } }
Re: an example of parallel calculation of metrics
I re-submitted this as: https://issues.dlang.org/show_bug.cgi?id=15135
Re: an example of parallel calculation of metrics
So, this is a condensed version of the original problem. It looks like the problem is that the return value for taskPool.amap can't be a tuple of tuples or a tuple of struct. Either way, it fails with the Wrong buffer type error message if I uncomment the taskPool line import std.algorithm, std.parallelism, std.range; import std.typecons; import std.meta; import std.stdio; // define some input measurement sample tuples and output metric tuples struct TR { long raw; double per_cyc;} //alias TR = Tuple!(long, "raw", double, "per_cyc"); alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, "L1D_READ", long, "L1D_WRITE", long, "cycles" ); alias TO = Tuple!(TR, "L1_MISS", TR, "L1D_ACCESS"); // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TR met_l1_miss ( ref TI m){ TR rv; rv.raw = m.L1I_MISS+m.L1D_MISS; rv.per_cyc = cast(double)rv.raw/m.cycles; return rv; } TR met_l1_access ( ref TI m){ TR rv; rv.raw = m.L1D_READ+m.L1D_WRITE; rv.per_cyc = cast(double)rv.raw/m.cycles; return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss, met_l1_access); void main(string[] argv) { auto samples = iota(100); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m.L1D_MISS= 100+i; m.L1I_MISS=100-i; m.L1D_READ= 200+i; m.L1D_WRITE=200-i; m.cycles= 10+i; } ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel //taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TR rv1 = met_l1_miss( meas[1]); TR rv2 = met_l1_access( meas[1]); writeln("measurements:", meas[1]); writeln("rv1:", rv1); writeln("rv2:", rv2); writeln("results:", results[1]); }
Re: an example of parallel calculation of metrics
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote: However, if you prove to yourself that the result tuple and your struct have the same memory layout, you can cast the tuple slice to struct slice after calling amap: After re-reading your explanation, I see that the problem is only that the results needs to be a Tuple. It works with named tuple members in this example as the result and array of struct as the input. I'll re-check if the multi-member result also works with named members. I'll update the issue report. import std.meta; import std.stdio; // define some input measurement sample tuples and output metric tuples struct TI {long L1I_MISS; long L1D_MISS; } alias TO = Tuple!(long, "raw"); // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TO met_l1_miss ( ref TI m){ TO rv; rv.raw = m.L1I_MISS+m.L1D_MISS; return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss); void main(string[] argv) { auto samples = iota(100); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m.L1D_MISS= 100+i; m.L1I_MISS=100-i; } ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TO rv1 = met_l1_miss( meas[1]); writeln("measurements:", meas[1]); writeln("rv1:", rv1); writeln("results:", results[1]); }
Re: an example of parallel calculation of metrics
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote: Makes sense. Please open a bug at least for investigation why tuples with named members don't work with amap. ok, thanks. I opened the issue. https://issues.dlang.org/show_bug.cgi?id=15134
Re: an example of parallel calculation of metrics
On 10/01/2015 08:56 AM, Jay Norwood wrote: > Thanks. My particular use case, working with metric expressions, is > easier to understand if I use the names. Makes sense. Please open a bug at least for investigation why tuples with named members don't work with amap. > I converted the use of Tuple > to struct to see if I could get an easier error msg. Turns out the use > of struct also results in much cleaner writeln text. > > Still has the compile error, though. We have to live with the fact that amap and friends produce a Tuple result if there are multiple functions. A struct won't work. However, if you prove to yourself that the result tuple and your struct have the same memory layout, you can cast the tuple slice to struct slice after calling amap: alias TO_for_amap_result = Tuple!(TR, TR, TR, TR); struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;} // ... auto results_for_amap = new TO_for_amap_result[samples.length]; // ... taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results_for_amap); auto results = cast(TO[])results_for_amap; // Use 'results' from this point on... Ali
Re: an example of parallel calculation of metrics
On Thursday, 1 October 2015 at 07:03:40 UTC, Ali Çehreli wrote: Looks like a bug. Workaround: Get rid of member names Thanks. My particular use case, working with metric expressions, is easier to understand if I use the names. I converted the use of Tuple to struct to see if I could get an easier error msg. Turns out the use of struct also results in much cleaner writeln text. Still has the compile error, though. import std.algorithm, std.parallelism, std.range; import std.stdio; import std.datetime; import std.typecons; import std.meta; // define some input measurement sample tuples and output metric tuples struct TR {double per_sec; double per_cycle; long raw;} struct TI {long proc_cyc; long DATA_RD; long DATA_WR; long INST_FETCH; long L1I_MISS; long L1I_HIT; long L1D_HIT; long L1D_MISS;} struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;} const double CYC_PER_SEC = 1_600_000_000; // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc); void main(string[] argv) { auto samples = iota(1_00); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 2000+i; INST_FETCH=proc_cyc/2; L1I_HIT= INST_FETCH-100; L1I_MISS=100; L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;} } std.datetime.StopWatch sw; sw.start(); ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TR rv1 = met_l1_miss( meas[0]); TR rv2 = met_l1_hit( meas[0]); TR rv3 = met_data_acc( meas[0]); TR rv4 = met_all_acc( meas[0]); // how long did this take long exec_ms = sw.peek().msecs; writeln("measurements:", meas[0]); writeln("rv1:", rv1); writeln("rv2:", rv2); writeln("rv3:", rv3); writeln("rv4:", rv4); writeln("results:", results[1]); writeln("time:", exec_ms); }
Re: an example of parallel calculation of metrics
On 09/30/2015 09:15 PM, Jay Norwood wrote: > alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", TR,"ALL_ACC"); Looks like a bug. Workaround: Get rid of member names there: alias TO = Tuple!(TR, TR, TR, TR); > //taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); For some reason, having member names prevents 'results' passing one of amap's requirements. The following check in std.parallelism thinks that 'results' does not have random assignable elements if it is a Tuple with member names. else static if(randAssignable!(Args[$ - 1]) && Args.length > 1) { static assert(0, "Wrong buffer type."); } Ali
Re: an example of parallel calculation of metrics
This compiles and appears to execute correctly, but if I uncomment the taskPool line I get a compile error message about wrong buffer type. Am I breaking some rule for std.parallelism.amap? import std.algorithm, std.parallelism, std.range; import std.stdio; import std.datetime; import std.typecons; import std.meta; // define some input measurement sample tuples and output metric tuples alias TR = Tuple!(double,"per_sec", double, "per_cycle", long,"raw"); alias TI = Tuple!(long, "proc_cyc", long, "DATA_RD", long, "DATA_WR", long, "INST_FETCH", long, "L1I_MISS", long, "L1I_HIT", long,"L1D_HIT", long, "L1D_MISS"); alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", TR,"ALL_ACC"); const double CYC_PER_SEC = 1_600_000_000; // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc); void main(string[] argv) { auto samples = iota(1_00); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 2000+i; INST_FETCH=proc_cyc/2; L1I_HIT= INST_FETCH-100; L1I_MISS=100; L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;} } std.datetime.StopWatch sw; sw.start(); ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel //taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TR rv1 = met_l1_miss( meas[0]); TR rv2 = met_l1_hit( meas[0]); TR rv3 = met_data_acc( meas[0]); TR rv4 = met_all_acc( meas[0]); // how long did this take long exec_ms = sw.peek().msecs; writeln("measurements:", meas[0]); writeln("rv1:", rv1); writeln("rv2:", rv2); writeln("rv3:", rv3); writeln("rv4:", rv4); writeln("results:", results[1]); writeln("time:", exec_ms); }
Re: an example of parallel calculation of metrics
On Wednesday, 30 September 2015 at 22:24:25 UTC, Jay Norwood wrote: // various metric definitions // the Tuples could also define names for each member and use the names here in the metrics. long met1( TI m){ return m[0] + m[1] + m[2]; } long met2( TI m){ return m[1] + m[2] + m[3]; } long met3( TI m){ return m[0] - m[1] + m[2]; } long met4( TI m){ return m[0] + m[1] - m[2]; } should use reference parameters here: long met1( ref TI m){ return m[0] + m[1] + m[2]; } long met2( ref TI m){ return m[1] + m[2] + m[3]; } long met3( ref TI m){ return m[0] - m[1] + m[2]; } long met4( ref TI m){ return m[0] + m[1] - m[2]; }
an example of parallel calculation of metrics
This is something I'm playing with for work. We do this a lot, capture counter events for some number of on-chip performance counters, compute some metrics, display the outputs. This seems ideal for the application. import std.algorithm, std.parallelism, std.range; import std.stdio; import std.datetime; import std.typecons; import std.meta; // define some input measurement sample tuples and output metric tuples alias TI = Tuple!(long, long, long, long, long); alias TO = Tuple!(long, long, long, long); // various metric definitions // the Tuples could also define names for each member and use the names here in the metrics. long met1( TI m){ return m[0] + m[1] + m[2]; } long met2( TI m){ return m[1] + m[2] + m[3]; } long met3( TI m){ return m[0] - m[1] + m[2]; } long met4( TI m){ return m[0] + m[1] - m[2]; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met1,met2,met3,met4); void main(string[] argv) { auto samples = iota(1_000); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m[0] = i; m[1] = i+1; m[2] = i+2; m[3] = i+3; m[4] = i+4; } std.datetime.StopWatch sw; sw.start(); ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); // how long did this take long exec_ms = sw.peek().msecs; writeln("results:", results); writeln("time:", exec_ms); }