This is an automated email from the ASF dual-hosted git repository.
pavelvesely pushed a commit to branch ReqExperiment
in repository
https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/ReqExperiment by this push:
new 8919a2e two metrics for measuring accuracy times size
8919a2e is described below
commit 8919a2e06865fe8bd7a6e754f439d521420355a1
Author: Pavel Vesely <[email protected]>
AuthorDate: Mon Jan 11 12:06:30 2021 +0100
two metrics for measuring accuracy times size
---
.../resources/quantiles/ReqSketchAccuracyJob.conf | 11 +++++---
.../quantiles/ReqSketchAccuracyProfile.java | 32 ++++++++++++++++++++--
2 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
index 72a2b54..17a12dd 100644
--- a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
+++ b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
@@ -22,13 +22,13 @@ Pattern=Sorted # Sorted, Reversed, Zoomin, Zoomout, Random,
Sqrt, FlipFlop
Offset=1 #0 for min value of 0; 1 for min value of 1
## Stream lengths
-LgMin=24 # The starting stream length
-LgMax=24 # How high the stream length goes
+LgMin=26 # The starting stream length
+LgMax=26 # How high the stream length goes
LgDelta=2 # If > 0, this is the lg Increment
PPO=1 # The horizontal x-resolution of trials points
# Trials config (indep of sketch)
-LgTrials=15 # lgTrials at every stream length
+LgTrials=12 # lgTrials at every stream length
ErrQSkLgK=12 # the rank error distribution sketch LgK
ErrHllSkLgK=12 # the rank error HLL sketch Lgk
Shuffle=true # If true, shuffle before each trial
@@ -41,7 +41,7 @@ StdDev=1 # std deviation used when plotting LB, UB
RankRange=1.0 # range of rank to plot. E.g., given 0.3: if LRA => 0 to 0.3; if
HRA => 0.7 to 1.0
# Specific sketch config
-K=24 # sketch size and accuracy parameter
+K=12 # sketch size and accuracy parameter
HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
Compatible=false
@@ -59,6 +59,9 @@ TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8)
= -28_800_000 PDT
FileNameDateFormat=yyyyMMdd'_'HHmmssz
ReadableDateFormat=yyyy/MM/dd HH:mm:ss
+# FOR SPECIAL METRICS CAPTURING ACCURACY PER BYTE
+MetricsRankRange = 0.3
+
# TEMPORARY
INIT_NUMBER_OF_SECTIONS = 3
NOM_CAPACITY_MULTIPLIER = 2
diff --git
a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
index dfc9f15..4742c7e 100644
---
a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
+++
b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
@@ -69,6 +69,7 @@ public class ReqSketchAccuracyProfile implements JobProfile {
private double exponent;
private int sd;
private double rankRange;
+ private double metricsRankRange;
//Target sketch configuration & error analysis
private int K;
@@ -159,6 +160,8 @@ public class ReqSketchAccuracyProfile implements JobProfile
{
hra = Boolean.parseBoolean(prop.mustGet("HRA"));
ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+
+ metricsRankRange = Double.parseDouble(prop.mustGet("MetricsRankRange"));
INIT_NUMBER_OF_SECTIONS =
Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
NOM_CAPACITY_MULTIPLIER =
Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
@@ -274,7 +277,13 @@ public class ReqSketchAccuracyProfile implements
JobProfile {
//sumAllocCounts = sk.
}
-
+
+ // for special metrics for capturing accuracy per byte
+ double sumRelStdDev = 0;
+ int numRelStdDev = 0;
+ double sumAddStdDev = 0;
+ int numAddStdDev = 0;
+
//at this point each of the errQSkArr sketches has a distribution of error
from numTrials
for (int pp = 0 ; pp < numPlotPoints; pp++) {
final double v = sortedPPValues[pp];
@@ -291,10 +300,29 @@ public class ReqSketchAccuracyProfile implements
JobProfile {
job.printfData(fFmt, relPP, v, tr,
errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6],
rlb, rub, uErrCnt);
+
+ if (relPP > 0 && relPP < 1
+ && ((hra && relPP < metricsRankRange) || (!hra && relPP >= 1 -
metricsRankRange))) {
+ sumAddStdDev += errQ[4];
+ numAddStdDev++;
+ }
+ if (relPP > 0 && relPP < 1
+ && ((!hra && relPP < metricsRankRange) || (hra && relPP >= 1
- metricsRankRange))) {
+ sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP);
+ numRelStdDev++;
+ }
errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength
errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength
}
- job.println(LS + "Serialization Bytes: " + sk.getSerializationBytes());
+ int serBytes = sk.getSerializationBytes();
+
+ // special metrics for capturing accuracy per byte
+ double avgRelStdDevTimesSize = serBytes * sumRelStdDev / numRelStdDev;
+ double avgAddStdDevTimesSize = serBytes * sumAddStdDev / numAddStdDev;
+ job.println(LS + "Avg. relative std. dev. times size: " +
avgRelStdDevTimesSize);
+ job.println( "Avg. additive std. dev. times size: " +
avgAddStdDevTimesSize);
+
+ job.println(LS + "Serialization Bytes: " + serBytes);
job.println(sk.viewCompactorDetail("%5.0f", false));
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]