(datasketches-characterization) 01/01: tdigest characterization

alsay Tue, 22 Oct 2024 18:55:41 -0700

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tdigest-cpp
in repository 
https://gitbox.apache.org/repos/asf/datasketches-characterization.git


commit caf2cf3a2d85c3430d7bd2fca79428fd1ff8cf97
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Oct 22 18:53:50 2024 -0700

    tdigest characterization
---
 cpp/matlab/tdigest_error_vs_rank.m                 |  45 +++
 cpp/matlab/tdigest_memory.m                        |  53 ++++
 cpp/matlab/tdigest_rank_error.m                    | 127 +++++++++
 cpp/matlab/tdigest_size.m                          |  53 ++++
 cpp/matlab/tdigest_update_time.m                   |  38 +++
 cpp/matlab/tdigest_vs_req_error_exp.m              |  42 +++
 cpp/src/main.cpp                                   |  11 +
 cpp/src/tdigest_accuracy_profile_impl.hpp          |   7 +-
 cpp/src/tdigest_memory_profile.hpp                 |  44 +++
 cpp/src/tdigest_memory_profile_impl.hpp            |  71 +++++
 cpp/src/tdigest_merge_accuracy_profile.hpp         |  38 +++
 ...hpp => tdigest_merge_accuracy_profile_impl.hpp} |  44 ++-
 cpp/src/tdigest_merge_timing_profile.hpp           |  44 +++
 ...l.hpp => tdigest_merge_timing_profile_impl.hpp} |   7 +-
 cpp/src/tdigest_sketch_accuracy_profile_impl.hpp   |  22 +-
 cpp/src/tdigest_timing_profile_impl.hpp            |  13 +-
 .../tdigest/TDigestAccuracyProfile.java            | 165 +++++++++++
 .../tdigest/TDigestErrorVsRankProfile.java         | 311 +++++++++++++++++++++
 .../tdigest/TDigestMergeAccuracyProfile.java       | 173 ++++++++++++
 .../tdigest/TDigestSpeedProfile.java               | 150 ++++++++++
 src/main/resources/tdigest/TDigestAccuracyJob.conf |  38 +++
 .../resources/tdigest/TDigestErrorVsRankJob.conf   |  64 +++++
 .../resources/tdigest/TDigestMergeAccuracyJob.conf |  40 +++
 src/main/resources/tdigest/TDigestSpeedJob.conf    |  27 ++
 24 files changed, 1578 insertions(+), 49 deletions(-)

diff --git a/cpp/matlab/tdigest_error_vs_rank.m 
b/cpp/matlab/tdigest_error_vs_rank.m
new file mode 100644
index 0000000..e064a86
--- /dev/null
+++ b/cpp/matlab/tdigest_error_vs_rank.m
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+clf;
+
+td_cpp=load('../results/tdigest_error_vs_rank-k100-sl25.tsv');
+
+hold on;
+
+plot(td_cpp(:,1), td_cpp(:,8), 'linewidth', 2);
+plot(td_cpp(:,1), td_cpp(:,7), 'linewidth', 2);
+plot(td_cpp(:,1), td_cpp(:,6), 'linewidth', 2);
+plot(td_cpp(:,1), td_cpp(:,5), 'linewidth', 2);
+plot(td_cpp(:,1), td_cpp(:,4), 'linewidth', 2);
+plot(td_cpp(:,1), td_cpp(:,3), 'linewidth', 2);
+plot(td_cpp(:,1), td_cpp(:,2), 'linewidth', 2);
+
+set(gca, 'fontsize', 16);
+title 'TDigest rank error K=100, N=2^2^5, uniform distribution, 10000 trials'
+xlabel 'normalized rank'
+ylabel 'rank error'
+grid minor on
+legend(
+'+3SD',
+'+2SD',
+'+1SD',
+'median',
+'-1SD',
+'-2SD',
+'-3SD',
+'location', 'northwest');
diff --git a/cpp/matlab/tdigest_memory.m b/cpp/matlab/tdigest_memory.m
new file mode 100644
index 0000000..b1d10fa
--- /dev/null
+++ b/cpp/matlab/tdigest_memory.m
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+clf;
+#td100_dbl=load('../results/tdigest_double_memory-100.tsv');
+#td100_dbl_nres=load('../results/tdigest_double_memory-100-no-reserve.tsv');
+
+td100_flt=load('../results/tdigest_float_memory-100.tsv');
+td200_flt=load('../results/tdigest_float_memory-200.tsv');
+
+req10_flt=load('../results/req_sketch_memory_float_k10.tsv');
+req20_flt=load('../results/req_sketch_memory_float_k20.tsv');
+req30_flt=load('../results/req_sketch_memory_float_k30.tsv');
+req40_flt=load('../results/req_sketch_memory_float_k40.tsv');
+
+hold on;
+
+semilogx(td100_flt(:,1), td100_flt(:,11), 'linewidth', 2);
+semilogx(td200_flt(:,1), td200_flt(:,11), 'linewidth', 2);
+
+semilogx(req10_flt(:,1), req10_flt(:,11), 'linewidth', 2);
+semilogx(req20_flt(:,1), req20_flt(:,11), 'linewidth', 2);
+semilogx(req30_flt(:,1), req30_flt(:,11), 'linewidth', 2);
+semilogx(req40_flt(:,1), req40_flt(:,11), 'linewidth', 2);
+
+set(gca, 'fontsize', 16);
+title 'Memory usage TDigest vs REQ sketch (updates, no transients, C++)'
+xlabel 'number of values'
+ylabel 'size in memory, bytes'
+grid minor on
+legend(
+'tdigest<float> k=100',
+'tdigest<float> k=200',
+'req\_sketch<float> k=10',
+'req\_sketch<float> k=20',
+'req\_sketch<float> k=30',
+'req\_sketch<float> k=40',
+'location', 'northwest'
+);
diff --git a/cpp/matlab/tdigest_rank_error.m b/cpp/matlab/tdigest_rank_error.m
new file mode 100644
index 0000000..31921ff
--- /dev/null
+++ b/cpp/matlab/tdigest_rank_error.m
@@ -0,0 +1,127 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+clf;
+
+#td_cpp=load('../results/tdigest_rank_error-100-my_version-true_rank.tsv');
+#td_cpp=load('../results/tdigest_rank_error-100-lb_ub_rank.tsv');
+td100_cpp=load('../results/tdigest_rank_error-100.tsv');
+#td_java=load('../../results/tdigest_rank_error-100.tsv');
+#td_java=load('../../results/tdigest_rank_error-100-new.tsv');
+
+td200_cpp=load('../results/tdigest_rank_error-200-float.tsv');
+td200_n_cpp=load('../results/tdigest_rank_error-200-new_sizing.tsv');
+
+#td_merge2_cpp=load('../results/tdigest_merge_rank_error-100-2way.tsv');
+#td_merge4_cpp=load('../results/tdigest_merge_rank_error-100-4way.tsv');
+#td_merge32_cpp=load('../results/tdigest_merge_rank_error-100-32way.tsv');
+#td_merge64_cpp=load('../results/tdigest_merge_rank_error-100-64way.tsv');
+#td_merge128_cpp=load('../results/tdigest_merge_rank_error-100-128way.tsv');
+
+td200_merge32_cpp=load('../results/tdigest_merge_rank_error-200-32way.tsv');
+
+#td_merge2_java=load('../../results/tdigest_merge_rank_error-100-2way.tsv');
+#td_merge4_java=load('../../results/tdigest_merge_rank_error-100-4way.tsv');
+#td_merge32_java=load('../../results/tdigest_merge_rank_error-100-32way.tsv');
+#td_merge64_java=load('../../results/tdigest_merge_rank_error-100-64way.tsv');
+#td_merge128_java=load('../../results/tdigest_merge_rank_error-100-128way.tsv');
+
+#req12=load('../results/req_sketch_error-k12-double.tsv');
+req10=load('../results/req_sketch_error-k10-double.tsv');
+#req8=load('../results/req_sketch_error-k8-double.tsv');
+
+req20=load('../results/req_sketch_error-k20-double.tsv');
+req30=load('../results/req_sketch_error-k30-double.tsv');
+req40=load('../results/req_sketch_error-k40-double.tsv');
+
+hold on;
+
+semilogx(td100_cpp(:,1), td100_cpp(:,6), 'linewidth', 2);
+semilogx(td200_cpp(:,1), td200_cpp(:,6), 'linewidth', 2);
+semilogx(td200_n_cpp(:,1), td200_n_cpp(:,6), 'linewidth', 2);
+
+#semilogx(td_java(:,1), td_java(:,4), 'linewidth', 2);
+#semilogx(td_java(:,1), td_java(:,5), 'linewidth', 2);
+#semilogx(td_java(:,1), td_java(:,6), 'linewidth', 2);
+
+#semilogx(td_merge32_cpp(:,1), td_merge32_cpp(:,5), 'linewidth', 2);
+#semilogx(td_merge64_cpp(:,1), td_merge64_cpp(:,5), 'linewidth', 2);
+#semilogx(td_merge128_cpp(:,1), td_merge128_cpp(:,5), 'linewidth', 2);
+
+#semilogx(td_merge2_cpp(:,1), td_merge2_cpp(:,6), 'linewidth', 2);
+#semilogx(td_merge4_cpp(:,1), td_merge4_cpp(:,6), 'linewidth', 2);
+#semilogx(td_merge32_cpp(:,1), td_merge32_cpp(:,6), 'linewidth', 2);
+#semilogx(td_merge64_cpp(:,1), td_merge64_cpp(:,6), 'linewidth', 2);
+#semilogx(td_merge128_cpp(:,1), td_merge128_cpp(:,6), 'linewidth', 2);
+
+semilogx(td200_merge32_cpp(:,1), td200_merge32_cpp(:,6), 'linewidth', 2);
+
+#semilogx(td_merge32_java(:,1), td_merge32_java(:,5), 'linewidth', 2);
+#semilogx(td_merge64_java(:,1), td_merge64_java(:,5), 'linewidth', 2);
+#semilogx(td_merge128_java(:,1), td_merge128_java(:,5), 'linewidth', 2);
+
+#semilogx(td_merge2_java(:,1), td_merge2_java(:,6), 'linewidth', 2);
+#semilogx(td_merge4_java(:,1), td_merge4_java(:,6), 'linewidth', 2);
+#semilogx(td_merge32_java(:,1), td_merge32_java(:,6), 'linewidth', 2);
+#semilogx(td_merge64_java(:,1), td_merge64_java(:,6), 'linewidth', 2);
+#semilogx(td_merge128_java(:,1), td_merge128_java(:,6), 'linewidth', 2);
+
+semilogx(req10(:,1), req10(:,6), 'linewidth', 2);
+semilogx(req20(:,1), req20(:,6), 'linewidth', 2);
+semilogx(req30(:,1), req30(:,6), 'linewidth', 2);
+semilogx(req40(:,1), req40(:,6), 'linewidth', 2);
+
+set(gca, 'fontsize', 16);
+title 'Rank Error of TDigest and REQ sketch, uniform distribution, 1000 trials 
99 pct'
+xlabel 'stream size'
+ylabel 'normalized rank error |true - esimate|, %'
+grid minor on
+legend(
+#'TDigest k=100 rank 0.5 C++',
+'TDigest k=100 rank 0.99',
+#'t-digest k=100 rank 0.99 C++',
+'TDigest k=200 rank 0.99',
+'TDigest k=200 rank 0.99 new sizing',
+
+#'t-digest k=100 2-way merge rank 0.99 C++',
+#'t-digest k=100 4-way merge rank 0.99 C++',
+#'t-digest k=100 32-way merge rank 0.99 C++',
+#'t-digest k=100 64-way merge rank 0.95 C++',
+#'t-digest k=100 128-way merge rank 0.95 C++',
+
+'TDigest k=200 32-way merge rank 0.99 C++',
+
+#'t-digest k=100 32-way merge rank 0.99 Java',
+#'t-digest k=100 2-way merge rank 0.99 Java',
+#'t-digest k=100 4-way merge rank 0.99 Java',
+#'t-digest k=100 64-way merge rank 0.95 Java',
+#'t-digest k=100 128-way merge rank 0.95 Java',
+
+#'req k=12 rank 0.01 C++',
+#'req k=12 rank 0.05 C++',
+#'req k=12 rank 0.5 C++',
+#'REQ k=12 rank 0.95',
+#'REQ k=12 rank 0.99',
+#'req k=10 rank 0.05 C++',
+#'req k=10 rank 0.5 C++',
+'REQ k=10 rank 0.99',
+#'REQ k=10 rank 0.99',
+'REQ k=20 rank 0.99',
+'REQ k=30 rank 0.99',
+'REQ k=40 rank 0.99',
+'location', 'northwest'
+);
diff --git a/cpp/matlab/tdigest_size.m b/cpp/matlab/tdigest_size.m
new file mode 100644
index 0000000..e9ed465
--- /dev/null
+++ b/cpp/matlab/tdigest_size.m
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+clf;
+
+#tdigest_tdunning=load('../../results/tdigest_timing-100.tsv');
+td100_dbl=load('../results/tdigest_timing-100-double-gcc13.tsv');
+td200_dbl=load('../results/tdigest_timing-200-double-gcc13.tsv');
+req10_dbl=load('../results/req_sketch_timing_double_hra_k10_gcc13.tsv');
+req20_dbl=load('../results/req_sketch_timing_double_hra_k20_gcc13.tsv');
+req30_dbl=load('../results/req_sketch_timing_double_hra_k30_gcc13.tsv');
+req40_dbl=load('../results/req_sketch_timing_double_hra_k40_gcc13.tsv');
+
+hold on;
+
+#semilogx(tdigest_tdunning(:,1), tdigest_tdunning(:,11), 'linewidth', 2);
+
+semilogx(td100_dbl(:,1), td100_dbl(:,5), 'linewidth', 2);
+semilogx(td200_dbl(:,1), td200_dbl(:,5), 'linewidth', 2);
+
+semilogx(req10_dbl(:,1), req10_dbl(:,5), 'linewidth', 2);
+semilogx(req20_dbl(:,1), req20_dbl(:,5), 'linewidth', 2);
+semilogx(req30_dbl(:,1), req30_dbl(:,5), 'linewidth', 2);
+semilogx(req40_dbl(:,1), req40_dbl(:,5), 'linewidth', 2);
+
+set(gca, 'fontsize', 16);
+title 'Serialized size of TDigest and REQ sketch'
+xlabel 'stream size'
+ylabel 'serialized size, bytes'
+grid minor on
+legend(
+'tdigest<double> k=100',
+'tdigest<double> k=200',
+'req\_sketch<double> k=10',
+'req\_sketch<double> k=20',
+'req\_sketch<double> k=30',
+'req\_sketch<double> k=40',
+'location', 'northwest'
+);
diff --git a/cpp/matlab/tdigest_update_time.m b/cpp/matlab/tdigest_update_time.m
new file mode 100644
index 0000000..f62063a
--- /dev/null
+++ b/cpp/matlab/tdigest_update_time.m
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+clf;
+
+td100_java=load('../../results/tdigest_timing-100.tsv');
+td100_dbl_gcc13=load('../results/tdigest_timing-100-double-gcc13.tsv');
+td100_dbl_gcc13_res=load('../results/tdigest_timing-100-double-gcc13-reserve.tsv');
+
+hold on;
+semilogx(td100_dbl_gcc13(:,1), td100_dbl_gcc13(:,4), 'linewidth', 2);
+semilogx(td100_dbl_gcc13_res(:,1), td100_dbl_gcc13_res(:,4), 'linewidth', 2);
+semilogx(td100_java(:,1), td100_java(:,4), 'linewidth', 2);
+
+set(gca, 'fontsize', 16);
+title 'Update time of TDigest'
+xlabel 'stream size'
+legend(
+'cmopression=100 double gcc13',
+'cmopression=100 double gcc13 with reserve',
+'compression=100 Java tdunning',
+'location', 'northwest');
+ylabel 'update time, nanoseconds'
+grid minor on
diff --git a/cpp/matlab/tdigest_vs_req_error_exp.m 
b/cpp/matlab/tdigest_vs_req_error_exp.m
new file mode 100644
index 0000000..1a6c3df
--- /dev/null
+++ b/cpp/matlab/tdigest_vs_req_error_exp.m
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+clf;
+
+td200=load('../results/tdigest_rank_error-200-double-exp1.5.tsv');
+
+req40_hra=load('../results/req_sketch_error-40-double-exp1.5-hra.tsv');
+req40_lra=load('../results/req_sketch_error-40-double-exp1.5-lra.tsv');
+
+hold on;
+
+semilogx(td200(:,1), td200(:,6), 'linewidth', 2);
+semilogx(req40_hra(:,1), req40_hra(:,6), 'linewidth', 2);
+
+#semilogx(td200(:,1), td200(:,2), 'linewidth', 2);
+#semilogx(req40_lra(:,1), req40_lra(:,2), 'linewidth', 2);
+
+set(gca, 'fontsize', 16);
+title 'Rank Error of TDigest and REQ sketch, exponential distribution 
lambda=1.5, 1000 trials 99 pct'
+xlabel 'stream size'
+ylabel 'normalized rank error |true - esimate|, %'
+grid minor on
+legend(
+'TDigest k=200 rank 0.99',
+'REQ k=40 HRA rank 0.99',
+'location', 'northwest'
+);
diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp
index 4bb78ed..3a06be7 100644
--- a/cpp/src/main.cpp
+++ b/cpp/src/main.cpp
@@ -55,6 +55,8 @@
 
 #include "tdigest_timing_profile.hpp"
 #include "tdigest_sketch_accuracy_profile.hpp"
+#include "tdigest_merge_accuracy_profile.hpp"
+#include "tdigest_memory_profile.hpp"
 
 #include "cpc_sketch_memory_profile.hpp"
 #include "hll_sketch_memory_profile.hpp"
@@ -65,6 +67,7 @@
 
 #include "req_sketch_timing_profile.hpp"
 #include "req_merge_timing_profile.hpp"
+#include "req_error_vs_rank_profile.hpp"
 
 using namespace datasketches;
 typedef std::unique_ptr<job_profile> job_profile_ptr;
@@ -86,6 +89,7 @@ int main(int argc, char **argv) {
   job_profile::add("fi-merge-timing", job_profile_ptr(new 
frequent_items_merge_timing_profile()));
   job_profile::add("req-sketch-timing-float", job_profile_ptr(new 
req_sketch_timing_profile<float>()));
   job_profile::add("req-merge-timing-float", job_profile_ptr(new 
req_merge_timing_profile<float>()));
+  job_profile::add("req-sketch-timing-double", job_profile_ptr(new 
req_sketch_timing_profile<double>()));
 
   job_profile::add("cpc-sketch-accuracy", job_profile_ptr(new 
cpc_sketch_accuracy_profile()));
   job_profile::add("cpc-union-accuracy", job_profile_ptr(new 
cpc_union_accuracy_profile()));
@@ -96,9 +100,16 @@ int main(int argc, char **argv) {
   job_profile::add("kll-sketch-accuracy", job_profile_ptr(new 
kll_sketch_accuracy_profile()));
   job_profile::add("kll-merge-accuracy", job_profile_ptr(new 
kll_merge_accuracy_profile()));
   job_profile::add("fi-sketch-accuracy", job_profile_ptr(new 
frequent_items_sketch_accuracy_profile()));
+  job_profile::add("req-error-vs-rank-double", job_profile_ptr(new 
req_error_vs_rank_profile<double>()));
 
   job_profile::add("tdigest-timing-double", job_profile_ptr(new 
tdigest_timing_profile<double>()));
   job_profile::add("tdigest-sketch-accuracy-double", job_profile_ptr(new 
tdigest_sketch_accuracy_profile<double>()));
+  job_profile::add("tdigest-merge-accuracy-double", job_profile_ptr(new 
tdigest_merge_accuracy_profile<double>()));
+  job_profile::add("tdigest-timing-float", job_profile_ptr(new 
tdigest_timing_profile<float>()));
+  job_profile::add("tdigest-sketch-accuracy-float", job_profile_ptr(new 
tdigest_sketch_accuracy_profile<float>()));
+  job_profile::add("tdigest-merge-accuracy-float", job_profile_ptr(new 
tdigest_merge_accuracy_profile<float>()));
+  job_profile::add("tdigest-memory-float", job_profile_ptr(new 
tdigest_memory_profile<float>()));
+  job_profile::add("tdigest-memory-double", job_profile_ptr(new 
tdigest_memory_profile<double>()));
 
   job_profile::add("cpc-sketch-memory", job_profile_ptr(new 
cpc_sketch_memory_profile()));
   job_profile::add("hll-sketch-memory", job_profile_ptr(new 
hll_sketch_memory_profile()));
diff --git a/cpp/src/tdigest_accuracy_profile_impl.hpp 
b/cpp/src/tdigest_accuracy_profile_impl.hpp
index bfa5f7a..91dd78c 100644
--- a/cpp/src/tdigest_accuracy_profile_impl.hpp
+++ b/cpp/src/tdigest_accuracy_profile_impl.hpp
@@ -32,11 +32,11 @@ template<typename T>
 void tdigest_accuracy_profile<T>::run() {
   const unsigned lg_min = 0;
   const unsigned lg_max = 23;
-  const unsigned ppo = 16;
+  const unsigned ppo = 8;
   const unsigned num_trials = 1000;
   const unsigned error_pct = 99;
 
-  const uint16_t compression = 100;
+  const uint16_t compression = 200;
   const std::vector<double> ranks = {0.01, 0.05, 0.5, 0.95, 0.99};
 
   std::vector<std::vector<double>> rank_errors(ranks.size(), 
std::vector<double>());
@@ -49,7 +49,8 @@ void tdigest_accuracy_profile<T>::run() {
 
   std::random_device rd;
   std::mt19937 gen(rd());
-  std::uniform_real_distribution<T> dist(0, 1.0);
+//  std::uniform_real_distribution<T> dist(0, 1.0);
+  std::exponential_distribution<T> dist(1.5);
 
   const unsigned num_steps = count_points(lg_min, lg_max, ppo);
   unsigned stream_length = 1 << lg_min;
diff --git a/cpp/src/tdigest_memory_profile.hpp 
b/cpp/src/tdigest_memory_profile.hpp
new file mode 100644
index 0000000..e28a8ee
--- /dev/null
+++ b/cpp/src/tdigest_memory_profile.hpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TDIGEST_MEMORY_PROFILE_HPP_
+#define TDIGEST_MEMORY_PROFILE_HPP_
+
+#include <random>
+
+#include "memory_usage_profile.hpp"
+
+namespace datasketches {
+
+template<typename T>
+class tdigest_memory_profile: public memory_usage_profile {
+public:
+  tdigest_memory_profile();
+  void run_trial(size_t lg_min_x, size_t num_points, size_t x_ppo);
+private:
+  std::random_device rd;
+  std::mt19937 gen;
+  std::uniform_real_distribution<T> dist;
+};
+
+}
+
+#include "tdigest_memory_profile_impl.hpp"
+
+#endif
diff --git a/cpp/src/tdigest_memory_profile_impl.hpp 
b/cpp/src/tdigest_memory_profile_impl.hpp
new file mode 100644
index 0000000..6911b73
--- /dev/null
+++ b/cpp/src/tdigest_memory_profile_impl.hpp
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TDIGEST_MEMORY_PROFILE_IMPL_HPP_
+#define TDIGEST_MEMORY_PROFILE_IMPL_HPP_
+
+#include <tdigest.hpp>
+#include <req_sketch.hpp>
+
+#include "counting_allocator.hpp"
+
+namespace datasketches {
+
+// this relies on a global variable to count total amount of allocated memory
+extern long long int total_allocated_memory;
+
+template<typename T>
+tdigest_memory_profile<T>::tdigest_memory_profile():
+rd(),
+gen(rd()),
+dist(0, 1.0)
+{}
+
+template<typename T>
+void tdigest_memory_profile<T>::run_trial(size_t lg_min_x, size_t num_points, 
size_t x_ppo) {
+  const size_t k = 40;
+
+//  using tdigest_t = tdigest<T, counting_allocator<T>>;
+//  tdigest_t* td = new (counting_allocator<tdigest_t>().allocate(1)) 
tdigest_t(k);
+
+  using req_sketch_t = req_sketch<T, std::less<T>, counting_allocator<T>>;
+  req_sketch_t* s = new (counting_allocator<req_sketch_t>().allocate(1)) 
req_sketch_t(k);
+
+  size_t count = 0;
+  size_t p = 1ULL << lg_min_x;
+  for (size_t i = 0; i < num_points; ++i) {
+    const size_t delta = p - count;
+    for (size_t j = 0; j < delta; ++j) {
+      s->update(dist(gen));
+    }
+    count += delta;
+    stats[i].update(total_allocated_memory);
+    p = pwr_2_law_next(x_ppo, p);
+  }
+
+//  td->~tdigest_t();
+//  counting_allocator<tdigest_t>().deallocate(td, 1);
+  s->~req_sketch_t();
+  counting_allocator<req_sketch_t>().deallocate(s, 1);
+  if (total_allocated_memory != 0) throw 
std::runtime_error("total_allocated_memory != 0");
+}
+
+}
+
+#endif
diff --git a/cpp/src/tdigest_merge_accuracy_profile.hpp 
b/cpp/src/tdigest_merge_accuracy_profile.hpp
new file mode 100644
index 0000000..cbe8c04
--- /dev/null
+++ b/cpp/src/tdigest_merge_accuracy_profile.hpp
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TDIGEST_MERGE_ACCURACY_PROFILE_HPP_
+#define TDIGEST_MERGE_ACCURACY_PROFILE_HPP_
+
+#include "tdigest_accuracy_profile.hpp"
+
+namespace datasketches {
+
+template<typename T>
+class tdigest_merge_accuracy_profile: public tdigest_accuracy_profile<T> {
+public:
+  void run_trial(std::vector<T>& values, size_t stream_length, uint16_t k,
+      const std::vector<double>& ranks, std::vector<std::vector<double>>& 
rank_errors);
+};
+
+}
+
+#include "tdigest_merge_accuracy_profile_impl.hpp"
+
+#endif
diff --git a/cpp/src/tdigest_sketch_accuracy_profile_impl.hpp 
b/cpp/src/tdigest_merge_accuracy_profile_impl.hpp
similarity index 51%
copy from cpp/src/tdigest_sketch_accuracy_profile_impl.hpp
copy to cpp/src/tdigest_merge_accuracy_profile_impl.hpp
index 4c1a6e4..a7f2406 100644
--- a/cpp/src/tdigest_sketch_accuracy_profile_impl.hpp
+++ b/cpp/src/tdigest_merge_accuracy_profile_impl.hpp
@@ -17,44 +17,40 @@
  * under the License.
  */
 
-#ifndef TDIGEST_SKETCH_ACCURACY_PROFILE_IMPL_HPP_
-#define TDIGEST_SKETCH_ACCURACY_PROFILE_IMPL_HPP_
+#ifndef TDIGEST_MERGE_ACCURACY_PROFILE_IMPL_HPP_
+#define TDIGEST_MERGE_ACCURACY_PROFILE_IMPL_HPP_
 
 #include <tdigest.hpp>
+#include <req_sketch.hpp>
+
+#include "true_rank.hpp"
 
 namespace datasketches {
 
-// assumes sorted values, 0 <= rank <= 1
-// only the first n = size values in the vector should be used
 template<typename T>
-T get_quantile(const std::vector<T>& values, size_t size, double rank) {
-  return values[(size - 1) * rank];
-}
+void tdigest_merge_accuracy_profile<T>::run_trial(std::vector<T>& values, 
size_t stream_length, uint16_t k,
+    const std::vector<double>& ranks, std::vector<std::vector<double>>& 
rank_errors) {
 
-// assumes sorted values, given value is one of the values in the vector
-// only the first n = size values in the vector should be used
-template<typename T>
-double get_rank(const std::vector<T>& values, size_t size, T value) {
-  auto lower = std::lower_bound(values.begin(), values.begin() + size, value);
-  const auto d1 = std::distance(values.begin(), lower);
-  auto upper = std::upper_bound(lower, values.begin() + size, value);
-  const auto d2 = std::distance(values.begin(), upper);
-  return (d1 + d2) / 2.0 / size;
-}
+  const size_t num_sketches = 32;
+  std::vector<tdigest<T>> sketches;
+  for (size_t i = 0; i < num_sketches; ++i) sketches.push_back(tdigest<T>(k));
 
-template<typename T>
-void tdigest_sketch_accuracy_profile<T>::run_trial(std::vector<T>& values, 
size_t stream_length, uint16_t k,
-    const std::vector<double>& ranks, std::vector<std::vector<double>>& 
rank_errors) {
+  size_t s = 0;
+  for (size_t i = 0; i < stream_length; ++i) {
+    sketches[s].update(values[i]);
+    ++s;
+    if (s == num_sketches) s = 0;
+  }
 
-  tdigest<T> sketch(k);
-  for (size_t i = 0; i < stream_length; ++i) sketch.update(values[i]);
+  tdigest<T> merge_sketch(k);
+  for (size_t i = 0; i < num_sketches; ++i) merge_sketch.merge(sketches[i]);
 
   std::sort(values.begin(), values.begin() + stream_length);
   unsigned j = 0;
   for (const double rank: ranks) {
     const T quantile = get_quantile(values, stream_length, rank);
-    const double true_rank = get_rank(values, stream_length, quantile);
-    rank_errors[j++].push_back(std::abs(sketch.get_rank(quantile) - 
true_rank));
+    const double true_rank = get_rank(values, stream_length, quantile, 
MIDPOINT);
+    rank_errors[j++].push_back(std::abs(merge_sketch.get_rank(quantile) - 
true_rank));
   }
 }
 
diff --git a/cpp/src/tdigest_merge_timing_profile.hpp 
b/cpp/src/tdigest_merge_timing_profile.hpp
new file mode 100644
index 0000000..d0c3ed9
--- /dev/null
+++ b/cpp/src/tdigest_merge_timing_profile.hpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TDIGEST_MERGE_TIMING_PROFILE_HPP_
+#define TDIGEST_MERGE_TIMING_PROFILE_HPP_
+
+#include <random>
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+template<typename T>
+class tdigest_timing_profile: public job_profile {
+public:
+  tdigest_timing_profile();
+  void run();
+  T sample();
+private:
+  std::default_random_engine generator;
+  std::uniform_real_distribution<T> distribution;
+};
+
+}
+
+#include "tdigest_merge_timing_profile_impl.hpp"
+
+#endif
diff --git a/cpp/src/tdigest_timing_profile_impl.hpp 
b/cpp/src/tdigest_merge_timing_profile_impl.hpp
similarity index 94%
copy from cpp/src/tdigest_timing_profile_impl.hpp
copy to cpp/src/tdigest_merge_timing_profile_impl.hpp
index 8be06b9..9853deb 100644
--- a/cpp/src/tdigest_timing_profile_impl.hpp
+++ b/cpp/src/tdigest_merge_timing_profile_impl.hpp
@@ -59,6 +59,7 @@ void tdigest_timing_profile<T>::run() {
     std::chrono::nanoseconds build_time_ns(0);
     std::chrono::nanoseconds update_time_ns(0);
     std::chrono::nanoseconds get_rank_time_ns(0);
+    size_t size_bytes = 0;
 
     const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, 
lg_max_stream_len, lg_min_trials, lg_max_trials);
     for (size_t t = 0; t < num_trials; ++t) {
@@ -80,12 +81,16 @@ void tdigest_timing_profile<T>::run() {
 //      }
 //      auto finish_get_rank(std::chrono::high_resolution_clock::now());
 //      get_rank_time_ns += 
std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_rank - 
start_get_rank);
+
+      size_bytes += sketch.get_serialized_size_bytes();
     }
     std::cout << stream_length << "\t"
         << num_trials << "\t"
         << (double) build_time_ns.count() / num_trials << "\t"
         << (double) update_time_ns.count() / num_trials / stream_length << "\t"
-        << (double) get_rank_time_ns.count() / num_trials / num_queries << 
"\n";
+//        << (double) get_rank_time_ns.count() / num_trials / num_queries << 
"\t"
+        << (double) size_bytes / num_trials << "\n";
+
     stream_length = pwr_2_law_next(ppo, stream_length);
   }
 }
diff --git a/cpp/src/tdigest_sketch_accuracy_profile_impl.hpp 
b/cpp/src/tdigest_sketch_accuracy_profile_impl.hpp
index 4c1a6e4..60e84ba 100644
--- a/cpp/src/tdigest_sketch_accuracy_profile_impl.hpp
+++ b/cpp/src/tdigest_sketch_accuracy_profile_impl.hpp
@@ -22,25 +22,9 @@
 
 #include <tdigest.hpp>
 
-namespace datasketches {
-
-// assumes sorted values, 0 <= rank <= 1
-// only the first n = size values in the vector should be used
-template<typename T>
-T get_quantile(const std::vector<T>& values, size_t size, double rank) {
-  return values[(size - 1) * rank];
-}
+#include "true_rank.hpp"
 
-// assumes sorted values, given value is one of the values in the vector
-// only the first n = size values in the vector should be used
-template<typename T>
-double get_rank(const std::vector<T>& values, size_t size, T value) {
-  auto lower = std::lower_bound(values.begin(), values.begin() + size, value);
-  const auto d1 = std::distance(values.begin(), lower);
-  auto upper = std::upper_bound(lower, values.begin() + size, value);
-  const auto d2 = std::distance(values.begin(), upper);
-  return (d1 + d2) / 2.0 / size;
-}
+namespace datasketches {
 
 template<typename T>
 void tdigest_sketch_accuracy_profile<T>::run_trial(std::vector<T>& values, 
size_t stream_length, uint16_t k,
@@ -53,7 +37,7 @@ void 
tdigest_sketch_accuracy_profile<T>::run_trial(std::vector<T>& values, size_
   unsigned j = 0;
   for (const double rank: ranks) {
     const T quantile = get_quantile(values, stream_length, rank);
-    const double true_rank = get_rank(values, stream_length, quantile);
+    const double true_rank = get_rank(values, stream_length, quantile, 
MIDPOINT);
     rank_errors[j++].push_back(std::abs(sketch.get_rank(quantile) - 
true_rank));
   }
 }
diff --git a/cpp/src/tdigest_timing_profile_impl.hpp 
b/cpp/src/tdigest_timing_profile_impl.hpp
index 8be06b9..644528f 100644
--- a/cpp/src/tdigest_timing_profile_impl.hpp
+++ b/cpp/src/tdigest_timing_profile_impl.hpp
@@ -26,6 +26,7 @@
 #include <sstream>
 
 #include <tdigest.hpp>
+#include <req_sketch.hpp>
 
 namespace datasketches {
 
@@ -46,6 +47,8 @@ void tdigest_timing_profile<T>::run() {
 
   const size_t num_queries(20);
 
+  const uint16_t k = 200;
+
   std::cout << "Stream\tTrials\tBuild\tUpdate\tRank" << std::endl;
 
   std::vector<T> values(1ULL << lg_max_stream_len, 0);
@@ -59,13 +62,15 @@ void tdigest_timing_profile<T>::run() {
     std::chrono::nanoseconds build_time_ns(0);
     std::chrono::nanoseconds update_time_ns(0);
     std::chrono::nanoseconds get_rank_time_ns(0);
+    size_t size_bytes = 0;
 
     const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, 
lg_max_stream_len, lg_min_trials, lg_max_trials);
     for (size_t t = 0; t < num_trials; ++t) {
       std::generate(values.begin(), values.begin() + stream_length, [this] { 
return this->sample(); });
 
       auto start_build(std::chrono::high_resolution_clock::now());
-      tdigest<T> sketch;
+      tdigest<T> sketch(k);
+//      req_sketch<T> sketch(40);
       auto finish_build(std::chrono::high_resolution_clock::now());
       build_time_ns += 
std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - 
start_build);
 
@@ -80,12 +85,16 @@ void tdigest_timing_profile<T>::run() {
 //      }
 //      auto finish_get_rank(std::chrono::high_resolution_clock::now());
 //      get_rank_time_ns += 
std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_rank - 
start_get_rank);
+
+      size_bytes += sketch.get_serialized_size_bytes();
     }
     std::cout << stream_length << "\t"
         << num_trials << "\t"
         << (double) build_time_ns.count() / num_trials << "\t"
         << (double) update_time_ns.count() / num_trials / stream_length << "\t"
-        << (double) get_rank_time_ns.count() / num_trials / num_queries << 
"\n";
+//        << (double) get_rank_time_ns.count() / num_trials / num_queries << 
"\t"
+        << (double) size_bytes / num_trials << "\n";
+
     stream_length = pwr_2_law_next(ppo, stream_length);
   }
 }
diff --git 
a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestAccuracyProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestAccuracyProfile.java
new file mode 100644
index 0000000..0598ca4
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestAccuracyProfile.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.tdigest;
+
+import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.JobProfile;
+import org.apache.datasketches.MonotonicPoints;
+import org.apache.datasketches.Properties;
+
+import com.tdunning.math.stats.TDigest;
+
+public class TDigestAccuracyProfile implements JobProfile {
+  private Job job;
+  private Properties prop;
+
+  private Random rand;
+  private double[] values;
+
+  @Override
+  public void start(final Job job) {
+    this.job = job;
+    prop = job.getProperties();
+    final int lgMin = Integer.parseInt(prop.mustGet("LgMin"));
+    final int lgMax = Integer.parseInt(prop.mustGet("LgMax"));
+    final int lgDelta = Integer.parseInt(prop.mustGet("LgDelta"));
+    final int ppo = Integer.parseInt(prop.mustGet("PPO"));
+
+    final int compression = Integer.parseInt(prop.mustGet("Compression"));
+    final double[] ranks = {0.01, 0.05, 0.5, 0.95, 0.99};
+    final double errorPercentile = 
Double.parseDouble(prop.mustGet("ErrorPercentile"));
+
+    rand = new Random();
+    values = new double[1 << lgMax];
+    
+    final int numSteps;
+    final boolean useppo;
+    if (lgDelta < 1) {
+      numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo);
+      useppo = true;
+    } else {
+      numSteps = (lgMax - lgMin) / lgDelta + 1;
+      useppo = false;
+    }
+
+    int streamLength = 1 << lgMin;
+    int lgCurSL = lgMin;
+    for (int step = 0; step < numSteps; step++) {
+
+      doStreamLength(streamLength, compression, ranks, errorPercentile);
+
+      if (useppo) {
+        streamLength = (int)pwr2SeriesNext(ppo, streamLength);
+      } else {
+        lgCurSL += lgDelta;
+        streamLength = 1 << lgCurSL;
+      }
+    }
+  }
+  
+  @Override
+  public void shutdown() {}
+
+  @Override
+  public void cleanup() {}
+
+  void doStreamLength(final int streamLength, final int compression, final 
double[] ranks, final double errorPercentile) {
+    final int numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials"));
+
+    double[][] rankErrors = new double[ranks.length][];
+    for (int i = 0; i < ranks.length; i++) rankErrors[i] = new 
double[numTrials];
+
+    for (int t = 0; t < numTrials; t++) {
+      runTrial(t, streamLength, compression, ranks, rankErrors);
+    }
+
+    job.print(streamLength);
+    for (int i = 0; i < ranks.length; i++) {
+      Arrays.sort(rankErrors[i]);
+      final int errPctIndex = (int)(numTrials * errorPercentile / 100);
+      final double rankErrorAtPct = rankErrors[i][errPctIndex];
+      job.print("\t");
+      job.print(rankErrorAtPct * 100);
+    }
+    job.print("\n");
+  }
+
+  void runTrial(final int trial, final int streamLength, final int 
compression, final double[] ranks, final double[][] rankErrors) {
+    TDigest td = TDigest.createDigest(compression);
+    for (int i = 0; i < streamLength; i++) {
+      values[i] = rand.nextDouble(); 
+      td.add(values[i]);
+    }
+    Arrays.sort(values, 0, streamLength);
+    for (int i = 0; i < ranks.length; i++) {
+      final double quantile = values[(int)((streamLength - 1) * ranks[i])];
+      final double trueRank = computeTrueRank(values, streamLength, quantile);
+      rankErrors[i][trial] = Math.abs(trueRank - td.cdf(quantile));
+    }
+  }
+
+  static double computeTrueRank(final double[] values, final int streamLength, 
final double value) {
+    final int lower = lowerBound(values, 0, streamLength, value);
+    final int upper = upperBound(values, lower, streamLength, value);
+    return (lower + upper) / 2.0 / streamLength;
+  }
+
+  static int lowerBound(final double[] values, int first, final int last, 
final double value) {
+    int current;
+    int step;
+    int count = last - first; 
+    while (count > 0) {
+      current = first;
+      step = count / 2;
+      current += step;
+      if (values[current] < value) {
+        first = ++current;
+        count -= step + 1;
+      } else {
+        count = step;
+      }
+    }
+    return first;
+  }
+
+  static int upperBound(final double[] values, int first, final int last, 
final double value) {
+    int current;
+    int step;
+    int count = last - first; 
+    while (count > 0) {
+      current = first; 
+      step = count / 2; 
+      current += step;
+      if (!(value < values[current])) {
+        first = ++current;
+        count -= step + 1;
+      } else {
+        count = step;
+      }
+    }
+    return first;
+  }
+
+}
diff --git 
a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestErrorVsRankProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestErrorVsRankProfile.java
new file mode 100644
index 0000000..966b6a8
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestErrorVsRankProfile.java
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.tdigest;
+
+import static java.lang.Math.round;
+import static org.apache.datasketches.GaussianRanks.GAUSSIANS_3SD;
+import static org.apache.datasketches.SpacedPoints.expSpaced;
+import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+import static 
org.apache.datasketches.quantilescommon.QuantilesUtil.evenlySpacedDoubles;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.JobProfile;
+import org.apache.datasketches.MonotonicPoints;
+import org.apache.datasketches.Properties;
+import org.apache.datasketches.characterization.Shuffle;
+import org.apache.datasketches.characterization.req.StreamMaker;
+import org.apache.datasketches.characterization.req.StreamMaker.Pattern;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.quantiles.DoublesSketch;
+import org.apache.datasketches.quantiles.DoublesSketchBuilder;
+import org.apache.datasketches.quantiles.UpdateDoublesSketch;
+
+import com.tdunning.math.stats.TDigest;
+
+/**
+ * @author Lee Rhodes
+ */
+public class TDigestErrorVsRankProfile implements JobProfile {
+  private Job job;
+  private Properties prop;
+
+  //FROM PROPERTIES
+  //Stream pattern config
+  StreamMaker streamMaker = new StreamMaker();
+  private Pattern pattern;
+  private int offset;
+
+  //For computing the different stream lengths
+  private int lgMin;
+  private int lgMax;
+  private int lgDelta;
+  private int ppo; //not currently used
+
+  private int numTrials; //num of Trials per plotPoint
+  private int errQSkLgK; //size of the error quantiles sketches
+  private int errHllSkLgK; //size of the error HLL sketch
+  private boolean shuffle; //if true, shuffle for each trial
+
+  //plotting & x-axis configuration
+  private int numPlotPoints;
+  private boolean evenlySpaced;
+  private double exponent;
+  private double rankRange;
+  private double metricsRankRange;
+
+  //Target sketch configuration & error analysis
+  private int K;
+  private TDigest sk;
+
+  //The array of Gaussian quantiles for +/- StdDev error analysis
+  private double[] gRanks;
+  private UpdateDoublesSketch[] errQSkArr;
+  private HllSketch[] errHllSkArr;
+
+  //Specific to a streamLength
+  private TrueDoubleRanks trueRanks;
+  //The entire stream
+  private float[] stream; //a shuffled array of values from 1...N
+  private float[] sortedStream;
+  private int[] sortedAbsRanks;
+  //private int[] streamAbsRanks ?? do we need?
+  //The PP points
+  private float[] sortedPPValues;
+  private int[] sortedPPIndices;
+  private int[] sortedPPAbsRanks;
+  int sumAllocCounts = 0;
+
+  private final String[] columnLabels =
+    {"nPP", "Value", "Rank",
+     "-3SD","-2SD", "-1SD", "Med", "+1SD", "+2SD", "+3SD",
+     "1LB", "1UB", "UErrCnt"};
+  private final String sFmt =
+      "%3s\t%5s\t%4s\t"
+    + "%4s\t%4s\t%4s\t%5s\t%4s\t%4s\t%4s\t"
+    + "%3s\t%3s\t%7s\n";
+  private final String fFmt =
+    "%f\t%f\t%f\t" // rPP, Value, Rank
+  + "%f\t%f\t%f\t%f\t%f\t%f\t%f\t" //-3sd to +3sd
+  + "\t%d\n"; // UErrCnt
+
+  //JobProfile interface
+  @Override
+  public void start(final Job job) {
+    this.job = job;
+    prop = job.getProperties();
+    extractProperties();
+    configureCommon();
+    doStreamLengths();
+  }
+
+  @Override
+  public void shutdown() {}
+
+  @Override
+  public void cleanup() {}
+  //end JobProfile
+
+  private void extractProperties() {
+    //Stream Pattern
+    pattern = Pattern.valueOf(prop.mustGet("Pattern"));
+    offset = Integer.parseInt(prop.mustGet("Offset"));
+    //Stream lengths
+    lgMin = Integer.parseInt(prop.mustGet("LgMin"));
+    lgMax = Integer.parseInt(prop.mustGet("LgMax"));
+    lgDelta = Integer.parseInt(prop.mustGet("LgDelta"));
+    ppo = Integer.parseInt(prop.mustGet("PPO"));
+    // Trials config (independent of sketch)
+    numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials"));
+    errQSkLgK = Integer.parseInt(prop.mustGet("ErrQSkLgK"));
+    errHllSkLgK = Integer.parseInt(prop.mustGet("ErrHllSkLgK"));
+    shuffle = Boolean.valueOf(prop.mustGet("Shuffle"));
+    //plotting
+    numPlotPoints = Integer.parseInt(prop.mustGet("NumPlotPoints"));
+    evenlySpaced = Boolean.valueOf(prop.mustGet("EvenlySpaced"));
+    exponent = Double.parseDouble(prop.mustGet("Exponent"));
+    rankRange = Double.parseDouble(prop.mustGet("RankRange"));
+    //Target sketch config
+    K = Integer.parseInt(prop.mustGet("K"));
+
+    metricsRankRange = Double.parseDouble(prop.mustGet("MetricsRankRange"));
+  }
+
+  void configureCommon() {
+    configureSketch();
+    errQSkArr = new UpdateDoublesSketch[numPlotPoints];
+    errHllSkArr = new HllSketch[numPlotPoints];
+    //configure the error quantiles array & HLL sketch arr
+    final DoublesSketchBuilder builder = DoublesSketch.builder().setK(1 << 
errQSkLgK);
+    for (int i = 0; i < numPlotPoints; i++) {
+      errQSkArr[i] = builder.build();
+      errHllSkArr[i] = new HllSketch(errHllSkLgK);
+    }
+    gRanks = new double[GAUSSIANS_3SD.length - 2]; //omit 0.0 and 1.0
+    for (int i = 1; i < GAUSSIANS_3SD.length - 1; i++) {
+      gRanks[i - 1] = GAUSSIANS_3SD[i];
+    }
+  }
+
+  void configureSketch() {
+  }
+
+  private void doStreamLengths() {
+    //compute the number of stream lengths for the whole job
+    final int numSteps;
+    final boolean useppo;
+    if (lgDelta < 1) {
+      numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo);
+      useppo = true;
+    } else {
+      numSteps = (lgMax - lgMin) / lgDelta + 1;
+      useppo = false;
+    }
+
+    int streamLength = 1 << lgMin; //initial streamLength
+    int lgCurSL = lgMin;
+
+    // Step through the different stream lengths
+    for (int step = 0; step < numSteps; step++) {
+
+      doStreamLength(streamLength);
+
+      //go to next stream length
+      if (useppo) {
+        streamLength = (int)pwr2SeriesNext(ppo, streamLength);
+      } else {
+        lgCurSL += lgDelta;
+        streamLength = 1 << lgCurSL;
+      }
+    }
+  }
+
+  void doStreamLength(final int streamLength) {
+    job.println(LS + "Stream Length: " + streamLength );
+    job.println(LS + "param k: " + K );
+    job.printfData(sFmt, (Object[])columnLabels);
+    //build the stream
+    stream = streamMaker.makeStream(streamLength, pattern, offset);
+    //compute true ranks
+    trueRanks = new TrueDoubleRanks(stream);
+    sortedStream = trueRanks.getSortedStream();
+    sortedAbsRanks = trueRanks.getSortedAbsRanks();
+
+    //compute the true values used at the plot points
+    int startIdx = 0;
+    int endIdx = streamLength - 1;
+    final boolean hra = true;
+    if (rankRange < 1.0) { //A substream of points focuses on a sub-range at 
one end.
+      final int subStreamLen = (int)Math.round(rankRange * streamLength);
+      startIdx = hra ? streamLength - subStreamLen : 0;
+      endIdx = hra ? streamLength - 1 : subStreamLen - 1;
+    }
+
+    //generates PP indices in [startIdx, endIdx] inclusive, inclusive
+    // PV 2020-01-07: using double so that there's enough precision even for 
large stream lengths
+    final double[] temp = evenlySpaced
+        ? evenlySpacedDoubles(startIdx, endIdx, numPlotPoints)
+        : expSpaced(startIdx, endIdx, numPlotPoints, exponent, hra);
+
+    sortedPPIndices = new int[numPlotPoints];
+    sortedPPAbsRanks = new int[numPlotPoints];
+    sortedPPValues = new float[numPlotPoints];
+
+    for (int pp = 0; pp < numPlotPoints; pp++) {
+      final int idx = (int)Math.round(temp[pp]);
+      sortedPPIndices[pp] = idx;
+      sortedPPAbsRanks[pp] = sortedAbsRanks[idx];
+      sortedPPValues[pp] = sortedStream[idx];
+    }
+
+    //Do numTrials for all plotpoints
+    for (int t = 0; t < numTrials; t++) {
+      doTrial();
+
+      //sumAllocCounts = sk.
+    }
+
+    // for special metrics for capturing accuracy per byte
+    double sumRelStdDev = 0;
+    int numRelStdDev = 0;
+    double sumAddStdDev = 0;
+    int numAddStdDev = 0;
+
+    //at this point each of the errQSkArr sketches has a distribution of error 
from numTrials
+    for (int pp = 0 ; pp < numPlotPoints; pp++) {
+      final double v = sortedPPValues[pp];
+      final double tr = v / streamLength; //the true rank
+
+      //for each of the numErrDistRanks distributions extract the sd Gaussian 
quantiles
+      final double[] errQ = errQSkArr[pp].getQuantiles(gRanks);
+      final int uErrCnt = (int)round(errHllSkArr[pp].getEstimate());
+
+      //Plot the row.
+      final double relPP = (double)(pp + 1) / numPlotPoints;
+      job.printfData(fFmt, relPP, v, tr,
+          errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6],
+          uErrCnt);
+
+      if (relPP > 0 && relPP < 1
+          && (hra && relPP < metricsRankRange || !hra && relPP >= 1 - 
metricsRankRange)) {
+        sumAddStdDev += errQ[4];
+        numAddStdDev++;
+      }
+      if (relPP > 0 && relPP < 1
+          && (!hra && relPP < metricsRankRange || hra && relPP >= 1 - 
metricsRankRange)) {
+        sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP);
+        numRelStdDev++;
+      }
+      errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength
+      errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength
+    }
+    final int serBytes = sk.smallByteSize();
+
+    // special metrics for capturing accuracy per byte
+    final double avgRelStdDevTimesSize = serBytes * sumRelStdDev / 
numRelStdDev;
+    final  double avgAddStdDevTimesSize = serBytes * sumAddStdDev / 
numAddStdDev;
+    job.println(LS + "Avg. relative std. dev. times size: " + 
avgRelStdDevTimesSize);
+    job.println(     "Avg. additive std. dev. times size: " + 
avgAddStdDevTimesSize);
+
+    job.println(LS + "Serialization Bytes: " + serBytes);
+//    job.println(sk.viewCompactorDetail("%5.0f", false));
+  }
+
+  /**
+   * A trial consists of updating a virgin sketch with a stream of values.
+   * Capture the estimated ranks for all plotPoints and then update the 
errQSkArr with those
+   * error values.
+   */
+  void doTrial() {
+    sk = TDigest.createDigest(K);
+    if (shuffle) { Shuffle.shuffle(stream); }
+    final int sl = stream.length;
+    for (int i = 0; i < sl; i++) { sk.add(stream[i]); }
+    //get estimated ranks from sketch for all plot points
+    final double[] estRanks = new double[sortedPPValues.length];
+    for (int i = 0; i < sortedPPValues.length; i++) estRanks[i] = 
sk.cdf(sortedPPValues[i]);
+    //compute errors and update HLL for each plotPoint
+    for (int pp = 0; pp < numPlotPoints; pp++) {
+      final double errorAtPlotPoint = estRanks[pp] - 
(double)sortedPPAbsRanks[pp] / sl;
+      errQSkArr[pp].update(errorAtPlotPoint); //update each of the errQArr 
sketches
+      errHllSkArr[pp].update(errorAtPlotPoint); //unique count of error values
+    }
+  }
+
+}
diff --git 
a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestMergeAccuracyProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestMergeAccuracyProfile.java
new file mode 100644
index 0000000..15fdd3d
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestMergeAccuracyProfile.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.tdigest;
+
+import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.JobProfile;
+import org.apache.datasketches.MonotonicPoints;
+import org.apache.datasketches.Properties;
+
+import com.tdunning.math.stats.TDigest;
+
+public class TDigestMergeAccuracyProfile implements JobProfile {
+  private Job job;
+  private Properties prop;
+
+  private Random rand;
+  private double[] values;
+
+  @Override
+  public void start(final Job job) {
+    this.job = job;
+    prop = job.getProperties();
+    final int lgMin = Integer.parseInt(prop.mustGet("LgMin"));
+    final int lgMax = Integer.parseInt(prop.mustGet("LgMax"));
+    final int lgDelta = Integer.parseInt(prop.mustGet("LgDelta"));
+    final int ppo = Integer.parseInt(prop.mustGet("PPO"));
+
+    final int compression = Integer.parseInt(prop.mustGet("Compression"));
+    final double[] ranks = {0.01, 0.05, 0.5, 0.95, 0.99};
+    final double errorPercentile = 
Double.parseDouble(prop.mustGet("ErrorPercentile"));
+
+    rand = new Random();
+    values = new double[1 << lgMax];
+    
+    final int numSteps;
+    final boolean useppo;
+    if (lgDelta < 1) {
+      numSteps = MonotonicPoints.countPoints(lgMin, lgMax, ppo);
+      useppo = true;
+    } else {
+      numSteps = (lgMax - lgMin) / lgDelta + 1;
+      useppo = false;
+    }
+
+    int streamLength = 1 << lgMin;
+    int lgCurSL = lgMin;
+    for (int step = 0; step < numSteps; step++) {
+
+      doStreamLength(streamLength, compression, ranks, errorPercentile);
+
+      if (useppo) {
+        streamLength = (int)pwr2SeriesNext(ppo, streamLength);
+      } else {
+        lgCurSL += lgDelta;
+        streamLength = 1 << lgCurSL;
+      }
+    }
+  }
+  
+  @Override
+  public void shutdown() {}
+
+  @Override
+  public void cleanup() {}
+
+  void doStreamLength(final int streamLength, final int compression, final 
double[] ranks, final double errorPercentile) {
+    final int numTrials = 1 << Integer.parseInt(prop.mustGet("LgTrials"));
+
+    double[][] rankErrors = new double[ranks.length][];
+    for (int i = 0; i < ranks.length; i++) rankErrors[i] = new 
double[numTrials];
+
+    for (int t = 0; t < numTrials; t++) {
+      runTrial(t, streamLength, compression, ranks, rankErrors);
+    }
+
+    job.print(streamLength);
+    for (int i = 0; i < ranks.length; i++) {
+      Arrays.sort(rankErrors[i]);
+      final int errPctIndex = (int)(numTrials * errorPercentile / 100);
+      final double rankErrorAtPct = rankErrors[i][errPctIndex];
+      job.print("\t");
+      job.print(rankErrorAtPct * 100);
+    }
+    job.print("\n");
+  }
+
+  void runTrial(final int trial, final int streamLength, final int 
compression, final double[] ranks, final double[][] rankErrors) {
+    final int numSketches = Integer.parseInt(prop.mustGet("NumSketches"));
+    final ArrayList<TDigest> tds = new ArrayList<>();
+    for (int s = 0; s < numSketches; s++) 
tds.add(TDigest.createDigest(compression));
+    int s = 0;
+    for (int i = 0; i < streamLength; i++) {
+      values[i] = rand.nextDouble(); 
+      tds.get(s).add(values[i]);
+      s++;
+      if (s == numSketches) s = 0;
+    }
+    TDigest tdMerge = TDigest.createDigest(compression);
+    tdMerge.add(tds);
+    Arrays.sort(values, 0, streamLength);
+    for (int i = 0; i < ranks.length; i++) {
+      final double quantile = values[(int)((streamLength - 1) * ranks[i])];
+      final double trueRank = computeTrueRank(values, streamLength, quantile);
+      rankErrors[i][trial] = Math.abs(trueRank - tdMerge.cdf(quantile));
+    }
+  }
+
+  static double computeTrueRank(final double[] values, final int streamLength, 
final double value) {
+    final int lower = lowerBound(values, 0, streamLength, value);
+    final int upper = upperBound(values, lower, streamLength, value);
+    return (lower + upper) / 2.0 / streamLength;
+  }
+
+  static int lowerBound(final double[] values, int first, final int last, 
final double value) {
+    int current;
+    int step;
+    int count = last - first; 
+    while (count > 0) {
+      current = first;
+      step = count / 2;
+      current += step;
+      if (values[current] < value) {
+        first = ++current;
+        count -= step + 1;
+      } else {
+        count = step;
+      }
+    }
+    return first;
+  }
+
+  static int upperBound(final double[] values, int first, final int last, 
final double value) {
+    int current;
+    int step;
+    int count = last - first; 
+    while (count > 0) {
+      current = first; 
+      step = count / 2; 
+      current += step;
+      if (!(value < values[current])) {
+        first = ++current;
+        count -= step + 1;
+      } else {
+        count = step;
+      }
+    }
+    return first;
+  }
+
+}
diff --git 
a/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestSpeedProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestSpeedProfile.java
new file mode 100644
index 0000000..b4e2efc
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/characterization/tdigest/TDigestSpeedProfile.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.tdigest;
+
+import java.util.Random;
+
+import org.apache.datasketches.Properties;
+import 
org.apache.datasketches.characterization.quantiles.BaseQuantilesSpeedProfile;
+
+import com.tdunning.math.stats.TDigest;
+
+public class TDigestSpeedProfile extends BaseQuantilesSpeedProfile {
+
+  private static final Random rnd = new Random();
+  private int k;
+  private double[] inputValues;
+  private int numQueryValues;
+  private double[] rankQueryValues;
+  private double[] quantileQueryValues;
+
+  long buildTimeNs;
+  long updateTimeNs;
+  long getRankTimeNs;
+  long getQuantileTimeNs;
+  long serializeTimeNs;
+  long deserializeTimeNs;
+  long fullSerializedSizeBytes;
+  long smallSerializedSizeBytes;
+  int numCentroids;
+
+  @Override
+  public void configure(final int k, final int numQueryValues, final 
Properties properties) {
+    this.k = k;
+    this.numQueryValues = numQueryValues;
+  }
+
+  @Override
+  public void prepareTrial(final int streamLength) {
+    // prepare input data
+    inputValues = new double[streamLength];
+    for (int i = 0; i < streamLength; i++) {
+      inputValues[i] = rnd.nextDouble();
+    }
+    // prepare query data
+    quantileQueryValues = new double[numQueryValues];
+    for (int i = 0; i < numQueryValues; i++) {
+      quantileQueryValues[i] = rnd.nextDouble();
+    }
+    rankQueryValues = new double[numQueryValues];
+    for (int i = 0; i < numQueryValues; i++) {
+      rankQueryValues[i] = rnd.nextDouble();
+    }
+    resetStats();
+  }
+
+  @Override
+  public void doTrial() {
+    final long startBuild = System.nanoTime();
+    final TDigest sketch = TDigest.createDigest(k);
+    final long stopBuild = System.nanoTime();
+    buildTimeNs += stopBuild - startBuild;
+
+    final long startUpdate = System.nanoTime();
+    for (int i = 0; i < inputValues.length; i++) {
+      sketch.add(inputValues[i]);
+    }
+    final long stopUpdate = System.nanoTime();
+    updateTimeNs += stopUpdate - startUpdate;
+
+    final long startGetRank = System.nanoTime();
+    for (final double value: rankQueryValues) {
+      sketch.cdf(value);
+    }
+    final long stopGetRank = System.nanoTime();
+    getRankTimeNs += stopGetRank - startGetRank;
+
+    final long startGetQuantile = System.nanoTime();
+    for (final double value: quantileQueryValues) {
+      sketch.quantile(value);
+    }
+    final long stopGetQuantile = System.nanoTime();
+    getQuantileTimeNs += stopGetQuantile - startGetQuantile;
+
+//    final long startSerialize = System.nanoTime();
+//    final byte[] bytes = sketch.asBytes(null);
+//    final long stopSerialize = System.nanoTime();
+//    serializeTimeNs += stopSerialize - startSerialize;
+
+//    final long startDeserialize = System.nanoTime();
+//    final deserializedSketch = MergingDigest.fromBytes(bytes);
+//    final long stopDeserialize = System.nanoTime();
+//    deserializeTimeNs += stopDeserialize - startDeserialize;
+
+    numCentroids += sketch.centroidCount();
+    fullSerializedSizeBytes += sketch.byteSize();
+    smallSerializedSizeBytes += sketch.smallByteSize();
+  }
+
+  @Override
+  public String getHeader() {
+    return 
"Stream\tTrials\tBuild\tUpdate\tRank\tQuant\tSer\tDeser\tCentroids\tFullSize\tSmallSize";
+  }
+
+  @Override
+  public String getStats(final int streamLength, final int numTrials, final 
int numQueryValues) {
+    return 
String.format("%d\t%d\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%.1f\t%d\t%d\t%d",
+      streamLength,
+      numTrials,
+      (double) buildTimeNs / numTrials,
+      (double) updateTimeNs / numTrials / streamLength,
+      (double) getRankTimeNs / numTrials / numQueryValues,
+      (double) getQuantileTimeNs / numTrials / numQueryValues,
+      (double) serializeTimeNs / numTrials,
+      (double) deserializeTimeNs / numTrials,
+      numCentroids / numTrials,
+      fullSerializedSizeBytes / numTrials,
+      smallSerializedSizeBytes / numTrials
+    );
+  }
+
+  private void resetStats() {
+    buildTimeNs = 0;
+    updateTimeNs = 0;
+    getRankTimeNs = 0;
+    getQuantileTimeNs = 0;
+    serializeTimeNs = 0;
+    deserializeTimeNs = 0;
+    fullSerializedSizeBytes = 0;
+    smallSerializedSizeBytes = 0;
+    numCentroids = 0;
+  }
+
+}
diff --git a/src/main/resources/tdigest/TDigestAccuracyJob.conf 
b/src/main/resources/tdigest/TDigestAccuracyJob.conf
new file mode 100644
index 0000000..7769a8f
--- /dev/null
+++ b/src/main/resources/tdigest/TDigestAccuracyJob.conf
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+JobProfile=org.apache.datasketches.characterization.tdigest.TDigestAccuracyProfile
+
+## Stream lengths
+LgMin=0     # The starting stream length
+LgMax=23    # How high the stream length goes
+LgDelta=0   # If > 0, this is the lg Increment
+PPO=16      # The horizontal x-resolution of trials points
+
+# Trials config (indep of sketch)
+LgTrials=10 # lgTrials at every stream length
+
+# Specific sketch config
+Compression=100 # sketch size and accuracy parameter
+
+ErrorPercentile=99 # output this percentile from rank error distribution
+
+# Date-Time Profile
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  
PDT (UTC-7) = -25_200_000
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
diff --git a/src/main/resources/tdigest/TDigestErrorVsRankJob.conf 
b/src/main/resources/tdigest/TDigestErrorVsRankJob.conf
new file mode 100644
index 0000000..1c4ea20
--- /dev/null
+++ b/src/main/resources/tdigest/TDigestErrorVsRankJob.conf
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+JobProfile=org.apache.datasketches.characterization.tdigest.TDigestErrorVsRankProfile
+
+# Stream Pattern
+Pattern=RANDOM # Sorted, Reversed, Zoomin, Zoomout, Random, Sqrt, FlipFlop, 
ZoominSqrt
+Offset=1 #0 for min value of 0; 1 for min value of 1
+
+## Stream lengths
+LgMin=24    # The starting stream length
+LgMax=24    # How high the stream length goes
+LgDelta=2   # If > 0, this is the lg Increment
+PPO=1       # The horizontal x-resolution of trials points
+
+# Trials config (indep of sketch)
+LgTrials=14 # lgTrials at every stream length
+ErrQSkLgK=12   # the rank error distribution sketch LgK
+ErrHllSkLgK=12 # the rank error HLL sketch Lgk
+Shuffle=false # If true, shuffle before each trial
+
+# Plotting
+NumPlotPoints=100 # number of plot points along the x-axis
+EvenlySpaced=false # if true the x-axis points will be evenly spaced ranks in 
[0,1], otherwise exponential in [0,1]
+Exponent=2.0 # the steepness of the exponential x-axis density gradient curve, 
must be >= 1.0
+StdDev=1 # std deviation used when plotting LB, UB
+RankRange=1.0 # range of rank to plot. E.g., given 0.3: if LRA => 0 to 0.3; if 
HRA => 0.7 to 1.0 
+
+# Specific sketch config
+K=100 # sketch size and accuracy parameter
+
+HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
+#Compatible=false
+# For LRA, LE,GT have the converged point at rank 1.0
+# For HRA, LT,GE have the converged point at rank 0.0
+# Criterion=LE # LT, LE, GT, GE. Must be all caps.
+LtEq=true
+
+# ReqDebugLevel=2 # or 0, 1, 2. disable by commenting it out. Use only when 
LgTrials=0
+# ReqDebugFmt=%5.0f
+
+# Date-Time Profile
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  
PDT (UTC-7) = -25_200_000
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
+
+# FOR SPECIAL METRICS CAPTURING ACCURACY PER BYTE
+MetricsRankRange = 0.3
+
diff --git a/src/main/resources/tdigest/TDigestMergeAccuracyJob.conf 
b/src/main/resources/tdigest/TDigestMergeAccuracyJob.conf
new file mode 100644
index 0000000..233d8f5
--- /dev/null
+++ b/src/main/resources/tdigest/TDigestMergeAccuracyJob.conf
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+JobProfile=org.apache.datasketches.characterization.tdigest.TDigestMergeAccuracyProfile
+
+## Stream lengths
+LgMin=0     # The starting stream length
+LgMax=23    # How high the stream length goes
+LgDelta=0   # If > 0, this is the lg Increment
+PPO=16      # The horizontal x-resolution of trials points
+
+# Trials config (indep of sketch)
+LgTrials=10 # lgTrials at every stream length
+
+# Specific sketch config
+Compression=100 # sketch size and accuracy parameter
+
+NumSketches=8 # stream is sprayed across so many sketches and sketches are 
merged
+
+ErrorPercentile=99 # output this percentile from rank error distribution
+
+# Date-Time Profile
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  
PDT (UTC-7) = -25_200_000
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
diff --git a/src/main/resources/tdigest/TDigestSpeedJob.conf 
b/src/main/resources/tdigest/TDigestSpeedJob.conf
new file mode 100644
index 0000000..4d0b5db
--- /dev/null
+++ b/src/main/resources/tdigest/TDigestSpeedJob.conf
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+JobProfile=org.apache.datasketches.characterization.tdigest.TDigestSpeedProfile
+K=100 # sketch size and accuracy parameter
+numQueryValues=20 # number of values for getQuantile() and getRank()
+
+lgMin=0  # The starting stream length
+lgMax=23 # How high the stream length goes
+PPO=16   # The horizontal x-resolution of trials points
+
+lgMaxTrials=16 # Max trials at start (low counts)
+lgMinTrials=6 # Min trials at tail (high counts)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datasketches-characterization) 01/01: tdigest characterization

Reply via email to