This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tdigest-cpp
in repository
https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/tdigest-cpp by this push:
new 11285b7 REQ error vs rank
11285b7 is described below
commit 11285b76fff9577b2b770d9eeb2500d381704020
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Oct 22 18:56:28 2024 -0700
REQ error vs rank
---
cpp/src/req_error_vs_rank_profile.hpp | 44 ++++++++++++++
cpp/src/req_error_vs_rank_profile_impl.hpp | 97 ++++++++++++++++++++++++++++++
2 files changed, 141 insertions(+)
diff --git a/cpp/src/req_error_vs_rank_profile.hpp
b/cpp/src/req_error_vs_rank_profile.hpp
new file mode 100644
index 0000000..f0dcbe8
--- /dev/null
+++ b/cpp/src/req_error_vs_rank_profile.hpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_ERROR_VS_RANK_PROFILE_HPP_
+#define REQ_ERROR_VS_RANK_PROFILE_HPP_
+
+#include <random>
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+template<typename T>
+class req_error_vs_rank_profile: public job_profile {
+public:
+ req_error_vs_rank_profile();
+ void run();
+ T sample();
+private:
+ std::default_random_engine generator;
+ std::uniform_real_distribution<double> distribution;
+};
+
+}
+
+#include "req_error_vs_rank_profile_impl.hpp"
+
+#endif
diff --git a/cpp/src/req_error_vs_rank_profile_impl.hpp
b/cpp/src/req_error_vs_rank_profile_impl.hpp
new file mode 100644
index 0000000..0380a5c
--- /dev/null
+++ b/cpp/src/req_error_vs_rank_profile_impl.hpp
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_ERROR_VS_RANK_PROFILE_IMPL_HPP_
+#define REQ_ERROR_VS_RANK_PROFILE_IMPL_HPP_
+
+#include <iostream>
+#include <algorithm>
+
+#include <req_sketch.hpp>
+#include <kll_sketch.hpp>
+
+#include "true_rank.hpp"
+#include "stddev.hpp"
+
+namespace datasketches {
+
+template<typename T>
+req_error_vs_rank_profile<T>::req_error_vs_rank_profile():
+generator(std::chrono::system_clock::now().time_since_epoch().count()),
+distribution(0.0, 1.0)
+{}
+
+template<typename T>
+void req_error_vs_rank_profile<T>::run() {
+ const size_t lg_stream_len = 25;
+ const size_t plot_points = 100;
+
+ const size_t num_trials = 10000;
+
+ const bool hra = true;
+ const uint16_t k = 12;
+
+ const uint16_t error_sketch_k = 1000;
+
+ size_t stream_len = 1 << lg_stream_len;
+ std::vector<T> values(stream_len);
+
+ std::vector<double> plot_ranks(plot_points);
+ for (size_t i = 0; i < plot_points; ++i) plot_ranks[i] =
static_cast<double>(i) / (plot_points - 1);
+
+ std::vector<kll_sketch<double>> error_distributions(plot_points);
+ std::generate(error_distributions.begin(), error_distributions.end(),
[]{return kll_sketch<double>(error_sketch_k);});
+
+ std::cout << "Trials: " << num_trials << "\n";
+ for (size_t t = 1; t <= num_trials; ++t) {
+ if (t % 10 == 0) std::cout << "trial " << t << "\n";
+ std::generate(values.begin(), values.end(), [this]{return sample();});
+// req_sketch<T> sketch(k, hra);
+ tdigest<T> sketch(100);
+ for (auto value: values) sketch.update(value);
+ std::sort(values.begin(), values.end());
+ for (size_t i = 0; i < plot_points; ++i) {
+ const T quantile = get_quantile(values, values.size(), plot_ranks[i]);
+// const double true_rank = get_rank(values, values.size(), quantile,
INCLUSIVE);
+ const double true_rank = get_rank(values, values.size(), quantile,
MIDPOINT);
+ error_distributions[i].update(sketch.get_rank(quantile) - true_rank);
+ }
+ }
+
+ std::cout << "Rank\t-3SD\t-2SD\t-1SD\tMed\t+1SD\t+2SD\t+3SD\n";
+ for (size_t i = 0; i < plot_points; ++i) {
+ std::cout << plot_ranks[i] << "\t";
+ std::cout << error_distributions[i].get_quantile(M3SD) << "\t";
+ std::cout << error_distributions[i].get_quantile(M2SD) << "\t";
+ std::cout << error_distributions[i].get_quantile(M1SD) << "\t";
+ std::cout << error_distributions[i].get_quantile(0.5) << "\t";
+ std::cout << error_distributions[i].get_quantile(P1SD) << "\t";
+ std::cout << error_distributions[i].get_quantile(P2SD) << "\t";
+ std::cout << error_distributions[i].get_quantile(P3SD) << "\n";
+ }
+}
+
+template<typename T>
+T req_error_vs_rank_profile<T>::sample() {
+ return distribution(generator);
+}
+
+}
+
+#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]