This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tdigest-cpp
in repository 
https://gitbox.apache.org/repos/asf/datasketches-characterization.git


The following commit(s) were added to refs/heads/tdigest-cpp by this push:
     new 11285b7  REQ error vs rank
11285b7 is described below

commit 11285b76fff9577b2b770d9eeb2500d381704020
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Tue Oct 22 18:56:28 2024 -0700

    REQ error vs rank
---
 cpp/src/req_error_vs_rank_profile.hpp      | 44 ++++++++++++++
 cpp/src/req_error_vs_rank_profile_impl.hpp | 97 ++++++++++++++++++++++++++++++
 2 files changed, 141 insertions(+)

diff --git a/cpp/src/req_error_vs_rank_profile.hpp 
b/cpp/src/req_error_vs_rank_profile.hpp
new file mode 100644
index 0000000..f0dcbe8
--- /dev/null
+++ b/cpp/src/req_error_vs_rank_profile.hpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_ERROR_VS_RANK_PROFILE_HPP_
+#define REQ_ERROR_VS_RANK_PROFILE_HPP_
+
+#include <random>
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+template<typename T>
+class req_error_vs_rank_profile: public job_profile {
+public:
+  req_error_vs_rank_profile();
+  void run();
+  T sample();
+private:
+  std::default_random_engine generator;
+  std::uniform_real_distribution<double> distribution;
+};
+
+}
+
+#include "req_error_vs_rank_profile_impl.hpp"
+
+#endif
diff --git a/cpp/src/req_error_vs_rank_profile_impl.hpp 
b/cpp/src/req_error_vs_rank_profile_impl.hpp
new file mode 100644
index 0000000..0380a5c
--- /dev/null
+++ b/cpp/src/req_error_vs_rank_profile_impl.hpp
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_ERROR_VS_RANK_PROFILE_IMPL_HPP_
+#define REQ_ERROR_VS_RANK_PROFILE_IMPL_HPP_
+
+#include <iostream>
+#include <algorithm>
+
+#include <req_sketch.hpp>
+#include <kll_sketch.hpp>
+
+#include "true_rank.hpp"
+#include "stddev.hpp"
+
+namespace datasketches {
+
+template<typename T>
+req_error_vs_rank_profile<T>::req_error_vs_rank_profile():
+generator(std::chrono::system_clock::now().time_since_epoch().count()),
+distribution(0.0, 1.0)
+{}
+
+template<typename T>
+void req_error_vs_rank_profile<T>::run() {
+  const size_t lg_stream_len = 25;
+  const size_t plot_points = 100;
+
+  const size_t num_trials = 10000;
+
+  const bool hra = true;
+  const uint16_t k = 12;
+
+  const uint16_t error_sketch_k = 1000;
+
+  size_t stream_len = 1 << lg_stream_len;
+  std::vector<T> values(stream_len);
+
+  std::vector<double> plot_ranks(plot_points);
+  for (size_t i = 0; i < plot_points; ++i) plot_ranks[i] = 
static_cast<double>(i) / (plot_points - 1);
+
+  std::vector<kll_sketch<double>> error_distributions(plot_points);
+  std::generate(error_distributions.begin(), error_distributions.end(), 
[]{return kll_sketch<double>(error_sketch_k);});
+
+  std::cout << "Trials: " << num_trials << "\n";
+  for (size_t t = 1; t <= num_trials; ++t) {
+    if (t % 10 == 0) std::cout << "trial " << t << "\n";
+    std::generate(values.begin(), values.end(), [this]{return sample();});
+//    req_sketch<T> sketch(k, hra);
+    tdigest<T> sketch(100);
+    for (auto value: values) sketch.update(value);
+    std::sort(values.begin(), values.end());
+    for (size_t i = 0; i < plot_points; ++i) {
+      const T quantile = get_quantile(values, values.size(), plot_ranks[i]);
+//      const double true_rank = get_rank(values, values.size(), quantile, 
INCLUSIVE);
+      const double true_rank = get_rank(values, values.size(), quantile, 
MIDPOINT);
+      error_distributions[i].update(sketch.get_rank(quantile) - true_rank);
+    }
+  }
+
+  std::cout << "Rank\t-3SD\t-2SD\t-1SD\tMed\t+1SD\t+2SD\t+3SD\n";
+  for (size_t i = 0; i < plot_points; ++i) {
+    std::cout << plot_ranks[i] << "\t";
+    std::cout << error_distributions[i].get_quantile(M3SD) << "\t";
+    std::cout << error_distributions[i].get_quantile(M2SD) << "\t";
+    std::cout << error_distributions[i].get_quantile(M1SD) << "\t";
+    std::cout << error_distributions[i].get_quantile(0.5) << "\t";
+    std::cout << error_distributions[i].get_quantile(P1SD) << "\t";
+    std::cout << error_distributions[i].get_quantile(P2SD) << "\t";
+    std::cout << error_distributions[i].get_quantile(P3SD) << "\n";
+  }
+}
+
+template<typename T>
+T req_error_vs_rank_profile<T>::sample() {
+  return distribution(generator);
+}
+
+}
+
+#endif


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to