This is an automated email from the ASF dual-hosted git repository.

hui pushed a commit to branch research/encoding-reorder
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit caacc929a5ad5623a8bd2826c0c18a4fd6248d91
Author: xjz17 <[email protected]>
AuthorDate: Sun Nov 12 17:53:02 2023 +0800

    update
---
 .../tsfile/encoding/KernelDensityEstimation.java   | 153 +++++++++++++++++++++
 .../iotdb/tsfile/encoding/REGERCompressTest.java   |  89 +++++++-----
 2 files changed, 207 insertions(+), 35 deletions(-)

diff --git 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java
 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java
new file mode 100644
index 00000000000..bdf3628baf9
--- /dev/null
+++ 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java
@@ -0,0 +1,153 @@
+package org.apache.iotdb.tsfile.encoding;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+
+public class KernelDensityEstimation {
+        public static void main(String[] args) {
+            // 离散分布数据
+            Map<Integer, Integer> discreteDistribution = new HashMap<>();
+            discreteDistribution.put(2, 10);
+            discreteDistribution.put(3, 100);
+            discreteDistribution.put(1, 3);
+            discreteDistribution.put(4, 12);
+
+            // 计算核密度估计
+            double[] kernelDensity = 
calculateKernelDensity(discreteDistribution);
+
+            // 打印核密度估计
+            System.out.println("Kernel Density Estimation:");
+            for (int i = 0; i < kernelDensity.length; i++) {
+                System.out.println("x=" + (i + 1) + ": " + kernelDensity[i]);
+            }
+
+            // 寻找核密度估计的极小值点
+            int[] minIndex = findMinIndex(kernelDensity);
+            System.out.println("Minimum point: x=" + 
(Arrays.toString(minIndex)));
+        }
+
+        // 计算核密度估计
+        static double[] calculateKernelDensity(Map<Integer, Integer> 
discreteDistribution) {
+            int maxKey = 
discreteDistribution.keySet().stream().max(Integer::compare).orElse(0);
+            double[] kernelDensity = new double[maxKey];
+
+            for (int x = 1; x <= maxKey; x++) {
+                for (Map.Entry<Integer, Integer> entry : 
discreteDistribution.entrySet()) {
+                    int dataPoint = entry.getKey();
+                    int weight = entry.getValue();
+                    kernelDensity[x - 1] += gaussianKernel(x, dataPoint) * 
weight;
+                }
+            }
+
+            return kernelDensity;
+        }
+
+        // 高斯核函数
+        private static double gaussianKernel(int x, int dataPoint) {
+            double bandwidth = 1.0; // 可调整的带宽参数
+            return Math.exp(-0.5 * Math.pow((x - dataPoint) / bandwidth, 2)) / 
(Math.sqrt(2 * Math.PI) * bandwidth);
+        }
+
+        // 寻找数组中的最小值索引
+        static int[] findMinIndex(double[] array) {
+            int[] minIndex = new int[array.length];
+            int final_min_count = 0;
+//            double preValue = array[0];
+
+            for (int i = 1; i < array.length-1; i++) {
+                if (array[i] < array[i-1] && array[i] < array[i+1]) {
+                    minIndex[final_min_count] = i;
+                    final_min_count ++;
+                }
+            }
+            int[] final_minIndex = new int[final_min_count];
+            if(final_min_count>0){
+                final_minIndex[0] = minIndex[0];
+                System.arraycopy(minIndex, 0, final_minIndex, 0, 
final_min_count);
+            }
+            return final_minIndex;
+        }
+//    public static void main(String[] args) {
+//        // 数据分布
+//        Map<Integer, Integer> data = new HashMap<>();
+//        data.put(1, 3);
+//        data.put(2, 10);
+//        data.put(3, 100);
+//        data.put(4, 12);
+//       if( data.containsKey(10)){
+//           System.out.println("contain");
+//       }
+//        if( data.containsKey(1)){
+//            System.out.println("contain 1");
+//        }
+//        // 选择带宽
+//        double bandwidth = 1.0;
+//
+//        // 计算核密度曲线的极小值点
+//        findMinima(data, bandwidth);
+//    }
+//
+//    static void findMinima(Map<Integer, Integer> data, double bandwidth) {
+//        // 计算核密度估计
+//        Map<Integer, Double> kernelDensityEstimate = 
calculateKernelDensity(data, bandwidth);
+//
+//        // 计算导数
+//        Map<Integer, Double> derivative = 
calculateDerivative(kernelDensityEstimate);
+//
+//        System.out.println(derivative);
+//
+//        // 打印导数为零的点
+//        System.out.println("Minima Points:");
+//        for (Map.Entry<Integer, Double> entry : derivative.entrySet()) {
+//            if (entry.getValue() == 0.0) {
+//                System.out.println("Point " + entry.getKey());
+//            }
+//        }
+//    }
+//
+//    private static Map<Integer, Double> calculateKernelDensity(Map<Integer, 
Integer> data, double bandwidth) {
+//        // 计算核密度估计
+//        Map<Integer, Double> kernelDensityEstimate = new HashMap<>();
+//
+//        for (Map.Entry<Integer, Integer> entry : data.entrySet()) {
+//            int point = entry.getKey();
+//            double sum = 0.0;
+//
+//            for (Map.Entry<Integer, Integer> dataEntry : data.entrySet()) {
+//                double x = dataEntry.getKey();
+//                double kernel = gaussianKernel(x, point, bandwidth);
+//                sum += kernel;
+//            }
+//
+//            kernelDensityEstimate.put(point, sum / (data.size() * 
bandwidth));
+//        }
+//
+//        return kernelDensityEstimate;
+//    }
+//
+//    private static Map<Integer, Double> calculateDerivative(Map<Integer, 
Double> function) {
+//        // 计算导数
+//        Map<Integer, Double> derivative = new HashMap<>();
+//
+//        for (Map.Entry<Integer, Double> entry : function.entrySet()) {
+//            int point = entry.getKey();
+//
+//            if (point > 1 && point < 4) {
+//                double derivativeValue = (function.get(point + 1) - 
function.get(point - 1)) / 2.0;
+//                derivative.put(point, derivativeValue);
+//            } else {
+//                // 边缘点处理
+//                derivative.put(point, 0.0);
+//            }
+//        }
+//
+//        return derivative;
+//    }
+//
+//    private static double gaussianKernel(double x, double xi, double 
bandwidth) {
+//        // 高斯核函数
+//        return Math.exp(-0.5 * Math.pow((x - xi) / bandwidth, 2)) / 
Math.sqrt(2 * Math.PI);
+//    }
+}
+
diff --git 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java
 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java
index 7963f26648c..0c3f2ebe6f5 100644
--- 
a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java
+++ 
b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java
@@ -9,11 +9,11 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Objects;
+import java.util.*;
 
 import static java.lang.Math.abs;
+import static 
org.apache.iotdb.tsfile.encoding.KernelDensityEstimation.calculateKernelDensity;
+import static 
org.apache.iotdb.tsfile.encoding.KernelDensityEstimation.findMinIndex;
 
 public class REGERCompressTest {
     public static int getBitWith(int num) {
@@ -2431,16 +2431,45 @@ public class REGERCompressTest {
         int[][] ts_block;
         int[][] ts_block_value;
         int[][] ts_block_partition;
+
         if (supply_length == 0) {
+            int max_value = Integer.MIN_VALUE;
+            int min_value = Integer.MAX_VALUE;
+
+//            int[] unique = new int[block_size];
             ts_block = new int[block_size][2];
             ts_block_value = new int[block_size][2];
             ts_block_partition = new int[block_size][2];
             for (int j = 0; j < block_size; j++) {
                 ts_block[j][0] = (data[j + i * block_size][0] - min_time);
                 ts_block[j][1] = data[j + i * block_size][1];
+                if(ts_block[j][1]>max_value){
+                    max_value = ts_block[j][1];
+                }
+                if(ts_block[j][1]<min_value){
+                    min_value = ts_block[j][1];
+                }
                 ts_block_value[j][0] =ts_block[j][0];
                 ts_block_value[j][1] =ts_block[j][1];
             }
+            Map<Integer, Integer> data_map = new HashMap<>();
+//            for(int mv=min_value;mv<=max_value;mv++){
+//                data_map.put(mv,0);
+//            }
+            for (int j = 0; j < block_size; j++) {
+                if(data_map.containsKey(ts_block[j][1])){
+                    int tmp = data_map.get(ts_block[j][1]);
+                    tmp++;
+                    data_map.put(ts_block[j][1],tmp);
+                }else{
+                    data_map.put(ts_block[j][1],1);
+                }
+            }
+//            System.out.println(data_map);
+            double[] kernelDensity = calculateKernelDensity(data_map);
+
+            third_value= findMinIndex(kernelDensity);
+            System.out.println("Minimum point: x=" + 
(Arrays.toString(third_value)));
         } else {
             ts_block = new int[supply_length][2];
             ts_block_value = new int[supply_length][2];
@@ -2478,30 +2507,33 @@ public class REGERCompressTest {
         ts_block_delta_time = ReorderingTimeSeries(ts_block, time_length,  
theta_time, segment_size, k);
 
         int pos_ts_block_partition = 0;
-        for (int[] datum : ts_block) {
-            if (datum[1] > third_value[third_value.length - 1]) {
-                ts_block_partition[pos_ts_block_partition][0] = datum[0];
-                ts_block_partition[pos_ts_block_partition][1] = datum[1];
-                pos_ts_block_partition++;
-            }
-        }
-        for (int third_i = third_value.length - 1; third_i > 0; third_i--) {
+        if(third_value.length>0){
             for (int[] datum : ts_block) {
-                if (datum[1] <= third_value[third_i] && datum[1] > 
third_value[third_i - 1]) {
+                if (datum[1] > third_value[third_value.length - 1]) {
                     ts_block_partition[pos_ts_block_partition][0] = datum[0];
                     ts_block_partition[pos_ts_block_partition][1] = datum[1];
                     pos_ts_block_partition++;
                 }
             }
-        }
-        for (int[] datum : ts_block) {
-            if (datum[1] <= third_value[0]) {
-                ts_block_partition[pos_ts_block_partition][0] = datum[0];
-                ts_block_partition[pos_ts_block_partition][1] = datum[1];
-                pos_ts_block_partition++;
+            for (int third_i = third_value.length - 1; third_i > 0; third_i--) 
{
+                for (int[] datum : ts_block) {
+                    if (datum[1] <= third_value[third_i] && datum[1] > 
third_value[third_i - 1]) {
+                        ts_block_partition[pos_ts_block_partition][0] = 
datum[0];
+                        ts_block_partition[pos_ts_block_partition][1] = 
datum[1];
+                        pos_ts_block_partition++;
+                    }
+                }
+            }
+            for (int[] datum : ts_block) {
+                if (datum[1] <= third_value[0]) {
+                    ts_block_partition[pos_ts_block_partition][0] = datum[0];
+                    ts_block_partition[pos_ts_block_partition][1] = datum[1];
+                    pos_ts_block_partition++;
+                }
             }
         }
 
+
         ts_block_delta_partition = ReorderingTimeSeries(ts_block_partition, 
raw_length,  theta, segment_size, k);
 
         Arrays.sort(ts_block_value, (a, b) -> {
@@ -2514,21 +2546,6 @@ public class REGERCompressTest {
 
 
         int choose = min3(time_length[0], raw_length[0], reorder_length[0]);
-//        ArrayList<Integer> min_index = new ArrayList<>();
-
-//        if (choose == 0) {
-//            raw_length = time_length;
-//
-//            theta = theta_time;
-//        } else if (choose == 1) {
-//            ts_block = ts_block_partition;
-//        } else {
-//            raw_length = reorder_length;
-//            theta = theta_reorder;
-//
-//        }
-
-
 
         int segment_n = (block_size - 1) / segment_size;
         int[][] bit_width_segments = new int[segment_n][2];
@@ -2671,6 +2688,7 @@ public class REGERCompressTest {
 //                }
 //            } else {
 //                System.out.println("Time");
+//        for (int i = 0; i < 1; i++) {
         for (int i = 0; i < block_num; i++) {
             encode_pos = REGERBlockEncoder(data, i, block_size, 0, 
third_value, segment_size, k, encode_pos, encoded_result,best_order);
         }
@@ -2917,8 +2935,8 @@ public class REGERCompressTest {
         output_path_list.add(output_parent_dir + 
"/EPM-Education_ratio.csv");//11
 //        dataset_block_size.add(256);
 
-    for (int file_i = 0; file_i < input_path_list.size(); file_i++) {
-//        for (int file_i = 0; file_i < 1; file_i++) {
+//    for (int file_i = 0; file_i < input_path_list.size(); file_i++) {
+        for (int file_i = 0; file_i < 1; file_i++) {
             String inputPath = input_path_list.get(file_i);
             String Output = output_path_list.get(file_i);
 
@@ -3005,6 +3023,7 @@ public class REGERCompressTest {
                         String.valueOf(best_order[2])
                 };
                 writer.writeRecord(record);
+                System.out.println(Arrays.toString(best_order));
                 System.out.println(ratio);
 
 //                break;

Reply via email to