This is an automated email from the ASF dual-hosted git repository. hui pushed a commit to branch research/encoding-reorder in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit caacc929a5ad5623a8bd2826c0c18a4fd6248d91 Author: xjz17 <[email protected]> AuthorDate: Sun Nov 12 17:53:02 2023 +0800 update --- .../tsfile/encoding/KernelDensityEstimation.java | 153 +++++++++++++++++++++ .../iotdb/tsfile/encoding/REGERCompressTest.java | 89 +++++++----- 2 files changed, 207 insertions(+), 35 deletions(-) diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java new file mode 100644 index 00000000000..bdf3628baf9 --- /dev/null +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/KernelDensityEstimation.java @@ -0,0 +1,153 @@ +package org.apache.iotdb.tsfile.encoding; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +public class KernelDensityEstimation { + public static void main(String[] args) { + // 离散分布数据 + Map<Integer, Integer> discreteDistribution = new HashMap<>(); + discreteDistribution.put(2, 10); + discreteDistribution.put(3, 100); + discreteDistribution.put(1, 3); + discreteDistribution.put(4, 12); + + // 计算核密度估计 + double[] kernelDensity = calculateKernelDensity(discreteDistribution); + + // 打印核密度估计 + System.out.println("Kernel Density Estimation:"); + for (int i = 0; i < kernelDensity.length; i++) { + System.out.println("x=" + (i + 1) + ": " + kernelDensity[i]); + } + + // 寻找核密度估计的极小值点 + int[] minIndex = findMinIndex(kernelDensity); + System.out.println("Minimum point: x=" + (Arrays.toString(minIndex))); + } + + // 计算核密度估计 + static double[] calculateKernelDensity(Map<Integer, Integer> discreteDistribution) { + int maxKey = discreteDistribution.keySet().stream().max(Integer::compare).orElse(0); + double[] kernelDensity = new double[maxKey]; + + for (int x = 1; x <= maxKey; x++) { + for (Map.Entry<Integer, Integer> entry : discreteDistribution.entrySet()) { + int dataPoint = entry.getKey(); + int weight = entry.getValue(); + kernelDensity[x - 1] += gaussianKernel(x, dataPoint) * weight; + } + } + + return kernelDensity; + } + + // 高斯核函数 + private static double gaussianKernel(int x, int dataPoint) { + double bandwidth = 1.0; // 可调整的带宽参数 + return Math.exp(-0.5 * Math.pow((x - dataPoint) / bandwidth, 2)) / (Math.sqrt(2 * Math.PI) * bandwidth); + } + + // 寻找数组中的最小值索引 + static int[] findMinIndex(double[] array) { + int[] minIndex = new int[array.length]; + int final_min_count = 0; +// double preValue = array[0]; + + for (int i = 1; i < array.length-1; i++) { + if (array[i] < array[i-1] && array[i] < array[i+1]) { + minIndex[final_min_count] = i; + final_min_count ++; + } + } + int[] final_minIndex = new int[final_min_count]; + if(final_min_count>0){ + final_minIndex[0] = minIndex[0]; + System.arraycopy(minIndex, 0, final_minIndex, 0, final_min_count); + } + return final_minIndex; + } +// public static void main(String[] args) { +// // 数据分布 +// Map<Integer, Integer> data = new HashMap<>(); +// data.put(1, 3); +// data.put(2, 10); +// data.put(3, 100); +// data.put(4, 12); +// if( data.containsKey(10)){ +// System.out.println("contain"); +// } +// if( data.containsKey(1)){ +// System.out.println("contain 1"); +// } +// // 选择带宽 +// double bandwidth = 1.0; +// +// // 计算核密度曲线的极小值点 +// findMinima(data, bandwidth); +// } +// +// static void findMinima(Map<Integer, Integer> data, double bandwidth) { +// // 计算核密度估计 +// Map<Integer, Double> kernelDensityEstimate = calculateKernelDensity(data, bandwidth); +// +// // 计算导数 +// Map<Integer, Double> derivative = calculateDerivative(kernelDensityEstimate); +// +// System.out.println(derivative); +// +// // 打印导数为零的点 +// System.out.println("Minima Points:"); +// for (Map.Entry<Integer, Double> entry : derivative.entrySet()) { +// if (entry.getValue() == 0.0) { +// System.out.println("Point " + entry.getKey()); +// } +// } +// } +// +// private static Map<Integer, Double> calculateKernelDensity(Map<Integer, Integer> data, double bandwidth) { +// // 计算核密度估计 +// Map<Integer, Double> kernelDensityEstimate = new HashMap<>(); +// +// for (Map.Entry<Integer, Integer> entry : data.entrySet()) { +// int point = entry.getKey(); +// double sum = 0.0; +// +// for (Map.Entry<Integer, Integer> dataEntry : data.entrySet()) { +// double x = dataEntry.getKey(); +// double kernel = gaussianKernel(x, point, bandwidth); +// sum += kernel; +// } +// +// kernelDensityEstimate.put(point, sum / (data.size() * bandwidth)); +// } +// +// return kernelDensityEstimate; +// } +// +// private static Map<Integer, Double> calculateDerivative(Map<Integer, Double> function) { +// // 计算导数 +// Map<Integer, Double> derivative = new HashMap<>(); +// +// for (Map.Entry<Integer, Double> entry : function.entrySet()) { +// int point = entry.getKey(); +// +// if (point > 1 && point < 4) { +// double derivativeValue = (function.get(point + 1) - function.get(point - 1)) / 2.0; +// derivative.put(point, derivativeValue); +// } else { +// // 边缘点处理 +// derivative.put(point, 0.0); +// } +// } +// +// return derivative; +// } +// +// private static double gaussianKernel(double x, double xi, double bandwidth) { +// // 高斯核函数 +// return Math.exp(-0.5 * Math.pow((x - xi) / bandwidth, 2)) / Math.sqrt(2 * Math.PI); +// } +} + diff --git a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java index 7963f26648c..0c3f2ebe6f5 100644 --- a/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java +++ b/iotdb-core/tsfile/src/test/java/org/apache/iotdb/tsfile/encoding/REGERCompressTest.java @@ -9,11 +9,11 @@ import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Objects; +import java.util.*; import static java.lang.Math.abs; +import static org.apache.iotdb.tsfile.encoding.KernelDensityEstimation.calculateKernelDensity; +import static org.apache.iotdb.tsfile.encoding.KernelDensityEstimation.findMinIndex; public class REGERCompressTest { public static int getBitWith(int num) { @@ -2431,16 +2431,45 @@ public class REGERCompressTest { int[][] ts_block; int[][] ts_block_value; int[][] ts_block_partition; + if (supply_length == 0) { + int max_value = Integer.MIN_VALUE; + int min_value = Integer.MAX_VALUE; + +// int[] unique = new int[block_size]; ts_block = new int[block_size][2]; ts_block_value = new int[block_size][2]; ts_block_partition = new int[block_size][2]; for (int j = 0; j < block_size; j++) { ts_block[j][0] = (data[j + i * block_size][0] - min_time); ts_block[j][1] = data[j + i * block_size][1]; + if(ts_block[j][1]>max_value){ + max_value = ts_block[j][1]; + } + if(ts_block[j][1]<min_value){ + min_value = ts_block[j][1]; + } ts_block_value[j][0] =ts_block[j][0]; ts_block_value[j][1] =ts_block[j][1]; } + Map<Integer, Integer> data_map = new HashMap<>(); +// for(int mv=min_value;mv<=max_value;mv++){ +// data_map.put(mv,0); +// } + for (int j = 0; j < block_size; j++) { + if(data_map.containsKey(ts_block[j][1])){ + int tmp = data_map.get(ts_block[j][1]); + tmp++; + data_map.put(ts_block[j][1],tmp); + }else{ + data_map.put(ts_block[j][1],1); + } + } +// System.out.println(data_map); + double[] kernelDensity = calculateKernelDensity(data_map); + + third_value= findMinIndex(kernelDensity); + System.out.println("Minimum point: x=" + (Arrays.toString(third_value))); } else { ts_block = new int[supply_length][2]; ts_block_value = new int[supply_length][2]; @@ -2478,30 +2507,33 @@ public class REGERCompressTest { ts_block_delta_time = ReorderingTimeSeries(ts_block, time_length, theta_time, segment_size, k); int pos_ts_block_partition = 0; - for (int[] datum : ts_block) { - if (datum[1] > third_value[third_value.length - 1]) { - ts_block_partition[pos_ts_block_partition][0] = datum[0]; - ts_block_partition[pos_ts_block_partition][1] = datum[1]; - pos_ts_block_partition++; - } - } - for (int third_i = third_value.length - 1; third_i > 0; third_i--) { + if(third_value.length>0){ for (int[] datum : ts_block) { - if (datum[1] <= third_value[third_i] && datum[1] > third_value[third_i - 1]) { + if (datum[1] > third_value[third_value.length - 1]) { ts_block_partition[pos_ts_block_partition][0] = datum[0]; ts_block_partition[pos_ts_block_partition][1] = datum[1]; pos_ts_block_partition++; } } - } - for (int[] datum : ts_block) { - if (datum[1] <= third_value[0]) { - ts_block_partition[pos_ts_block_partition][0] = datum[0]; - ts_block_partition[pos_ts_block_partition][1] = datum[1]; - pos_ts_block_partition++; + for (int third_i = third_value.length - 1; third_i > 0; third_i--) { + for (int[] datum : ts_block) { + if (datum[1] <= third_value[third_i] && datum[1] > third_value[third_i - 1]) { + ts_block_partition[pos_ts_block_partition][0] = datum[0]; + ts_block_partition[pos_ts_block_partition][1] = datum[1]; + pos_ts_block_partition++; + } + } + } + for (int[] datum : ts_block) { + if (datum[1] <= third_value[0]) { + ts_block_partition[pos_ts_block_partition][0] = datum[0]; + ts_block_partition[pos_ts_block_partition][1] = datum[1]; + pos_ts_block_partition++; + } } } + ts_block_delta_partition = ReorderingTimeSeries(ts_block_partition, raw_length, theta, segment_size, k); Arrays.sort(ts_block_value, (a, b) -> { @@ -2514,21 +2546,6 @@ public class REGERCompressTest { int choose = min3(time_length[0], raw_length[0], reorder_length[0]); -// ArrayList<Integer> min_index = new ArrayList<>(); - -// if (choose == 0) { -// raw_length = time_length; -// -// theta = theta_time; -// } else if (choose == 1) { -// ts_block = ts_block_partition; -// } else { -// raw_length = reorder_length; -// theta = theta_reorder; -// -// } - - int segment_n = (block_size - 1) / segment_size; int[][] bit_width_segments = new int[segment_n][2]; @@ -2671,6 +2688,7 @@ public class REGERCompressTest { // } // } else { // System.out.println("Time"); +// for (int i = 0; i < 1; i++) { for (int i = 0; i < block_num; i++) { encode_pos = REGERBlockEncoder(data, i, block_size, 0, third_value, segment_size, k, encode_pos, encoded_result,best_order); } @@ -2917,8 +2935,8 @@ public class REGERCompressTest { output_path_list.add(output_parent_dir + "/EPM-Education_ratio.csv");//11 // dataset_block_size.add(256); - for (int file_i = 0; file_i < input_path_list.size(); file_i++) { -// for (int file_i = 0; file_i < 1; file_i++) { +// for (int file_i = 0; file_i < input_path_list.size(); file_i++) { + for (int file_i = 0; file_i < 1; file_i++) { String inputPath = input_path_list.get(file_i); String Output = output_path_list.get(file_i); @@ -3005,6 +3023,7 @@ public class REGERCompressTest { String.valueOf(best_order[2]) }; writer.writeRecord(record); + System.out.println(Arrays.toString(best_order)); System.out.println(ratio); // break;
