This is an automated email from the ASF dual-hosted git repository. leirui pushed a commit to branch research/area-visualization in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 2a22359d4bccd402a123e0bc254c0da0e9a1eafb Author: Lei Rui <[email protected]> AuthorDate: Tue Jan 28 00:38:18 2025 +0800 add --- .../java/org/apache/iotdb/db/query/eBUG/DNSL1.java | 97 ++++++++++++++++++++++ .../java/org/apache/iotdb/db/query/eBUG/DP.java | 57 +++++++------ .../org/apache/iotdb/db/query/eBUG/Test10.java | 62 ++++++++++++++ 3 files changed, 191 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/DNSL1.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DNSL1.java new file mode 100644 index 00000000000..df2c09d35ab --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DNSL1.java @@ -0,0 +1,97 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import static org.apache.iotdb.db.query.eBUG.DP.dynamic_programming; + +public class DNSL1 { + // 默认使用linear interpolation连接分段首尾点来近似 + // 默认使用L1 error + // 使用divide and conquer算法 + public static List<Point> reducePoints(List<Point> points, int m, boolean debug) { + int n = points.size(); + int k = m - 1; // 分段数 + int intervalPts = (int) Math.pow((double) n / k, (double) 2 / 3); // divide batch点数 + + if (debug) { + System.out.println("interval point length=" + intervalPts); + } + + // divide into intervalPts equal-length intervals + List<Point> allSampledPoints = new ArrayList<>(); + for (int start = 0; start < n; start += intervalPts) { + int end = Math.min(start + intervalPts, n); + List<Point> batch = points.subList(start, end); // 左闭右开 + if (debug) { + System.out.println("Processing batch: [" + start + "," + end + ")"); + } + List<Point> sampledForBatch = dynamic_programming(batch, k, DP.ERROR.L1, false); + allSampledPoints.addAll(sampledForBatch); // 把上一步的结果加到大的容器里 + } + + // 在大的容器上最后执行DP.dynamic_programming得到最后的m个采样点 + if (debug) { + System.out.println("number of points from batch sampling: " + allSampledPoints.size()); + } + List<Point> finalSampledPoints = dynamic_programming(allSampledPoints, k, DP.ERROR.L1, false); + if (debug) { + System.out.println("result point number: " + finalSampledPoints.size()); + } + return finalSampledPoints; + } + + public static void main(String[] args) { + Random rand = new Random(10); + String input = "raw.csv"; + boolean hasHeader = true; + int timeIdx = 0; + int valueIdx = 1; + int N = 1000; +// List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + List<Point> points = polyline.getVertices(); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (Point point : points) { + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(points.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + +// int m = (int) Math.pow(points.size(), 0.5); + int m = 10; + +// long startTime = System.currentTimeMillis(); +// List<Point> sampled = dynamic_programming(points, m - 1, DP.ERROR.area, false); +// long endTime = System.currentTimeMillis(); +// System.out.println("Time taken: " + (endTime - startTime) + "ms"); +// for (Point p : sampled) { +// System.out.println(p); +// } +// System.out.println(sampled.size()); + + long startTime2 = System.currentTimeMillis(); +// System.out.println(points.size() * m / (Math.pow(points.size() * 1.0 / (m - 1), 2 / 3.0))); +// System.out.println(10000000 * (2 / (Math.pow(10000000 * 1.0 / (2 - 1), 2 / 3.0)))); + List<Point> sampled2 = reducePoints(points, m, true); + long endTime2 = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime2 - startTime2) + "ms"); +// for (Point p : sampled2) { +// System.out.println(p); +// } +// System.out.println(sampled2.size()); + } +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java index a6a9647fb4b..62c81cd10ea 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/DP.java @@ -19,6 +19,7 @@ public class DP { // Dynamic-Programming public static double[][] prepareCostTable(List<Point> points, ERROR errorType, boolean debug) { int N = points.size(); double[][] dists = new double[N][N]; // 默认0,对角线元素已经都是0了 + // 一个 double[47700][47700] 矩阵大约需要 16.94 GB 的内存。 // 外层循环i:range长度=right boundary-left boundary // 内层循环j: 矩阵逐行 @@ -89,12 +90,18 @@ public class DP { // Dynamic-Programming System.out.println(">>>分段数i+1" + (i + 1) + ",end pos j=" + j); } double[] choices = new double[j + 1]; // java从0开始 + int bestIndex = -1; + double bestVal = Double.MAX_VALUE; // 越小越好 for (int xtmp = 0; xtmp < j + 1; xtmp++) { if (errorType == ERROR.L_infy) { choices[xtmp] = Math.max(kSegDist[i - 1][xtmp], dists[xtmp][j]); } else { choices[xtmp] = kSegDist[i - 1][xtmp] + dists[xtmp][j]; } + if (choices[xtmp] < bestVal) { + bestVal = choices[xtmp]; + bestIndex = xtmp; + } } if (debug) { for (int xtmp = 0; xtmp < j + 1; xtmp++) { // 遍历从 0 到 j 的每个元素 @@ -112,12 +119,8 @@ public class DP { // Dynamic-Programming } } - int bestIndex = getIndexOfMin(choices); - double bestVal = choices[bestIndex]; - // Store the sub-problem solution kSegPath[i][j] = bestIndex; - kSegDist[i][j] = bestVal; if (debug) { @@ -162,23 +165,27 @@ public class DP { // Dynamic-Programming return sDs; } - // Helper method to get index of minimum value - private static int getIndexOfMin(double[] array) { - double minVal = array[0]; - int minIndex = 0; - for (int i = 1; i < array.length; i++) { - if (array[i] < minVal) { - minVal = array[i]; - minIndex = i; - } - } - return minIndex; - } +// // Helper method to get index of minimum value +// private static int getIndexOfMin(double[] array) { +// double minVal = array[0]; +// int minIndex = 0; +// for (int i = 1; i < array.length; i++) { +// if (array[i] < minVal) { +// minVal = array[i]; +// minIndex = i; +// } +// } +// return minIndex; +// } // Method to calculate joint segment error based on error type private static double joint_segment_error(List<Point> points, int lx, int rx, ERROR errorType) { // 默认joint=true, residual=true,即使用linear interpolation近似分段 // lx~rx 左闭右闭 + if (lx == rx) { + return 0; + } + if (errorType != ERROR.area) { double x1 = points.get(lx).x; double y1 = points.get(lx).y; @@ -240,14 +247,14 @@ public class DP { // Dynamic-Programming boolean hasHeader = true; int timeIdx = 0; int valueIdx = 1; - int N = -1; - List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); -// Polyline polyline = new Polyline(); -// for (int i = 0; i < N; i += 1) { -// double v = rand.nextInt(1000); -// polyline.addVertex(new Point(i, v)); -// } -// List<Point> points = polyline.getVertices(); + int N = 100; +// List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + List<Point> points = polyline.getVertices(); try (FileWriter writer = new FileWriter("raw.csv")) { // 写入CSV头部 writer.append("x,y,z\n"); @@ -263,7 +270,7 @@ public class DP { // Dynamic-Programming long startTime = System.currentTimeMillis(); // double[][] ad2 = prepareKSegments(points, ERROR.L1, false); - int m = 4; + int m = 10; List<Point> sampled = dynamic_programming(points, m - 1, ERROR.area, false); long endTime = System.currentTimeMillis(); System.out.println("Time taken: " + (endTime - startTime) + "ms"); diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test10.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test10.java new file mode 100644 index 00000000000..4dba96d0e3e --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Test10.java @@ -0,0 +1,62 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; +import java.util.Random; + +import static org.apache.iotdb.db.query.eBUG.DP.dynamic_programming; + +public class Test10 { + // 用于验证java DP正确性 + public static void main(String[] args) { + Random rand = new Random(10); + String input = "raw.csv"; + boolean hasHeader = true; + int timeIdx = 0; + int valueIdx = 1; + int N = 100; +// List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + Polyline polyline = new Polyline(); + for (int i = 0; i < N; i += 1) { + double v = rand.nextInt(1000); + polyline.addVertex(new Point(i, v)); + } + List<Point> points = polyline.getVertices(); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (Point point : points) { + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(points.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + + + long startTime = System.currentTimeMillis(); +// double[][] ad2 = prepareKSegments(points, ERROR.L1, false); + int m = 10; + List<Point> sampled = dynamic_programming(points, m - 1, DP.ERROR.area, false); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime - startTime) + "ms"); + + for (Point p : sampled) { + System.out.println(p); + } + System.out.println(sampled.size()); + +// try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { +// // 写入字符串 +// for (int i = 0; i < sampled.size(); i++) { +// writer.println(sampled.get(i).x + "," + sampled.get(i).y); +// } +// +// } catch (FileNotFoundException e) { +// e.printStackTrace(); +// } + } +}
