This is an automated email from the ASF dual-hosted git repository. leirui pushed a commit to branch research/area-visualization in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit e6bd68ed6a2419bf2b0059d38d31343ae55082cd Author: Lei Rui <[email protected]> AuthorDate: Tue Jan 28 07:28:38 2025 +0800 rdp --- server/pom.xml | 6 +- server/sample_minmax-jar-with-dependencies.jar | Bin 40761381 -> 40768753 bytes ...es.jar => sample_rdp-jar-with-dependencies.jar} | Bin 40761381 -> 40768959 bytes .../java/org/apache/iotdb/db/query/eBUG/Rdp.java | 286 +++++++++++++++++++++ .../apache/iotdb/db/query/eBUG/sample_MinMax.java | 3 +- .../eBUG/{sample_MinMax.java => sampled_Rdp.java} | 48 ++-- 6 files changed, 318 insertions(+), 25 deletions(-) diff --git a/server/pom.xml b/server/pom.xml index 1808c5f6791..998fcc8f41f 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -287,13 +287,15 @@ <plugin> <artifactId>maven-assembly-plugin</artifactId> <configuration> - <finalName>sample_minmax</finalName> + <finalName>sample_rdp</finalName> + <!-- <finalName>sample_minmax</finalName>--> <!-- <finalName>sample_fsw</finalName>--> <!-- <finalName>sample_eBUG</finalName>--> <!-- <finalName>sample_BUYdiff</finalName>--> <archive> <manifest> - <mainClass>org.apache.iotdb.db.query.eBUG.sample_MinMax</mainClass> + <mainClass>org.apache.iotdb.db.query.eBUG.sampled_Rdp</mainClass> + <!-- <mainClass>org.apache.iotdb.db.query.eBUG.sample_MinMax</mainClass>--> <!-- <mainClass>org.apache.iotdb.db.query.eBUG.sample_FSW</mainClass>--> <!-- <mainClass>org.apache.iotdb.db.query.eBUG.sample_eBUG</mainClass>--> <!-- <mainClass>org.apache.iotdb.db.query.eBUG.sample_bottomUpYdiff</mainClass>--> diff --git a/server/sample_minmax-jar-with-dependencies.jar b/server/sample_minmax-jar-with-dependencies.jar index cd09c6c3bbf..1cf9f21e3f1 100644 Binary files a/server/sample_minmax-jar-with-dependencies.jar and b/server/sample_minmax-jar-with-dependencies.jar differ diff --git a/server/sample_minmax-jar-with-dependencies.jar b/server/sample_rdp-jar-with-dependencies.jar similarity index 99% copy from server/sample_minmax-jar-with-dependencies.jar copy to server/sample_rdp-jar-with-dependencies.jar index cd09c6c3bbf..4f69e1781d5 100644 Binary files a/server/sample_minmax-jar-with-dependencies.jar and b/server/sample_rdp-jar-with-dependencies.jar differ diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/Rdp.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Rdp.java new file mode 100644 index 00000000000..64cfc12e889 --- /dev/null +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/Rdp.java @@ -0,0 +1,286 @@ +package org.apache.iotdb.db.query.eBUG; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.Stack; + +public class Rdp { + // Ramer–Douglas–Peucker algorithm (RDP) + // top-down, for min perpendicular distance + // worst case complexity n2 + // best case complexity nlogn + + public static double point_line_distance(Point point, Point lineStart, Point lineEnd) { + // 计算点到直线的垂直正交距离 + // (x0,y0)到直线(经过(x1,y1)、(x2,y2)点的直线)的正交距离 + double x0 = point.x; + double y0 = point.y; + double x1 = lineStart.x; + double y1 = lineStart.y; + double x2 = lineEnd.x; + double y2 = lineEnd.y; + + double numerator = Math.abs((x2 - x1) * (y1 - y0) - (x1 - x0) * (y2 - y1)); + double denominator = Math.sqrt(Math.pow(x2 - x1, 2) + Math.pow(y2 - y1, 2)); + return numerator / denominator; + } + + // public static List<Point> reducePoints(List<Point> points, double epsilon, Object... kwargs) { + // if (points == null || points.size() < 3) { + // return points; + // } + // + // // 找到距离最远的点 + // double maxDistance = 0; + // int index = 0; + // Point start = points.get(0); + // Point end = points.get(points.size() - 1); + // + // for (int i = 1; i < points.size() - 1; i++) { + // double distance = point_line_distance(points.get(i), start, end); + // if (distance > maxDistance) { + // maxDistance = distance; + // index = i; + // } + // } + // + // List<Point> result = new ArrayList<>(); + // if (maxDistance > epsilon) { + // List<Point> firstPart = reducePoints(points.subList(0, index + 1), epsilon); + // List<Point> secondPart = reducePoints(points.subList(index, points.size()), epsilon); + // + // // 合并结果 + // result.addAll(firstPart.subList(0, firstPart.size() - 1)); // 避免重复添加中间点 + // result.addAll(secondPart); + // } else { + // // 如果最远距离小于阈值,则只保留起点和终点 + // result.add(start); + // result.add(end); + // } + // + // return result; + // } + + /** + * 使用迭代实现 RDP 算法。避免递归深度过大。 + * + * @param points 点集 + * @param epsilon 简化阈值 + * @return 简化后的点集 + */ + public static List<Point> reducePoints(List<Point> points, double epsilon, Object... kwargs) { + if (points.size() <= 2) { + return points; + } + + // 使用栈模拟递归 + Stack<int[]> stack = new Stack<>(); + stack.push(new int[] {0, points.size() - 1}); + + // 用于标记需要保留的点 + boolean[] keep = new boolean[points.size()]; + keep[0] = true; + keep[points.size() - 1] = true; + + while (!stack.isEmpty()) { + int[] range = stack.pop(); + int start = range[0]; + int end = range[1]; + + // 找到距离最远的点 + double maxDistance = 0; + int index = start; + Point startPoint = points.get(start); + Point endPoint = points.get(end); + + for (int i = start + 1; i < end; i++) { + double distance = point_line_distance(points.get(i), startPoint, endPoint); + if (distance > maxDistance) { + maxDistance = distance; + index = i; + } + } + + // 如果最远距离大于阈值,则继续分割 + if (maxDistance > epsilon) { + keep[index] = true; // 保留当前点 + stack.push(new int[] {start, index}); + stack.push(new int[] {index, end}); + } + } + + // 构建简化后的点集 + List<Point> result = new ArrayList<>(); + for (int i = 0; i < points.size(); i++) { + if (keep[i]) { + result.add(points.get(i)); + } + } + + return result; + } + + public static Object[] normalizePoints(List<Point> points) { + // 提取 x 和 y 值 + double[] xValues = points.stream().mapToDouble(p -> p.x).toArray(); + double[] yValues = points.stream().mapToDouble(p -> p.y).toArray(); + + // 计算 x 和 y 的最小值和最大值 + double xMin = Double.MAX_VALUE, xMax = Double.MIN_VALUE; + double yMin = Double.MAX_VALUE, yMax = Double.MIN_VALUE; + + for (double x : xValues) { + if (x < xMin) xMin = x; + if (x > xMax) xMax = x; + } + for (double y : yValues) { + if (y < yMin) yMin = y; + if (y > yMax) yMax = y; + } + + // 标准化 + List<Point> normalizedPoints = new ArrayList<>(); + for (Point p : points) { + double xNormalized = (xMax != xMin) ? (p.x - xMin) / (xMax - xMin) : 0; + double yNormalized = (yMax != yMin) ? (p.y - yMin) / (yMax - yMin) : 0; + normalizedPoints.add(new Point(xNormalized, yNormalized)); + } + + return new Object[] {normalizedPoints, xMin, xMax, yMin, yMax}; + } + + public static List<Point> denormalizePoints(List<Point> points, double[] originalScale) { + double xMin = originalScale[0]; + double xMax = originalScale[1]; + double yMin = originalScale[2]; + double yMax = originalScale[3]; + + List<Point> denormalizedPoints = new ArrayList<>(); + for (Point p : points) { + double xOriginal = p.x * (xMax - xMin) + xMin; + double yOriginal = p.y * (yMax - yMin) + yMin; + denormalizedPoints.add(new Point(xOriginal, yOriginal)); + } + + return denormalizedPoints; + } + + public static void main(String[] args) throws IOException { + Random rand = new Random(10); + String input = "D:\\datasets\\regular\\tmp2.csv"; + boolean hasHeader = true; + int timeIdx = 0; + int valueIdx = 1; + int N = 1000_0000; + List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); + // Polyline polyline = new Polyline(); + // for (int i = 0; i < N; i += 1) { + // double v = rand.nextInt(1000); + // polyline.addVertex(new Point(i, v)); + // } + // List<Point> points = polyline.getVertices(); + try (FileWriter writer = new FileWriter("raw.csv")) { + // 写入CSV头部 + writer.append("x,y,z\n"); + + // 写入每个点的数据 + for (Point point : points) { + writer.append(point.x + "," + point.y + "," + point.z + "\n"); + } + System.out.println(points.size() + " Data has been written"); + } catch (IOException e) { + System.out.println("Error writing to CSV file: " + e.getMessage()); + } + + // 标准化 + Object[] normalizedData = normalizePoints(points); + List<Point> normalizedPoints = (List<Point>) normalizedData[0]; + double xMin = (double) normalizedData[1]; + double xMax = (double) normalizedData[2]; + double yMin = (double) normalizedData[3]; + double yMax = (double) normalizedData[4]; + + int m = 10; + double tolerantPts = 0; + double epsilon = Tool.getParam(normalizedPoints, m, Rdp::reducePoints, tolerantPts); + // double epsilon = 0.001; + + long startTime = System.currentTimeMillis(); + List<Point> sampled_tmp = reducePoints(normalizedPoints, epsilon); + long endTime = System.currentTimeMillis(); + System.out.println("Time taken: " + (endTime - startTime) + "ms"); + + // 还原到原始尺度 + double[] originalScale = {xMin, xMax, yMin, yMax}; + List<Point> sampled = denormalizePoints(sampled_tmp, originalScale); + + // for (Point p : sampled) { + // System.out.println(p); + // } + System.out.println(sampled.size()); + + try (PrintWriter writer = new PrintWriter(new File("output.csv"))) { + // 写入字符串 + for (int i = 0; i < sampled.size(); i++) { + writer.println(sampled.get(i).x + "," + sampled.get(i).y); + } + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + } + + // public static void reducePoints(List<Point> points, double epsilon, int s, int e, + // List<Point> resultList) { + // double dmax = 0; + // int index = 0; + // + // final int start = s; + // final int end = e - 1; + // for (int i = start + 1; i < end; i++) { + // // Point + // final double px = list.get(i)[0]; + // final double py = list.get(i)[1]; + // // Start + // final double vx = list.get(start)[0]; + // final double vy = list.get(start)[1]; + // // End + // final double wx = list.get(end)[0]; + // final double wy = list.get(end)[1]; + // final double d = perpendicularDistance(px, py, vx, vy, wx, wy); + // if (d > dmax) { + // index = i; + // dmax = d; + // } + // } + // // If max distance is greater than epsilon, recursively simplify + // if (dmax > epsilon) { + // // Recursive call + // douglasPeucker(list, s, index, epsilon, resultList); + // douglasPeucker(list, index, e, epsilon, resultList); + // } else { + // if ((end - start) > 0) { + // resultList.add(list.get(start)); + // resultList.add(list.get(end)); + // } else { + // resultList.add(list.get(start)); + // } + // } + // } + + // /** + // * Given a curve composed of line segments find a similar curve with fewer points. + // * + // * @param list List of Double[] points (x,y) + // * @param epsilon Distance dimension + // * @return Similar curve with fewer points + // */ + // public static final List<Double[]> douglasPeucker(List<Double[]> list, double epsilon) { + // final List<Double[]> resultList = new ArrayList<Double[]>(); + // douglasPeucker(list, 0, list.size(), epsilon, resultList); + // return resultList; + // } + +} diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java index 767b7f7151e..5d841a62e87 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java @@ -49,7 +49,8 @@ public class sample_MinMax { List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); String outputFile = - generateOutputFileName(input, outDir, "-" + bucketType + "-n" + points.size() + "-m" + m); + generateOutputFileName( + input, outDir, "-MinMax-" + bucketType + "-n" + points.size() + "-m" + m); System.out.println("Output file: " + outputFile); // do not modify this hint string log // 采样 diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sampled_Rdp.java similarity index 60% copy from server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java copy to server/src/main/java/org/apache/iotdb/db/query/eBUG/sampled_Rdp.java index 767b7f7151e..834f84d09b7 100644 --- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/sample_MinMax.java +++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/sampled_Rdp.java @@ -5,13 +5,16 @@ import java.io.FileWriter; import java.io.IOException; import java.util.List; +import static org.apache.iotdb.db.query.eBUG.Rdp.denormalizePoints; +import static org.apache.iotdb.db.query.eBUG.Rdp.normalizePoints; import static org.apache.iotdb.db.query.eBUG.Tool.generateOutputFileName; +import static org.apache.iotdb.db.query.eBUG.Tool.getParam; -public class sample_MinMax { +public class sampled_Rdp { public static void main(String[] args) throws IOException { if (args.length < 7) { System.out.println( - "Usage: Please provide arguments: inputFilePath,hasHeader,timeIdx,valueIdx,N,m,bucketType,(outDir)"); + "Usage: Please provide arguments: inputFilePath,hasHeader,timeIdx,valueIdx,N,m,tolerantRatio,(outDir)"); } String input = args[0]; boolean hasHeader = Boolean.parseBoolean(args[1]); @@ -19,15 +22,7 @@ public class sample_MinMax { int valueIdx = Integer.parseInt(args[3]); int N = Integer.parseInt(args[4]); // N<=0表示读全部行,N>0表示读最多N行 int m = Integer.parseInt(args[5]); - String bucketTypeStr = args[6]; - MinMax.fixedBUCKETtype bucketType; - if (bucketTypeStr.equals("width")) { - bucketType = MinMax.fixedBUCKETtype.width; - } else if (bucketTypeStr.equals("frequency")) { - bucketType = MinMax.fixedBUCKETtype.frequency; - } else { - throw new IOException("please input fixed bucket type as width/frequency"); - } + double tolerantRatio = Double.parseDouble(args[6]); // 查找参数epsilon以使得输出采样点数接近m的程度,比如0.01 String outDir; if (args.length > 7) { outDir = args[7]; // 表示输出文件保存到指定的文件夹 @@ -42,33 +37,42 @@ public class sample_MinMax { System.out.println("Value index: " + valueIdx); System.out.println("N: " + N); System.out.println("m: " + m); - System.out.println("bucketType: " + bucketType); + System.out.println("tolerantRatio: " + tolerantRatio); System.out.println("outDir: " + outDir); // 读取原始序列 List<Point> points = Tool.readFromFile(input, hasHeader, timeIdx, valueIdx, N); - String outputFile = - generateOutputFileName(input, outDir, "-" + bucketType + "-n" + points.size() + "-m" + m); - System.out.println("Output file: " + outputFile); // do not modify this hint string log + generateOutputFileName(input, outDir, "-Rdp" + "-n" + points.size() + "-m" + m); + System.out.println("Output file: " + outputFile); + + // 标准化,避免非常大的时间戳主导perpendicular distance + Object[] normalizedData = normalizePoints(points); + List<Point> normalizedPoints = (List<Point>) normalizedData[0]; + double xMin = (double) normalizedData[1]; + double xMax = (double) normalizedData[2]; + double yMin = (double) normalizedData[3]; + double yMax = (double) normalizedData[4]; + + // 查找epsilon参数使得采样点数接近m + double epsilon = getParam(normalizedPoints, m, Rdp::reducePoints, m * tolerantRatio); // 采样 - List<Point> results; long startTime = System.currentTimeMillis(); - if (bucketType == MinMax.fixedBUCKETtype.width) { - results = MinMax.reducePoints_equalWidthBucket(points, m, false); - } else { - results = MinMax.reducePoints_equalFrequencyBucket(points, m, false); - } + List<Point> results_tmp = Rdp.reducePoints(normalizedPoints, epsilon); long endTime = System.currentTimeMillis(); + // 还原到原始尺度 + double[] originalScale = {xMin, xMax, yMin, yMax}; + List<Point> results = denormalizePoints(results_tmp, originalScale); + System.out.println("result point number: " + results.size()); System.out.println( "n=" + points.size() + ", m=" + m - + ", Time taken to reduce points: " // do not modify this hint string log + + ", Time taken to reduce points: " + (endTime - startTime) + "ms");
