This is an automated email from the ASF dual-hosted git repository.

leirui pushed a commit to branch research/area-visualization
in repository https://gitbox.apache.org/repos/asf/iotdb.git

commit 33a6441223684353d90fae91c22c9e80c5d8b267
Author: Lei Rui <[email protected]>
AuthorDate: Thu Jan 23 16:31:50 2025 +0800

    ok
---
 .../java/org/apache/iotdb/db/query/eBUG/eBUG.java  |   9 +-
 .../eBUG/eBUG_circulateArrayForEliminated.java     | 325 ---------------------
 2 files changed, 5 insertions(+), 329 deletions(-)

diff --git a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java 
b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java
index 906bc4e2366..408d440a46f 100644
--- a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java
+++ b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG.java
@@ -256,10 +256,11 @@ public class eBUG {
     public static void main(String[] args) {
 
 //        int eParam = 0;
-        int[] eParamList = {0, 1, 2, 3, 4, 5, 10, 14, 15, 16, 20, 30, 40, 50, 
100, 200, 500, 1000, 2000, 5000,
-                10000, 50000, 10_0000, 50_0000, 100_0000, 200_0000, 300_0000,
-                500_0000, 800_0000, 1000_0000,
-                1500_0000, 2000_0000, 2500_0000, 3000_0000};
+//        int[] eParamList = {0, 0, 1, 2, 3, 4, 5, 10, 14, 15, 16, 20, 30, 40, 
50, 100, 200, 500, 1000, 2000, 5000,
+//                10000, 50000, 10_0000, 50_0000, 100_0000, 200_0000, 300_0000,
+//                500_0000, 800_0000, 1000_0000,
+//                1500_0000, 2000_0000, 2500_0000, 3000_0000};
+        int[] eParamList = {100_0000};
         for (int eParam : eParamList) {
             try (PrintWriter writer = new PrintWriter(new File("exp_varyN_e" + 
eParam + ".csv"))) {
                 for (int n = 100_0000; n < 2000_0000; n += 200_0000) { // 
超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度
diff --git 
a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_circulateArrayForEliminated.java
 
b/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_circulateArrayForEliminated.java
deleted file mode 100644
index 38e9c96ad4e..00000000000
--- 
a/server/src/main/java/org/apache/iotdb/db/query/eBUG/eBUG_circulateArrayForEliminated.java
+++ /dev/null
@@ -1,325 +0,0 @@
-///*
-// * Licensed to the Apache Software Foundation (ASF) under one
-// * or more contributor license agreements.  See the NOTICE file
-// * distributed with this work for additional information
-// * regarding copyright ownership.  The ASF licenses this file
-// * to you under the Apache License, Version 2.0 (the
-// * "License"); you may not use this file except in compliance
-// * with the License.  You may obtain a copy of the License at
-// *
-// *     http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing,
-// * software distributed under the License is distributed on an
-// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// * KIND, either express or implied.  See the License for the
-// * specific language governing permissions and limitations
-// * under the License.
-// */
-//
-//package org.apache.iotdb.db.query.eBUG;
-//
-//import java.io.File;
-//import java.io.FileNotFoundException;
-//import java.io.PrintWriter;
-//import java.util.*;
-//
-//import static org.apache.iotdb.db.query.eBUG.Tool.total_areal_displacement;
-//import static org.apache.iotdb.db.query.eBUG.Tool.triArea;
-//
-//
-//public class eBUG_circulateArrayForEliminated {
-//    public static List<Point> findEliminated(Polyline lineToSimplify, int[] 
recentEliminated,
-//                                             int pa_idx, int pi_idx, int 
pb_idx) {
-//        //  复杂度分析 记c=b-a。
-//        //  如果e<c,那么从最近淘汰的e个点里寻找位于pa~pb之间的,找到的点和pa pi 
pb一起按照时间戳递增排序,后面鞋带公式计算面积:O(e+eloge)
-//        //  否则e>=c,直接取T[a:b],已经排序号了,后面鞋带公式计算面积:O(c)
-//
-//        // 性质:最近淘汰的那一个点一定是位于pa~pb之间,因为正是这个点的淘汰才使得pi的significance要更新!
-//
-//        // pi: the point whose significance needs updating
-//        // pa: left adjacent non-eliminated point of pi
-//        // pb: right adjacent non-eliminated point of pi
-//        // return a list of points, containing pa,p,pb and points between 
pa&pa from the e most recently eliminated points,
-//        // order by time in ascending order
-//
-//        Point pa = lineToSimplify.get(pa_idx);
-//        Point pi = lineToSimplify.get(pi_idx);
-//        Point pb = lineToSimplify.get(pb_idx);
-//
-//        List<Point> res = new ArrayList<>();
-//
-//        // TODO note: here may happen because e=c=0 or just 0<c<=e
-//        if (recentEliminated.length >= (pb_idx - pa_idx - 2)) { // e >= the 
total number of eliminated points in this region
-////            System.out.println("[c=(b-a-2)]<=e, use T directly"); // worst 
case下, k=c
-//            res = lineToSimplify.getVertices().subList(pa_idx, pb_idx + 1); 
// 左闭右开
-//            return res; // 已经按照时间戳递增
-//        }
-//
-//        // TODO note: here may happen because c>e=0 or c>e>0. the latter 
needs search among recentEliminated and sort by timestamp
-//        // 从最近淘汰的e个点里寻找位于pa~pb之间的点,找到的有可能小于e个点
-////        System.out.println("k>=[c=(b-a-2)]>e, search among e");
-//        res.add(pa); // 注意pa pi pb已经按照时间戳递增
-//        res.add(pi);
-//        res.add(pb);
-//
-//        // 包括了c>e=0的情况
-//        for (int idx : recentEliminated) { // e points
-//            if (idx == 0) { // init, not filled by real eliminated points yet
-//                break;
-//            }
-//            Point p = lineToSimplify.get(idx);
-//            if (p.x > pa.x && p.x < pb.x) {
-//                res.add(p);
-//            }
-//        }
-//
-//        // 按照时间戳递增排序,这是为了后面计算total areal displacement需要
-//        if (recentEliminated.length > 0) { // 否则e=0,不需要排序pa pi pb三个点
-//            res.sort(Comparator.comparingDouble(p -> p.x)); // 按时间戳排序 
这样的话时间复杂度变成e*log(e)
-//        }
-//
-//        return res;
-//    }
-//
-//    public static List<Point> buildEffectiveArea(Polyline lineToSimplify, 
int e, boolean debug) {
-//        if (e < 0) {
-//            throw new IllegalArgumentException("Parameter e must be 
non-negative.");
-//        }
-//
-//        List<Point> results = lineToSimplify.getVertices(); // 浅复制
-//
-//        // 用循环数组来记录最近淘汰的点
-//        int[] recentEliminated = new int[e]; // init 0 points to the first 
point, skipped in findEliminated
-//        int recentEliminatedIdx = 0;  // index in the array, circulate
-//
-//        int total = lineToSimplify.size();
-//        if (total < 3) {
-//            return results; // 不足 3 个点无法形成三角形
-//        }
-//
-//        int nTriangles = total - 2;
-//        Triangle[] triangles = new Triangle[nTriangles];
-//
-//        // 创建所有三角形并计算初始面积
-//        for (int i = 1; i < total - 1; i++) {
-//            int index1 = i - 1, index2 = i, index3 = i + 1;
-//            double area = triArea(lineToSimplify.get(index1), 
lineToSimplify.get(index2), lineToSimplify.get(index3));
-//            triangles[i - 1] = new Triangle(index1, index2, index3, area);
-//        }
-//
-//        // 设置三角形的前后关系
-//        for (int i = 0; i < nTriangles; i++) { // TODO 这个可以和上一个循环合并吗??好像不可以
-//            triangles[i].prev = (i == 0 ? null : triangles[i - 1]);
-//            triangles[i].next = (i == nTriangles - 1 ? null : triangles[i + 
1]);
-//        }
-//
-//        // 使用优先队列构建 minHeap
-//        PriorityQueue<Triangle> triangleHeap = new 
PriorityQueue<>(Comparator.comparingDouble(t -> t.area));
-//        Collections.addAll(triangleHeap, triangles); // complexity TODO O(n) 
or O(nlogn)?
-//
-//        double previousEA = -1;
-//        while (!triangleHeap.isEmpty()) {
-//            // 注意peek只需要直接访问该位置的元素,不涉及任何重排或堆化操作
-//            // 而poll是删除堆顶元素,需要重新堆化以维护堆的性质,复杂度是O(logk),k是当前堆的大小
-//            Triangle tri = triangleHeap.poll(); // O(logn)
-//
-//            // 如果该三角形已经被删除,跳过. Avoid using heap.remove(x) as it is O(n) 
complexity
-//            // 而且除了heap里,已经没有其他节点会和它关联了,所有的connections关系已经迁移到新的角色替代节点上
-//            if (tri.isDeleted) {
-//                if (debug) {
-//                    System.out.println(">>>bottom-up, remaining " + 
triangleHeap.size() + " triangles (including those marked for removal)");
-//                }
-//                continue;
-//            }
-//
-//            // 真正的淘汰点
-//            // 记录最近淘汰的点,注意不要重复记录也就是在上面执行之后再确认淘汰
-//            if (debug) {
-//                System.out.println("(1) eliminate " + 
lineToSimplify.get(tri.indices[1]));
-//            }
-//            if (e > 0) { // otherwise e=0
-//                recentEliminated[recentEliminatedIdx] = tri.indices[1];
-//                recentEliminatedIdx = (recentEliminatedIdx + 1) % e; // 
0~e-1 circulate
-//            }
-//            if (debug) {
-//                System.out.println("the e most recently eliminated points:" 
+ Arrays.toString(recentEliminated));
-//            }
-//
-//            // 更新当前点的重要性(z 轴存储effective area,这是一个单调增的指标)
-//            if (tri.area > previousEA) {
-//                previousEA = tri.area;
-//            }
-//            results.get(tri.indices[1]).z = previousEA; // dominated 
significance
-//            if (debug) {
-//                System.out.println(Arrays.toString(tri.indices) + ", 
Dominated Sig=" + previousEA);
-//            }
-//
-//            // 更新相邻三角形
-//            if (tri.prev != null) {
-//                // 
标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序
-//
-//                // 1. 处理旧的tri.prev被标记删除的事情(角色替代)
-//                // triangleHeap.remove(tri.prev); // Avoid using 
heap.remove(x) as it is O(n) complexity!
-//                tri.prev.markDeleted(); // O(1) 
这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只要heap poll到它就跳过就可以
-//
-//                Triangle newPre = new Triangle(tri.prev); // deep copy and 
inherit connection
-//                // tri.prev = newPre; // can omit, because already done by 
new Triangle(tri.prev)
-//
-//                // 2. 处理pi被淘汰引起tri.prev被更新的事情
-//                // 前一个三角形连到后一个三角形
-//                tri.prev.next = tri.next; // ATTENTION!!!: 
这里的tri.next后面可能会因为处理旧的tri.next被标记删除的事情被换掉!到时候要重新赋值!
-//                tri.prev.indices[2] = tri.indices[2];
-//
-//                // e parameter
-//                List<Point> pointsForSig = findEliminated(lineToSimplify, 
recentEliminated,
-//                        tri.prev.indices[0],
-//                        tri.prev.indices[1],
-//                        tri.prev.indices[2]); // TODO complexity ?
-//                if (debug) {
-//                    System.out.println("(2) update point on the left " + 
lineToSimplify.get(tri.prev.indices[1]));
-//                    for (Point point : pointsForSig) {
-//                        System.out.println("\t" + point);
-//                    }
-//                }
-//                List<Point> baseLine = new ArrayList<>();
-//                baseLine.add(pointsForSig.get(0));
-//                baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 
直接连接左右两边最近的未被淘汰的点
-//                double sig = total_areal_displacement(pointsForSig, 
baseLine, false);
-//                tri.prev.area = sig;
-//
-//                if (debug) {
-//                    System.out.println("sig=" + sig);
-//                    double tmpTri = 
triArea(lineToSimplify.get(tri.prev.indices[0]),
-//                            lineToSimplify.get(tri.prev.indices[1]),
-//                            lineToSimplify.get(tri.prev.indices[2]));
-//                    System.out.println("\t" + "tri=" + tmpTri + ", " + 
((tmpTri > sig) ? "over-estimated" : "equal/less-estimated"));
-//                }
-//
-//                // 重新加入堆
-//                // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序
-//                // 所以必须通过add来显式重新插入修改后的元素
-//                triangleHeap.add(tri.prev); // O(logn) 
注意加入的是一个新的对象isDeleted=false
-//            }
-//
-//            if (tri.next != null) {
-//                // 
标记为失效点,同时new一个新的对象接管它的一切数据和前后连接关系,然后更新前后连接关系、更新significance、加入heap使其排好序
-//
-//                // 1. 处理旧的tri.next被标记删除的事情(角色替代)
-//                // triangleHeap.remove(tri.next); // Avoid using 
heap.remove(x) as it is O(n) complexity
-//                tri.next.markDeleted(); // O(1) 
这个点标记为废掉,前后关联都砍断,但是不remove因为那太耗时,只有poll到它就跳过就可以
-//
-//                Triangle newNext = new Triangle(tri.next); // deep copy and 
inherit connection
-//                // tri.next = newNext; // omit, because already done by new 
Triangle(tri.prev)
-//
-//                if (tri.prev != null) {
-//                    tri.prev.next = tri.next; // ATTENTION!!!: 
这里的tri.next已经被换掉!所以之前的要重新赋值!
-//                }
-//
-//                // 2. 处理pi被淘汰引起tri.next被更新的事情
-//                tri.next.prev = tri.prev; // 
注意此时tri.prev已经是替代后的节点,tri.next也是,从而被标记为废点的前后关联真正砍断
-//                tri.next.indices[0] = tri.indices[0];
-//
-//                // e parameter
-//                List<Point> pointsForSig = findEliminated(lineToSimplify, 
recentEliminated,
-//                        tri.next.indices[0],
-//                        tri.next.indices[1],
-//                        tri.next.indices[2]); // TODO complexity
-//                if (debug) {
-//                    System.out.println("(2) updating point on the right " + 
lineToSimplify.get(tri.next.indices[1]));
-//                    for (Point point : pointsForSig) {
-//                        System.out.println("\t" + point);
-//                    }
-//                }
-//                List<Point> baseLine = new ArrayList<>();
-//                baseLine.add(pointsForSig.get(0));
-//                baseLine.add(pointsForSig.get(pointsForSig.size() - 1)); // 
直接连接左右两边最近的未被淘汰的点
-//                double sig = total_areal_displacement(pointsForSig, 
baseLine, false);
-//                tri.next.area = sig;
-//
-//                if (debug) {
-//                    System.out.println("sig=" + sig);
-//                    double tmpTri = 
triArea(lineToSimplify.get(tri.next.indices[0]),
-//                            lineToSimplify.get(tri.next.indices[1]),
-//                            lineToSimplify.get(tri.next.indices[2]));
-//                    System.out.println("\t" + "tri=" + tmpTri + ", " + 
((tmpTri > sig) ? "over-estimated" : "equal/less-estimated"));
-//                }
-//
-//                // 重新加入堆
-//                // 在 Java 的 PriorityQueue 中,修改元素的属性不会自动更新堆的顺序
-//                // 所以必须通过add 来显式重新插入修改后的元素
-//                triangleHeap.add(tri.next); // 注意加入的是一个新的对象isDeleted=false
-//            }
-//            if (debug) {
-//                System.out.println(">>>bottom-up, remaining " + 
triangleHeap.size() + " triangles (including those marked for removal)");
-//            }
-//        }
-//        return results; // 注意这就是lineToSimplify.getVertices()
-//    }
-//
-//    public static void main(String[] args) {
-//
-//        int eParam = 2000_0000;
-//        try (PrintWriter writer = new PrintWriter(new File("exp_varyN_e" + 
eParam + ".csv"))) {
-//            for (int n = 100_0000; n < 2000_0000; n += 200_0000) { // 
超过两千万就都变得慢多了,感觉可能是内存有限的原因,而不是算法复杂度
-//                // TODO 注意 要有一个点是n=300w
-//
-//                Polyline polyline = new Polyline();
-//                int seed = 1;
-//                Random rand = new Random(seed); // TODO 注意seed在每轮里重设
-//                for (int i = 0; i < n; i++) {
-//                    double v = rand.nextInt(1000000);
-//                    polyline.addVertex(new Point(i, v));
-//                }
-//
-//                long startTime = System.currentTimeMillis();
-//                List<Point> results = buildEffectiveArea(polyline, eParam, 
false);
-//                long endTime = System.currentTimeMillis();
-//
-//                System.out.println("n=" + n + ", e=" + eParam + ", Time 
taken to reduce points: " + (endTime - startTime) + "ms");
-//                writer.println(n + "," + eParam + "," + (endTime - 
startTime));
-//                System.out.println(results.size());
-//
-//                // clear
-//                polyline.clear();
-//                results.clear();
-//                polyline = null;
-//                results = null;
-//                System.gc();
-//            }
-//        } catch (FileNotFoundException e) {
-//            throw new RuntimeException(e);
-//        }
-//
-////        int n = 300_0000;
-////        int seed = 1;
-////        Random rand = new Random(seed); // TODO 注意seed在每轮里重设
-////        Polyline polyline = new Polyline(); // 
注意:如果是固定n变化e实验,这个数据要一次性生成,不然每次随机不一样
-////        for (int i = 0; i < n; i++) {
-////            double v = rand.nextInt(1000000);
-////            polyline.addVertex(new Point(i, v));
-////        }
-////
-////        try (PrintWriter writer = new PrintWriter(new File("exp_varyE_n" + 
n + ".csv"))) {
-////            int[] eParamList = {0, 1, 2, 10, 1000, 10000, 10_0000, 
30_0000, 50_0000, 60_0000, 80_0000, 100_0000, 150_0000, 200_0000};
-////            for (int eParam : eParamList) {
-////                eParam *= 3; // TODO 注意 因为n=300w而不是100w
-////
-////                long startTime = System.currentTimeMillis();
-////                List<Point> results = buildEffectiveArea(polyline, eParam, 
false);
-////                long endTime = System.currentTimeMillis();
-////
-////                System.out.println("n=" + n + ", e=" + eParam + ", Time 
taken to reduce points: " + (endTime - startTime) + "ms");
-////                writer.println(n + "," + eParam + "," + (endTime - 
startTime));
-////                System.out.println(results.size());
-////
-////                // clear 注意不能把原始数据清除,还要接着用
-////                System.gc();
-////            }
-////        } catch (FileNotFoundException e) {
-////            throw new RuntimeException(e);
-////        }
-//
-//        System.out.println("finish");
-//    }
-//}

Reply via email to