This is an automated email from the ASF dual-hosted git repository. jackietien pushed a commit to branch mergemaster0808 in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 3251e82791c79b5a2c4022cdea5b7346793d50f4 Author: Haonan <[email protected]> AuthorDate: Thu Aug 8 01:20:58 2024 +0800 Remove the usage of lt_downsampling_java8 (#13108) (cherry picked from commit 0051687b13bc913f9762ece76f45988e4b40ad47) --- library-udf/pom.xml | 7 -- .../apache/iotdb/library/dprofile/UDTFSample.java | 24 ++--- .../apache/iotdb/library/dprofile/util/Area.java | 62 ++++++++++++ .../apache/iotdb/library/dprofile/util/Bucket.java | 85 ++++++++++++++++ .../library/dprofile/util/LTThreeBuckets.java | 57 +++++++++++ .../library/dprofile/util/OnePassBucketizer.java | 68 +++++++++++++ .../library/dprofile/util/SlidingCollector.java | 111 +++++++++++++++++++++ .../iotdb/library/dprofile/util/Triangle.java | 59 +++++++++++ pom.xml | 55 ++++------ 9 files changed, 472 insertions(+), 56 deletions(-) diff --git a/library-udf/pom.xml b/library-udf/pom.xml index 2aca5427ad7..f893e0f0dc4 100644 --- a/library-udf/pom.xml +++ b/library-udf/pom.xml @@ -68,13 +68,6 @@ <groupId>com.github.wendykierp</groupId> <artifactId>JTransforms</artifactId> </dependency> - <!-- Sampling --> - <dependency> - <groupId>com.github.ggalmazor</groupId> - <artifactId>lt_downsampling_java8</artifactId> - <!-- This version is only available in the jitpack.io repo --> - <version>0.0.6</version> - </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDTFSample.java b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDTFSample.java index 2d39eef3152..bf014a63de9 100644 --- a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDTFSample.java +++ b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/UDTFSample.java @@ -19,6 +19,7 @@ package org.apache.iotdb.library.dprofile; +import org.apache.iotdb.library.dprofile.util.LTThreeBuckets; import org.apache.iotdb.library.util.NoNumberException; import org.apache.iotdb.library.util.Util; import org.apache.iotdb.udf.api.UDTF; @@ -33,11 +34,8 @@ import org.apache.iotdb.udf.api.customizer.strategy.RowByRowAccessStrategy; import org.apache.iotdb.udf.api.customizer.strategy.SlidingSizeWindowAccessStrategy; import org.apache.iotdb.udf.api.type.Type; -import com.github.ggalmazor.ltdownsampling.LTThreeBuckets; -import com.github.ggalmazor.ltdownsampling.Point; import org.apache.commons.lang3.tuple.Pair; -import java.math.BigDecimal; import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -129,29 +127,29 @@ public class UDTFSample implements UDTF { if (this.k < n) { if (this.method == Method.TRIANGLE) { - List<Point> input = new LinkedList<>(); + List<Pair<Long, Double>> input = new LinkedList<>(); for (int i = 0; i < n; i++) { Row row = rowWindow.getRow(i); - BigDecimal time = BigDecimal.valueOf(row.getTime()); - BigDecimal data = BigDecimal.valueOf(Util.getValueAsDouble(row)); - input.add(new Point(time, data)); + long time = row.getTime(); + double data = Util.getValueAsDouble(row); + input.add(Pair.of(time, data)); } if (k > 2) { // The first and last element will always be sampled so the buckets is k - 2 - List<Point> output = LTThreeBuckets.sorted(input, k - 2); - for (Point p : output) { + List<Pair<Long, Double>> output = LTThreeBuckets.sorted(input, k - 2); + for (Pair<Long, Double> p : output) { switch (dataType) { case INT32: - collector.putInt(p.getX().longValue(), p.getY().intValue()); + collector.putInt(p.getLeft(), p.getRight().intValue()); break; case INT64: - collector.putLong(p.getX().longValue(), p.getY().longValue()); + collector.putLong(p.getLeft(), p.getRight().longValue()); break; case FLOAT: - collector.putFloat(p.getX().longValue(), p.getY().floatValue()); + collector.putFloat(p.getLeft(), p.getRight().floatValue()); break; case DOUBLE: - collector.putDouble(p.getX().longValue(), p.getY().doubleValue()); + collector.putDouble(p.getLeft(), p.getRight()); break; default: throw new NoNumberException(); diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Area.java b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Area.java new file mode 100644 index 00000000000..10023a6294d --- /dev/null +++ b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Area.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.library.dprofile.util; + +import org.apache.commons.lang3.tuple.Pair; + +import java.util.Arrays; +import java.util.List; + +import static java.lang.Math.abs; + +/** + * this class is copied and modified from <a + * href="https://github.com/ggalmazor/lt_downsampling_java8">...</a> project + */ +class Area<T extends Pair<Long, Double>> { + private final T generator; + private final double value; + + private Area(T generator, double value) { + this.generator = generator; + this.value = value; + } + + static <U extends Pair<Long, Double>> Area<U> ofTriangle( + Pair<Long, Double> a, U b, Pair<Long, Double> c) { + // area of a triangle = |[Ax(By - Cy) + Bx(Cy - Ay) + Cx(Ay - By)] / 2| + List<Double> addends = + Arrays.asList( + a.getLeft() * (b.getRight() - c.getRight()), + b.getLeft() * (c.getRight() - a.getRight()), + c.getLeft() * (a.getRight() - b.getRight())); + double sum = addends.stream().reduce(0d, Double::sum); + double value = abs(sum / 2); + return new Area<>(b, value); + } + + T getGenerator() { + return generator; + } + + public double getValue() { + return value; + } +} diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Bucket.java b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Bucket.java new file mode 100644 index 00000000000..caa640707cc --- /dev/null +++ b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Bucket.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.library.dprofile.util; + +import org.apache.commons.lang3.tuple.Pair; + +import java.util.Collections; +import java.util.List; +import java.util.function.Function; + +import static java.util.stream.Collectors.toList; + +/** + * this class is copied and modified from <a + * href="https://github.com/ggalmazor/lt_downsampling_java8">...</a> project + */ +class Bucket<T extends Pair<Long, Double>> { + private final List<T> data; + private final T first; + private final T last; + private final Pair<Long, Double> center; + private final T result; + + private Bucket(List<T> data, T first, T last, Pair<Long, Double> center, T result) { + this.data = data; + this.first = first; + this.last = last; + this.center = center; + this.result = result; + } + + static <U extends Pair<Long, Double>> Bucket<U> of(List<U> us) { + U first = us.get(0); + U last = us.get(us.size() - 1); + Pair<Long, Double> center = centerBetween(first, last); + return new Bucket<>(us, first, last, center, first); + } + + static <U extends Pair<Long, Double>> Bucket<U> of(U u) { + return new Bucket<>(Collections.singletonList(u), u, u, u, u); + } + + T getResult() { + return result; + } + + T getFirst() { + return first; + } + + T getLast() { + return last; + } + + Pair<Long, Double> getCenter() { + return center; + } + + <U> List<U> map(Function<T, U> mapper) { + return data.stream().map(mapper).collect(toList()); + } + + static Pair<Long, Double> centerBetween(Pair<Long, Double> a, Pair<Long, Double> b) { + Pair<Long, Double> vector = Pair.of(b.getLeft() - a.getLeft(), b.getRight() - a.getRight()); + Pair<Long, Double> halfVector = Pair.of(vector.getLeft() / 2, vector.getRight() / 2); + return Pair.of(a.getLeft() + halfVector.getLeft(), a.getRight() + halfVector.getRight()); + } +} diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/LTThreeBuckets.java b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/LTThreeBuckets.java new file mode 100644 index 00000000000..f63cc2b74c1 --- /dev/null +++ b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/LTThreeBuckets.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.library.dprofile.util; + +import org.apache.commons.lang3.tuple.Pair; + +import java.util.ArrayList; +import java.util.List; + +/** + * this class is copied and modified from <a + * href="https://github.com/ggalmazor/lt_downsampling_java8">...</a> project + */ +public final class LTThreeBuckets { + + public static List<Pair<Long, Double>> sorted( + List<Pair<Long, Double>> input, int desiredBuckets) { + return sorted(input, input.size(), desiredBuckets); + } + + public static List<Pair<Long, Double>> sorted( + List<Pair<Long, Double>> input, int inputSize, int desiredBuckets) { + List<Pair<Long, Double>> results = new ArrayList<>(); + + OnePassBucketizer.bucketize(input, inputSize, desiredBuckets).stream() + .collect(new SlidingCollector<>(3, 1)) + .stream() + .map(Triangle::of) + .forEach( + triangle -> { + if (results.isEmpty()) results.add(triangle.getFirst()); + + results.add(triangle.getResult()); + + if (results.size() == desiredBuckets + 1) results.add(triangle.getLast()); + }); + + return results; + } +} diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/OnePassBucketizer.java b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/OnePassBucketizer.java new file mode 100644 index 00000000000..1c5eddca379 --- /dev/null +++ b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/OnePassBucketizer.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.library.dprofile.util; + +import org.apache.commons.lang3.tuple.Pair; + +import java.util.ArrayList; +import java.util.List; + +/** + * this class is copied and modified from <a + * href="https://github.com/ggalmazor/lt_downsampling_java8">...</a> project + */ +class OnePassBucketizer { + + static List<Bucket<Pair<Long, Double>>> bucketize( + List<Pair<Long, Double>> input, int inputSize, int desiredBuckets) { + int middleSize = inputSize - 2; + int bucketSize = middleSize / desiredBuckets; + int remainingElements = middleSize % desiredBuckets; + + if (bucketSize == 0) + throw new IllegalArgumentException( + "Can't produce " + + desiredBuckets + + " buckets from an input series of " + + (middleSize + 2) + + " elements"); + + List<Bucket<Pair<Long, Double>>> buckets = new ArrayList<>(); + + // Add first point as the only point in the first bucket + buckets.add(Bucket.of(input.get(0))); + + List<Pair<Long, Double>> rest = input.subList(1, input.size() - 1); + + // Add middle buckets. + // When inputSize is not a multiple of desiredBuckets, remaining elements are equally + // distributed on the first buckets. + while (buckets.size() < desiredBuckets + 1) { + int size = buckets.size() <= remainingElements ? bucketSize + 1 : bucketSize; + buckets.add(Bucket.of(rest.subList(0, size))); + rest = rest.subList(size, rest.size()); + } + + // Add last point as the only point in the last bucket + buckets.add(Bucket.of(input.get(input.size() - 1))); + + return buckets; + } +} diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/SlidingCollector.java b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/SlidingCollector.java new file mode 100644 index 00000000000..c369fb1d81a --- /dev/null +++ b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/SlidingCollector.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.library.dprofile.util; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.function.BinaryOperator; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Collector; + +import static java.lang.Integer.max; +import static java.util.stream.Collectors.toList; + +/** + * this class is copied and modified from <a + * href="https://github.com/ggalmazor/lt_downsampling_java8">...</a> project + */ +public class SlidingCollector<T> implements Collector<T, List<List<T>>, List<List<T>>> { + + private final int size; + private final int step; + private final int window; + private final Queue<T> buffer = new ArrayDeque<>(); + private int totalIn = 0; + + public SlidingCollector(int size, int step) { + this.size = size; + this.step = step; + this.window = max(size, step); + } + + @Override + public Supplier<List<List<T>>> supplier() { + return ArrayList::new; + } + + @Override + public BiConsumer<List<List<T>>, T> accumulator() { + return (lists, t) -> { + buffer.offer(t); + ++totalIn; + if (buffer.size() == window) { + dumpCurrent(lists); + shiftBy(step); + } + }; + } + + @Override + public Function<List<List<T>>, List<List<T>>> finisher() { + return lists -> { + if (!buffer.isEmpty()) { + final int totalOut = estimateTotalOut(); + if (totalOut > lists.size()) { + dumpCurrent(lists); + } + } + return lists; + }; + } + + private int estimateTotalOut() { + return max(0, (totalIn + step - size - 1) / step) + 1; + } + + private void dumpCurrent(List<List<T>> lists) { + final List<T> batch = buffer.stream().limit(size).collect(toList()); + lists.add(batch); + } + + private void shiftBy(int by) { + for (int i = 0; i < by; i++) { + buffer.remove(); + } + } + + @Override + public BinaryOperator<List<List<T>>> combiner() { + return (l1, l2) -> { + throw new UnsupportedOperationException("Combining not possible"); + }; + } + + @Override + public Set<Characteristics> characteristics() { + return EnumSet.noneOf(Characteristics.class); + } +} diff --git a/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Triangle.java b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Triangle.java new file mode 100644 index 00000000000..6f9400a5264 --- /dev/null +++ b/library-udf/src/main/java/org/apache/iotdb/library/dprofile/util/Triangle.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.iotdb.library.dprofile.util; + +import org.apache.commons.lang3.tuple.Pair; + +import java.util.List; + +import static java.util.Comparator.comparing; + +/** + * this class is copied and modified from <a + * href="https://github.com/ggalmazor/lt_downsampling_java8">...</a> project + */ +class Triangle<T extends Pair<Long, Double>> { + private final Bucket<T> left, center, right; + + private Triangle(Bucket<T> left, Bucket<T> center, Bucket<T> right) { + this.left = left; + this.center = center; + this.right = right; + } + + static <U extends Pair<Long, Double>> Triangle<U> of(List<Bucket<U>> buckets) { + return new Triangle<>(buckets.get(0), buckets.get(1), buckets.get(2)); + } + + T getFirst() { + return left.getFirst(); + } + + T getLast() { + return right.getLast(); + } + + T getResult() { + return center.map(b -> Area.ofTriangle(left.getResult(), b, right.getCenter())).stream() + .max(comparing(Area::getValue)) + .orElseThrow(() -> new RuntimeException("Can't obtain max area triangle")) + .getGenerator(); + } +} diff --git a/pom.xml b/pom.xml index 75b53374549..2077c3e9635 100644 --- a/pom.xml +++ b/pom.xml @@ -1390,16 +1390,6 @@ <system>Jira</system> <url>https://issues.apache.org/jira/browse/iotdb</url> </issueManagement> - <!-- - Needed for fetching lt_downsampling_java8 (which is used by library-udf) - (Adding this in the root or when building other parts, this transitive dependency can be fetched) - --> - <repositories> - <repository> - <id>jitpack.io</id> - <url>https://jitpack.io</url> - </repository> - </repositories> <!-- Only configure the site distribution as the rest is handled by the apache parent --> <distributionManagement> <site> @@ -1509,6 +1499,25 @@ </activation> <properties> <maven.compiler.release>8</maven.compiler.release> + <!-- + Add argLine for Java 9 and above, due to + [JEP 260: Encapsulate Most Internal APIs], + [JEP 396: Strongly Encapsulate JDK Internals by Default], + [JEP 403: Strongly Encapsulate JDK Internals] + --> + <argLine><![CDATA[ + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED + --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED + --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED + --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED + --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED + ]]></argLine> </properties> </profile> <!-- Current version of spotless cannot support JDK11 below --> @@ -1524,32 +1533,6 @@ <spotless.skip>true</spotless.skip> </properties> </profile> - <!-- - Add argLine for Java 16 and above, due to [JEP 396: Strongly Encapsulate JDK Internals by Default] - (https://openjdk.java.net/jeps/396) - --> - <profile> - <id>.java-16</id> - <activation> - <jdk>16</jdk> - </activation> - <properties> - <argLine>--illegal-access=permit --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.java [...] - </properties> - </profile> - <!-- - Add argLine for Java 16 and above, due to [JEP 396: Strongly Encapsulate JDK Internals by Default] - (https://openjdk.java.net/jeps/396) - --> - <profile> - <id>.java-17-and-above</id> - <activation> - <jdk>[17,)</jdk> - </activation> - <properties> - <argLine>--add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED --add [...] - </properties> - </profile> <!-- Little helper profile that will disable running the cmake tests when the maven tests are being skipped --> <profile> <id>.skipTests</id>
