This is an automated email from the ASF dual-hosted git repository.
jackietien pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 0b286adf feat: Add memory computing logic for some classes (#671)
0b286adf is described below
commit 0b286adf251c7e2b020ab920f658a08c79b41965
Author: Le Yang <[email protected]>
AuthorDate: Tue Dec 23 10:23:54 2025 +0800
feat: Add memory computing logic for some classes (#671)
---
.../tsfile/common/regexp/DenseDfaMatcher.java | 25 ++++++++++++++++++++--
.../apache/tsfile/common/regexp/FjsMatcher.java | 24 ++++++++++++++++++++-
.../apache/tsfile/common/regexp/LikeMatcher.java | 22 ++++++++++++++++++-
.../apache/tsfile/common/regexp/LikePattern.java | 15 ++++++++++++-
.../org/apache/tsfile/common/regexp/Matcher.java | 4 +++-
.../apache/tsfile/common/regexp/NfaMatcher.java | 9 ++++++++
.../apache/tsfile/common/regexp/pattern/Any.java | 9 ++++++++
.../tsfile/common/regexp/pattern/Literal.java | 9 ++++++++
.../tsfile/common/regexp/pattern/Pattern.java | 4 +++-
.../tsfile/common/regexp/pattern/ZeroOrMore.java | 10 +++++++++
.../java/org/apache/tsfile/utils/TimeDuration.java | 3 +++
11 files changed, 127 insertions(+), 7 deletions(-)
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java
index c020ec4f..e368036f 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/DenseDfaMatcher.java
@@ -23,6 +23,8 @@ import org.apache.tsfile.common.regexp.pattern.Any;
import org.apache.tsfile.common.regexp.pattern.Literal;
import org.apache.tsfile.common.regexp.pattern.Pattern;
import org.apache.tsfile.common.regexp.pattern.ZeroOrMore;
+import org.apache.tsfile.utils.Accountable;
+import org.apache.tsfile.utils.RamUsageEstimator;
import java.util.Arrays;
import java.util.List;
@@ -32,8 +34,10 @@ import static java.util.Objects.requireNonNull;
import static org.apache.tsfile.utils.Preconditions.checkArgument;
public class DenseDfaMatcher implements Matcher {
- public static final int FAIL_STATE = -1;
+ private static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(DenseDfaMatcher.class);
+ public static final int FAIL_STATE = -1;
private List<Pattern> pattern;
private int start;
private int end;
@@ -63,7 +67,17 @@ public class DenseDfaMatcher implements Matcher {
return matcher.prefixMatch(input, offset, length);
}
- private static class DenseDfa {
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE
+ + RamUsageEstimator.sizeOfObject(pattern)
+ + RamUsageEstimator.sizeOfObject(matcher);
+ }
+
+ private static class DenseDfa implements Accountable {
+ private static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(DenseDfa.class);
+
// The DFA is encoded as a sequence of transitions for each possible byte
value for each state.
// I.e., 256 transitions per state.
// The content of the transitions array is the base offset into
@@ -141,6 +155,13 @@ public class DenseDfaMatcher implements Matcher {
return accept[state >>> 8];
}
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE
+ + RamUsageEstimator.sizeOf(transitions)
+ + RamUsageEstimator.sizeOf(accept);
+ }
+
private static NFA makeNfa(List<Pattern> pattern, int start, int end) {
checkArgument(!pattern.isEmpty(), "pattern is empty");
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java
index 0ef73177..22826c33 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/FjsMatcher.java
@@ -22,6 +22,8 @@ package org.apache.tsfile.common.regexp;
import org.apache.tsfile.common.regexp.pattern.Any;
import org.apache.tsfile.common.regexp.pattern.Literal;
import org.apache.tsfile.common.regexp.pattern.Pattern;
+import org.apache.tsfile.utils.Accountable;
+import org.apache.tsfile.utils.RamUsageEstimator;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
@@ -31,6 +33,9 @@ import static java.util.Objects.requireNonNull;
import static org.apache.tsfile.utils.Preconditions.checkArgument;
public class FjsMatcher implements Matcher {
+ private static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(FjsMatcher.class);
+
private final List<Pattern> pattern;
private final int start;
private final int end;
@@ -56,7 +61,16 @@ public class FjsMatcher implements Matcher {
return matcher.match(input, offset, length);
}
- private static class Fjs {
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE
+ + RamUsageEstimator.sizeOfObject(pattern)
+ + RamUsageEstimator.sizeOfObject(matcher);
+ }
+
+ private static class Fjs implements Accountable {
+ private static final long INSTANCE_SIZE =
RamUsageEstimator.shallowSizeOfInstance(Fjs.class);
+
private final boolean exact;
private final List<byte[]> patterns = new ArrayList<>();
private final List<int[]> bmsShifts = new ArrayList<>();
@@ -219,5 +233,13 @@ public class FjsMatcher implements Matcher {
return !exact || remaining == 0;
}
+
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE
+ + RamUsageEstimator.sizeOfObject(patterns)
+ + RamUsageEstimator.sizeOfObject(bmsShifts)
+ + RamUsageEstimator.sizeOfObject(kmpShifts);
+ }
}
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java
index 49347c3d..5ad3d4f3 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikeMatcher.java
@@ -23,6 +23,8 @@ import org.apache.tsfile.common.regexp.pattern.Any;
import org.apache.tsfile.common.regexp.pattern.Literal;
import org.apache.tsfile.common.regexp.pattern.Pattern;
import org.apache.tsfile.common.regexp.pattern.ZeroOrMore;
+import org.apache.tsfile.utils.Accountable;
+import org.apache.tsfile.utils.RamUsageEstimator;
import java.util.ArrayList;
import java.util.List;
@@ -31,7 +33,10 @@ import java.util.OptionalInt;
import static java.nio.charset.StandardCharsets.UTF_8;
-public class LikeMatcher {
+public class LikeMatcher implements Accountable {
+ private static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(LikeMatcher.class);
+
private final int minSize;
private final OptionalInt maxSize;
private final byte[] prefix;
@@ -173,6 +178,21 @@ public class LikeMatcher {
return true;
}
+ @Override
+ public long ramBytesUsed() {
+ long size =
+ INSTANCE_SIZE
+ + RamUsageEstimator.shallowSizeOf(maxSize)
+ + RamUsageEstimator.sizeOf(prefix)
+ + RamUsageEstimator.sizeOf(suffix)
+ + RamUsageEstimator.shallowSizeOf(matcher);
+
+ if (matcher.isPresent()) {
+ size += matcher.get().ramBytesUsed();
+ }
+ return size;
+ }
+
private boolean startsWith(byte[] pattern, byte[] input, int offset) {
for (int i = 0; i < pattern.length; i++) {
if (pattern[i] != input[offset + i]) {
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java
index bda6ef3a..762b4ff6 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/LikePattern.java
@@ -19,12 +19,18 @@
package org.apache.tsfile.common.regexp;
+import org.apache.tsfile.utils.Accountable;
+import org.apache.tsfile.utils.RamUsageEstimator;
+
import java.util.Objects;
import java.util.Optional;
import static java.util.Objects.requireNonNull;
-public class LikePattern {
+public class LikePattern implements Accountable {
+ private static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(LikePattern.class);
+
private final String pattern;
private final Optional<Character> escape;
private final LikeMatcher matcher;
@@ -81,4 +87,11 @@ public class LikePattern {
+ (escape.map(character -> ", escape=" + character).orElse(""))
+ '}';
}
+
+ @Override
+ public long ramBytesUsed() {
+ long size =
+ INSTANCE_SIZE + RamUsageEstimator.sizeOf(pattern) +
RamUsageEstimator.shallowSizeOf(escape);
+ return size + matcher.ramBytesUsed();
+ }
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java
index 49536933..f6676496 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/Matcher.java
@@ -19,6 +19,8 @@
package org.apache.tsfile.common.regexp;
-public interface Matcher {
+import org.apache.tsfile.utils.Accountable;
+
+public interface Matcher extends Accountable {
boolean match(byte[] input, int offset, int length);
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java
index c6d6c477..d0e8308d 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/NfaMatcher.java
@@ -23,11 +23,15 @@ import org.apache.tsfile.common.regexp.pattern.Any;
import org.apache.tsfile.common.regexp.pattern.Literal;
import org.apache.tsfile.common.regexp.pattern.Pattern;
import org.apache.tsfile.common.regexp.pattern.ZeroOrMore;
+import org.apache.tsfile.utils.RamUsageEstimator;
import java.util.Arrays;
import java.util.List;
public class NfaMatcher implements Matcher {
+ private static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(NfaMatcher.class);
+
private static final int ANY = -1;
private static final int NONE = -2;
private static final int INVALID_CODEPOINT = -1;
@@ -173,4 +177,9 @@ public class NfaMatcher implements Matcher {
return accept;
}
+
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE + RamUsageEstimator.sizeOf(loopback) +
RamUsageEstimator.sizeOf(match);
+ }
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java
index 8c1e650a..ccf84b17 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Any.java
@@ -19,7 +19,11 @@
package org.apache.tsfile.common.regexp.pattern;
+import org.apache.tsfile.utils.RamUsageEstimator;
+
public class Any implements Pattern {
+ private static final long INSTANCE_SIZE =
RamUsageEstimator.shallowSizeOfInstance(Any.class);
+
private final int length;
public Any(int length) {
@@ -41,4 +45,9 @@ public class Any implements Pattern {
}
return sb.toString();
}
+
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE;
+ }
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java
index 8ea9cf05..b41422ba 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Literal.java
@@ -19,7 +19,11 @@
package org.apache.tsfile.common.regexp.pattern;
+import org.apache.tsfile.utils.RamUsageEstimator;
+
public class Literal implements Pattern {
+ private static final long INSTANCE_SIZE =
RamUsageEstimator.shallowSizeOfInstance(Literal.class);
+
private final String value;
public Literal(String value) {
@@ -34,4 +38,9 @@ public class Literal implements Pattern {
public String toString() {
return value;
}
+
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE + RamUsageEstimator.sizeOf(value);
+ }
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java
index 959e5a71..2ed10847 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/Pattern.java
@@ -19,4 +19,6 @@
package org.apache.tsfile.common.regexp.pattern;
-public interface Pattern {}
+import org.apache.tsfile.utils.Accountable;
+
+public interface Pattern extends Accountable {}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java
index ac0a0443..17454671 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/common/regexp/pattern/ZeroOrMore.java
@@ -19,9 +19,19 @@
package org.apache.tsfile.common.regexp.pattern;
+import org.apache.tsfile.utils.RamUsageEstimator;
+
public class ZeroOrMore implements Pattern {
+ private static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(ZeroOrMore.class);
+
@Override
public String toString() {
return "%";
}
+
+ @Override
+ public long ramBytesUsed() {
+ return INSTANCE_SIZE;
+ }
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java
b/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java
index 042c1d07..9e835423 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/utils/TimeDuration.java
@@ -30,6 +30,9 @@ import java.util.TimeZone;
import java.util.concurrent.TimeUnit;
public class TimeDuration implements Serializable {
+ public static final long INSTANCE_SIZE =
+ RamUsageEstimator.shallowSizeOfInstance(TimeDuration.class);
+
// month part of time duration
public final int monthDuration;
// non-month part of time duration, its precision is same as current
time_precision