This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch dev/1.1
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/dev/1.1 by this push:
new a489ea04 [To dev/1.1] Correct the retained size calculation for
BinaryColumn and BinaryColumnBuilder (#514)
a489ea04 is described below
commit a489ea04a7a0dca256125d9cfbb81ca55b237e98
Author: Jackie Tien <[email protected]>
AuthorDate: Mon Jun 16 18:53:48 2025 +0800
[To dev/1.1] Correct the retained size calculation for BinaryColumn and
BinaryColumnBuilder (#514)
* Correct the retained size calculation for BinaryColumn and
BinaryColumnBuilder
* Empty-Commit
* remove useless file
---
.../org/apache/tsfile/block/column/Column.java | 7 ++++
.../org/apache/tsfile/utils/RamUsageEstimator.java | 12 ++++++
.../apache/tsfile/read/common/block/TsBlock.java | 24 ++++++++++++
.../tsfile/read/common/block/TsBlockBuilder.java | 4 --
.../read/common/block/column/BinaryColumn.java | 45 +++++++++++++++++++---
.../common/block/column/BinaryColumnBuilder.java | 5 +--
.../read/common/block/column/BooleanColumn.java | 5 +++
.../read/common/block/column/DoubleColumn.java | 5 +++
.../read/common/block/column/FloatColumn.java | 5 +++
.../tsfile/read/common/block/column/IntColumn.java | 5 +++
.../read/common/block/column/LongColumn.java | 5 +++
.../read/common/block/column/NullColumn.java | 5 +++
.../block/column/RunLengthEncodedColumn.java | 5 +++
.../read/common/block/column/TimeColumn.java | 8 +++-
.../read/common/block/column/TsBlockSerde.java | 6 +--
15 files changed, 128 insertions(+), 18 deletions(-)
diff --git
a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
index 161a2c8e..e9093693 100644
--- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
+++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
@@ -133,6 +133,13 @@ public interface Column {
*/
long getRetainedSizeInBytes();
+ /**
+ * Returns the size of this Column as if it was compacted, ignoring any
over-allocations and any
+ * unloaded nested Columns. For example, in dictionary blocks, this only
counts each dictionary
+ * entry once, rather than each time a value is referenced.
+ */
+ long getSizeInBytes();
+
/**
* Returns a column starting at the specified position and extends for the
specified length. The
* specified region must be entirely contained within this column.
diff --git
a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
index 7c66e56d..2f1cf3db 100644
--- a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
+++ b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
@@ -271,6 +271,18 @@ public final class RamUsageEstimator {
: alignObjectSize(NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES *
arr.length);
}
+ public static long sizeOf(Accountable[] arr) {
+ if (arr == null) {
+ return 0;
+ } else {
+ long size = shallowSizeOf(arr);
+ for (Accountable obj : arr) {
+ size += obj != null ? obj.ramBytesUsed() : 0;
+ }
+ return size;
+ }
+ }
+
/** Returns the size in bytes of the String[] object. */
public static long sizeOf(String[] arr) {
if (arr == null) {
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
index 14072b7a..0c457158 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
@@ -65,6 +65,8 @@ public class TsBlock {
private volatile long retainedSizeInBytes = -1;
+ private volatile long sizeInBytes = -1;
+
public TsBlock(int positionCount) {
this(false, positionCount, null, EMPTY_COLUMNS);
}
@@ -117,6 +119,18 @@ public class TsBlock {
return retainedSizeInBytes;
}
+ /**
+ * Returns the size of this block as if it was compacted, ignoring any
over-allocations and any
+ * unloaded nested blocks. For example, in dictionary blocks, this only
counts each dictionary
+ * entry once, rather than each time a value is referenced.
+ */
+ public long getSizeInBytes() {
+ if (sizeInBytes < 0) {
+ return updateSize();
+ }
+ return sizeInBytes;
+ }
+
/**
* @param positionOffset start offset
* @param length slice length
@@ -504,6 +518,16 @@ public class TsBlock {
return newRetainedSizeInBytes;
}
+ private long updateSize() {
+ long newSizeInBytes = INSTANCE_SIZE;
+ newSizeInBytes += timeColumn.getSizeInBytes();
+ for (Column column : valueColumns) {
+ newSizeInBytes += column.getSizeInBytes();
+ }
+ this.sizeInBytes = newSizeInBytes;
+ return newSizeInBytes;
+ }
+
public int getTotalInstanceSize() {
int totalInstanceSize = INSTANCE_SIZE;
totalInstanceSize += timeColumn.getInstanceSize();
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
index 568c222c..389d59e6 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
@@ -104,8 +104,6 @@ public class TsBlockBuilder {
valueColumnBuilders = new ColumnBuilder[types.size()];
for (int i = 0; i < valueColumnBuilders.length; i++) {
- // TODO use Type interface to encapsulate createColumnBuilder to each
concrete type class
- // instead of switch-case
switch (types.get(i)) {
case BOOLEAN:
valueColumnBuilders[i] =
@@ -176,8 +174,6 @@ public class TsBlockBuilder {
valueColumnBuilders = new ColumnBuilder[types.size()];
int initialExpectedEntries = timeColumnBuilder.getPositionCount();
for (int i = 0; i < valueColumnBuilders.length; i++) {
- // TODO use Type interface to encapsulate createColumnBuilder to each
concrete type class
- // instead of switch-case
switch (types.get(i)) {
case BOOLEAN:
valueColumnBuilders[i] =
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
index beb48c28..ab244311 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
@@ -30,8 +30,8 @@ import java.util.Arrays;
import java.util.Optional;
import static
org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
+import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
-import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray;
public class BinaryColumn implements Column {
@@ -44,6 +44,7 @@ public class BinaryColumn implements Column {
private final Binary[] values;
private final long retainedSizeInBytes;
+ private final long sizeInBytes;
public BinaryColumn(int positionCount, Optional<boolean[]> valueIsNull,
Binary[] values) {
this(0, positionCount, valueIsNull.orElse(null), values);
@@ -69,9 +70,37 @@ public class BinaryColumn implements Column {
}
this.valueIsNull = valueIsNull;
- // TODO we need to sum up all the Binary's retainedSize here
- retainedSizeInBytes =
- INSTANCE_SIZE + sizeOfBooleanArray(positionCount) +
sizeOfObjectArray(positionCount);
+ retainedSizeInBytes = INSTANCE_SIZE + sizeOfBooleanArray(positionCount) +
sizeOf(values);
+ sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount /
values.length : 0L;
+ }
+
+ // called by getRegion which already knows the underlying retainedSizeInBytes
+ private BinaryColumn(
+ int arrayOffset,
+ int positionCount,
+ boolean[] valueIsNull,
+ Binary[] values,
+ long retainedSizeInBytes) {
+ if (arrayOffset < 0) {
+ throw new IllegalArgumentException("arrayOffset is negative");
+ }
+ this.arrayOffset = arrayOffset;
+ if (positionCount < 0) {
+ throw new IllegalArgumentException("positionCount is negative");
+ }
+ this.positionCount = positionCount;
+
+ if (values.length - arrayOffset < positionCount) {
+ throw new IllegalArgumentException("values length is less than
positionCount");
+ }
+ this.values = values;
+
+ if (valueIsNull != null && valueIsNull.length - arrayOffset <
positionCount) {
+ throw new IllegalArgumentException("isNull length is less than
positionCount");
+ }
+ this.valueIsNull = valueIsNull;
+ this.retainedSizeInBytes = retainedSizeInBytes;
+ this.sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount
/ values.length : 0L;
}
@Override
@@ -134,10 +163,16 @@ public class BinaryColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return sizeInBytes;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
- return new BinaryColumn(positionOffset + arrayOffset, length, valueIsNull,
values);
+ return new BinaryColumn(
+ positionOffset + arrayOffset, length, valueIsNull, values,
getRetainedSizeInBytes());
}
@Override
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
index 917c6e5f..fa5d9cdf 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
@@ -32,7 +32,6 @@ import java.util.Arrays;
import static java.lang.Math.max;
import static
org.apache.tsfile.read.common.block.column.ColumnUtil.calculateBlockResetSize;
-import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf;
public class BinaryColumnBuilder implements ColumnBuilder {
@@ -124,7 +123,6 @@ public class BinaryColumnBuilder implements ColumnBuilder {
@Override
public long getRetainedSizeInBytes() {
- // TODO we need to sum up all the Binary's retainedSize here
long size = INSTANCE_SIZE + arraysRetainedSizeInBytes;
if (columnBuilderStatus != null) {
size += ColumnBuilderStatus.INSTANCE_SIZE;
@@ -134,7 +132,6 @@ public class BinaryColumnBuilder implements ColumnBuilder {
@Override
public ColumnBuilder newColumnBuilderLike(ColumnBuilderStatus
columnBuilderStatus) {
- // TODO we should take retain size into account here
return new BinaryColumnBuilder(columnBuilderStatus,
calculateBlockResetSize(positionCount));
}
@@ -153,6 +150,6 @@ public class BinaryColumnBuilder implements ColumnBuilder {
}
private void updateArraysDataSize() {
- arraysRetainedSizeInBytes = sizeOf(valueIsNull) + shallowSizeOf(values);
+ arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(values);
}
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
index f74bcd8e..f502b74e 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
@@ -132,6 +132,11 @@ public class BooleanColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
index 0311830b..ee2caa4c 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
@@ -133,6 +133,11 @@ public class DoubleColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
index 449212eb..ad55c15c 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
@@ -133,6 +133,11 @@ public class FloatColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
index 0254690b..9c7a0f73 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
@@ -133,6 +133,11 @@ public class IntColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
index 34cc7dc4..b42c6fa5 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
@@ -133,6 +133,11 @@ public class LongColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
index 8b7e2152..8e56b5ab 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
@@ -83,6 +83,11 @@ public class NullColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return retainedSizeInBytes;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
index a78e2daa..649bc70e 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
@@ -191,6 +191,11 @@ public class RunLengthEncodedColumn implements Column {
return INSTANCE_SIZE + value.getRetainedSizeInBytes();
}
+ @Override
+ public long getSizeInBytes() {
+ return value.getSizeInBytes();
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(positionCount, positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
index 1783f6a3..a936a3e7 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
@@ -98,8 +98,7 @@ public class TimeColumn implements Column {
@Override
public boolean[] isNull() {
- // todo
- return null;
+ throw new UnsupportedOperationException("isNull is not supported for
TimeColumn");
}
@Override
@@ -112,6 +111,11 @@ public class TimeColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
ColumnUtil.checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
index 0ef00256..a5074504 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
@@ -89,12 +89,12 @@ public class TsBlockSerde {
* @return Serialized tsblock.
*/
public ByteBuffer serialize(TsBlock tsBlock) throws IOException {
- if (tsBlock.getRetainedSizeInBytes() > Integer.MAX_VALUE) {
+ if (tsBlock.getSizeInBytes() > Integer.MAX_VALUE) {
throw new IllegalStateException(
- "TsBlock should not be that large: " +
tsBlock.getRetainedSizeInBytes());
+ "TsBlock should not be that large: " + tsBlock.getSizeInBytes());
}
ByteArrayOutputStream byteArrayOutputStream =
- new ByteArrayOutputStream((int) tsBlock.getRetainedSizeInBytes());
+ new ByteArrayOutputStream((int) tsBlock.getSizeInBytes());
DataOutputStream dataOutputStream = new
DataOutputStream(byteArrayOutputStream);
// Value column count.