This is an automated email from the ASF dual-hosted git repository. jackietien pushed a commit to branch BinaryMemory in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 97917485c1d084945ec238967866001f075c2ddf Author: Jackie Tien <[email protected]> AuthorDate: Fri Feb 7 09:02:51 2025 +0800 Correct the retained size calculation for BinaryColumn and BinaryColumnBuilder --- .mvn/.develocity/develocity-workspace-id | 1 + .../org/apache/tsfile/block/column/Column.java | 7 ++++ .../org/apache/tsfile/utils/RamUsageEstimator.java | 12 ++++++ .../apache/tsfile/read/common/block/TsBlock.java | 24 ++++++++++++ .../tsfile/read/common/block/TsBlockBuilder.java | 4 -- .../read/common/block/column/BinaryColumn.java | 45 +++++++++++++++++++--- .../common/block/column/BinaryColumnBuilder.java | 5 +-- .../read/common/block/column/BooleanColumn.java | 5 +++ .../read/common/block/column/DoubleColumn.java | 5 +++ .../read/common/block/column/FloatColumn.java | 5 +++ .../tsfile/read/common/block/column/IntColumn.java | 5 +++ .../read/common/block/column/LongColumn.java | 5 +++ .../read/common/block/column/NullColumn.java | 5 +++ .../block/column/RunLengthEncodedColumn.java | 5 +++ .../read/common/block/column/TimeColumn.java | 8 +++- .../read/common/block/column/TsBlockSerde.java | 6 +-- 16 files changed, 129 insertions(+), 18 deletions(-) diff --git a/.mvn/.develocity/develocity-workspace-id b/.mvn/.develocity/develocity-workspace-id new file mode 100644 index 00000000..61f84136 --- /dev/null +++ b/.mvn/.develocity/develocity-workspace-id @@ -0,0 +1 @@ +wcio6zj4nzcp3aw5rha55b4j34 \ No newline at end of file diff --git a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java index 161a2c8e..e9093693 100644 --- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java +++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java @@ -133,6 +133,13 @@ public interface Column { */ long getRetainedSizeInBytes(); + /** + * Returns the size of this Column as if it was compacted, ignoring any over-allocations and any + * unloaded nested Columns. For example, in dictionary blocks, this only counts each dictionary + * entry once, rather than each time a value is referenced. + */ + long getSizeInBytes(); + /** * Returns a column starting at the specified position and extends for the specified length. The * specified region must be entirely contained within this column. diff --git a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java index 7c66e56d..2f1cf3db 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java @@ -271,6 +271,18 @@ public final class RamUsageEstimator { : alignObjectSize(NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES * arr.length); } + public static long sizeOf(Accountable[] arr) { + if (arr == null) { + return 0; + } else { + long size = shallowSizeOf(arr); + for (Accountable obj : arr) { + size += obj != null ? obj.ramBytesUsed() : 0; + } + return size; + } + } + /** Returns the size in bytes of the String[] object. */ public static long sizeOf(String[] arr) { if (arr == null) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java index 14072b7a..0c457158 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java @@ -65,6 +65,8 @@ public class TsBlock { private volatile long retainedSizeInBytes = -1; + private volatile long sizeInBytes = -1; + public TsBlock(int positionCount) { this(false, positionCount, null, EMPTY_COLUMNS); } @@ -117,6 +119,18 @@ public class TsBlock { return retainedSizeInBytes; } + /** + * Returns the size of this block as if it was compacted, ignoring any over-allocations and any + * unloaded nested blocks. For example, in dictionary blocks, this only counts each dictionary + * entry once, rather than each time a value is referenced. + */ + public long getSizeInBytes() { + if (sizeInBytes < 0) { + return updateSize(); + } + return sizeInBytes; + } + /** * @param positionOffset start offset * @param length slice length @@ -504,6 +518,16 @@ public class TsBlock { return newRetainedSizeInBytes; } + private long updateSize() { + long newSizeInBytes = INSTANCE_SIZE; + newSizeInBytes += timeColumn.getSizeInBytes(); + for (Column column : valueColumns) { + newSizeInBytes += column.getSizeInBytes(); + } + this.sizeInBytes = newSizeInBytes; + return newSizeInBytes; + } + public int getTotalInstanceSize() { int totalInstanceSize = INSTANCE_SIZE; totalInstanceSize += timeColumn.getInstanceSize(); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java index 568c222c..389d59e6 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java @@ -104,8 +104,6 @@ public class TsBlockBuilder { valueColumnBuilders = new ColumnBuilder[types.size()]; for (int i = 0; i < valueColumnBuilders.length; i++) { - // TODO use Type interface to encapsulate createColumnBuilder to each concrete type class - // instead of switch-case switch (types.get(i)) { case BOOLEAN: valueColumnBuilders[i] = @@ -176,8 +174,6 @@ public class TsBlockBuilder { valueColumnBuilders = new ColumnBuilder[types.size()]; int initialExpectedEntries = timeColumnBuilder.getPositionCount(); for (int i = 0; i < valueColumnBuilders.length; i++) { - // TODO use Type interface to encapsulate createColumnBuilder to each concrete type class - // instead of switch-case switch (types.get(i)) { case BOOLEAN: valueColumnBuilders[i] = diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java index beb48c28..ab244311 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java @@ -30,8 +30,8 @@ import java.util.Arrays; import java.util.Optional; import static org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion; +import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray; -import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray; public class BinaryColumn implements Column { @@ -44,6 +44,7 @@ public class BinaryColumn implements Column { private final Binary[] values; private final long retainedSizeInBytes; + private final long sizeInBytes; public BinaryColumn(int positionCount, Optional<boolean[]> valueIsNull, Binary[] values) { this(0, positionCount, valueIsNull.orElse(null), values); @@ -69,9 +70,37 @@ public class BinaryColumn implements Column { } this.valueIsNull = valueIsNull; - // TODO we need to sum up all the Binary's retainedSize here - retainedSizeInBytes = - INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOfObjectArray(positionCount); + retainedSizeInBytes = INSTANCE_SIZE + sizeOfBooleanArray(positionCount) + sizeOf(values); + sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L; + } + + // called by getRegion which already knows the underlying retainedSizeInBytes + private BinaryColumn( + int arrayOffset, + int positionCount, + boolean[] valueIsNull, + Binary[] values, + long retainedSizeInBytes) { + if (arrayOffset < 0) { + throw new IllegalArgumentException("arrayOffset is negative"); + } + this.arrayOffset = arrayOffset; + if (positionCount < 0) { + throw new IllegalArgumentException("positionCount is negative"); + } + this.positionCount = positionCount; + + if (values.length - arrayOffset < positionCount) { + throw new IllegalArgumentException("values length is less than positionCount"); + } + this.values = values; + + if (valueIsNull != null && valueIsNull.length - arrayOffset < positionCount) { + throw new IllegalArgumentException("isNull length is less than positionCount"); + } + this.valueIsNull = valueIsNull; + this.retainedSizeInBytes = retainedSizeInBytes; + this.sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount / values.length : 0L; } @Override @@ -134,10 +163,16 @@ public class BinaryColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return sizeInBytes; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); - return new BinaryColumn(positionOffset + arrayOffset, length, valueIsNull, values); + return new BinaryColumn( + positionOffset + arrayOffset, length, valueIsNull, values, getRetainedSizeInBytes()); } @Override diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java index 917c6e5f..fa5d9cdf 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java @@ -32,7 +32,6 @@ import java.util.Arrays; import static java.lang.Math.max; import static org.apache.tsfile.read.common.block.column.ColumnUtil.calculateBlockResetSize; -import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf; import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf; public class BinaryColumnBuilder implements ColumnBuilder { @@ -124,7 +123,6 @@ public class BinaryColumnBuilder implements ColumnBuilder { @Override public long getRetainedSizeInBytes() { - // TODO we need to sum up all the Binary's retainedSize here long size = INSTANCE_SIZE + arraysRetainedSizeInBytes; if (columnBuilderStatus != null) { size += ColumnBuilderStatus.INSTANCE_SIZE; @@ -134,7 +132,6 @@ public class BinaryColumnBuilder implements ColumnBuilder { @Override public ColumnBuilder newColumnBuilderLike(ColumnBuilderStatus columnBuilderStatus) { - // TODO we should take retain size into account here return new BinaryColumnBuilder(columnBuilderStatus, calculateBlockResetSize(positionCount)); } @@ -153,6 +150,6 @@ public class BinaryColumnBuilder implements ColumnBuilder { } private void updateArraysDataSize() { - arraysRetainedSizeInBytes = sizeOf(valueIsNull) + shallowSizeOf(values); + arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(values); } } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java index f74bcd8e..f502b74e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java @@ -132,6 +132,11 @@ public class BooleanColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java index 0311830b..ee2caa4c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java @@ -133,6 +133,11 @@ public class DoubleColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java index 449212eb..ad55c15c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java @@ -133,6 +133,11 @@ public class FloatColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java index 0254690b..9c7a0f73 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java @@ -133,6 +133,11 @@ public class IntColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java index 34cc7dc4..b42c6fa5 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java @@ -133,6 +133,11 @@ public class LongColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java index 8b7e2152..8e56b5ab 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java @@ -83,6 +83,11 @@ public class NullColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return retainedSizeInBytes; + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java index a78e2daa..649bc70e 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java @@ -191,6 +191,11 @@ public class RunLengthEncodedColumn implements Column { return INSTANCE_SIZE + value.getRetainedSizeInBytes(); } + @Override + public long getSizeInBytes() { + return value.getSizeInBytes(); + } + @Override public Column getRegion(int positionOffset, int length) { checkValidRegion(positionCount, positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java index 1783f6a3..a936a3e7 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java @@ -98,8 +98,7 @@ public class TimeColumn implements Column { @Override public boolean[] isNull() { - // todo - return null; + throw new UnsupportedOperationException("isNull is not supported for TimeColumn"); } @Override @@ -112,6 +111,11 @@ public class TimeColumn implements Column { return retainedSizeInBytes; } + @Override + public long getSizeInBytes() { + return (long) positionCount * SIZE_IN_BYTES_PER_POSITION; + } + @Override public Column getRegion(int positionOffset, int length) { ColumnUtil.checkValidRegion(getPositionCount(), positionOffset, length); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java index 0ef00256..a5074504 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java @@ -89,12 +89,12 @@ public class TsBlockSerde { * @return Serialized tsblock. */ public ByteBuffer serialize(TsBlock tsBlock) throws IOException { - if (tsBlock.getRetainedSizeInBytes() > Integer.MAX_VALUE) { + if (tsBlock.getSizeInBytes() > Integer.MAX_VALUE) { throw new IllegalStateException( - "TsBlock should not be that large: " + tsBlock.getRetainedSizeInBytes()); + "TsBlock should not be that large: " + tsBlock.getSizeInBytes()); } ByteArrayOutputStream byteArrayOutputStream = - new ByteArrayOutputStream((int) tsBlock.getRetainedSizeInBytes()); + new ByteArrayOutputStream((int) tsBlock.getSizeInBytes()); DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream); // Value column count.
