This is an automated email from the ASF dual-hosted git repository.
jackietien pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 9f8ab1266 Correct the retained size calculation for BinaryColumn and
BinaryColumnBuilder
9f8ab1266 is described below
commit 9f8ab1266de753667d01d87af8cca33280dee7c9
Author: Jackie Tien <[email protected]>
AuthorDate: Fri Feb 7 09:02:51 2025 +0800
Correct the retained size calculation for BinaryColumn and
BinaryColumnBuilder
---
.../org/apache/tsfile/block/column/Column.java | 7 ++++
.../org/apache/tsfile/utils/RamUsageEstimator.java | 12 ++++++
.../tsfile/file/header/ChunkGroupHeader.java | 2 -
.../apache/tsfile/read/common/block/TsBlock.java | 24 ++++++++++++
.../tsfile/read/common/block/TsBlockBuilder.java | 4 --
.../read/common/block/column/BinaryColumn.java | 45 +++++++++++++++++++---
.../common/block/column/BinaryColumnBuilder.java | 5 +--
.../read/common/block/column/BooleanColumn.java | 5 +++
.../read/common/block/column/DictionaryColumn.java | 5 +++
.../read/common/block/column/DoubleColumn.java | 5 +++
.../read/common/block/column/FloatColumn.java | 5 +++
.../tsfile/read/common/block/column/IntColumn.java | 5 +++
.../read/common/block/column/LongColumn.java | 5 +++
.../read/common/block/column/NullColumn.java | 5 +++
.../block/column/RunLengthEncodedColumn.java | 5 +++
.../read/common/block/column/TimeColumn.java | 8 +++-
.../read/common/block/column/TsBlockSerde.java | 7 ++--
17 files changed, 133 insertions(+), 21 deletions(-)
diff --git
a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
index e45664634..b5105ed6c 100644
--- a/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
+++ b/java/common/src/main/java/org/apache/tsfile/block/column/Column.java
@@ -143,6 +143,13 @@ public interface Column {
*/
long getRetainedSizeInBytes();
+ /**
+ * Returns the size of this Column as if it was compacted, ignoring any
over-allocations and any
+ * unloaded nested Columns. For example, in dictionary blocks, this only
counts each dictionary
+ * entry once, rather than each time a value is referenced.
+ */
+ long getSizeInBytes();
+
/**
* Returns a column starting at the specified position and extends for the
specified length. The
* specified region must be entirely contained within this column.
diff --git
a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
index 3a35d2269..d357a42ea 100644
--- a/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
+++ b/java/common/src/main/java/org/apache/tsfile/utils/RamUsageEstimator.java
@@ -271,6 +271,18 @@ public final class RamUsageEstimator {
: alignObjectSize(NUM_BYTES_ARRAY_HEADER + (long) Double.BYTES *
arr.length);
}
+ public static long sizeOf(Accountable[] arr) {
+ if (arr == null) {
+ return 0;
+ } else {
+ long size = shallowSizeOf(arr);
+ for (Accountable obj : arr) {
+ size += obj != null ? obj.ramBytesUsed() : 0;
+ }
+ return size;
+ }
+ }
+
/** Returns the size in bytes of the String[] object. */
public static long sizeOf(String[] arr) {
long size = shallowSizeOf(arr);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java
b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java
index b46f0e0c3..ba3371e9d 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/file/header/ChunkGroupHeader.java
@@ -53,7 +53,6 @@ public class ChunkGroupHeader {
}
private int getSerializedSize(IDeviceID deviceID) {
- // TODO: add an interface in IDeviceID
int length = deviceID.serializedSize();
return Byte.BYTES + ReadWriteForEncodingUtils.varIntSize(length) + length;
}
@@ -73,7 +72,6 @@ public class ChunkGroupHeader {
}
}
- // TODO: add an interface in IDeviceID
final IDeviceID deviceID = deserializeDeviceID(inputStream, versionNumber);
return new ChunkGroupHeader(deviceID);
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
index 5f5edc6bb..b5224580e 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlock.java
@@ -69,6 +69,8 @@ public class TsBlock {
private volatile long retainedSizeInBytes = -1;
+ private volatile long sizeInBytes = -1;
+
public TsBlock(int positionCount) {
this(false, positionCount, null, EMPTY_COLUMNS);
}
@@ -122,6 +124,18 @@ public class TsBlock {
return retainedSizeInBytes;
}
+ /**
+ * Returns the size of this block as if it was compacted, ignoring any
over-allocations and any
+ * unloaded nested blocks. For example, in dictionary blocks, this only
counts each dictionary
+ * entry once, rather than each time a value is referenced.
+ */
+ public long getSizeInBytes() {
+ if (sizeInBytes < 0) {
+ return updateSize();
+ }
+ return sizeInBytes;
+ }
+
/**
* @param positionOffset start offset
* @param length slice length
@@ -508,6 +522,16 @@ public class TsBlock {
return newRetainedSizeInBytes;
}
+ private long updateSize() {
+ long newSizeInBytes = INSTANCE_SIZE;
+ newSizeInBytes += timeColumn.getSizeInBytes();
+ for (Column column : valueColumns) {
+ newSizeInBytes += column.getSizeInBytes();
+ }
+ this.sizeInBytes = newSizeInBytes;
+ return newSizeInBytes;
+ }
+
public int getTotalInstanceSize() {
int totalInstanceSize = INSTANCE_SIZE;
totalInstanceSize += timeColumn.getInstanceSize();
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
index 918551d9b..028bffc99 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/TsBlockBuilder.java
@@ -104,8 +104,6 @@ public class TsBlockBuilder {
valueColumnBuilders = new ColumnBuilder[types.size()];
for (int i = 0; i < valueColumnBuilders.length; i++) {
- // TODO use Type interface to encapsulate createColumnBuilder to each
concrete type class
- // instead of switch-case
switch (types.get(i)) {
case BOOLEAN:
valueColumnBuilders[i] =
@@ -176,8 +174,6 @@ public class TsBlockBuilder {
valueColumnBuilders = new ColumnBuilder[types.size()];
int initialExpectedEntries = timeColumnBuilder.getPositionCount();
for (int i = 0; i < valueColumnBuilders.length; i++) {
- // TODO use Type interface to encapsulate createColumnBuilder to each
concrete type class
- // instead of switch-case
switch (types.get(i)) {
case BOOLEAN:
valueColumnBuilders[i] =
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
index ec36fc4f1..8a794508a 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumn.java
@@ -32,8 +32,8 @@ import java.util.Optional;
import static
org.apache.tsfile.read.common.block.column.ColumnUtil.checkArrayRange;
import static
org.apache.tsfile.read.common.block.column.ColumnUtil.checkReadablePosition;
import static
org.apache.tsfile.read.common.block.column.ColumnUtil.checkValidRegion;
+import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfBooleanArray;
-import static org.apache.tsfile.utils.RamUsageEstimator.sizeOfObjectArray;
public class BinaryColumn implements Column {
@@ -46,6 +46,7 @@ public class BinaryColumn implements Column {
private final Binary[] values;
private final long retainedSizeInBytes;
+ private final long sizeInBytes;
public BinaryColumn(int initialCapacity) {
this(0, 0, null, new Binary[initialCapacity]);
@@ -75,9 +76,37 @@ public class BinaryColumn implements Column {
}
this.valueIsNull = valueIsNull;
- // TODO we need to sum up all the Binary's retainedSize here
- retainedSizeInBytes =
- INSTANCE_SIZE + sizeOfBooleanArray(positionCount) +
sizeOfObjectArray(positionCount);
+ retainedSizeInBytes = INSTANCE_SIZE + sizeOfBooleanArray(positionCount) +
sizeOf(values);
+ sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount /
values.length : 0L;
+ }
+
+ // called by getRegion which already knows the underlying retainedSizeInBytes
+ private BinaryColumn(
+ int arrayOffset,
+ int positionCount,
+ boolean[] valueIsNull,
+ Binary[] values,
+ long retainedSizeInBytes) {
+ if (arrayOffset < 0) {
+ throw new IllegalArgumentException("arrayOffset is negative");
+ }
+ this.arrayOffset = arrayOffset;
+ if (positionCount < 0) {
+ throw new IllegalArgumentException("positionCount is negative");
+ }
+ this.positionCount = positionCount;
+
+ if (values.length - arrayOffset < positionCount) {
+ throw new IllegalArgumentException("values length is less than
positionCount");
+ }
+ this.values = values;
+
+ if (valueIsNull != null && valueIsNull.length - arrayOffset <
positionCount) {
+ throw new IllegalArgumentException("isNull length is less than
positionCount");
+ }
+ this.valueIsNull = valueIsNull;
+ this.retainedSizeInBytes = retainedSizeInBytes;
+ this.sizeInBytes = values.length > 0 ? retainedSizeInBytes * positionCount
/ values.length : 0L;
}
@Override
@@ -140,10 +169,16 @@ public class BinaryColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return sizeInBytes;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
- return new BinaryColumn(positionOffset + arrayOffset, length, valueIsNull,
values);
+ return new BinaryColumn(
+ positionOffset + arrayOffset, length, valueIsNull, values,
getRetainedSizeInBytes());
}
@Override
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
index a82d82fc0..d9c560dc6 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BinaryColumnBuilder.java
@@ -32,7 +32,6 @@ import java.util.Arrays;
import static java.lang.Math.max;
import static
org.apache.tsfile.read.common.block.column.ColumnUtil.calculateBlockResetSize;
-import static org.apache.tsfile.utils.RamUsageEstimator.shallowSizeOf;
import static org.apache.tsfile.utils.RamUsageEstimator.sizeOf;
public class BinaryColumnBuilder implements ColumnBuilder {
@@ -129,7 +128,6 @@ public class BinaryColumnBuilder implements ColumnBuilder {
@Override
public long getRetainedSizeInBytes() {
- // TODO we need to sum up all the Binary's retainedSize here
long size = INSTANCE_SIZE + arraysRetainedSizeInBytes;
if (columnBuilderStatus != null) {
size += ColumnBuilderStatus.INSTANCE_SIZE;
@@ -139,7 +137,6 @@ public class BinaryColumnBuilder implements ColumnBuilder {
@Override
public ColumnBuilder newColumnBuilderLike(ColumnBuilderStatus
columnBuilderStatus) {
- // TODO we should take retain size into account here
return new BinaryColumnBuilder(columnBuilderStatus,
calculateBlockResetSize(positionCount));
}
@@ -158,6 +155,6 @@ public class BinaryColumnBuilder implements ColumnBuilder {
}
private void updateArraysDataSize() {
- arraysRetainedSizeInBytes = sizeOf(valueIsNull) + shallowSizeOf(values);
+ arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(values);
}
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
index 76b3fb6d7..7b9aca747 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/BooleanColumn.java
@@ -138,6 +138,11 @@ public class BooleanColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java
index 13dbd2261..50a2dd1ef 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DictionaryColumn.java
@@ -187,6 +187,11 @@ public final class DictionaryColumn implements Column {
return retainedSizeInBytes + dictionary.getRetainedSizeInBytes();
}
+ @Override
+ public long getSizeInBytes() {
+ return ids.length > 0 ? getRetainedSizeInBytes() * positionCount /
ids.length : 0L;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(positionCount, positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
index afc78208e..e0aff8f7a 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/DoubleColumn.java
@@ -139,6 +139,11 @@ public class DoubleColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
index c008d9353..8a576c0ce 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/FloatColumn.java
@@ -154,6 +154,11 @@ public class FloatColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
index c2065ee32..6820a83eb 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/IntColumn.java
@@ -182,6 +182,11 @@ public class IntColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
index ecba9013d..03d8af0e6 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/LongColumn.java
@@ -154,6 +154,11 @@ public class LongColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
index 9b999c412..d91359cae 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/NullColumn.java
@@ -85,6 +85,11 @@ public class NullColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return retainedSizeInBytes;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
index 620378182..148407295 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/RunLengthEncodedColumn.java
@@ -193,6 +193,11 @@ public class RunLengthEncodedColumn implements Column {
return INSTANCE_SIZE + value.getRetainedSizeInBytes();
}
+ @Override
+ public long getSizeInBytes() {
+ return value.getSizeInBytes();
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
checkValidRegion(positionCount, positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
index 3b1880a96..e7108fc85 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TimeColumn.java
@@ -100,8 +100,7 @@ public class TimeColumn implements Column {
@Override
public boolean[] isNull() {
- // todo
- return null;
+ throw new UnsupportedOperationException("isNull is not supported for
TimeColumn");
}
@Override
@@ -114,6 +113,11 @@ public class TimeColumn implements Column {
return retainedSizeInBytes;
}
+ @Override
+ public long getSizeInBytes() {
+ return (long) positionCount * SIZE_IN_BYTES_PER_POSITION;
+ }
+
@Override
public Column getRegion(int positionOffset, int length) {
ColumnUtil.checkValidRegion(getPositionCount(), positionOffset, length);
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
index e887a5773..a891553b5 100644
---
a/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
+++
b/java/tsfile/src/main/java/org/apache/tsfile/read/common/block/column/TsBlockSerde.java
@@ -67,7 +67,6 @@ public class TsBlockSerde {
}
// Time column.
- // TODO: a TimeColumn will be deserialized as a LongColumn
Column timeColumn =
ColumnEncoderFactory.get(columnEncodings.get(0))
.readColumn(byteBuffer, TSDataType.INT64, positionCount);
@@ -91,12 +90,12 @@ public class TsBlockSerde {
* @return Serialized tsblock.
*/
public ByteBuffer serialize(TsBlock tsBlock) throws IOException {
- if (tsBlock.getRetainedSizeInBytes() > Integer.MAX_VALUE) {
+ if (tsBlock.getSizeInBytes() > Integer.MAX_VALUE) {
throw new IllegalStateException(
- "TsBlock should not be that large: " +
tsBlock.getRetainedSizeInBytes());
+ "TsBlock should not be that large: " + tsBlock.getSizeInBytes());
}
ByteArrayOutputStream byteArrayOutputStream =
- new ByteArrayOutputStream((int) tsBlock.getRetainedSizeInBytes());
+ new ByteArrayOutputStream((int) tsBlock.getSizeInBytes());
DataOutputStream dataOutputStream = new
DataOutputStream(byteArrayOutputStream);
// Value column count.