This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch object_type in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 93842ba960269e7ab6bd99231736c5464095dd7c Author: HTHou <[email protected]> AuthorDate: Wed Jul 2 18:13:13 2025 +0800 object type write interface --- .../java/org/apache/tsfile/enums/TSDataType.java | 30 ++++- .../org/apache/tsfile/utils/TsPrimitiveType.java | 2 + .../apache/tsfile/common/conf/TSFileConfig.java | 2 + .../tsfile/encoding/encoder/PlainEncoder.java | 1 + .../tsfile/file/metadata/TimeseriesMetadata.java | 12 +- .../tsfile/file/metadata/enums/TSEncoding.java | 2 + .../file/metadata/statistics/ObjectStatistics.java | 125 +++++++++++++++++++++ .../file/metadata/statistics/Statistics.java | 4 + .../apache/tsfile/read/TsFileSequenceReader.java | 3 + .../java/org/apache/tsfile/read/common/Chunk.java | 2 + .../tsfile/read/common/DescReadWriteBatchData.java | 3 + .../java/org/apache/tsfile/read/common/Field.java | 5 + .../write/chunk/AlignedChunkGroupWriterImpl.java | 3 + .../tsfile/write/chunk/AlignedChunkWriterImpl.java | 2 + .../tsfile/write/record/datapoint/DataPoint.java | 1 + .../file/metadata/statistics/StatisticsTest.java | 5 + 16 files changed, 194 insertions(+), 8 deletions(-) diff --git a/java/common/src/main/java/org/apache/tsfile/enums/TSDataType.java b/java/common/src/main/java/org/apache/tsfile/enums/TSDataType.java index cf17aa04..9c2c1ba3 100644 --- a/java/common/src/main/java/org/apache/tsfile/enums/TSDataType.java +++ b/java/common/src/main/java/org/apache/tsfile/enums/TSDataType.java @@ -69,7 +69,10 @@ public enum TSDataType { BLOB((byte) 10), /** STRING */ - STRING((byte) 11); + STRING((byte) 11), + + /** OBJECT */ + OBJECT((byte) 12); private final byte type; private static final Map<TSDataType, Set<TSDataType>> compatibleTypes; @@ -120,6 +123,8 @@ public enum TSDataType { Set<TSDataType> stringCompatibleTypes = new HashSet<>(); stringCompatibleTypes.add(TEXT); compatibleTypes.put(STRING, stringCompatibleTypes); + + compatibleTypes.put(OBJECT, Collections.emptySet()); } TSDataType(byte type) { @@ -166,6 +171,8 @@ public enum TSDataType { return TSDataType.BLOB; case 11: return TSDataType.STRING; + case 12: + return TSDataType.OBJECT; default: throw new IllegalArgumentException("Invalid input: " + type); } @@ -265,6 +272,12 @@ public enum TSDataType { } else { break; } + case OBJECT: + if (sourceType == TSDataType.OBJECT) { + return value; + } else { + break; + } case VECTOR: case UNKNOWN: default: @@ -367,6 +380,12 @@ public enum TSDataType { } else { break; } + case OBJECT: + if (sourceType == TSDataType.OBJECT) { + return array; + } else { + break; + } case VECTOR: case UNKNOWN: default: @@ -414,6 +433,7 @@ public enum TSDataType { case DOUBLE: case VECTOR: case BLOB: + case OBJECT: case STRING: case TIMESTAMP: return 8; @@ -452,6 +472,7 @@ public enum TSDataType { case BOOLEAN: case TEXT: case VECTOR: + case OBJECT: return false; default: throw new UnSupportedDataTypeException(this.toString()); @@ -478,6 +499,7 @@ public enum TSDataType { return true; case VECTOR: case BLOB: + case OBJECT: return false; default: throw new UnSupportedDataTypeException(this.toString()); @@ -485,6 +507,10 @@ public enum TSDataType { } public boolean isBinary() { - return this == TEXT || this == STRING || this == BLOB; + return this == TEXT || this == STRING || this == BLOB || this == OBJECT; + } + + public boolean isBlob() { + return this == BLOB || this == OBJECT; } } diff --git a/java/common/src/main/java/org/apache/tsfile/utils/TsPrimitiveType.java b/java/common/src/main/java/org/apache/tsfile/utils/TsPrimitiveType.java index c3356fc5..4850d1ad 100644 --- a/java/common/src/main/java/org/apache/tsfile/utils/TsPrimitiveType.java +++ b/java/common/src/main/java/org/apache/tsfile/utils/TsPrimitiveType.java @@ -48,6 +48,7 @@ public abstract class TsPrimitiveType implements Serializable { case TEXT: case BLOB: case STRING: + case OBJECT: return new TsPrimitiveType.TsBinary(); case VECTOR: return new TsPrimitiveType.TsVector(); @@ -79,6 +80,7 @@ public abstract class TsPrimitiveType implements Serializable { case TEXT: case BLOB: case STRING: + case OBJECT: return new TsPrimitiveType.TsBinary((Binary) v); case VECTOR: return new TsPrimitiveType.TsVector((TsPrimitiveType[]) v); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java b/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java index 709af3c0..f20daa5c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/common/conf/TSFileConfig.java @@ -374,6 +374,7 @@ public class TSFileConfig implements Serializable { case STRING: case BLOB: case TEXT: + case OBJECT: default: return textEncoding; } @@ -402,6 +403,7 @@ public class TSFileConfig implements Serializable { case STRING: case BLOB: case TEXT: + case OBJECT: compressionName = textCompression; break; default: diff --git a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/PlainEncoder.java b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/PlainEncoder.java index 09c28cbe..0b97005d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/PlainEncoder.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/encoding/encoder/PlainEncoder.java @@ -128,6 +128,7 @@ public class PlainEncoder extends Encoder { case TEXT: case STRING: case BLOB: + case OBJECT: // refer to encode(Binary,ByteArrayOutputStream) return 4 + TSFileConfig.BYTE_SIZE_PER_CHAR * maxStringLength; default: diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java index 25b1a127..e0868f1c 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/TimeseriesMetadata.java @@ -133,8 +133,8 @@ public class TimeseriesMetadata implements ITimeSeriesMetadata { int chunkMetaDataListDataSize = ReadWriteForEncodingUtils.readUnsignedVarInt(buffer); timeseriesMetaData.setDataSizeOfChunkMetaDataList(chunkMetaDataListDataSize); timeseriesMetaData.setStatistics(Statistics.deserialize(buffer, timeseriesMetaData.dataType)); - if ((timeseriesMetaData.getTsDataType() != TSDataType.BLOB && needChunkMetadataForNonBlob) - || (timeseriesMetaData.getTsDataType() == TSDataType.BLOB && needChunkMetadataForBlob)) { + if ((!timeseriesMetaData.getTsDataType().isBlob() && needChunkMetadataForNonBlob) + || (timeseriesMetaData.getTsDataType().isBlob() && needChunkMetadataForBlob)) { ByteBuffer byteBuffer = buffer.slice(); byteBuffer.limit(chunkMetaDataListDataSize); timeseriesMetaData.chunkMetadataList = new ArrayList<>(); @@ -169,8 +169,8 @@ public class TimeseriesMetadata implements ITimeSeriesMetadata { timeseriesMetaData.setStatistics( Statistics.deserialize(inputStream, timeseriesMetaData.dataType)); long startOffset = tsFileInput.position(); - if ((timeseriesMetaData.getTsDataType() != TSDataType.BLOB && needChunkMetadataForNonBlob) - || (timeseriesMetaData.getTsDataType() == TSDataType.BLOB && needChunkMetadataForBlob)) { + if ((!timeseriesMetaData.getTsDataType().isBlob() && needChunkMetadataForNonBlob) + || (timeseriesMetaData.getTsDataType().isBlob() && needChunkMetadataForBlob)) { timeseriesMetaData.chunkMetadataList = new ArrayList<>(); while (tsFileInput.position() < startOffset + chunkMetaDataListDataSize) { timeseriesMetaData.chunkMetadataList.add( @@ -212,8 +212,8 @@ public class TimeseriesMetadata implements ITimeSeriesMetadata { timeseriesMetaData.setStatistics(statistics); if (!excludedMeasurements.contains(measurementID) - && ((tsDataType != TSDataType.BLOB && needChunkMetadataForNonBlob) - || (tsDataType == TSDataType.BLOB && needChunkMetadataForBlob))) { + && ((!tsDataType.isBlob() && needChunkMetadataForNonBlob) + || (tsDataType.isBlob() && needChunkMetadataForBlob))) { // measurement is not in the excluded set and need chunk metadata ByteBuffer byteBuffer = buffer.slice(); byteBuffer.limit(chunkMetaDataListDataSize); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java index 20374376..cffd529f 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/enums/TSEncoding.java @@ -91,6 +91,8 @@ public enum TSEncoding { Set<TSEncoding> blobSet = new HashSet<>(); blobSet.add(TSEncoding.PLAIN); TYPE_SUPPORTED_ENCODINGS.put(TSDataType.BLOB, blobSet); + + TYPE_SUPPORTED_ENCODINGS.put(TSDataType.OBJECT, blobSet); } TSEncoding(byte type) { diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/statistics/ObjectStatistics.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/statistics/ObjectStatistics.java new file mode 100644 index 00000000..74106f22 --- /dev/null +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/statistics/ObjectStatistics.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile.file.metadata.statistics; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.exception.filter.StatisticsClassException; +import org.apache.tsfile.utils.Binary; +import org.apache.tsfile.utils.RamUsageEstimator; + +public class ObjectStatistics extends Statistics<Binary> { + + public static final long INSTANCE_SIZE = + RamUsageEstimator.shallowSizeOfInstance(ObjectStatistics.class); + + // no statistics for object data type + + @Override + public TSDataType getType() { + return TSDataType.OBJECT; + } + + /** The output of this method should be identical to the method "serializeStats(outputStream)". */ + @Override + public int getStatsSize() { + return 0; + } + + @Override + public long getRetainedSizeInBytes() { + return INSTANCE_SIZE; + } + + @Override + int serializeStats(OutputStream outputStream) throws IOException { + return 0; + } + + public void updateStats(Binary value) { + if (isEmpty) { + isEmpty = false; + } + } + + @Override + public void deserialize(InputStream inputStream) throws IOException { + // do nothing + } + + @Override + public void deserialize(ByteBuffer byteBuffer) { + // do nothing + } + + @Override + public Binary getMinValue() { + throw new StatisticsClassException( + String.format(STATS_UNSUPPORTED_MSG, TSDataType.OBJECT, "min")); + } + + @Override + public Binary getMaxValue() { + throw new StatisticsClassException( + String.format(STATS_UNSUPPORTED_MSG, TSDataType.OBJECT, "max")); + } + + @Override + public Binary getFirstValue() { + throw new StatisticsClassException( + String.format(STATS_UNSUPPORTED_MSG, TSDataType.OBJECT, "first")); + } + + @Override + public Binary getLastValue() { + throw new StatisticsClassException( + String.format(STATS_UNSUPPORTED_MSG, TSDataType.OBJECT, "last")); + } + + @Override + public double getSumDoubleValue() { + throw new StatisticsClassException( + String.format(STATS_UNSUPPORTED_MSG, TSDataType.OBJECT, "sum")); + } + + @Override + public long getSumLongValue() { + + throw new StatisticsClassException( + String.format(STATS_UNSUPPORTED_MSG, TSDataType.OBJECT, "sum")); + } + + @SuppressWarnings("rawtypes") + @Override + protected void mergeStatisticsValue(Statistics stats) { + // do nothing + if (isEmpty) { + isEmpty = false; + } + } + + @Override + public String toString() { + return "ObjectStatistics{}"; + } +} diff --git a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/statistics/Statistics.java b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/statistics/Statistics.java index 3fc0f714..fa12c5d2 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/statistics/Statistics.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/file/metadata/statistics/Statistics.java @@ -94,6 +94,8 @@ public abstract class Statistics<T extends Serializable> { return new StringStatistics(); case BLOB: return new BlobStatistics(); + case OBJECT: + return new ObjectStatistics(); default: throw new UnknownColumnTypeException(type.toString()); } @@ -123,6 +125,8 @@ public abstract class Statistics<T extends Serializable> { return StringStatistics.INSTANCE_SIZE; case BLOB: return BlobStatistics.INSTANCE_SIZE; + case OBJECT: + return ObjectStatistics.INSTANCE_SIZE; default: throw new UnknownColumnTypeException(type.toString()); } diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java b/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java index 033e3d04..71d259f9 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/TsFileSequenceReader.java @@ -2276,6 +2276,7 @@ public class TsFileSequenceReader implements AutoCloseable { case TEXT: case BLOB: case STRING: + case OBJECT: chunkStatistics.update(timeStamp, value.getBinary()); break; default: @@ -2315,6 +2316,7 @@ public class TsFileSequenceReader implements AutoCloseable { case TEXT: case BLOB: case STRING: + case OBJECT: chunkStatistics.update(batchData.currentTime(), batchData.getBinary()); break; default: @@ -2545,6 +2547,7 @@ public class TsFileSequenceReader implements AutoCloseable { case TEXT: case BLOB: case STRING: + case OBJECT: chunkStatistics.update(batchData.currentTime(), batchData.getBinary()); break; default: diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Chunk.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Chunk.java index a7e2e9ea..c560050d 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Chunk.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Chunk.java @@ -284,6 +284,7 @@ public class Chunk { case TEXT: case STRING: case BLOB: + case OBJECT: chunkWriter.write( timestamp, convertedValue == null ? Binary.EMPTY_VALUE : (Binary) convertedValue, @@ -358,6 +359,7 @@ public class Chunk { case TEXT: case STRING: case BLOB: + case OBJECT: chunkWriter.write(timestamp, (Binary) convertedValue); break; default: diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/DescReadWriteBatchData.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/DescReadWriteBatchData.java index 1fd65740..3381e286 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/DescReadWriteBatchData.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/DescReadWriteBatchData.java @@ -75,6 +75,7 @@ public class DescReadWriteBatchData extends DescReadBatchData { case TEXT: case BLOB: case STRING: + case OBJECT: binaryRet = new LinkedList<>(); binaryRet.add(new Binary[capacity]); break; @@ -440,6 +441,7 @@ public class DescReadWriteBatchData extends DescReadBatchData { case TEXT: case BLOB: case STRING: + case OBJECT: for (int i = length() - 1; i >= 0; i--) { outputStream.writeLong(getTimeByIndex(i)); Binary binary = getBinaryByIndex(i); @@ -485,6 +487,7 @@ public class DescReadWriteBatchData extends DescReadBatchData { case TEXT: case BLOB: case STRING: + case OBJECT: Binary binary = value.getBinary(); outputStream.writeInt(binary.getLength()); outputStream.write(binary.getValues()); diff --git a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Field.java b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Field.java index 01d4dcdb..fb6ae1b4 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/read/common/Field.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/read/common/Field.java @@ -71,6 +71,7 @@ public class Field { case TEXT: case BLOB: case STRING: + case OBJECT: out.setBinaryV(field.getBinaryV()); break; default: @@ -182,6 +183,7 @@ public class Field { return String.valueOf(doubleV); case TEXT: case STRING: + case OBJECT: return binaryV.toString(); case BLOB: return BytesUtils.parseBlobByteArrayToString(binaryV.getValues()); @@ -216,6 +218,7 @@ public class Field { case TEXT: case BLOB: case STRING: + case OBJECT: return getBinaryV(); default: throw new UnSupportedDataTypeException(dataType.toString()); @@ -248,6 +251,7 @@ public class Field { case TEXT: case BLOB: case STRING: + case OBJECT: field.setBinaryV((Binary) value); break; default: @@ -278,6 +282,7 @@ public class Field { case TEXT: case BLOB: case STRING: + case OBJECT: field.setBinaryV(value.getBinary()); break; default: diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java index a47df975..b84ae7f7 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkGroupWriterImpl.java @@ -186,6 +186,7 @@ public class AlignedChunkGroupWriterImpl implements IChunkGroupWriter { case TEXT: case BLOB: case STRING: + case OBJECT: valueChunkWriter.write(time, (Binary) point.getValue(), isNull); break; default: @@ -278,6 +279,7 @@ public class AlignedChunkGroupWriterImpl implements IChunkGroupWriter { case TEXT: case BLOB: case STRING: + case OBJECT: valueChunkWriter.write(time, ((Binary[]) tablet.getValues()[columnIndex])[row], isNull); break; default: @@ -371,6 +373,7 @@ public class AlignedChunkGroupWriterImpl implements IChunkGroupWriter { case TEXT: case BLOB: case STRING: + case OBJECT: valueChunkWriter.write(-1, null, true); break; default: diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkWriterImpl.java b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkWriterImpl.java index 2ec4bd8f..27761e67 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkWriterImpl.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/chunk/AlignedChunkWriterImpl.java @@ -325,6 +325,7 @@ public class AlignedChunkWriterImpl implements IChunkWriter { case TEXT: case BLOB: case STRING: + case OBJECT: writer.write( time, point != null ? point.getBinary() : new Binary("".getBytes(StandardCharsets.UTF_8)), @@ -369,6 +370,7 @@ public class AlignedChunkWriterImpl implements IChunkWriter { case TEXT: case BLOB: case STRING: + case OBJECT: chunkWriter.write(times, column.getBinaries(), column.isNull(), batchSize, arrayOffset); break; case DOUBLE: diff --git a/java/tsfile/src/main/java/org/apache/tsfile/write/record/datapoint/DataPoint.java b/java/tsfile/src/main/java/org/apache/tsfile/write/record/datapoint/DataPoint.java index 80e617ca..fd62e587 100644 --- a/java/tsfile/src/main/java/org/apache/tsfile/write/record/datapoint/DataPoint.java +++ b/java/tsfile/src/main/java/org/apache/tsfile/write/record/datapoint/DataPoint.java @@ -88,6 +88,7 @@ public abstract class DataPoint { case TEXT: case BLOB: case STRING: + case OBJECT: dataPoint = new StringDataPoint(measurementId, new Binary(value, TSFileConfig.STRING_CHARSET)); break; diff --git a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/statistics/StatisticsTest.java b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/statistics/StatisticsTest.java index 06aeca38..ba12c635 100644 --- a/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/statistics/StatisticsTest.java +++ b/java/tsfile/src/test/java/org/apache/tsfile/file/metadata/statistics/StatisticsTest.java @@ -108,6 +108,7 @@ public class StatisticsTest { statistics.getLastValue()); break; case BLOB: + case OBJECT: break; default: throw new IllegalArgumentException(statistics.getType().toString()); @@ -162,6 +163,10 @@ public class StatisticsTest { BlobStatistics blobStat = new BlobStatistics(); result = blobStat; break; + case OBJECT: + ObjectStatistics objectStat = new ObjectStatistics(); + result = objectStat; + break; case DATE: DateStatistics dateStat = new DateStatistics(); dateStat.initializeStats(val, val, val, val, val);
