This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 9ef47090 Add Tablet.append (#562)
9ef47090 is described below
commit 9ef470900c75f23f3a558c2e5939e38ed1d8034e
Author: Jiang Tian <[email protected]>
AuthorDate: Wed Jul 30 11:39:29 2025 +0800
Add Tablet.append (#562)
* Add Tablet.append
* fix preferred capacity
---
.../main/java/org/apache/tsfile/utils/BitMap.java | 22 +-
.../org/apache/tsfile/write/record/Tablet.java | 174 +++++++++++-
.../org/apache/tsfile/write/record/TabletTest.java | 314 +++++++++++++++++++++
3 files changed, 500 insertions(+), 10 deletions(-)
diff --git a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java
b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java
index 8a4c10b4..6f0d0974 100644
--- a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java
+++ b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java
@@ -36,8 +36,8 @@ public class BitMap {
(byte) 0X7F // 01111111
};
- private final byte[] bits;
- private final int size;
+ private byte[] bits;
+ private int size;
/** Initialize a BitMap with given size. */
public BitMap(int size) {
@@ -258,4 +258,22 @@ public class BitMap {
public byte[] getTruncatedByteArray(int size) {
return Arrays.copyOf(this.bits, getSizeOfBytes(size));
}
+
+ public void append(BitMap another, int position, int length) {
+ for (int i = 0; i < length; i++) {
+ if (another.isMarked(i)) {
+ mark(position + i);
+ } else {
+ unmark(position + i);
+ }
+ }
+ }
+
+ public void extend(int newSize) {
+ if (size >= newSize) {
+ return;
+ }
+ bits = Arrays.copyOf(bits, getSizeOfBytes(newSize));
+ size = newSize;
+ }
}
diff --git
a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java
b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java
index ac03949a..c984b422 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java
@@ -42,6 +42,7 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.time.LocalDate;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -57,6 +58,7 @@ import java.util.Objects;
*
* <p>Notice: The tablet should not have empty cell, please use BitMap to
denote null value
*/
+@SuppressWarnings("SuspiciousSystemArraycopy")
public class Tablet {
private static final int DEFAULT_SIZE = 1024;
@@ -95,7 +97,7 @@ public class Tablet {
private int rowSize;
/** The maximum number of rows for this {@link Tablet} */
- private final int maxRowNumber;
+ private int maxRowNumber;
/**
* Return a {@link Tablet} with default specified row number. This is the
standard constructor
@@ -632,32 +634,36 @@ public class Tablet {
}
private Object createValueColumnOfDataType(TSDataType dataType) {
+ return createValueColumnOfDataType(dataType, maxRowNumber);
+ }
+
+ private Object createValueColumnOfDataType(TSDataType dataType, int
capacity) {
Object valueColumn;
switch (dataType) {
case INT32:
- valueColumn = new int[maxRowNumber];
+ valueColumn = new int[capacity];
break;
case INT64:
case TIMESTAMP:
- valueColumn = new long[maxRowNumber];
+ valueColumn = new long[capacity];
break;
case FLOAT:
- valueColumn = new float[maxRowNumber];
+ valueColumn = new float[capacity];
break;
case DOUBLE:
- valueColumn = new double[maxRowNumber];
+ valueColumn = new double[capacity];
break;
case BOOLEAN:
- valueColumn = new boolean[maxRowNumber];
+ valueColumn = new boolean[capacity];
break;
case TEXT:
case STRING:
case BLOB:
- valueColumn = new Binary[maxRowNumber];
+ valueColumn = new Binary[capacity];
break;
case DATE:
- valueColumn = new LocalDate[maxRowNumber];
+ valueColumn = new LocalDate[capacity];
break;
default:
throw new
UnSupportedDataTypeException(String.format(NOT_SUPPORT_DATATYPE, dataType));
@@ -1320,4 +1326,156 @@ public class Tablet {
}
return true;
}
+
+ /**
+ * Append `another` to the tail of this tablet.
+ *
+ * @return true if append successfully, false if the tablets have
inconsistent insertTarget of
+ * schemas.
+ */
+ public boolean append(Tablet another) {
+ return append(another, 0);
+ }
+
+ /**
+ * Append `another` to the tail of this tablet, with a preferred capacity
after appending. To
+ * avoid frequent memory copy, it is highly recommended to use this method
instead of
+ * `append(Tablet)` when multiple appending could be involved.
+ *
+ * @param preferredCapacity if the total size of the two tablets is below
this value, this tablet
+ * will extend to the capacity.
+ * @return true if append successfully, false if the tablets have
inconsistent insertTarget of *
+ * schemas.
+ */
+ public boolean append(Tablet another, int preferredCapacity) {
+ if (!Objects.equals(insertTargetName, another.insertTargetName)) {
+ return false;
+ }
+
+ if (!Objects.equals(schemas, another.schemas)) {
+ return false;
+ }
+
+ if (!Objects.equals(columnCategories, another.columnCategories)) {
+ return false;
+ }
+
+ int prevCapacity = timestamps.length;
+ appendTimestamps(another, preferredCapacity);
+ appendValues(another, prevCapacity, preferredCapacity);
+ appendBitMaps(another, prevCapacity, preferredCapacity);
+
+ maxRowNumber = Math.max(preferredCapacity, Math.max(maxRowNumber, rowSize
+ another.rowSize));
+ rowSize = rowSize + another.rowSize;
+ return true;
+ }
+
+ private void appendTimestamps(Tablet another, int preferredCapacity) {
+ int capacity = timestamps.length;
+ int thisSize = rowSize;
+ int thatSize = another.rowSize;
+ int totalSize = Math.max(thisSize + thatSize, preferredCapacity);
+
+ if (thisSize + thatSize <= capacity && capacity >= preferredCapacity) {
+ System.arraycopy(another.timestamps, 0, timestamps, thisSize, thatSize);
+ } else {
+ timestamps = Arrays.copyOf(timestamps, totalSize);
+ System.arraycopy(another.timestamps, 0, timestamps, thisSize, thatSize);
+ }
+ }
+
+ private void appendValues(Tablet another, int prevCapacity, int
preferredCapacity) {
+ for (int i = 0; i < schemas.size(); i++) {
+ appendValue(another, prevCapacity, i, schemas.get(i).getType(),
preferredCapacity);
+ }
+ }
+
+ private void appendValue(
+ Tablet another,
+ int prevCapacity,
+ int columnIndex,
+ TSDataType dataType,
+ int preferredCapacity) {
+ Object thisCol = values[columnIndex];
+ Object anotherCol = another.values[columnIndex];
+
+ int thisSize = rowSize;
+ int thatSize = another.rowSize;
+ int totalSize = Math.max(thisSize + thatSize, preferredCapacity);
+
+ if (thisSize + thatSize <= prevCapacity && prevCapacity >=
preferredCapacity) {
+ System.arraycopy(anotherCol, 0, thisCol, thisSize, thatSize);
+ } else {
+ Object newCol = createValueColumnOfDataType(dataType, totalSize);
+ System.arraycopy(thisCol, 0, newCol, 0, thisSize);
+ System.arraycopy(anotherCol, 0, newCol, thisSize, thatSize);
+ values[columnIndex] = newCol;
+ }
+ }
+
+ private void appendBitMaps(Tablet another, int prevCapacity, int
preferredCapacity) {
+ if (bitMaps == null && another.bitMaps == null) {
+ return;
+ }
+
+ if (bitMaps == null) {
+ appendBitMapsWhenThisNull(another, prevCapacity, preferredCapacity);
+ } else if (another.bitMaps == null) {
+ appendBitMapsWhenThatNull(another, prevCapacity, preferredCapacity);
+ } else {
+ appendBitMapsWhenNoNull(another, prevCapacity, preferredCapacity);
+ }
+ }
+
+ private void appendBitMapsWhenThisNull(Tablet another, int prevCapacity, int
preferredCapacity) {
+ int thisSize = rowSize;
+ int thatSize = another.rowSize;
+ bitMaps = new BitMap[schemas.size()];
+ int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize,
preferredCapacity));
+ for (int i = 0; i < bitMaps.length; i++) {
+ if (another.bitMaps[i] != null) {
+ bitMaps[i] = new BitMap(totalSize);
+ bitMaps[i].append(another.bitMaps[i], thisSize, thatSize);
+ }
+ }
+ }
+
+ private void appendBitMapsWhenThatNull(Tablet another, int prevCapacity, int
preferredCapacity) {
+ int thisSize = rowSize;
+ int thatSize = another.rowSize;
+ int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize,
preferredCapacity));
+ for (BitMap bitMap : bitMaps) {
+ if (bitMap != null) {
+ bitMap.extend(totalSize);
+ for (int j = 0; j < thatSize; j++) {
+ bitMap.unmark(j + thisSize);
+ }
+ }
+ }
+ }
+
+ private void appendBitMapsWhenNoNull(Tablet another, int prevCapacity, int
preferredCapacity) {
+ int thisSize = rowSize;
+ int thatSize = another.rowSize;
+ int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize,
preferredCapacity));
+
+ for (int i = 0; i < bitMaps.length; i++) {
+ if (bitMaps[i] == null && another.bitMaps[i] == null) {
+ continue;
+ }
+
+ if (bitMaps[i] == null && another.bitMaps[i] != null) {
+ bitMaps[i] = new BitMap(totalSize);
+ bitMaps[i].append(another.bitMaps[i], thisSize, thatSize);
+ } else if (bitMaps[i] != null && another.bitMaps[i] == null) {
+ bitMaps[i].extend(totalSize);
+ for (int j = 0; j < thatSize; j++) {
+ bitMaps[i].unmark(j + thisSize);
+ }
+ } else if (bitMaps[i] != null && another.bitMaps[i] != null) {
+ bitMaps[i].extend(totalSize);
+ bitMaps[i].append(another.bitMaps[i], thisSize, thatSize);
+ }
+ }
+ }
}
diff --git
a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java
b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java
index 07aedad4..25828400 100644
--- a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java
+++ b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java
@@ -20,10 +20,12 @@
package org.apache.tsfile.write.record;
import org.apache.tsfile.common.conf.TSFileConfig;
+import org.apache.tsfile.enums.ColumnCategory;
import org.apache.tsfile.enums.TSDataType;
import org.apache.tsfile.file.metadata.enums.TSEncoding;
import org.apache.tsfile.utils.Binary;
import org.apache.tsfile.utils.BitMap;
+import org.apache.tsfile.utils.Pair;
import org.apache.tsfile.write.schema.IMeasurementSchema;
import org.apache.tsfile.write.schema.MeasurementSchema;
@@ -32,13 +34,19 @@ import org.junit.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
+import java.util.Random;
+import java.util.Set;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
public class TabletTest {
@@ -297,4 +305,310 @@ public class TabletTest {
Assert.assertEquals(tablet.getValue(1, 1), deserializeTablet.getValue(1,
1));
Assert.assertTrue(deserializeTablet.isNull(1, 0));
}
+
+ @Test
+ public void testAppendInconsistent() {
+ Tablet t1 =
+ new Tablet(
+ "table1",
+ Arrays.asList("tag1", "s1"),
+ Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+ Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+
+ Tablet tWrongTable =
+ new Tablet(
+ "table2",
+ Arrays.asList("tag1", "s1"),
+ Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+ Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+ assertFalse(t1.append(tWrongTable));
+
+ Tablet tWrongColName =
+ new Tablet(
+ "table1",
+ Arrays.asList("tag2", "s1"),
+ Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+ Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+ assertFalse(t1.append(tWrongColName));
+
+ Tablet tWrongColType =
+ new Tablet(
+ "table1",
+ Arrays.asList("tag1", "s1"),
+ Arrays.asList(TSDataType.STRING, TSDataType.INT64),
+ Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+ assertFalse(t1.append(tWrongColType));
+
+ Tablet tWrongColCategory =
+ new Tablet(
+ "table1",
+ Arrays.asList("tag1", "s1"),
+ Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+ Arrays.asList(ColumnCategory.TAG, ColumnCategory.TAG));
+ assertFalse(t1.append(tWrongColCategory));
+ }
+
+ private void fillTablet(Tablet t, int valueOffset, int length) {
+ for (int i = 0; i < length; i++) {
+ t.addTimestamp(i, i + valueOffset);
+ for (int j = 0; j < t.getSchemas().size(); j++) {
+ switch (t.getSchemas().get(j).getType()) {
+ case INT32:
+ t.addValue(i, j, i + valueOffset);
+ break;
+ case TIMESTAMP:
+ case INT64:
+ t.addValue(i, j, (long) (i + valueOffset));
+ break;
+ case FLOAT:
+ t.addValue(i, j, (i + valueOffset) * 1.0f);
+ break;
+ case DOUBLE:
+ t.addValue(i, j, (i + valueOffset) * 1.0);
+ break;
+ case BOOLEAN:
+ t.addValue(i, j, (i + valueOffset) % 2 == 0);
+ break;
+ case TEXT:
+ case STRING:
+ case BLOB:
+ t.addValue(i, j, String.valueOf(i + valueOffset));
+ break;
+ case DATE:
+ t.addValue(i, j, LocalDate.of(i + valueOffset, 1, 1));
+ break;
+ }
+ }
+ }
+ }
+
+ private final List<String> colNamesForAppendTest =
+ Arrays.asList(
+ "tag1",
+ TSDataType.INT32.name(),
+ TSDataType.INT64.name(),
+ TSDataType.FLOAT.name(),
+ TSDataType.DOUBLE.name(),
+ TSDataType.BOOLEAN.name(),
+ TSDataType.TEXT.name(),
+ TSDataType.STRING.name(),
+ TSDataType.BLOB.name(),
+ TSDataType.TIMESTAMP.name(),
+ TSDataType.DATE.name());
+ private final List<TSDataType> dataTypesForAppendTest =
+ Arrays.asList(
+ TSDataType.STRING,
+ TSDataType.INT32,
+ TSDataType.INT64,
+ TSDataType.FLOAT,
+ TSDataType.DOUBLE,
+ TSDataType.BOOLEAN,
+ TSDataType.TEXT,
+ TSDataType.STRING,
+ TSDataType.BLOB,
+ TSDataType.TIMESTAMP,
+ TSDataType.DATE);
+ private final List<ColumnCategory> categoriesForAppendTest =
+ Arrays.asList(
+ ColumnCategory.TAG,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD,
+ ColumnCategory.FIELD);
+
+ @Test
+ public void testAppendNoNull() {
+ Tablet t1 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+
+ int t1Size = 100;
+ fillTablet(t1, 0, t1Size);
+
+ int t2Size = 100;
+ Tablet t2 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+ fillTablet(t2, t1Size, t2Size);
+
+ assertTrue(t1.append(t2));
+ checkAppendedTablet(t1, t1Size + t2Size, null);
+ }
+
+ @Test
+ public void testPreferredCapacity() {
+ Tablet t1 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+
+ int t1Size = 100;
+ fillTablet(t1, 0, t1Size);
+
+ int t2Size = 100;
+ Tablet t2 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+ fillTablet(t2, t1Size, t2Size);
+
+ assertTrue(t1.append(t2, 10000));
+ checkAppendedTablet(t1, t1Size + t2Size, null);
+ assertEquals(10000, t1.getMaxRowNumber());
+ }
+
+ @Test
+ public void testAppendNullPoints() {
+ Set<Pair<Integer, Integer>> nullPositions = new HashSet<>();
+ int nullPointNum = 10;
+ Random random = new Random();
+
+ Tablet t1 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+
+ int t1Size = 100;
+ fillTablet(t1, 0, t1Size);
+ for (int i = 0; i < nullPointNum; i++) {
+ int rowIndex = random.nextInt(t1Size);
+ int columnIndex = random.nextInt(colNamesForAppendTest.size());
+ nullPositions.add(new Pair<>(rowIndex, columnIndex));
+ t1.getBitMaps()[columnIndex].mark(rowIndex);
+ }
+
+ int t2Size = 100;
+ Tablet t2 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+ fillTablet(t2, t1Size, t2Size);
+ for (int i = 0; i < nullPointNum; i++) {
+ int rowIndex = random.nextInt(t1Size);
+ int columnIndex = random.nextInt(colNamesForAppendTest.size());
+ nullPositions.add(new Pair<>(rowIndex + t1Size, columnIndex));
+ t2.getBitMaps()[columnIndex].mark(rowIndex);
+ }
+
+ assertTrue(t1.append(t2));
+ checkAppendedTablet(t1, t1Size + t2Size, nullPositions);
+ }
+
+ @Test
+ public void testAppendNullBitMapColumn() {
+ int nullBitMapNum = 5;
+ Random random = new Random();
+
+ Tablet t1 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+
+ int t1Size = 100;
+ fillTablet(t1, 0, t1Size);
+ for (int i = 0; i < nullBitMapNum; i++) {
+ int columnIndex = random.nextInt(colNamesForAppendTest.size());
+ t1.getBitMaps()[columnIndex] = null;
+ }
+
+ int t2Size = 100;
+ Tablet t2 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+ fillTablet(t2, t1Size, t2Size);
+ for (int i = 0; i < nullBitMapNum; i++) {
+ int columnIndex = random.nextInt(colNamesForAppendTest.size());
+ t2.getBitMaps()[columnIndex] = null;
+ }
+
+ assertTrue(t1.append(t2));
+ assertEquals(t1Size + t2Size, t1.getRowSize());
+
+ checkAppendedTablet(t1, t1Size + t2Size, null);
+ }
+
+ @Test
+ public void testAppendThisNullBitMap() {
+
+ Tablet t1 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+
+ int t1Size = 100;
+ fillTablet(t1, 0, t1Size);
+ t1.setBitMaps(null);
+
+ int t2Size = 100;
+ Tablet t2 =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+ fillTablet(t2, t1Size, t2Size);
+
+ assertTrue(t1.append(t2));
+ checkAppendedTablet(t1, t1Size + t2Size, null);
+ }
+
+ @Test
+ public void testMultipleAppend() {
+ List<Tablet> tablets = new ArrayList<>();
+ int tabletNum = 10;
+ int singleTabletSize = 100;
+ for (int i = 0; i < tabletNum; i++) {
+ Tablet tablet =
+ new Tablet(
+ "table1", colNamesForAppendTest, dataTypesForAppendTest,
categoriesForAppendTest);
+ fillTablet(tablet, i * singleTabletSize, singleTabletSize);
+ tablets.add(tablet);
+ }
+ for (int i = 1; i < tabletNum; i++) {
+ assertTrue(tablets.get(0).append(tablets.get(i)));
+ }
+ checkAppendedTablet(tablets.get(0), singleTabletSize * tabletNum, null);
+ }
+
+ private void checkAppendedTablet(
+ Tablet result, int totalSize, Set<Pair<Integer, Integer>> nullPositions)
{
+ assertEquals(totalSize, result.getRowSize());
+
+ for (int i = 0; i < totalSize; i++) {
+ assertEquals(i, result.getTimestamp(i));
+ for (int j = 0; j < result.getSchemas().size(); j++) {
+ if (nullPositions != null && nullPositions.contains(new Pair<>(i, j)))
{
+ assertTrue(result.isNull(i, j));
+ continue;
+ }
+
+ assertFalse(result.isNull(i, j));
+ switch (result.getSchemas().get(j).getType()) {
+ case INT32:
+ assertEquals(i, result.getValue(i, j));
+ break;
+ case TIMESTAMP:
+ case INT64:
+ assertEquals((long) i, result.getValue(i, j));
+ break;
+ case FLOAT:
+ assertEquals(i * 1.0f, (float) result.getValue(i, j), 0.0001f);
+ break;
+ case DOUBLE:
+ assertEquals(i * 1.0, (double) result.getValue(i, j), 0.0001);
+ break;
+ case BOOLEAN:
+ assertEquals(i % 2 == 0, result.getValue(i, j));
+ break;
+ case TEXT:
+ case BLOB:
+ case STRING:
+ assertEquals(
+ new Binary(String.valueOf(i).getBytes(StandardCharsets.UTF_8)),
+ result.getValue(i, j));
+ break;
+ case DATE:
+ assertEquals(LocalDate.of(i, 1, 1), result.getValue(i, j));
+ break;
+ }
+ }
+ }
+ }
}