This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git


The following commit(s) were added to refs/heads/develop by this push:
     new 9ef47090 Add Tablet.append (#562)
9ef47090 is described below

commit 9ef470900c75f23f3a558c2e5939e38ed1d8034e
Author: Jiang Tian <[email protected]>
AuthorDate: Wed Jul 30 11:39:29 2025 +0800

    Add Tablet.append (#562)
    
    * Add Tablet.append
    
    * fix preferred capacity
---
 .../main/java/org/apache/tsfile/utils/BitMap.java  |  22 +-
 .../org/apache/tsfile/write/record/Tablet.java     | 174 +++++++++++-
 .../org/apache/tsfile/write/record/TabletTest.java | 314 +++++++++++++++++++++
 3 files changed, 500 insertions(+), 10 deletions(-)

diff --git a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java 
b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java
index 8a4c10b4..6f0d0974 100644
--- a/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java
+++ b/java/common/src/main/java/org/apache/tsfile/utils/BitMap.java
@@ -36,8 +36,8 @@ public class BitMap {
         (byte) 0X7F // 01111111
       };
 
-  private final byte[] bits;
-  private final int size;
+  private byte[] bits;
+  private int size;
 
   /** Initialize a BitMap with given size. */
   public BitMap(int size) {
@@ -258,4 +258,22 @@ public class BitMap {
   public byte[] getTruncatedByteArray(int size) {
     return Arrays.copyOf(this.bits, getSizeOfBytes(size));
   }
+
+  public void append(BitMap another, int position, int length) {
+    for (int i = 0; i < length; i++) {
+      if (another.isMarked(i)) {
+        mark(position + i);
+      } else {
+        unmark(position + i);
+      }
+    }
+  }
+
+  public void extend(int newSize) {
+    if (size >= newSize) {
+      return;
+    }
+    bits = Arrays.copyOf(bits, getSizeOfBytes(newSize));
+    size = newSize;
+  }
 }
diff --git 
a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java 
b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java
index ac03949a..c984b422 100644
--- a/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java
+++ b/java/tsfile/src/main/java/org/apache/tsfile/write/record/Tablet.java
@@ -42,6 +42,7 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.time.LocalDate;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -57,6 +58,7 @@ import java.util.Objects;
  *
  * <p>Notice: The tablet should not have empty cell, please use BitMap to 
denote null value
  */
+@SuppressWarnings("SuspiciousSystemArraycopy")
 public class Tablet {
 
   private static final int DEFAULT_SIZE = 1024;
@@ -95,7 +97,7 @@ public class Tablet {
   private int rowSize;
 
   /** The maximum number of rows for this {@link Tablet} */
-  private final int maxRowNumber;
+  private int maxRowNumber;
 
   /**
    * Return a {@link Tablet} with default specified row number. This is the 
standard constructor
@@ -632,32 +634,36 @@ public class Tablet {
   }
 
   private Object createValueColumnOfDataType(TSDataType dataType) {
+    return createValueColumnOfDataType(dataType, maxRowNumber);
+  }
+
+  private Object createValueColumnOfDataType(TSDataType dataType, int 
capacity) {
 
     Object valueColumn;
     switch (dataType) {
       case INT32:
-        valueColumn = new int[maxRowNumber];
+        valueColumn = new int[capacity];
         break;
       case INT64:
       case TIMESTAMP:
-        valueColumn = new long[maxRowNumber];
+        valueColumn = new long[capacity];
         break;
       case FLOAT:
-        valueColumn = new float[maxRowNumber];
+        valueColumn = new float[capacity];
         break;
       case DOUBLE:
-        valueColumn = new double[maxRowNumber];
+        valueColumn = new double[capacity];
         break;
       case BOOLEAN:
-        valueColumn = new boolean[maxRowNumber];
+        valueColumn = new boolean[capacity];
         break;
       case TEXT:
       case STRING:
       case BLOB:
-        valueColumn = new Binary[maxRowNumber];
+        valueColumn = new Binary[capacity];
         break;
       case DATE:
-        valueColumn = new LocalDate[maxRowNumber];
+        valueColumn = new LocalDate[capacity];
         break;
       default:
         throw new 
UnSupportedDataTypeException(String.format(NOT_SUPPORT_DATATYPE, dataType));
@@ -1320,4 +1326,156 @@ public class Tablet {
     }
     return true;
   }
+
+  /**
+   * Append `another` to the tail of this tablet.
+   *
+   * @return true if append successfully, false if the tablets have 
inconsistent insertTarget of
+   *     schemas.
+   */
+  public boolean append(Tablet another) {
+    return append(another, 0);
+  }
+
+  /**
+   * Append `another` to the tail of this tablet, with a preferred capacity 
after appending. To
+   * avoid frequent memory copy, it is highly recommended to use this method 
instead of
+   * `append(Tablet)` when multiple appending could be involved.
+   *
+   * @param preferredCapacity if the total size of the two tablets is below 
this value, this tablet
+   *     will extend to the capacity.
+   * @return true if append successfully, false if the tablets have 
inconsistent insertTarget of *
+   *     schemas.
+   */
+  public boolean append(Tablet another, int preferredCapacity) {
+    if (!Objects.equals(insertTargetName, another.insertTargetName)) {
+      return false;
+    }
+
+    if (!Objects.equals(schemas, another.schemas)) {
+      return false;
+    }
+
+    if (!Objects.equals(columnCategories, another.columnCategories)) {
+      return false;
+    }
+
+    int prevCapacity = timestamps.length;
+    appendTimestamps(another, preferredCapacity);
+    appendValues(another, prevCapacity, preferredCapacity);
+    appendBitMaps(another, prevCapacity, preferredCapacity);
+
+    maxRowNumber = Math.max(preferredCapacity, Math.max(maxRowNumber, rowSize 
+ another.rowSize));
+    rowSize = rowSize + another.rowSize;
+    return true;
+  }
+
+  private void appendTimestamps(Tablet another, int preferredCapacity) {
+    int capacity = timestamps.length;
+    int thisSize = rowSize;
+    int thatSize = another.rowSize;
+    int totalSize = Math.max(thisSize + thatSize, preferredCapacity);
+
+    if (thisSize + thatSize <= capacity && capacity >= preferredCapacity) {
+      System.arraycopy(another.timestamps, 0, timestamps, thisSize, thatSize);
+    } else {
+      timestamps = Arrays.copyOf(timestamps, totalSize);
+      System.arraycopy(another.timestamps, 0, timestamps, thisSize, thatSize);
+    }
+  }
+
+  private void appendValues(Tablet another, int prevCapacity, int 
preferredCapacity) {
+    for (int i = 0; i < schemas.size(); i++) {
+      appendValue(another, prevCapacity, i, schemas.get(i).getType(), 
preferredCapacity);
+    }
+  }
+
+  private void appendValue(
+      Tablet another,
+      int prevCapacity,
+      int columnIndex,
+      TSDataType dataType,
+      int preferredCapacity) {
+    Object thisCol = values[columnIndex];
+    Object anotherCol = another.values[columnIndex];
+
+    int thisSize = rowSize;
+    int thatSize = another.rowSize;
+    int totalSize = Math.max(thisSize + thatSize, preferredCapacity);
+
+    if (thisSize + thatSize <= prevCapacity && prevCapacity >= 
preferredCapacity) {
+      System.arraycopy(anotherCol, 0, thisCol, thisSize, thatSize);
+    } else {
+      Object newCol = createValueColumnOfDataType(dataType, totalSize);
+      System.arraycopy(thisCol, 0, newCol, 0, thisSize);
+      System.arraycopy(anotherCol, 0, newCol, thisSize, thatSize);
+      values[columnIndex] = newCol;
+    }
+  }
+
+  private void appendBitMaps(Tablet another, int prevCapacity, int 
preferredCapacity) {
+    if (bitMaps == null && another.bitMaps == null) {
+      return;
+    }
+
+    if (bitMaps == null) {
+      appendBitMapsWhenThisNull(another, prevCapacity, preferredCapacity);
+    } else if (another.bitMaps == null) {
+      appendBitMapsWhenThatNull(another, prevCapacity, preferredCapacity);
+    } else {
+      appendBitMapsWhenNoNull(another, prevCapacity, preferredCapacity);
+    }
+  }
+
+  private void appendBitMapsWhenThisNull(Tablet another, int prevCapacity, int 
preferredCapacity) {
+    int thisSize = rowSize;
+    int thatSize = another.rowSize;
+    bitMaps = new BitMap[schemas.size()];
+    int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize, 
preferredCapacity));
+    for (int i = 0; i < bitMaps.length; i++) {
+      if (another.bitMaps[i] != null) {
+        bitMaps[i] = new BitMap(totalSize);
+        bitMaps[i].append(another.bitMaps[i], thisSize, thatSize);
+      }
+    }
+  }
+
+  private void appendBitMapsWhenThatNull(Tablet another, int prevCapacity, int 
preferredCapacity) {
+    int thisSize = rowSize;
+    int thatSize = another.rowSize;
+    int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize, 
preferredCapacity));
+    for (BitMap bitMap : bitMaps) {
+      if (bitMap != null) {
+        bitMap.extend(totalSize);
+        for (int j = 0; j < thatSize; j++) {
+          bitMap.unmark(j + thisSize);
+        }
+      }
+    }
+  }
+
+  private void appendBitMapsWhenNoNull(Tablet another, int prevCapacity, int 
preferredCapacity) {
+    int thisSize = rowSize;
+    int thatSize = another.rowSize;
+    int totalSize = Math.max(prevCapacity, Math.max(thisSize + thatSize, 
preferredCapacity));
+
+    for (int i = 0; i < bitMaps.length; i++) {
+      if (bitMaps[i] == null && another.bitMaps[i] == null) {
+        continue;
+      }
+
+      if (bitMaps[i] == null && another.bitMaps[i] != null) {
+        bitMaps[i] = new BitMap(totalSize);
+        bitMaps[i].append(another.bitMaps[i], thisSize, thatSize);
+      } else if (bitMaps[i] != null && another.bitMaps[i] == null) {
+        bitMaps[i].extend(totalSize);
+        for (int j = 0; j < thatSize; j++) {
+          bitMaps[i].unmark(j + thisSize);
+        }
+      } else if (bitMaps[i] != null && another.bitMaps[i] != null) {
+        bitMaps[i].extend(totalSize);
+        bitMaps[i].append(another.bitMaps[i], thisSize, thatSize);
+      }
+    }
+  }
 }
diff --git 
a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java 
b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java
index 07aedad4..25828400 100644
--- a/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java
+++ b/java/tsfile/src/test/java/org/apache/tsfile/write/record/TabletTest.java
@@ -20,10 +20,12 @@
 package org.apache.tsfile.write.record;
 
 import org.apache.tsfile.common.conf.TSFileConfig;
+import org.apache.tsfile.enums.ColumnCategory;
 import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
 import org.apache.tsfile.utils.Binary;
 import org.apache.tsfile.utils.BitMap;
+import org.apache.tsfile.utils.Pair;
 import org.apache.tsfile.write.schema.IMeasurementSchema;
 import org.apache.tsfile.write.schema.MeasurementSchema;
 
@@ -32,13 +34,19 @@ import org.junit.Test;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.time.LocalDate;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Random;
+import java.util.Set;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
 public class TabletTest {
@@ -297,4 +305,310 @@ public class TabletTest {
     Assert.assertEquals(tablet.getValue(1, 1), deserializeTablet.getValue(1, 
1));
     Assert.assertTrue(deserializeTablet.isNull(1, 0));
   }
+
+  @Test
+  public void testAppendInconsistent() {
+    Tablet t1 =
+        new Tablet(
+            "table1",
+            Arrays.asList("tag1", "s1"),
+            Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+            Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+
+    Tablet tWrongTable =
+        new Tablet(
+            "table2",
+            Arrays.asList("tag1", "s1"),
+            Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+            Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+    assertFalse(t1.append(tWrongTable));
+
+    Tablet tWrongColName =
+        new Tablet(
+            "table1",
+            Arrays.asList("tag2", "s1"),
+            Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+            Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+    assertFalse(t1.append(tWrongColName));
+
+    Tablet tWrongColType =
+        new Tablet(
+            "table1",
+            Arrays.asList("tag1", "s1"),
+            Arrays.asList(TSDataType.STRING, TSDataType.INT64),
+            Arrays.asList(ColumnCategory.TAG, ColumnCategory.FIELD));
+    assertFalse(t1.append(tWrongColType));
+
+    Tablet tWrongColCategory =
+        new Tablet(
+            "table1",
+            Arrays.asList("tag1", "s1"),
+            Arrays.asList(TSDataType.STRING, TSDataType.INT32),
+            Arrays.asList(ColumnCategory.TAG, ColumnCategory.TAG));
+    assertFalse(t1.append(tWrongColCategory));
+  }
+
+  private void fillTablet(Tablet t, int valueOffset, int length) {
+    for (int i = 0; i < length; i++) {
+      t.addTimestamp(i, i + valueOffset);
+      for (int j = 0; j < t.getSchemas().size(); j++) {
+        switch (t.getSchemas().get(j).getType()) {
+          case INT32:
+            t.addValue(i, j, i + valueOffset);
+            break;
+          case TIMESTAMP:
+          case INT64:
+            t.addValue(i, j, (long) (i + valueOffset));
+            break;
+          case FLOAT:
+            t.addValue(i, j, (i + valueOffset) * 1.0f);
+            break;
+          case DOUBLE:
+            t.addValue(i, j, (i + valueOffset) * 1.0);
+            break;
+          case BOOLEAN:
+            t.addValue(i, j, (i + valueOffset) % 2 == 0);
+            break;
+          case TEXT:
+          case STRING:
+          case BLOB:
+            t.addValue(i, j, String.valueOf(i + valueOffset));
+            break;
+          case DATE:
+            t.addValue(i, j, LocalDate.of(i + valueOffset, 1, 1));
+            break;
+        }
+      }
+    }
+  }
+
+  private final List<String> colNamesForAppendTest =
+      Arrays.asList(
+          "tag1",
+          TSDataType.INT32.name(),
+          TSDataType.INT64.name(),
+          TSDataType.FLOAT.name(),
+          TSDataType.DOUBLE.name(),
+          TSDataType.BOOLEAN.name(),
+          TSDataType.TEXT.name(),
+          TSDataType.STRING.name(),
+          TSDataType.BLOB.name(),
+          TSDataType.TIMESTAMP.name(),
+          TSDataType.DATE.name());
+  private final List<TSDataType> dataTypesForAppendTest =
+      Arrays.asList(
+          TSDataType.STRING,
+          TSDataType.INT32,
+          TSDataType.INT64,
+          TSDataType.FLOAT,
+          TSDataType.DOUBLE,
+          TSDataType.BOOLEAN,
+          TSDataType.TEXT,
+          TSDataType.STRING,
+          TSDataType.BLOB,
+          TSDataType.TIMESTAMP,
+          TSDataType.DATE);
+  private final List<ColumnCategory> categoriesForAppendTest =
+      Arrays.asList(
+          ColumnCategory.TAG,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD,
+          ColumnCategory.FIELD);
+
+  @Test
+  public void testAppendNoNull() {
+    Tablet t1 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+
+    int t1Size = 100;
+    fillTablet(t1, 0, t1Size);
+
+    int t2Size = 100;
+    Tablet t2 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+    fillTablet(t2, t1Size, t2Size);
+
+    assertTrue(t1.append(t2));
+    checkAppendedTablet(t1, t1Size + t2Size, null);
+  }
+
+  @Test
+  public void testPreferredCapacity() {
+    Tablet t1 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+
+    int t1Size = 100;
+    fillTablet(t1, 0, t1Size);
+
+    int t2Size = 100;
+    Tablet t2 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+    fillTablet(t2, t1Size, t2Size);
+
+    assertTrue(t1.append(t2, 10000));
+    checkAppendedTablet(t1, t1Size + t2Size, null);
+    assertEquals(10000, t1.getMaxRowNumber());
+  }
+
+  @Test
+  public void testAppendNullPoints() {
+    Set<Pair<Integer, Integer>> nullPositions = new HashSet<>();
+    int nullPointNum = 10;
+    Random random = new Random();
+
+    Tablet t1 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+
+    int t1Size = 100;
+    fillTablet(t1, 0, t1Size);
+    for (int i = 0; i < nullPointNum; i++) {
+      int rowIndex = random.nextInt(t1Size);
+      int columnIndex = random.nextInt(colNamesForAppendTest.size());
+      nullPositions.add(new Pair<>(rowIndex, columnIndex));
+      t1.getBitMaps()[columnIndex].mark(rowIndex);
+    }
+
+    int t2Size = 100;
+    Tablet t2 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+    fillTablet(t2, t1Size, t2Size);
+    for (int i = 0; i < nullPointNum; i++) {
+      int rowIndex = random.nextInt(t1Size);
+      int columnIndex = random.nextInt(colNamesForAppendTest.size());
+      nullPositions.add(new Pair<>(rowIndex + t1Size, columnIndex));
+      t2.getBitMaps()[columnIndex].mark(rowIndex);
+    }
+
+    assertTrue(t1.append(t2));
+    checkAppendedTablet(t1, t1Size + t2Size, nullPositions);
+  }
+
+  @Test
+  public void testAppendNullBitMapColumn() {
+    int nullBitMapNum = 5;
+    Random random = new Random();
+
+    Tablet t1 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+
+    int t1Size = 100;
+    fillTablet(t1, 0, t1Size);
+    for (int i = 0; i < nullBitMapNum; i++) {
+      int columnIndex = random.nextInt(colNamesForAppendTest.size());
+      t1.getBitMaps()[columnIndex] = null;
+    }
+
+    int t2Size = 100;
+    Tablet t2 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+    fillTablet(t2, t1Size, t2Size);
+    for (int i = 0; i < nullBitMapNum; i++) {
+      int columnIndex = random.nextInt(colNamesForAppendTest.size());
+      t2.getBitMaps()[columnIndex] = null;
+    }
+
+    assertTrue(t1.append(t2));
+    assertEquals(t1Size + t2Size, t1.getRowSize());
+
+    checkAppendedTablet(t1, t1Size + t2Size, null);
+  }
+
+  @Test
+  public void testAppendThisNullBitMap() {
+
+    Tablet t1 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+
+    int t1Size = 100;
+    fillTablet(t1, 0, t1Size);
+    t1.setBitMaps(null);
+
+    int t2Size = 100;
+    Tablet t2 =
+        new Tablet(
+            "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+    fillTablet(t2, t1Size, t2Size);
+
+    assertTrue(t1.append(t2));
+    checkAppendedTablet(t1, t1Size + t2Size, null);
+  }
+
+  @Test
+  public void testMultipleAppend() {
+    List<Tablet> tablets = new ArrayList<>();
+    int tabletNum = 10;
+    int singleTabletSize = 100;
+    for (int i = 0; i < tabletNum; i++) {
+      Tablet tablet =
+          new Tablet(
+              "table1", colNamesForAppendTest, dataTypesForAppendTest, 
categoriesForAppendTest);
+      fillTablet(tablet, i * singleTabletSize, singleTabletSize);
+      tablets.add(tablet);
+    }
+    for (int i = 1; i < tabletNum; i++) {
+      assertTrue(tablets.get(0).append(tablets.get(i)));
+    }
+    checkAppendedTablet(tablets.get(0), singleTabletSize * tabletNum, null);
+  }
+
+  private void checkAppendedTablet(
+      Tablet result, int totalSize, Set<Pair<Integer, Integer>> nullPositions) 
{
+    assertEquals(totalSize, result.getRowSize());
+
+    for (int i = 0; i < totalSize; i++) {
+      assertEquals(i, result.getTimestamp(i));
+      for (int j = 0; j < result.getSchemas().size(); j++) {
+        if (nullPositions != null && nullPositions.contains(new Pair<>(i, j))) 
{
+          assertTrue(result.isNull(i, j));
+          continue;
+        }
+
+        assertFalse(result.isNull(i, j));
+        switch (result.getSchemas().get(j).getType()) {
+          case INT32:
+            assertEquals(i, result.getValue(i, j));
+            break;
+          case TIMESTAMP:
+          case INT64:
+            assertEquals((long) i, result.getValue(i, j));
+            break;
+          case FLOAT:
+            assertEquals(i * 1.0f, (float) result.getValue(i, j), 0.0001f);
+            break;
+          case DOUBLE:
+            assertEquals(i * 1.0, (double) result.getValue(i, j), 0.0001);
+            break;
+          case BOOLEAN:
+            assertEquals(i % 2 == 0, result.getValue(i, j));
+            break;
+          case TEXT:
+          case BLOB:
+          case STRING:
+            assertEquals(
+                new Binary(String.valueOf(i).getBytes(StandardCharsets.UTF_8)),
+                result.getValue(i, j));
+            break;
+          case DATE:
+            assertEquals(LocalDate.of(i, 1, 1), result.getValue(i, j));
+            break;
+        }
+      }
+    }
+  }
 }

Reply via email to