(tsfile) branch tsFile_v4 updated: add column type in schema

jiangtian Tue, 09 Apr 2024 01:06:14 -0700

This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch tsFile_v4
in repository https://gitbox.apache.org/repos/asf/tsfile.git



The following commit(s) were added to refs/heads/tsFile_v4 by this push:
     new d2d59974 add column type in schema
d2d59974 is described below

commit d2d59974b10649275d0d1fbd87958c212dd027ea
Author: Tian Jiang <[email protected]>
AuthorDate: Tue Apr 9 16:13:14 2024 +0800

    add column type in schema
---
 .../tsfile/file/metadata/LogicalTableSchema.java   | 10 +++++++
 .../apache/tsfile/file/metadata/TableSchema.java   | 21 ++++++++++++---
 .../tsfile/read/query/executor/TsFileExecutor.java | 31 ++++++++++++++++------
 3 files changed, 51 insertions(+), 11 deletions(-)

diff --git 
a/tsfile/src/main/java/org/apache/tsfile/file/metadata/LogicalTableSchema.java 
b/tsfile/src/main/java/org/apache/tsfile/file/metadata/LogicalTableSchema.java
index e2ccb9fe..d25fb8c6 100644
--- 
a/tsfile/src/main/java/org/apache/tsfile/file/metadata/LogicalTableSchema.java
+++ 
b/tsfile/src/main/java/org/apache/tsfile/file/metadata/LogicalTableSchema.java
@@ -22,6 +22,7 @@ package org.apache.tsfile.file.metadata;
 import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.file.metadata.enums.CompressionType;
 import org.apache.tsfile.file.metadata.enums.TSEncoding;
+import org.apache.tsfile.write.record.Tablet.ColumnType;
 import org.apache.tsfile.write.schema.MeasurementSchema;
 
 import java.util.ArrayList;
@@ -63,9 +64,18 @@ public class LogicalTableSchema extends TableSchema {
     }
 
     List<MeasurementSchema> allColumns = new ArrayList<>(generateIdColumns());
+    List<ColumnType> allColumnTypes = ColumnType.nCopy(ColumnType.ID, 
allColumns.size());
     allColumns.addAll(columnSchemas);
+    allColumnTypes.addAll(columnTypes);
     columnSchemas = allColumns;
     updatable = false;
     return allColumns;
   }
+
+  @Override
+  public List<ColumnType> getColumnTypes() {
+    // make sure the columns are finalized
+    getColumnSchemas();
+    return super.getColumnTypes();
+  }
 }
diff --git 
a/tsfile/src/main/java/org/apache/tsfile/file/metadata/TableSchema.java 
b/tsfile/src/main/java/org/apache/tsfile/file/metadata/TableSchema.java
index b67621d4..f498ed9d 100644
--- a/tsfile/src/main/java/org/apache/tsfile/file/metadata/TableSchema.java
+++ b/tsfile/src/main/java/org/apache/tsfile/file/metadata/TableSchema.java
@@ -20,6 +20,7 @@
 package org.apache.tsfile.file.metadata;
 
 import org.apache.tsfile.utils.ReadWriteIOUtils;
+import org.apache.tsfile.write.record.Tablet.ColumnType;
 import org.apache.tsfile.write.schema.MeasurementSchema;
 
 import java.io.IOException;
@@ -35,6 +36,7 @@ public class TableSchema {
   // key the tableName can be known
   protected String tableName;
   protected List<MeasurementSchema> columnSchemas;
+  protected List<ColumnType> columnTypes;
   protected boolean updatable = false;
 
   // columnName -> pos in columnSchemas;
@@ -43,12 +45,14 @@ public class TableSchema {
   public TableSchema(String tableName) {
     this.tableName = tableName;
     this.columnSchemas = new ArrayList<>();
+    this.columnTypes = new ArrayList<>();
     this.updatable = true;
   }
 
-  public TableSchema(String tableName, List<MeasurementSchema> columnSchemas) {
+  public TableSchema(String tableName, List<MeasurementSchema> columnSchemas, 
List<ColumnType> columnTypes) {
     this.tableName = tableName;
     this.columnSchemas = columnSchemas;
+    this.columnTypes = columnTypes;
   }
 
   public Map<String, Integer> getColumnPosIndex() {
@@ -82,6 +86,7 @@ public class TableSchema {
       // if the measurement is not found in the column list, add it
       if (columnIndex == -1) {
         columnSchemas.add(chunkMetadata.toMeasurementSchema());
+        columnTypes.add(ColumnType.MEASUREMENT);
         getColumnPosIndex().put(chunkMetadata.getMeasurementUid(), 
columnSchemas.size() - 1);
       }
     }
@@ -91,26 +96,36 @@ public class TableSchema {
     return columnSchemas;
   }
 
+  public List<ColumnType> getColumnTypes() {
+    return columnTypes;
+  }
+
   public int serialize(OutputStream out) throws IOException {
     int cnt = 0;
     if (columnSchemas != null) {
       cnt += ReadWriteIOUtils.write(columnSchemas.size(), out);
-      for (MeasurementSchema columnSchema : columnSchemas) {
+      for (int i = 0; i < columnSchemas.size(); i++) {
+        MeasurementSchema columnSchema = columnSchemas.get(i);
+        ColumnType columnType = columnTypes.get(i);
         cnt += columnSchema.serializeTo(out);
+        cnt += ReadWriteIOUtils.write(columnType.ordinal(), out);
       }
     } else {
       cnt += ReadWriteIOUtils.write(0, out);
     }
+
     return cnt;
   }
 
   public static TableSchema deserialize(String tableName, ByteBuffer buffer) {
     final int tableNum = buffer.getInt();
     List<MeasurementSchema> measurementSchemas = new ArrayList<>(tableNum);
+    List<ColumnType> columnTypes = new ArrayList<>();
     for (int i = 0; i < tableNum; i++) {
       MeasurementSchema measurementSchema = 
MeasurementSchema.deserializeFrom(buffer);
       measurementSchemas.add(measurementSchema);
+      columnTypes.add(ColumnType.values()[buffer.getInt()]);
     }
-    return new TableSchema(tableName, measurementSchemas);
+    return new TableSchema(tableName, measurementSchemas, columnTypes);
   }
 }
diff --git 
a/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
 
b/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
index 8a41452a..4559c539 100644
--- 
a/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
+++ 
b/tsfile/src/main/java/org/apache/tsfile/read/query/executor/TsFileExecutor.java
@@ -19,11 +19,14 @@
 
 package org.apache.tsfile.read.query.executor;
 
+import java.util.HashMap;
+import java.util.Map;
 import org.apache.tsfile.enums.TSDataType;
 import org.apache.tsfile.exception.filter.QueryFilterOptimizationException;
 import org.apache.tsfile.exception.write.NoMeasurementException;
 import org.apache.tsfile.file.metadata.IChunkMetadata;
 import org.apache.tsfile.file.metadata.MetadataIndexNode;
+import org.apache.tsfile.file.metadata.TableSchema;
 import org.apache.tsfile.file.metadata.TsFileMetadata;
 import org.apache.tsfile.read.common.Path;
 import org.apache.tsfile.read.common.TimeRange;
@@ -63,14 +66,13 @@ public class TsFileExecutor implements QueryExecutor {
   public RecordReader query(String tableName, List<String> columns, 
ExpressionTree timeFilter,
       ExpressionTree idFilter, ExpressionTree measurementFilter) {
     TsFileMetadata fileMetadata = metadataQuerier.getWholeFileMetadata();
-    MetadataIndexNode tableIndexNode = 
fileMetadata.getTableMetadataIndexNodeMap()
+    MetadataIndexNode tableRoot = fileMetadata.getTableMetadataIndexNodeMap()
         .get(tableName);
-    if (tableIndexNode == null) {
+    TableSchema tableSchema = fileMetadata.getTableSchemaMap().get(tableName);
+    if (tableRoot == null || tableSchema == null) {
       return new EmptyRecordReader();
     }
 
-    
-
     return null;
   }
 
@@ -119,9 +121,9 @@ public class TsFileExecutor implements QueryExecutor {
   /**
    * Query with the space partition constraint.
    *
-   * @param queryExpression query expression
+   * @param queryExpression        query expression
    * @param spacePartitionStartPos the start position of the space partition
-   * @param spacePartitionEndPos the end position of the space partition
+   * @param spacePartitionEndPos   the end position of the space partition
    * @return QueryDataSet
    */
   public QueryDataSet execute(
@@ -177,7 +179,7 @@ public class TsFileExecutor implements QueryExecutor {
    * has a GlobalTimeExpression, can use multi-way merge.
    *
    * @param selectedPathList all selected paths
-   * @param timeFilter GlobalTimeExpression that takes effect to all selected 
paths
+   * @param timeFilter       GlobalTimeExpression that takes effect to all 
selected paths
    * @return DataSet without TimeGenerator
    */
   private QueryDataSet execute(List<Path> selectedPathList, 
GlobalTimeExpression timeFilter)
@@ -187,7 +189,7 @@ public class TsFileExecutor implements QueryExecutor {
 
   /**
    * @param selectedPathList completed path
-   * @param timeExpression a GlobalTimeExpression or null
+   * @param timeExpression   a GlobalTimeExpression or null
    * @return DataSetWithoutTimeGenerator
    */
   private QueryDataSet executeMayAttachTimeFiler(
@@ -215,4 +217,17 @@ public class TsFileExecutor implements QueryExecutor {
     }
     return new DataSetWithoutTimeGenerator(selectedPathList, dataTypes, 
readersOfSelectedSeries);
   }
+
+  private class ColumnMapping {
+    /**
+     * The same column may occur multiple times in a query, but we surely do 
not want to read it redundantly.
+     * This mapping is used to put data of the same series into multiple 
columns.
+     */
+    private Map<String, List<Integer>> columnPosMapping = new HashMap<>();
+    private Map<String, Boolean> isId = new HashMap<>();
+
+    private void add(String columnName, int i, TableSchema schema) throws 
NoMeasurementException {
+      schema.getColumnSchemas()
+    }
+  }
 }

(tsfile) branch tsFile_v4 updated: add column type in schema

Reply via email to