This is an automated email from the ASF dual-hosted git repository.

colinlee pushed a commit to branch bench_mark
in repository https://gitbox.apache.org/repos/asf/tsfile.git

commit 9481b4f3b3c1db781958d5b7ad14a7093f63959b
Author: ColinLee <[email protected]>
AuthorDate: Thu Apr 10 02:29:07 2025 +0800

    add tsfile writing bench mark.
---
 cpp/bench_mark/src/bench_mark.cc                   |   2 +-
 cpp/bench_mark/src/bench_mark_cpp.cc               |   1 +
 java/bench_mark/pom.xml                            |  80 ++++++++++
 .../src/main/java/org/apache/tsfile/BenchMark.java | 169 +++++++++++++++++++++
 .../main/java/org/apache/tsfile/BenchMarkConf.java |  82 ++++++++++
 java/pom.xml                                       |   1 +
 python/bench_mark/bench_mark.py                    | 140 +++++++++++++++++
 python/tsfile/tablet.py                            |   3 +-
 8 files changed, 476 insertions(+), 2 deletions(-)

diff --git a/cpp/bench_mark/src/bench_mark.cc b/cpp/bench_mark/src/bench_mark.cc
index be38d5bb..f7c1ea19 100644
--- a/cpp/bench_mark/src/bench_mark.cc
+++ b/cpp/bench_mark/src/bench_mark.cc
@@ -21,5 +21,5 @@
 
 int main() {
     bench_mark_cpp_write();
-    bench_mark_c_write();
+    //bench_mark_c_write();
 }
diff --git a/cpp/bench_mark/src/bench_mark_cpp.cc 
b/cpp/bench_mark/src/bench_mark_cpp.cc
index 56d886fa..af47f88b 100644
--- a/cpp/bench_mark/src/bench_mark_cpp.cc
+++ b/cpp/bench_mark/src/bench_mark_cpp.cc
@@ -151,6 +151,7 @@ int bench_mark_cpp_write() {
     }
 
     auto close_start = std::chrono::high_resolution_clock::now();
+    writer->flush();
     writer->close();
     auto close_end = std::chrono::high_resolution_clock::now();
 
diff --git a/java/bench_mark/pom.xml b/java/bench_mark/pom.xml
new file mode 100644
index 00000000..c193d4d7
--- /dev/null
+++ b/java/bench_mark/pom.xml
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.tsfile</groupId>
+        <artifactId>tsfile-java</artifactId>
+        <version>2.1.0-SNAPSHOT</version>
+    </parent>
+    <artifactId>bench_mark</artifactId>
+    <name>TsFile: Java: BenchMark</name>
+    <dependencies>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.tsfile</groupId>
+            <artifactId>tsfile</artifactId>
+            <version>2.1.0-SNAPSHOT</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <pluginManagement>
+            <plugins>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-compiler-plugin</artifactId>
+                    <configuration>
+                        <source>8</source>
+                        <target>8</target>
+                    </configuration>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-dependency-plugin</artifactId>
+                    <executions>
+                        <execution>
+                            <id>check-dependencies</id>
+                            <goals>
+                                <goal>analyze-only</goal>
+                            </goals>
+                            <phase>verify</phase>
+                            <configuration>
+                                <skip>true</skip>
+                            </configuration>
+                        </execution>
+                    </executions>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-enforcer-plugin</artifactId>
+                    <configuration>
+                        <!-- Managing the dependencies of all examples would 
not provide much, but be a lot of work -->
+                        <skip>true</skip>
+                    </configuration>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+    </build>
+</project>
diff --git a/java/bench_mark/src/main/java/org/apache/tsfile/BenchMark.java 
b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMark.java
new file mode 100644
index 00000000..d19c9e9a
--- /dev/null
+++ b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMark.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile;
+
+import org.apache.tsfile.enums.TSDataType;
+import org.apache.tsfile.exception.write.WriteProcessException;
+import org.apache.tsfile.file.metadata.ColumnSchema;
+import org.apache.tsfile.file.metadata.ColumnSchemaBuilder;
+import org.apache.tsfile.file.metadata.TableSchema;
+import org.apache.tsfile.fileSystem.FSFactoryProducer;
+import org.apache.tsfile.write.record.Tablet;
+import org.apache.tsfile.write.v4.ITsFileWriter;
+import org.apache.tsfile.write.v4.TsFileWriterBuilder;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+
+public class BenchMark {
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(BenchMark.class);
+
+  public static void main(String[] args) throws IOException {
+    BenchMarkConf.printConfig();
+    String path = "tsfile_table_write_bench_mark.tsfile";
+    File f = FSFactoryProducer.getFSFactory().getFile(path);
+    if (f.exists()) {
+      Files.delete(f.toPath());
+    }
+    List<String> column_names = new ArrayList<>();
+    List<TSDataType> column_types = new ArrayList<>();
+    List<ColumnSchema> columnSchemas = new ArrayList<>();
+    columnSchemas.add(
+        new ColumnSchemaBuilder()
+            .name("TAG1")
+            .dataType(TSDataType.STRING)
+            .category(Tablet.ColumnCategory.TAG)
+            .build());
+
+    columnSchemas.add(
+        new ColumnSchemaBuilder()
+            .name("TAG2")
+            .dataType(TSDataType.STRING)
+            .category(Tablet.ColumnCategory.TAG)
+            .build());
+    column_names.add("TAG1");
+    column_names.add("TAG2");
+    column_types.add(TSDataType.STRING);
+    column_types.add(TSDataType.STRING);
+
+    int fieldIndex = 2;
+    for (int i = 0; i < BenchMarkConf.FIELD_TYPE_VECTOR.size(); i++) {
+      int count = BenchMarkConf.FIELD_TYPE_VECTOR.get(i);
+      TSDataType dataType = BenchMarkConf.getTsDataType(i);
+      for (int j = 0; j < count; j++) {
+        columnSchemas.add(
+            new ColumnSchemaBuilder()
+                .name("FIELD" + fieldIndex)
+                .dataType(dataType)
+                .category(Tablet.ColumnCategory.FIELD)
+                .build());
+        column_names.add("FIELD" + fieldIndex);
+        column_types.add(dataType);
+        fieldIndex++;
+      }
+    }
+
+    long totalPrepareTimeNs = 0;
+    long totalWriteTimeNs = 0;
+    long start = System.nanoTime();
+    TableSchema tableSchema = new TableSchema("TestTable", columnSchemas);
+    try (ITsFileWriter writer =
+        new TsFileWriterBuilder().file(f).tableSchema(tableSchema).build()) {
+      long timestamp = 0;
+      for (int table_ind = 0; table_ind < BenchMarkConf.TABLET_NUM; 
table_ind++) {
+        long prepareStartTime = System.nanoTime();
+        Tablet tablet = new Tablet(column_names, column_types);
+        for (int tag1_ind = 0; tag1_ind < BenchMarkConf.TAG1_NUM; tag1_ind++) {
+          for (int tag2_ind = 0; tag2_ind < BenchMarkConf.TAG2_NUM; 
tag2_ind++) {
+            for (int row = 0; row < BenchMarkConf.TIMESTAMP_PER_TAG; row++) {
+              tablet.addTimestamp(row, timestamp + row);
+              tablet.addValue(row, 0, "tag1_" + tag1_ind);
+              tablet.addValue(row, 1, "tag2_" + tag2_ind);
+
+              for (int i = 2; i < column_types.size(); i++) {
+                switch (column_types.get(i)) {
+                  case INT32:
+                    tablet.addValue(row, i, (int)timestamp);
+                    break;
+                  case INT64:
+                    tablet.addValue(row, i, timestamp);
+                    break;
+                  case FLOAT:
+                    tablet.addValue(row, i, (float)(timestamp * 1.1));
+                    break;
+                  case DOUBLE:
+                    tablet.addValue(row, i, (double)timestamp * 1.1);
+                    break;
+                  case BOOLEAN:
+                    tablet.addValue(row, i, timestamp % 2 == 0);
+                  default:
+                    //
+                }
+              }
+            }
+          }
+        }
+        long prepareEndTime = System.nanoTime();
+
+        totalPrepareTimeNs += (prepareEndTime - prepareStartTime);
+        long writeStartTime = System.nanoTime();
+        writer.write(tablet);
+        long writeEndTime = System.nanoTime();
+        totalWriteTimeNs += (writeEndTime - writeStartTime);
+        timestamp += BenchMarkConf.TIMESTAMP_PER_TAG;
+      }
+    } catch (WriteProcessException e) {
+      LOGGER.error("meet error in TsFileWrite ", e);
+    }
+
+    long end = System.nanoTime();
+    double totalPrepareTimeSec = totalPrepareTimeNs / 1_000_000_000.0;
+    double totalWriteTimeSec = totalWriteTimeNs / 1_000_000_000.0;
+    double totalTimeSec = (end - start) / 1_000_000_000.0;
+
+    long size = f.length();
+
+    // 输出报告
+    System.out.println("====================");
+    System.out.println("finish bench mark for java");
+    System.out.printf("tsfile size is %d bytes ~ %dKB%n", size, size / 1024);
+
+    System.out.printf("prepare data time is %.6f s%n", totalPrepareTimeSec);
+    System.out.printf("writing data time is %.6f s%n", totalWriteTimeSec);
+
+    long totalPoints =
+        (long) BenchMarkConf.TABLET_NUM
+            * BenchMarkConf.TAG1_NUM
+            * BenchMarkConf.TAG2_NUM
+            * BenchMarkConf.TIMESTAMP_PER_TAG
+            * column_names.size();
+    double writingSpeed = totalPoints / totalTimeSec;
+    System.out.printf("writing speed is %d points/s%n", (long) writingSpeed);
+
+    System.out.printf("total time is %.6f s%n", totalTimeSec);
+    System.out.println("====================");
+  }
+}
diff --git a/java/bench_mark/src/main/java/org/apache/tsfile/BenchMarkConf.java 
b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMarkConf.java
new file mode 100644
index 00000000..7e6e5a2c
--- /dev/null
+++ b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMarkConf.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.tsfile;
+
+import org.apache.tsfile.enums.TSDataType;
+
+import java.util.Arrays;
+import java.util.List;
+
+public class BenchMarkConf {
+  public static final int TABLET_NUM = 1000;
+  public static final int TAG1_NUM = 10;
+  public static final int TAG2_NUM = 10;
+  public static final int TIMESTAMP_PER_TAG = 1000;
+  public static final List<Integer> FIELD_TYPE_VECTOR = Arrays.asList(1, 1, 1, 
1, 1);
+
+  public static TSDataType getTsDataType(int index) {
+    switch (index) {
+      case 0:
+        return TSDataType.INT32;
+      case 1:
+        return TSDataType.INT64;
+      case 2:
+        return TSDataType.FLOAT;
+      case 3:
+        return TSDataType.DOUBLE;
+      case 4:
+        return TSDataType.BOOLEAN;
+    }
+    return TSDataType.UNKNOWN;
+  }
+
+  public static final List<String> DATA_TYPES_NAME =
+      Arrays.asList("INT32", "INT64", "FLOAT", "DOUBLE", "BOOLEAN");
+
+  public static void printConfig() {
+    int columnNum = 0;
+    for (int count : FIELD_TYPE_VECTOR) {
+      columnNum += count;
+    }
+
+    System.out.println("TsFile benchmark");
+    System.out.println("Schema Configuration:");
+    System.out.println("Tag Column num: " + 2);
+    System.out.printf(
+        "TAG1 num: %d TAG2 num: %d%n%n", BenchMarkConf.TAG1_NUM, 
BenchMarkConf.TAG2_NUM);
+
+    System.out.println("Field Column and types: ");
+    for (int i = 0; i < 5; i++) {
+      System.out.printf("%sx%d  ", DATA_TYPES_NAME.get(i), 
BenchMarkConf.FIELD_TYPE_VECTOR.get(i));
+    }
+
+    System.out.printf("%nTablet num: %d%n", BenchMarkConf.TABLET_NUM);
+    System.out.printf("Tablet row num per tag: %d%n", 
BenchMarkConf.TIMESTAMP_PER_TAG);
+
+    long totalPoints =
+        (long) BenchMarkConf.TABLET_NUM
+            * BenchMarkConf.TAG1_NUM
+            * BenchMarkConf.TAG2_NUM
+            * BenchMarkConf.TIMESTAMP_PER_TAG
+            * (columnNum + 2);
+    System.out.println("Total points is " + totalPoints);
+    System.out.println("====================");
+  }
+}
diff --git a/java/pom.xml b/java/pom.xml
index 8c6be19b..87435a66 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -35,6 +35,7 @@
         <module>tsfile</module>
         <module>examples</module>
         <module>tools</module>
+        <module>bench_mark</module>
     </modules>
     <dependencyManagement>
         <dependencies>
diff --git a/python/bench_mark/bench_mark.py b/python/bench_mark/bench_mark.py
new file mode 100644
index 00000000..d3ba5ed5
--- /dev/null
+++ b/python/bench_mark/bench_mark.py
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+import os
+from time import perf_counter
+
+from tqdm import tqdm
+
+from tsfile import TSDataType, ColumnCategory
+from tsfile import TableSchema, ColumnSchema
+from tsfile import Tablet
+from tsfile import TsFileTableWriter
+
+bench_mark_conf = {
+    "tablet_num": 50,
+    "tag1_num": 10,
+    "tag2_num": 10,
+    "timestamp_per_tag": 1000,
+    "field_type_vector": [1, 1, 1, 1, 1],
+}
+
+type_list = [TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, 
TSDataType.DOUBLE, TSDataType.BOOLEAN]
+
+
+def print_config():
+    data_types_name = ["INT64", "INT32", "FLOAT", "DOUBLE", "BOOLEAN"]
+
+    print("TsFile benchmark")
+    print("Schema Configuration:")
+    print(f"Tag Column num: {2}")
+    print(f"TAG1 num: {bench_mark_conf['tag1_num']} TAG2 num: 
{bench_mark_conf['tag2_num']}\n")
+
+    print("Filed Column and types: ")
+    column_num = 0
+    for i in range(5):
+        print(f"{data_types_name[i]}x{bench_mark_conf['field_type_vector'][i]} 
 ", end="")
+        column_num += bench_mark_conf['field_type_vector'][i]
+
+    print("\n")
+    print(f"Tablet num: {bench_mark_conf['tablet_num']}")
+    print(f"Tablet row num per tag: {bench_mark_conf['timestamp_per_tag']}")
+
+    total_points = (bench_mark_conf['tablet_num'] *
+                    bench_mark_conf['tag1_num'] *
+                    bench_mark_conf['tag2_num'] *
+                    bench_mark_conf['timestamp_per_tag'] *
+                    (column_num + 2))
+    print(f"Total points is {total_points}")
+    print("====================")
+
+
+def bench_mark_write():
+    print_config()
+    column_schema_list = []
+    column_name = []
+    column_datat_type = []
+    column_schema_list.append(ColumnSchema("TAG1", TSDataType.STRING, 
ColumnCategory.TAG))
+    column_name.append("TAG1")
+    column_datat_type.append(TSDataType.STRING)
+    column_schema_list.append(ColumnSchema("TAG2", TSDataType.STRING, 
ColumnCategory.TAG))
+    column_name.append("TAG2")
+    column_datat_type.append(TSDataType.STRING)
+
+    i = 2
+    for count, type in zip(bench_mark_conf["field_type_vector"], type_list):
+        for _ in range(count):
+            column_schema_list.append(ColumnSchema("FIELD" + str(i), type, 
ColumnCategory.FIELD))
+            column_name.append("FIELD" + str(i))
+            column_datat_type.append(type)
+            i = i + 1
+
+    timestamp = 0
+    table_schema = TableSchema("TestTable", column_schema_list)
+    start = perf_counter()
+    prepare_time = 0
+    writing_time = 0
+    with TsFileTableWriter("tsfile_table_write_bench_mark.tsfile", 
table_schema) as writer:
+        for i in tqdm(range(bench_mark_conf["tablet_num"]), desc="Tablets"):
+            for j in range(bench_mark_conf["tag1_num"]):
+                for k in range(bench_mark_conf["tag2_num"]):
+                    prepare_start = perf_counter()
+                    tablet = Tablet(column_name, column_datat_type,
+                                    bench_mark_conf["timestamp_per_tag"] * 
bench_mark_conf["tag1_num"] *
+                                    bench_mark_conf["tag2_num"])
+                    for row in range(bench_mark_conf["timestamp_per_tag"]):
+                        tablet.add_timestamp(row, timestamp + row)
+                        tablet.add_value_by_index(0, row, "tag1_" + str(j))
+                        tablet.add_value_by_index(1, row, "tag2_" + str(k))
+                        for col in range(2, len(column_name)):
+                            if column_datat_type[col] == TSDataType.INT32:
+                                tablet.add_value_by_index(col, row, timestamp)
+                            elif column_datat_type[col] == TSDataType.INT64:
+                                tablet.add_value_by_index(col, row, timestamp)
+                            elif column_datat_type[col] == TSDataType.FLOAT:
+                                tablet.add_value_by_index(col, row, timestamp 
* 1.1)
+                            elif column_datat_type[col] == TSDataType.DOUBLE:
+                                tablet.add_value_by_index(col, row, timestamp 
* 1.1)
+                            elif column_datat_type[col] == TSDataType.BOOLEAN:
+                                tablet.add_value_by_index(col, row, timestamp 
% 2 == 0)
+
+                    prepare_time += perf_counter() - prepare_start
+                    write_start = perf_counter()
+                    writer.write_table(tablet)
+                    writing_time += perf_counter() - write_start
+                    timestamp = timestamp + 
bench_mark_conf["timestamp_per_tag"]
+    end = perf_counter()
+    total_time = end - start
+    size = os.path.getsize("tsfile_table_write_bench_mark.tsfile")
+
+    total_points = bench_mark_conf["tablet_num"] * bench_mark_conf["tag1_num"] 
* bench_mark_conf["tag2_num"] * \
+                   bench_mark_conf["timestamp_per_tag"] * len(column_name)
+
+    print("finish bench mark for python")
+    print(f"tsfile size is {size} bytes ~ {size // 1024}KB")
+
+    print(f"prepare data time is {prepare_time:.6f} s")  # 保留6位小数
+    print(f"writing data time is {writing_time:.6f} s")
+
+    writing_speed = int(total_points / (prepare_time + writing_time))
+    print(f"writing speed is {writing_speed} points/s")
+
+    total_time_seconds = (end - start)
+    print(f"total time is {total_time_seconds:.6f} s")
+
+
+bench_mark_write()
diff --git a/python/tsfile/tablet.py b/python/tsfile/tablet.py
index 2935db09..52e7389c 100644
--- a/python/tsfile/tablet.py
+++ b/python/tsfile/tablet.py
@@ -137,7 +137,8 @@ class Tablet(object):
         if not isinstance(value, expected_type.to_py_type()):
             raise TypeError(f"Expected {expected_type.to_py_type()} got 
{type(value)}")
 
-        self._check_numeric_range(value, expected_type)
+        if expected_type in (TSDataType.INT32, TSDataType.INT64, 
TSDataType.FLOAT, TSDataType.DOUBLE):
+            self._check_numeric_range(value, expected_type)
 
         self.data_list[col_index][row_index] = value
 

Reply via email to