This is an automated email from the ASF dual-hosted git repository. colinlee pushed a commit to branch bench_mark in repository https://gitbox.apache.org/repos/asf/tsfile.git
commit 9481b4f3b3c1db781958d5b7ad14a7093f63959b Author: ColinLee <[email protected]> AuthorDate: Thu Apr 10 02:29:07 2025 +0800 add tsfile writing bench mark. --- cpp/bench_mark/src/bench_mark.cc | 2 +- cpp/bench_mark/src/bench_mark_cpp.cc | 1 + java/bench_mark/pom.xml | 80 ++++++++++ .../src/main/java/org/apache/tsfile/BenchMark.java | 169 +++++++++++++++++++++ .../main/java/org/apache/tsfile/BenchMarkConf.java | 82 ++++++++++ java/pom.xml | 1 + python/bench_mark/bench_mark.py | 140 +++++++++++++++++ python/tsfile/tablet.py | 3 +- 8 files changed, 476 insertions(+), 2 deletions(-) diff --git a/cpp/bench_mark/src/bench_mark.cc b/cpp/bench_mark/src/bench_mark.cc index be38d5bb..f7c1ea19 100644 --- a/cpp/bench_mark/src/bench_mark.cc +++ b/cpp/bench_mark/src/bench_mark.cc @@ -21,5 +21,5 @@ int main() { bench_mark_cpp_write(); - bench_mark_c_write(); + //bench_mark_c_write(); } diff --git a/cpp/bench_mark/src/bench_mark_cpp.cc b/cpp/bench_mark/src/bench_mark_cpp.cc index 56d886fa..af47f88b 100644 --- a/cpp/bench_mark/src/bench_mark_cpp.cc +++ b/cpp/bench_mark/src/bench_mark_cpp.cc @@ -151,6 +151,7 @@ int bench_mark_cpp_write() { } auto close_start = std::chrono::high_resolution_clock::now(); + writer->flush(); writer->close(); auto close_end = std::chrono::high_resolution_clock::now(); diff --git a/java/bench_mark/pom.xml b/java/bench_mark/pom.xml new file mode 100644 index 00000000..c193d4d7 --- /dev/null +++ b/java/bench_mark/pom.xml @@ -0,0 +1,80 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.tsfile</groupId> + <artifactId>tsfile-java</artifactId> + <version>2.1.0-SNAPSHOT</version> + </parent> + <artifactId>bench_mark</artifactId> + <name>TsFile: Java: BenchMark</name> + <dependencies> + <dependency> + <groupId>ch.qos.logback</groupId> + <artifactId>logback-classic</artifactId> + </dependency> + <dependency> + <groupId>org.apache.tsfile</groupId> + <artifactId>tsfile</artifactId> + <version>2.1.0-SNAPSHOT</version> + </dependency> + </dependencies> + <build> + <pluginManagement> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>8</source> + <target>8</target> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <executions> + <execution> + <id>check-dependencies</id> + <goals> + <goal>analyze-only</goal> + </goals> + <phase>verify</phase> + <configuration> + <skip>true</skip> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-enforcer-plugin</artifactId> + <configuration> + <!-- Managing the dependencies of all examples would not provide much, but be a lot of work --> + <skip>true</skip> + </configuration> + </plugin> + </plugins> + </pluginManagement> + </build> +</project> diff --git a/java/bench_mark/src/main/java/org/apache/tsfile/BenchMark.java b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMark.java new file mode 100644 index 00000000..d19c9e9a --- /dev/null +++ b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMark.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile; + +import org.apache.tsfile.enums.TSDataType; +import org.apache.tsfile.exception.write.WriteProcessException; +import org.apache.tsfile.file.metadata.ColumnSchema; +import org.apache.tsfile.file.metadata.ColumnSchemaBuilder; +import org.apache.tsfile.file.metadata.TableSchema; +import org.apache.tsfile.fileSystem.FSFactoryProducer; +import org.apache.tsfile.write.record.Tablet; +import org.apache.tsfile.write.v4.ITsFileWriter; +import org.apache.tsfile.write.v4.TsFileWriterBuilder; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.List; + +public class BenchMark { + private static final Logger LOGGER = LoggerFactory.getLogger(BenchMark.class); + + public static void main(String[] args) throws IOException { + BenchMarkConf.printConfig(); + String path = "tsfile_table_write_bench_mark.tsfile"; + File f = FSFactoryProducer.getFSFactory().getFile(path); + if (f.exists()) { + Files.delete(f.toPath()); + } + List<String> column_names = new ArrayList<>(); + List<TSDataType> column_types = new ArrayList<>(); + List<ColumnSchema> columnSchemas = new ArrayList<>(); + columnSchemas.add( + new ColumnSchemaBuilder() + .name("TAG1") + .dataType(TSDataType.STRING) + .category(Tablet.ColumnCategory.TAG) + .build()); + + columnSchemas.add( + new ColumnSchemaBuilder() + .name("TAG2") + .dataType(TSDataType.STRING) + .category(Tablet.ColumnCategory.TAG) + .build()); + column_names.add("TAG1"); + column_names.add("TAG2"); + column_types.add(TSDataType.STRING); + column_types.add(TSDataType.STRING); + + int fieldIndex = 2; + for (int i = 0; i < BenchMarkConf.FIELD_TYPE_VECTOR.size(); i++) { + int count = BenchMarkConf.FIELD_TYPE_VECTOR.get(i); + TSDataType dataType = BenchMarkConf.getTsDataType(i); + for (int j = 0; j < count; j++) { + columnSchemas.add( + new ColumnSchemaBuilder() + .name("FIELD" + fieldIndex) + .dataType(dataType) + .category(Tablet.ColumnCategory.FIELD) + .build()); + column_names.add("FIELD" + fieldIndex); + column_types.add(dataType); + fieldIndex++; + } + } + + long totalPrepareTimeNs = 0; + long totalWriteTimeNs = 0; + long start = System.nanoTime(); + TableSchema tableSchema = new TableSchema("TestTable", columnSchemas); + try (ITsFileWriter writer = + new TsFileWriterBuilder().file(f).tableSchema(tableSchema).build()) { + long timestamp = 0; + for (int table_ind = 0; table_ind < BenchMarkConf.TABLET_NUM; table_ind++) { + long prepareStartTime = System.nanoTime(); + Tablet tablet = new Tablet(column_names, column_types); + for (int tag1_ind = 0; tag1_ind < BenchMarkConf.TAG1_NUM; tag1_ind++) { + for (int tag2_ind = 0; tag2_ind < BenchMarkConf.TAG2_NUM; tag2_ind++) { + for (int row = 0; row < BenchMarkConf.TIMESTAMP_PER_TAG; row++) { + tablet.addTimestamp(row, timestamp + row); + tablet.addValue(row, 0, "tag1_" + tag1_ind); + tablet.addValue(row, 1, "tag2_" + tag2_ind); + + for (int i = 2; i < column_types.size(); i++) { + switch (column_types.get(i)) { + case INT32: + tablet.addValue(row, i, (int)timestamp); + break; + case INT64: + tablet.addValue(row, i, timestamp); + break; + case FLOAT: + tablet.addValue(row, i, (float)(timestamp * 1.1)); + break; + case DOUBLE: + tablet.addValue(row, i, (double)timestamp * 1.1); + break; + case BOOLEAN: + tablet.addValue(row, i, timestamp % 2 == 0); + default: + // + } + } + } + } + } + long prepareEndTime = System.nanoTime(); + + totalPrepareTimeNs += (prepareEndTime - prepareStartTime); + long writeStartTime = System.nanoTime(); + writer.write(tablet); + long writeEndTime = System.nanoTime(); + totalWriteTimeNs += (writeEndTime - writeStartTime); + timestamp += BenchMarkConf.TIMESTAMP_PER_TAG; + } + } catch (WriteProcessException e) { + LOGGER.error("meet error in TsFileWrite ", e); + } + + long end = System.nanoTime(); + double totalPrepareTimeSec = totalPrepareTimeNs / 1_000_000_000.0; + double totalWriteTimeSec = totalWriteTimeNs / 1_000_000_000.0; + double totalTimeSec = (end - start) / 1_000_000_000.0; + + long size = f.length(); + + // 输出报告 + System.out.println("===================="); + System.out.println("finish bench mark for java"); + System.out.printf("tsfile size is %d bytes ~ %dKB%n", size, size / 1024); + + System.out.printf("prepare data time is %.6f s%n", totalPrepareTimeSec); + System.out.printf("writing data time is %.6f s%n", totalWriteTimeSec); + + long totalPoints = + (long) BenchMarkConf.TABLET_NUM + * BenchMarkConf.TAG1_NUM + * BenchMarkConf.TAG2_NUM + * BenchMarkConf.TIMESTAMP_PER_TAG + * column_names.size(); + double writingSpeed = totalPoints / totalTimeSec; + System.out.printf("writing speed is %d points/s%n", (long) writingSpeed); + + System.out.printf("total time is %.6f s%n", totalTimeSec); + System.out.println("===================="); + } +} diff --git a/java/bench_mark/src/main/java/org/apache/tsfile/BenchMarkConf.java b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMarkConf.java new file mode 100644 index 00000000..7e6e5a2c --- /dev/null +++ b/java/bench_mark/src/main/java/org/apache/tsfile/BenchMarkConf.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.tsfile; + +import org.apache.tsfile.enums.TSDataType; + +import java.util.Arrays; +import java.util.List; + +public class BenchMarkConf { + public static final int TABLET_NUM = 1000; + public static final int TAG1_NUM = 10; + public static final int TAG2_NUM = 10; + public static final int TIMESTAMP_PER_TAG = 1000; + public static final List<Integer> FIELD_TYPE_VECTOR = Arrays.asList(1, 1, 1, 1, 1); + + public static TSDataType getTsDataType(int index) { + switch (index) { + case 0: + return TSDataType.INT32; + case 1: + return TSDataType.INT64; + case 2: + return TSDataType.FLOAT; + case 3: + return TSDataType.DOUBLE; + case 4: + return TSDataType.BOOLEAN; + } + return TSDataType.UNKNOWN; + } + + public static final List<String> DATA_TYPES_NAME = + Arrays.asList("INT32", "INT64", "FLOAT", "DOUBLE", "BOOLEAN"); + + public static void printConfig() { + int columnNum = 0; + for (int count : FIELD_TYPE_VECTOR) { + columnNum += count; + } + + System.out.println("TsFile benchmark"); + System.out.println("Schema Configuration:"); + System.out.println("Tag Column num: " + 2); + System.out.printf( + "TAG1 num: %d TAG2 num: %d%n%n", BenchMarkConf.TAG1_NUM, BenchMarkConf.TAG2_NUM); + + System.out.println("Field Column and types: "); + for (int i = 0; i < 5; i++) { + System.out.printf("%sx%d ", DATA_TYPES_NAME.get(i), BenchMarkConf.FIELD_TYPE_VECTOR.get(i)); + } + + System.out.printf("%nTablet num: %d%n", BenchMarkConf.TABLET_NUM); + System.out.printf("Tablet row num per tag: %d%n", BenchMarkConf.TIMESTAMP_PER_TAG); + + long totalPoints = + (long) BenchMarkConf.TABLET_NUM + * BenchMarkConf.TAG1_NUM + * BenchMarkConf.TAG2_NUM + * BenchMarkConf.TIMESTAMP_PER_TAG + * (columnNum + 2); + System.out.println("Total points is " + totalPoints); + System.out.println("===================="); + } +} diff --git a/java/pom.xml b/java/pom.xml index 8c6be19b..87435a66 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -35,6 +35,7 @@ <module>tsfile</module> <module>examples</module> <module>tools</module> + <module>bench_mark</module> </modules> <dependencyManagement> <dependencies> diff --git a/python/bench_mark/bench_mark.py b/python/bench_mark/bench_mark.py new file mode 100644 index 00000000..d3ba5ed5 --- /dev/null +++ b/python/bench_mark/bench_mark.py @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +import os +from time import perf_counter + +from tqdm import tqdm + +from tsfile import TSDataType, ColumnCategory +from tsfile import TableSchema, ColumnSchema +from tsfile import Tablet +from tsfile import TsFileTableWriter + +bench_mark_conf = { + "tablet_num": 50, + "tag1_num": 10, + "tag2_num": 10, + "timestamp_per_tag": 1000, + "field_type_vector": [1, 1, 1, 1, 1], +} + +type_list = [TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, TSDataType.DOUBLE, TSDataType.BOOLEAN] + + +def print_config(): + data_types_name = ["INT64", "INT32", "FLOAT", "DOUBLE", "BOOLEAN"] + + print("TsFile benchmark") + print("Schema Configuration:") + print(f"Tag Column num: {2}") + print(f"TAG1 num: {bench_mark_conf['tag1_num']} TAG2 num: {bench_mark_conf['tag2_num']}\n") + + print("Filed Column and types: ") + column_num = 0 + for i in range(5): + print(f"{data_types_name[i]}x{bench_mark_conf['field_type_vector'][i]} ", end="") + column_num += bench_mark_conf['field_type_vector'][i] + + print("\n") + print(f"Tablet num: {bench_mark_conf['tablet_num']}") + print(f"Tablet row num per tag: {bench_mark_conf['timestamp_per_tag']}") + + total_points = (bench_mark_conf['tablet_num'] * + bench_mark_conf['tag1_num'] * + bench_mark_conf['tag2_num'] * + bench_mark_conf['timestamp_per_tag'] * + (column_num + 2)) + print(f"Total points is {total_points}") + print("====================") + + +def bench_mark_write(): + print_config() + column_schema_list = [] + column_name = [] + column_datat_type = [] + column_schema_list.append(ColumnSchema("TAG1", TSDataType.STRING, ColumnCategory.TAG)) + column_name.append("TAG1") + column_datat_type.append(TSDataType.STRING) + column_schema_list.append(ColumnSchema("TAG2", TSDataType.STRING, ColumnCategory.TAG)) + column_name.append("TAG2") + column_datat_type.append(TSDataType.STRING) + + i = 2 + for count, type in zip(bench_mark_conf["field_type_vector"], type_list): + for _ in range(count): + column_schema_list.append(ColumnSchema("FIELD" + str(i), type, ColumnCategory.FIELD)) + column_name.append("FIELD" + str(i)) + column_datat_type.append(type) + i = i + 1 + + timestamp = 0 + table_schema = TableSchema("TestTable", column_schema_list) + start = perf_counter() + prepare_time = 0 + writing_time = 0 + with TsFileTableWriter("tsfile_table_write_bench_mark.tsfile", table_schema) as writer: + for i in tqdm(range(bench_mark_conf["tablet_num"]), desc="Tablets"): + for j in range(bench_mark_conf["tag1_num"]): + for k in range(bench_mark_conf["tag2_num"]): + prepare_start = perf_counter() + tablet = Tablet(column_name, column_datat_type, + bench_mark_conf["timestamp_per_tag"] * bench_mark_conf["tag1_num"] * + bench_mark_conf["tag2_num"]) + for row in range(bench_mark_conf["timestamp_per_tag"]): + tablet.add_timestamp(row, timestamp + row) + tablet.add_value_by_index(0, row, "tag1_" + str(j)) + tablet.add_value_by_index(1, row, "tag2_" + str(k)) + for col in range(2, len(column_name)): + if column_datat_type[col] == TSDataType.INT32: + tablet.add_value_by_index(col, row, timestamp) + elif column_datat_type[col] == TSDataType.INT64: + tablet.add_value_by_index(col, row, timestamp) + elif column_datat_type[col] == TSDataType.FLOAT: + tablet.add_value_by_index(col, row, timestamp * 1.1) + elif column_datat_type[col] == TSDataType.DOUBLE: + tablet.add_value_by_index(col, row, timestamp * 1.1) + elif column_datat_type[col] == TSDataType.BOOLEAN: + tablet.add_value_by_index(col, row, timestamp % 2 == 0) + + prepare_time += perf_counter() - prepare_start + write_start = perf_counter() + writer.write_table(tablet) + writing_time += perf_counter() - write_start + timestamp = timestamp + bench_mark_conf["timestamp_per_tag"] + end = perf_counter() + total_time = end - start + size = os.path.getsize("tsfile_table_write_bench_mark.tsfile") + + total_points = bench_mark_conf["tablet_num"] * bench_mark_conf["tag1_num"] * bench_mark_conf["tag2_num"] * \ + bench_mark_conf["timestamp_per_tag"] * len(column_name) + + print("finish bench mark for python") + print(f"tsfile size is {size} bytes ~ {size // 1024}KB") + + print(f"prepare data time is {prepare_time:.6f} s") # 保留6位小数 + print(f"writing data time is {writing_time:.6f} s") + + writing_speed = int(total_points / (prepare_time + writing_time)) + print(f"writing speed is {writing_speed} points/s") + + total_time_seconds = (end - start) + print(f"total time is {total_time_seconds:.6f} s") + + +bench_mark_write() diff --git a/python/tsfile/tablet.py b/python/tsfile/tablet.py index 2935db09..52e7389c 100644 --- a/python/tsfile/tablet.py +++ b/python/tsfile/tablet.py @@ -137,7 +137,8 @@ class Tablet(object): if not isinstance(value, expected_type.to_py_type()): raise TypeError(f"Expected {expected_type.to_py_type()} got {type(value)}") - self._check_numeric_range(value, expected_type) + if expected_type in (TSDataType.INT32, TSDataType.INT64, TSDataType.FLOAT, TSDataType.DOUBLE): + self._check_numeric_range(value, expected_type) self.data_list[col_index][row_index] = value
