This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new b82de68d7e7 [feature][insert]add hive table sink thrift (#32274)
(#32360)
b82de68d7e7 is described below
commit b82de68d7e71b10677e1cbb61cb45bb2deef67fb
Author: Mingyu Chen <[email protected]>
AuthorDate: Mon Mar 18 10:46:17 2024 +0800
[feature][insert]add hive table sink thrift (#32274) (#32360)
bp #32274
---
gensrc/thrift/DataSinks.thrift | 34 +++++++++++++++++++++-------------
gensrc/thrift/FrontendService.thrift | 3 +++
gensrc/thrift/Partitions.thrift | 8 +++++++-
gensrc/thrift/PlanNodes.thrift | 4 +++-
4 files changed, 34 insertions(+), 15 deletions(-)
diff --git a/gensrc/thrift/DataSinks.thrift b/gensrc/thrift/DataSinks.thrift
index 7c9d5e8f8c2..e7683c25fd4 100644
--- a/gensrc/thrift/DataSinks.thrift
+++ b/gensrc/thrift/DataSinks.thrift
@@ -38,6 +38,7 @@ enum TDataSinkType {
MULTI_CAST_DATA_STREAM_SINK,
GROUP_COMMIT_OLAP_TABLE_SINK, // deprecated
GROUP_COMMIT_BLOCK_SINK,
+ HIVE_TABLE_SINK,
}
enum TResultSinkType {
@@ -101,7 +102,7 @@ enum TParquetRepetitionType {
struct TParquetSchema {
1: optional TParquetRepetitionType schema_repetition_type
2: optional TParquetDataType schema_data_type
- 3: optional string schema_column_name
+ 3: optional string schema_column_name
4: optional TParquetDataLogicalType schema_data_logical_type
}
@@ -280,6 +281,7 @@ struct TOlapTableSink {
struct THiveLocationParams {
1: optional string write_path
2: optional string target_path
+ 3: optional Types.TFileType file_type
}
struct TSortedColumn {
@@ -298,11 +300,16 @@ struct THiveBucket {
4: optional list<TSortedColumn> sorted_by
}
-enum THiveCompressionType {
- SNAPPY = 3,
- LZ4 = 4,
- ZLIB = 6,
- ZSTD = 7,
+enum THiveColumnType {
+ PARTITION_KEY = 0,
+ REGULAR = 1,
+ SYNTHESIZED = 2
+}
+
+struct THiveColumn {
+ 1: optional string name
+ 2: optional Types.TTypeDesc data_type
+ 3: optional THiveColumnType column_type
}
struct THivePartition {
@@ -314,13 +321,14 @@ struct THivePartition {
struct THiveTableSink {
1: optional string db_name
2: optional string table_name
- 3: optional list<string> data_column_names
- 4: optional list<string> partition_column_names
- 5: optional list<THivePartition> partitions
- 6: optional list<THiveBucket> buckets
- 7: optional PlanNodes.TFileFormatType file_format
- 8: optional THiveCompressionType compression_type
- 9: optional THiveLocationParams location
+ 3: optional list<THiveColumn> columns
+ 4: optional list<THivePartition> partitions
+ 5: optional THiveBucket bucket_info
+ 6: optional PlanNodes.TFileFormatType file_format
+ 7: optional PlanNodes.TFileCompressType compression_type
+ 8: optional THiveLocationParams location
+ 9: optional map<string, string> hadoop_config
+ 10: optional bool overwrite
}
enum TUpdateMode {
diff --git a/gensrc/thrift/FrontendService.thrift
b/gensrc/thrift/FrontendService.thrift
index 9f47037c56d..049c8450b23 100644
--- a/gensrc/thrift/FrontendService.thrift
+++ b/gensrc/thrift/FrontendService.thrift
@@ -29,6 +29,7 @@ include "Exprs.thrift"
include "RuntimeProfile.thrift"
include "MasterService.thrift"
include "AgentService.thrift"
+include "DataSinks.thrift"
// These are supporting structs for JniFrontend.java, which serves as the glue
// between our C++ execution environment and the Java frontend.
@@ -481,6 +482,8 @@ struct TReportExecStatusParams {
24: optional TQueryStatistics query_statistics // deprecated
25: optional TReportWorkloadRuntimeStatusParams
report_workload_runtime_status
+
+ 26: optional list<DataSinks.THivePartitionUpdate> hive_partition_updates
}
struct TFeResult {
diff --git a/gensrc/thrift/Partitions.thrift b/gensrc/thrift/Partitions.thrift
index 0a7e70c0a4f..4e306c2970b 100644
--- a/gensrc/thrift/Partitions.thrift
+++ b/gensrc/thrift/Partitions.thrift
@@ -43,7 +43,13 @@ enum TPartitionType {
BUCKET_SHFFULE_HASH_PARTITIONED,
// used for shuffle data by parititon and tablet
- TABLET_SINK_SHUFFLE_PARTITIONED
+ TABLET_SINK_SHUFFLE_PARTITIONED,
+
+ // used for shuffle data by hive parititon
+ TABLE_SINK_HASH_PARTITIONED,
+
+ // used for hive unparititoned table
+ TABLE_SINK_RANDOM_PARTITIONED
}
enum TDistributionType {
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index f9459256548..da3643747fb 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -138,7 +138,9 @@ enum TFileCompressType {
DEFLATE,
LZOP,
LZ4BLOCK,
- SNAPPYBLOCK
+ SNAPPYBLOCK,
+ ZLIB,
+ ZSTD
}
struct THdfsConf {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]