This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1-lakehouse in repository https://gitbox.apache.org/repos/asf/doris.git
commit 7969696be74735f389ec053049885aebac33a0af Author: Mingyu Chen (Rayner) <[email protected]> AuthorDate: Thu Feb 6 22:08:11 2025 +0800 [feature](iceberg) support s3 tables (#47115) Support AWS S3 tables: ``` CREATE CATALOG iceberg_s3 PROPERTIES ( 'type' = 'iceberg', 'iceberg.catalog.type' = 's3tables', 'warehouse' = 'arn:aws:s3tables:us-east-1:xxxx:bucket/doris-s3-table-bucket', 's3.region' = 'us-east-1', 's3.endpoint' = 's3.us-east-1.amazonaws.com', 's3.access_key' = 'ak', 's3.secret_key' = 'sk/lht4rMIfbhVftujO2alFx' ); ``` Support SQL, DDL and DML on s3 table bucket --- be/src/util/s3_util.cpp | 7 +- be/src/util/s3_util.h | 3 + fe/fe-core/pom.xml | 17 +- .../datasource/iceberg/IcebergExternalCatalog.java | 1 + .../iceberg/IcebergExternalCatalogFactory.java | 2 + .../iceberg/IcebergS3TablesExternalCatalog.java | 66 +++ .../s3tables/CustomAwsCredentialsProvider.java | 43 ++ .../datasource/iceberg/source/IcebergScanNode.java | 1 + .../datasource/property/S3ClientBEProperties.java | 8 + .../property/constants/S3Properties.java | 2 + .../org/apache/doris/persist/gson/GsonUtils.java | 3 + .../datasource/property/PropertyConverterTest.java | 48 +- .../doris/datasource/s3tables/S3TablesTest.java | 87 +++ fe/pom.xml | 2 + regression-test/conf/regression-conf.groovy | 9 + .../iceberg/test_s3tables_insert_overwrite.out | Bin 0 -> 23561 bytes .../iceberg/test_s3tables_write_insert.out | Bin 0 -> 41743 bytes .../iceberg/test_s3tables_write_partitions.out | Bin 0 -> 267 bytes .../iceberg/test_s3tables_insert_overwrite.groovy | 648 ++++++++++++++++++++ .../iceberg/test_s3tables_write_insert.groovy | 652 +++++++++++++++++++++ .../iceberg/test_s3tables_write_partitions.groovy | 108 ++++ 21 files changed, 1681 insertions(+), 26 deletions(-) diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp index c9406474526..449d421a078 100644 --- a/be/src/util/s3_util.cpp +++ b/be/src/util/s3_util.cpp @@ -168,7 +168,9 @@ std::shared_ptr<Aws::S3::S3Client> S3ClientFactory::create(const S3Conf& s3_conf } Aws::Client::ClientConfiguration aws_config = S3ClientFactory::getClientConfiguration(); - aws_config.endpointOverride = s3_conf.endpoint; + if (s3_conf.need_override_endpoint) { + aws_config.endpointOverride = s3_conf.endpoint; + } aws_config.region = s3_conf.region; std::string ca_cert = get_valid_ca_cert_path(); if ("" != _ca_cert_file_path) { @@ -255,6 +257,9 @@ Status S3ClientFactory::convert_properties_to_s3_conf( s3_conf->connect_timeout_ms = std::atoi(properties.find(S3_CONN_TIMEOUT_MS)->second.c_str()); } + if (auto it = properties.find(S3_NEED_OVERRIDE_ENDPOINT); it != properties.end()) { + s3_conf->need_override_endpoint = (it->second == "true"); + } if (s3_uri.get_bucket() == "") { return Status::InvalidArgument("Invalid S3 URI {}, bucket is not specified", s3_uri.to_string()); diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h index 74af0121d20..8d4725f5f0c 100644 --- a/be/src/util/s3_util.h +++ b/be/src/util/s3_util.h @@ -66,6 +66,7 @@ const static std::string S3_TOKEN = "AWS_TOKEN"; const static std::string S3_MAX_CONN_SIZE = "AWS_MAX_CONN_SIZE"; const static std::string S3_REQUEST_TIMEOUT_MS = "AWS_REQUEST_TIMEOUT_MS"; const static std::string S3_CONN_TIMEOUT_MS = "AWS_CONNECTION_TIMEOUT_MS"; +const static std::string S3_NEED_OVERRIDE_ENDPOINT = "AWS_NEED_OVERRIDE_ENDPOINT"; struct S3Conf { std::string ak; @@ -79,6 +80,8 @@ struct S3Conf { int request_timeout_ms = -1; int connect_timeout_ms = -1; bool use_virtual_addressing = true; + // For aws s3, no need to override endpoint + bool need_override_endpoint = true; std::string to_string() const { return fmt::format( diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 6195a1e32cd..89a221d901a 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -32,7 +32,6 @@ under the License. <doris.home>${basedir}/../../</doris.home> <doris.thirdparty>${basedir}/../../thirdparty</doris.thirdparty> <fe_ut_parallel>1</fe_ut_parallel> - <awssdk.version>2.20.131</awssdk.version> <huaweiobs.version>3.1.1-hw-46</huaweiobs.version> <tencentcos.version>8.2.7</tencentcos.version> </properties> @@ -788,6 +787,22 @@ under the License. <artifactId>modernizer-maven-annotations</artifactId> <version>2.4.0</version> </dependency> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>s3tables</artifactId> + <version>${awssdk.version}</version> + </dependency> + <dependency> + <groupId>software.amazon.s3tables</groupId> + <artifactId>s3-tables-catalog-for-iceberg</artifactId> + <version>${s3tables.catalog.version}</version> + </dependency> + <!-- AWS SDK Core --> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>sdk-core</artifactId> + <version>${awssdk.version}</version> + </dependency> </dependencies> <repositories> <!-- for huawei obs sdk --> diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java index 6a3265388f3..82ae49152ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalog.java @@ -43,6 +43,7 @@ public abstract class IcebergExternalCatalog extends ExternalCatalog { public static final String ICEBERG_HADOOP = "hadoop"; public static final String ICEBERG_GLUE = "glue"; public static final String ICEBERG_DLF = "dlf"; + public static final String ICEBERG_S3_TABLES = "s3tables"; public static final String EXTERNAL_CATALOG_NAME = "external_catalog.name"; protected String icebergCatalogType; protected Catalog catalog; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalogFactory.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalogFactory.java index e8f593f293c..748c0805393 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalogFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalCatalogFactory.java @@ -41,6 +41,8 @@ public class IcebergExternalCatalogFactory { return new IcebergDLFExternalCatalog(catalogId, name, resource, props, comment); case IcebergExternalCatalog.ICEBERG_HADOOP: return new IcebergHadoopExternalCatalog(catalogId, name, resource, props, comment); + case IcebergExternalCatalog.ICEBERG_S3_TABLES: + return new IcebergS3TablesExternalCatalog(catalogId, name, resource, props, comment); default: throw new DdlException("Unknown " + IcebergExternalCatalog.ICEBERG_CATALOG_TYPE + " value: " + catalogType); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergS3TablesExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergS3TablesExternalCatalog.java new file mode 100644 index 00000000000..67e2d7bc982 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergS3TablesExternalCatalog.java @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg; + +import org.apache.doris.datasource.CatalogProperty; +import org.apache.doris.datasource.iceberg.s3tables.CustomAwsCredentialsProvider; +import org.apache.doris.datasource.property.PropertyConverter; +import org.apache.doris.datasource.property.constants.S3Properties; + +import com.google.common.collect.Maps; +import org.apache.iceberg.CatalogProperties; +import software.amazon.s3tables.iceberg.S3TablesCatalog; + +import java.util.Map; + +public class IcebergS3TablesExternalCatalog extends IcebergExternalCatalog { + + public IcebergS3TablesExternalCatalog(long catalogId, String name, String resource, Map<String, String> props, + String comment) { + super(catalogId, name, comment); + props = PropertyConverter.convertToMetaProperties(props); + catalogProperty = new CatalogProperty(resource, props); + } + + @Override + protected void initCatalog() { + icebergCatalogType = ICEBERG_S3_TABLES; + S3TablesCatalog s3TablesCatalog = new S3TablesCatalog(); + Map<String, String> s3TablesCatalogProperties = convertToS3TablesCatalogProperties(); + String warehouse = catalogProperty.getHadoopProperties().get(CatalogProperties.WAREHOUSE_LOCATION); + s3TablesCatalogProperties.put(CatalogProperties.WAREHOUSE_LOCATION, warehouse); + s3TablesCatalog.initialize(getName(), s3TablesCatalogProperties); + catalog = s3TablesCatalog; + } + + private Map<String, String> convertToS3TablesCatalogProperties() { + Map<String, String> props = catalogProperty.getProperties(); + Map<String, String> s3Properties = Maps.newHashMap(); + s3Properties.put("client.credentials-provider", CustomAwsCredentialsProvider.class.getName()); + if (props.containsKey(S3Properties.ACCESS_KEY)) { + s3Properties.put("client.credentials-provider.s3.access-key-id", props.get(S3Properties.ACCESS_KEY)); + } + if (props.containsKey(S3Properties.SECRET_KEY)) { + s3Properties.put("client.credentials-provider.s3.secret-access-key", props.get(S3Properties.SECRET_KEY)); + } + if (props.containsKey(S3Properties.REGION)) { + s3Properties.put("client.region", props.get(S3Properties.REGION)); + } + return s3Properties; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/s3tables/CustomAwsCredentialsProvider.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/s3tables/CustomAwsCredentialsProvider.java new file mode 100644 index 00000000000..f76c8c3cc9b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/s3tables/CustomAwsCredentialsProvider.java @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.iceberg.s3tables; + +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + +import java.util.Map; + +public class CustomAwsCredentialsProvider implements AwsCredentialsProvider { + private final String accessKeyId; + private final String secretAccessKey; + + public CustomAwsCredentialsProvider(String accessKeyId, String secretAccessKey) { + this.accessKeyId = accessKeyId; + this.secretAccessKey = secretAccessKey; + } + + @Override + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create(accessKeyId, secretAccessKey); + } + + public static CustomAwsCredentialsProvider create(Map<String, String> props) { + return new CustomAwsCredentialsProvider(props.get("s3.access-key-id"), props.get("s3.secret-access-key")); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java index 87392d0ff38..0a9269ce860 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java @@ -121,6 +121,7 @@ public class IcebergScanNode extends FileQueryScanNode { case IcebergExternalCatalog.ICEBERG_DLF: case IcebergExternalCatalog.ICEBERG_GLUE: case IcebergExternalCatalog.ICEBERG_HADOOP: + case IcebergExternalCatalog.ICEBERG_S3_TABLES: source = new IcebergApiSource((IcebergExternalTable) table, desc, columnNameToRange); break; default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java index 7d8c2668fea..55edc48c7e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/S3ClientBEProperties.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.property; +import org.apache.doris.datasource.iceberg.IcebergExternalCatalog; import org.apache.doris.datasource.property.constants.CosProperties; import org.apache.doris.datasource.property.constants.GCSProperties; import org.apache.doris.datasource.property.constants.MinioProperties; @@ -53,6 +54,13 @@ public class S3ClientBEProperties { private static Map<String, String> getBeAWSPropertiesFromS3(Map<String, String> properties) { Map<String, String> beProperties = new HashMap<>(); + if (properties.containsKey(IcebergExternalCatalog.ICEBERG_CATALOG_TYPE) + && properties.get(IcebergExternalCatalog.ICEBERG_CATALOG_TYPE).equals( + IcebergExternalCatalog.ICEBERG_S3_TABLES)) { + beProperties.put(Env.NEED_OVERRIDE_ENDPOINT, "false"); + } else { + beProperties.put(Env.NEED_OVERRIDE_ENDPOINT, "true"); + } String endpoint = properties.get(S3Properties.ENDPOINT); beProperties.put(S3Properties.Env.ENDPOINT, endpoint); String region = PropertyConverter.checkRegion(endpoint, properties.get(S3Properties.Env.REGION), diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java index 8f73e36f73b..41dc3e083eb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/S3Properties.java @@ -111,6 +111,8 @@ public class S3Properties extends BaseProperties { public static final String DEFAULT_MAX_CONNECTIONS = "50"; public static final String DEFAULT_REQUEST_TIMEOUT_MS = "3000"; public static final String DEFAULT_CONNECTION_TIMEOUT_MS = "1000"; + public static final String NEED_OVERRIDE_ENDPOINT = "AWS_NEED_OVERRIDE_ENDPOINT"; + public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT); public static final List<String> FS_KEYS = Arrays.asList(ENDPOINT, REGION, ACCESS_KEY, SECRET_KEY, TOKEN, ROOT_PATH, BUCKET, MAX_CONNECTIONS, REQUEST_TIMEOUT_MS, CONNECTION_TIMEOUT_MS); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java index d62f056f4ad..3316310b8e6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java @@ -68,6 +68,7 @@ import org.apache.doris.datasource.iceberg.IcebergGlueExternalCatalog; import org.apache.doris.datasource.iceberg.IcebergHMSExternalCatalog; import org.apache.doris.datasource.iceberg.IcebergHadoopExternalCatalog; import org.apache.doris.datasource.iceberg.IcebergRestExternalCatalog; +import org.apache.doris.datasource.iceberg.IcebergS3TablesExternalCatalog; import org.apache.doris.datasource.infoschema.ExternalInfoSchemaDatabase; import org.apache.doris.datasource.infoschema.ExternalInfoSchemaTable; import org.apache.doris.datasource.infoschema.ExternalMysqlDatabase; @@ -238,6 +239,8 @@ public class GsonUtils { .registerSubtype(IcebergRestExternalCatalog.class, IcebergRestExternalCatalog.class.getSimpleName()) .registerSubtype(IcebergDLFExternalCatalog.class, IcebergDLFExternalCatalog.class.getSimpleName()) .registerSubtype(IcebergHadoopExternalCatalog.class, IcebergHadoopExternalCatalog.class.getSimpleName()) + .registerSubtype(IcebergS3TablesExternalCatalog.class, + IcebergS3TablesExternalCatalog.class.getSimpleName()) .registerSubtype(PaimonExternalCatalog.class, PaimonExternalCatalog.class.getSimpleName()) .registerSubtype(PaimonHMSExternalCatalog.class, PaimonHMSExternalCatalog.class.getSimpleName()) .registerSubtype(PaimonFileExternalCatalog.class, PaimonFileExternalCatalog.class.getSimpleName()) diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java index 8967ca5fae0..5673f563265 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/PropertyConverterTest.java @@ -241,7 +241,7 @@ public class PropertyConverterTest extends TestWithFeService { Assertions.assertEquals(analyzedStmt.getTableRefs().size(), 1); TableValuedFunctionRef oldFuncTable = (TableValuedFunctionRef) analyzedStmt.getTableRefs().get(0); S3TableValuedFunction s3Tvf = (S3TableValuedFunction) oldFuncTable.getTableFunction(); - Assertions.assertEquals(s3Tvf.getBrokerDesc().getProperties().size(), 9); + Assertions.assertEquals(10, s3Tvf.getBrokerDesc().getProperties().size()); String queryNew = "select * from s3(\n" + " 'uri' = 'http://s3.us-east-1.amazonaws.com/my-bucket/test.parquet',\n" @@ -254,7 +254,7 @@ public class PropertyConverterTest extends TestWithFeService { Assertions.assertEquals(analyzedStmtNew.getTableRefs().size(), 1); TableValuedFunctionRef newFuncTable = (TableValuedFunctionRef) analyzedStmt.getTableRefs().get(0); S3TableValuedFunction newS3Tvf = (S3TableValuedFunction) newFuncTable.getTableFunction(); - Assertions.assertEquals(newS3Tvf.getBrokerDesc().getProperties().size(), 9); + Assertions.assertEquals(10, newS3Tvf.getBrokerDesc().getProperties().size()); } @Test @@ -288,11 +288,11 @@ public class PropertyConverterTest extends TestWithFeService { CreateCatalogStmt analyzedStmt = createStmt(query); HMSExternalCatalog catalog = createAndGetCatalog(analyzedStmt, "hms_s3"); Map<String, String> properties = catalog.getCatalogProperty().getProperties(); - Assertions.assertEquals(12, properties.size()); + Assertions.assertEquals(13, properties.size()); Map<String, String> hdProps = catalog.getCatalogProperty().getHadoopProperties(); Assertions.assertNull(hdProps.get("fs.s3.impl.disable.cache")); - Assertions.assertEquals(20, hdProps.size()); + Assertions.assertEquals(21, hdProps.size()); } @Test @@ -439,11 +439,11 @@ public class PropertyConverterTest extends TestWithFeService { CreateCatalogStmt analyzedStmt = createStmt(queryOld); HMSExternalCatalog catalog = createAndGetCatalog(analyzedStmt, catalogName); Map<String, String> properties = catalog.getProperties(); - Assertions.assertEquals(properties.size(), 21); + Assertions.assertEquals(22, properties.size()); Assertions.assertEquals("s3.us-east-1.amazonaws.com", properties.get(S3Properties.ENDPOINT)); Map<String, String> hdProps = catalog.getCatalogProperty().getHadoopProperties(); - Assertions.assertEquals(29, hdProps.size()); + Assertions.assertEquals(30, hdProps.size()); Assertions.assertNull(hdProps.get("fs.s3.impl.disable.cache")); String query = "create catalog hms_glue properties (\n" @@ -458,12 +458,12 @@ public class PropertyConverterTest extends TestWithFeService { CreateCatalogStmt analyzedStmtNew = createStmt(query); HMSExternalCatalog catalogNew = createAndGetCatalog(analyzedStmtNew, catalogName); Map<String, String> propertiesNew = catalogNew.getProperties(); - Assertions.assertEquals(21, propertiesNew.size()); + Assertions.assertEquals(22, propertiesNew.size()); Assertions.assertEquals("s3.us-east-1.amazonaws.com.cn", propertiesNew.get(S3Properties.ENDPOINT)); Map<String, String> hdPropsNew = catalogNew.getCatalogProperty().getHadoopProperties(); Assertions.assertNull(hdPropsNew.get("fs.s3.impl.disable.cache")); - Assertions.assertEquals(29, hdPropsNew.size()); + Assertions.assertEquals(30, hdPropsNew.size()); } @Test @@ -477,7 +477,7 @@ public class PropertyConverterTest extends TestWithFeService { + " 'cos.secret_key' = 'skk'\n" + ");"; testS3CompatibleCatalogProperties(catalogName0, CosProperties.COS_PREFIX, - "cos.ap-beijing.myqcloud.com", query0, 12, 17); + "cos.ap-beijing.myqcloud.com", query0, 13, 18); String catalogName1 = "hms_oss"; String query1 = "create catalog " + catalogName1 + " properties (\n" @@ -488,7 +488,7 @@ public class PropertyConverterTest extends TestWithFeService { + " 'oss.secret_key' = 'skk'\n" + ");"; testS3CompatibleCatalogProperties(catalogName1, OssProperties.OSS_PREFIX, - "oss.oss-cn-beijing.aliyuncs.com", query1, 12, 16); + "oss.oss-cn-beijing.aliyuncs.com", query1, 13, 17); String catalogName2 = "hms_minio"; String query2 = "create catalog " + catalogName2 + " properties (\n" @@ -499,7 +499,7 @@ public class PropertyConverterTest extends TestWithFeService { + " 'minio.secret_key' = 'skk'\n" + ");"; testS3CompatibleCatalogProperties(catalogName2, MinioProperties.MINIO_PREFIX, - "http://127.0.0.1", query2, 12, 20); + "http://127.0.0.1", query2, 13, 21); String catalogName3 = "hms_obs"; String query3 = "create catalog hms_obs properties (\n" @@ -510,7 +510,7 @@ public class PropertyConverterTest extends TestWithFeService { + " 'obs.secret_key' = 'skk'\n" + ");"; testS3CompatibleCatalogProperties(catalogName3, ObsProperties.OBS_PREFIX, - "obs.cn-north-4.myhuaweicloud.com", query3, 12, 16); + "obs.cn-north-4.myhuaweicloud.com", query3, 13, 17); } private void testS3CompatibleCatalogProperties(String catalogName, String prefix, @@ -589,7 +589,7 @@ public class PropertyConverterTest extends TestWithFeService { origProp.put(S3Properties.REGION, "region"); origProp.put(PropertyConverter.USE_PATH_STYLE, "false"); beProperties = S3ClientBEProperties.getBeFSProperties(origProp); - Assertions.assertEquals(5, beProperties.size()); + Assertions.assertEquals(6, beProperties.size()); Assertions.assertEquals("ak", beProperties.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("sk", beProperties.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("endpoint", beProperties.get(S3Properties.Env.ENDPOINT)); @@ -604,7 +604,7 @@ public class PropertyConverterTest extends TestWithFeService { origProp.put(MinioProperties.REGION, "region"); origProp.put(PropertyConverter.USE_PATH_STYLE, "false"); beProperties = S3ClientBEProperties.getBeFSProperties(origProp); - Assertions.assertEquals(5, beProperties.size()); + Assertions.assertEquals(6, beProperties.size()); Assertions.assertEquals("ak", beProperties.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("sk", beProperties.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("endpoint", beProperties.get(S3Properties.Env.ENDPOINT)); @@ -618,7 +618,7 @@ public class PropertyConverterTest extends TestWithFeService { origProp.put(MinioProperties.ENDPOINT, "endpoint"); origProp.put(PropertyConverter.USE_PATH_STYLE, "false"); beProperties = S3ClientBEProperties.getBeFSProperties(origProp); - Assertions.assertEquals(5, beProperties.size()); + Assertions.assertEquals(6, beProperties.size()); Assertions.assertEquals("ak", beProperties.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("sk", beProperties.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("endpoint", beProperties.get(S3Properties.Env.ENDPOINT)); @@ -633,7 +633,7 @@ public class PropertyConverterTest extends TestWithFeService { origProp.put(ObsProperties.REGION, "region"); origProp.put(PropertyConverter.USE_PATH_STYLE, "false"); beProperties = S3ClientBEProperties.getBeFSProperties(origProp); - Assertions.assertEquals(5, beProperties.size()); + Assertions.assertEquals(6, beProperties.size()); Assertions.assertEquals("ak", beProperties.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("sk", beProperties.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("endpoint", beProperties.get(S3Properties.Env.ENDPOINT)); @@ -648,7 +648,7 @@ public class PropertyConverterTest extends TestWithFeService { origProp.put(OssProperties.REGION, "region"); origProp.put(PropertyConverter.USE_PATH_STYLE, "false"); beProperties = S3ClientBEProperties.getBeFSProperties(origProp); - Assertions.assertEquals(5, beProperties.size()); + Assertions.assertEquals(6, beProperties.size()); Assertions.assertEquals("ak", beProperties.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("sk", beProperties.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("endpoint", beProperties.get(S3Properties.Env.ENDPOINT)); @@ -663,7 +663,7 @@ public class PropertyConverterTest extends TestWithFeService { origProp.put(CosProperties.REGION, "region"); origProp.put(PropertyConverter.USE_PATH_STYLE, "false"); beProperties = S3ClientBEProperties.getBeFSProperties(origProp); - Assertions.assertEquals(5, beProperties.size()); + Assertions.assertEquals(6, beProperties.size()); Assertions.assertEquals("ak", beProperties.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("sk", beProperties.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("endpoint", beProperties.get(S3Properties.Env.ENDPOINT)); @@ -678,7 +678,7 @@ public class PropertyConverterTest extends TestWithFeService { origProp.put(GCSProperties.REGION, "region"); origProp.put(PropertyConverter.USE_PATH_STYLE, "false"); beProperties = S3ClientBEProperties.getBeFSProperties(origProp); - Assertions.assertEquals(5, beProperties.size()); + Assertions.assertEquals(6, beProperties.size()); Assertions.assertEquals("ak", beProperties.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("sk", beProperties.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("endpoint", beProperties.get(S3Properties.Env.ENDPOINT)); @@ -710,7 +710,7 @@ public class PropertyConverterTest extends TestWithFeService { props.put(DLFProperties.REGION, "cn-beijing"); props.put(DLFProperties.ACCESS_PUBLIC, "false"); Map<String, String> res = PropertyConverter.convertToMetaProperties(new HashMap<>(props)); - Assertions.assertEquals(25, res.size()); + Assertions.assertEquals(26, res.size()); Assertions.assertEquals("akk", res.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("skk", res.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("akk", res.get(DataLakeConfig.CATALOG_ACCESS_KEY_ID)); @@ -722,13 +722,13 @@ public class PropertyConverterTest extends TestWithFeService { props.put(DLFProperties.ACCESS_PUBLIC, "true"); res = PropertyConverter.convertToMetaProperties(new HashMap<>(props)); - Assertions.assertEquals(25, res.size()); + Assertions.assertEquals(26, res.size()); Assertions.assertEquals("oss-cn-beijing.aliyuncs.com", res.get(S3Properties.Env.ENDPOINT)); Assertions.assertEquals("oss-cn-beijing", res.get(S3Properties.Env.REGION)); props.put(OssProperties.OSS_HDFS_ENABLED, "true"); res = PropertyConverter.convertToMetaProperties(new HashMap<>(props)); - Assertions.assertEquals(28, res.size()); + Assertions.assertEquals(29, res.size()); Assertions.assertEquals("com.aliyun.jindodata.oss.JindoOssFileSystem", res.get("fs.oss.impl")); Assertions.assertEquals("com.aliyun.jindodata.oss.OSS", res.get("fs.AbstractFileSystem.oss.impl")); Assertions.assertEquals("false", res.get(DataLakeConfig.CATALOG_CREATE_DEFAULT_DB)); @@ -742,7 +742,7 @@ public class PropertyConverterTest extends TestWithFeService { props2.put("aws.glue.secret-key", "skk"); props2.put("aws.region", "us-east-1"); res = PropertyConverter.convertToMetaProperties(props2); - Assertions.assertEquals(16, res.size()); + Assertions.assertEquals(17, res.size()); Assertions.assertEquals("akk", res.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("skk", res.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("s3.us-east-1.amazonaws.com", res.get(S3Properties.Env.ENDPOINT)); @@ -754,7 +754,7 @@ public class PropertyConverterTest extends TestWithFeService { props3.put(GlueProperties.ACCESS_KEY, "akk"); props3.put(GlueProperties.SECRET_KEY, "skk"); res = PropertyConverter.convertToMetaProperties(props3); - Assertions.assertEquals(16, res.size()); + Assertions.assertEquals(17, res.size()); Assertions.assertEquals("akk", res.get(S3Properties.Env.ACCESS_KEY)); Assertions.assertEquals("skk", res.get(S3Properties.Env.SECRET_KEY)); Assertions.assertEquals("s3.us-east-1.amazonaws.com", res.get(S3Properties.Env.ENDPOINT)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/s3tables/S3TablesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/s3tables/S3TablesTest.java new file mode 100644 index 00000000000..576033785ed --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/s3tables/S3TablesTest.java @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.s3tables; + +import org.apache.doris.datasource.iceberg.s3tables.CustomAwsCredentialsProvider; + +import org.apache.iceberg.FileScanTask; +import org.apache.iceberg.Table; +import org.apache.iceberg.TableScan; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.CloseableIterable; +import org.junit.jupiter.api.Test; +import software.amazon.s3tables.iceberg.S3TablesCatalog; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class S3TablesTest { + + @Test + public void testS3TablesCatalog() { + S3TablesCatalog s3TablesCatalog = new S3TablesCatalog(); + Map<String, String> s3Properties = new HashMap<>(); + + // ak, sk + String accessKeyId = ""; + String secretKey = ""; + + s3Properties.put("client.region", "us-east-1"); + s3Properties.put("client.credentials-provider", CustomAwsCredentialsProvider.class.getName()); + s3Properties.put("client.credentials-provider.s3.access-key-id", accessKeyId); + s3Properties.put("client.credentials-provider.s3.secret-access-key", secretKey); + + String warehouse = "arn:aws:s3tables:us-east-1:169698404049:bucket/yy-s3-table-bucket"; + s3Properties.put("warehouse", warehouse); + + try { + s3TablesCatalog.initialize("s3tables", s3Properties); + System.out.println("Successfully initialized S3 Tables catalog!"); + + try { + // 1. list namespaces + List<Namespace> namespaces = s3TablesCatalog.listNamespaces(); + System.out.println("Successfully listed namespaces:"); + for (Namespace namespace : namespaces) { + System.out.println(namespace); + // 2. list tables + List<TableIdentifier> tblIdentifiers = s3TablesCatalog.listTables(namespace); + for (TableIdentifier tblId : tblIdentifiers) { + // 3. load table and list files + System.out.println(tblId); + Table tbl = s3TablesCatalog.loadTable(tblId); + System.out.println(tbl.schema()); + TableScan scan = tbl.newScan(); + CloseableIterable<FileScanTask> fileScanTasks = scan.planFiles(); + for (FileScanTask task : fileScanTasks) { + System.out.println(task.file()); + } + } + } + } catch (Exception e) { + e.printStackTrace(); + System.out.println("Note: Could not list namespaces - " + e.getMessage()); + } + } catch (Exception e) { + System.err.println("Error connecting to S3 Tables: " + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/fe/pom.xml b/fe/pom.xml index e5de5673536..b6493a54072 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -371,6 +371,8 @@ under the License. <airlift.concurrent.version>202</airlift.concurrent.version> <semver4j.version>5.3.0</semver4j.version> <aliyun-sdk-oss.version>3.15.0</aliyun-sdk-oss.version> + <awssdk.version>2.29.26</awssdk.version> + <s3tables.catalog.version>0.1.4</s3tables.catalog.version> </properties> <profiles> <profile> diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index 87f56ae5e75..53ee2f60dbb 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -239,3 +239,12 @@ externalEnvIp="127.0.0.1" enableKerberosTest=false kerberosHmsPort=9883 kerberosHdfsPort=8820 + +// External Env +enableExternalIcebergTest = false +// The properties string of iceberg catalog +icebergS3TablesCatalog = "" + +enableExternalHudiTest = false +// The properties string of hudi catalog +hudiEmrCatalog = "" diff --git a/regression-test/data/external_table_p2/iceberg/test_s3tables_insert_overwrite.out b/regression-test/data/external_table_p2/iceberg/test_s3tables_insert_overwrite.out new file mode 100644 index 00000000000..cdd4451c8c9 Binary files /dev/null and b/regression-test/data/external_table_p2/iceberg/test_s3tables_insert_overwrite.out differ diff --git a/regression-test/data/external_table_p2/iceberg/test_s3tables_write_insert.out b/regression-test/data/external_table_p2/iceberg/test_s3tables_write_insert.out new file mode 100644 index 00000000000..a0fcbd6816d Binary files /dev/null and b/regression-test/data/external_table_p2/iceberg/test_s3tables_write_insert.out differ diff --git a/regression-test/data/external_table_p2/iceberg/test_s3tables_write_partitions.out b/regression-test/data/external_table_p2/iceberg/test_s3tables_write_partitions.out new file mode 100644 index 00000000000..b206cd3d90d Binary files /dev/null and b/regression-test/data/external_table_p2/iceberg/test_s3tables_write_partitions.out differ diff --git a/regression-test/suites/external_table_p2/iceberg/test_s3tables_insert_overwrite.groovy b/regression-test/suites/external_table_p2/iceberg/test_s3tables_insert_overwrite.groovy new file mode 100644 index 00000000000..89c667719d8 --- /dev/null +++ b/regression-test/suites/external_table_p2/iceberg/test_s3tables_insert_overwrite.groovy @@ -0,0 +1,648 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_s3tables_insert_overwrite", "p0,external,iceberg,external_docker,external_docker_iceberg") { + def format_compressions = ["parquet_zstd", "orc_zlib"] + + def q01 = { String format_compression, String catalog_name -> + def parts = format_compression.split("_") + def format = parts[0] + def compression = parts[1] + sql """ DROP TABLE IF EXISTS `iceberg_overwrite_all_types_${format_compression}`; """ + sql """ + CREATE TABLE `iceberg_overwrite_all_types_${format_compression}`( + `boolean_col` boolean, + `int_col` int, + `bigint_col` bigint, + `float_col` float, + `double_col` double, + `decimal_col1` decimal(9,0), + `decimal_col2` decimal(8,4), + `decimal_col3` decimal(18,6), + `decimal_col4` decimal(38,12), + `string_col` string, + `date_col` date, + `timestamp_col1` datetime, + `timestamp_col2` datetime, + `timestamp_col3` datetime, + `t_map_string` map<string,string>, + `t_map_int` map<int,int>, + `t_map_bigint` map<bigint,bigint>, + `t_map_float` map<float,float>, + `t_map_double` map<double,double>, + `t_map_boolean` map<boolean,boolean>, + `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>, + `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>, + `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>, + `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>, + `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>, + `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>, + `t_array_string` array<string>, + `t_array_int` array<int>, + `t_array_bigint` array<bigint>, + `t_array_float` array<float>, + `t_array_double` array<double>, + `t_array_boolean` array<boolean>, + `t_array_decimal_precision_2` array<decimal(2,1)>, + `t_array_decimal_precision_4` array<decimal(4,2)>, + `t_array_decimal_precision_8` array<decimal(8,4)>, + `t_array_decimal_precision_17` array<decimal(17,8)>, + `t_array_decimal_precision_18` array<decimal(18,8)>, + `t_array_decimal_precision_38` array<decimal(38,16)>, + `t_struct_bigint` struct<s_bigint:bigint>, + `t_complex` map<string,array<struct<s_int:int>>>, + `t_struct_nested` struct<struct_field:array<string>>, + `t_struct_null` struct<struct_field_null:string,struct_field_null2:string>, + `t_struct_non_nulls_after_nulls` struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>, + `t_nested_struct_non_nulls_after_nulls` struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>, + `t_map_null_value` map<string,string>, + `t_array_string_starting_with_nulls` array<string>, + `t_array_string_with_nulls_in_between` array<string>, + `t_array_string_ending_with_nulls` array<string>, + `t_array_string_all_nulls` array<string>, + `dt` int) ENGINE=iceberg + properties ( + "compression-codec" = ${compression}, + "write-format"=${format} + ) + """ + + sql """ + INSERT OVERWRITE table iceberg_overwrite_all_types_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + 123.45, -- float_col + 123456.789, -- double_col + CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(TRUE, FALSE), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(TRUE, FALSE), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(1.23456789, 2.34567891), -- t_ARRAY_decimal_precision_18 + ARRAY(1.234567890123456789, 2.345678901234567890), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ); + """ + order_qt_q01 """ select * from iceberg_overwrite_all_types_${format_compression}; + """ + + sql """ + INSERT OVERWRITE table iceberg_overwrite_all_types_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + CAST(123.45 AS FLOAT), -- float_col + CAST(123456.789 AS DOUBLE), -- double_col + CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(true, false), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(true, false), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + CAST(-123.45 AS FLOAT), -- float_col + CAST(-123456.789 AS DOUBLE), -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-21', -- date_col + '2024-03-21 12:00:00', -- timestamp_col1 + '2024-03-21 12:00:00.123456789', -- timestamp_col2 + '2024-03-21 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(2, 20), -- t_map_int + MAP(2, 200000000000), -- t_map_bigint + MAP(CAST(2.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(2.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(2.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(2.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(2.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(3.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240321 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + CAST(-123.45 AS FLOAT), -- float_col + CAST(-123456.789 AS DOUBLE), -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-22', -- date_col + '2024-03-22 12:00:00', -- timestamp_col1 + '2024-03-22 12:00:00.123456789', -- timestamp_col2 + '2024-03-22 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(3, 20), -- t_map_int + MAP(3, 200000000000), -- t_map_bigint + MAP(CAST(3.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(3.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(3.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(3.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(3.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(8.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value11', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240322 -- dt + ); + """ + order_qt_q02 """ select * from iceberg_overwrite_all_types_${format_compression}; + """ + + sql """ + INSERT OVERWRITE table iceberg_overwrite_all_types_${format_compression}(float_col, t_map_int, t_ARRAY_decimal_precision_8, t_ARRAY_string_starting_with_nulls) + VALUES ( + CAST(123.45 AS FLOAT), -- float_col + MAP(1, 10), -- t_map_int + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(null, 'value1', 'value2') -- t_ARRAY_string_starting_with_nulls + ); + """ + order_qt_q03 """ select * from iceberg_overwrite_all_types_${format_compression}; + """ + + sql """ DROP TABLE iceberg_overwrite_all_types_${format_compression}; """ + } + + def q03 = { String format_compression, String catalog_name -> + def parts = format_compression.split("_") + def format = parts[0] + def compression = parts[1] + sql """ DROP TABLE IF EXISTS `iceberg_overwrite_types_par_${format_compression}`; """ + sql """ + CREATE TABLE `iceberg_overwrite_types_par_${format_compression}`( + `boolean_col` boolean, + `int_col` int, + `bigint_col` bigint, + `float_col` float, + `double_col` double, + `decimal_col1` decimal(9,0), + `decimal_col2` decimal(8,4), + `decimal_col3` decimal(18,6), + `decimal_col4` decimal(38,12), + `string_col` string, + `date_col` date, + `timestamp_col1` datetime, + `timestamp_col2` datetime, + `timestamp_col3` datetime, + `t_map_string` map<string,string>, + `t_map_int` map<int,int>, + `t_map_bigint` map<bigint,bigint>, + `t_map_float` map<float,float>, + `t_map_double` map<double,double>, + `t_map_boolean` map<boolean,boolean>, + `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>, + `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>, + `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>, + `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>, + `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>, + `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>, + `t_array_string` array<string>, + `t_array_int` array<int>, + `t_array_bigint` array<bigint>, + `t_array_float` array<float>, + `t_array_double` array<double>, + `t_array_boolean` array<boolean>, + `t_array_decimal_precision_2` array<decimal(2,1)>, + `t_array_decimal_precision_4` array<decimal(4,2)>, + `t_array_decimal_precision_8` array<decimal(8,4)>, + `t_array_decimal_precision_17` array<decimal(17,8)>, + `t_array_decimal_precision_18` array<decimal(18,8)>, + `t_array_decimal_precision_38` array<decimal(38,16)>, + `t_struct_bigint` struct<s_bigint:bigint>, + `t_complex` map<string,array<struct<s_int:int>>>, + `t_struct_nested` struct<struct_field:array<string>>, + `t_struct_null` struct<struct_field_null:string,struct_field_null2:string>, + `t_struct_non_nulls_after_nulls` struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>, + `t_nested_struct_non_nulls_after_nulls` struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>, + `t_map_null_value` map<string,string>, + `t_array_string_starting_with_nulls` array<string>, + `t_array_string_with_nulls_in_between` array<string>, + `t_array_string_ending_with_nulls` array<string>, + `t_array_string_all_nulls` array<string>, + `dt` int) ENGINE=iceberg + PARTITION BY LIST (dt) () + properties ( + "compression-codec" = ${compression}, + "write-format"=${format} + ); + """ + + sql """ + INSERT OVERWRITE TABLE iceberg_overwrite_types_par_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + 123.45, -- float_col + 123456.789, -- double_col + 123456789, -- decimal_col1 + 1234.5678, -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(true, false), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(true, false), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ); + """ + order_qt_q01 """ select * from iceberg_overwrite_types_par_${format_compression}; + """ + + sql """ + INSERT OVERWRITE TABLE iceberg_overwrite_types_par_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + CAST(123.45 AS FLOAT), -- float_col + CAST(123456.789 AS DOUBLE), -- double_col + CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(true, false), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(true, false), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + CAST(-123.45 AS FLOAT), -- float_col + CAST(-123456.789 AS DOUBLE), -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-21', -- date_col + '2024-03-21 12:00:00', -- timestamp_col1 + '2024-03-21 12:00:00.123456789', -- timestamp_col2 + '2024-03-21 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(2, 20), -- t_map_int + MAP(2, 200000000000), -- t_map_bigint + MAP(CAST(2.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(2.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(2.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(2.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(2.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(3.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240321 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + -123.45, -- float_col + -123456.789, -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-22', -- date_col + '2024-03-22 12:00:00', -- timestamp_col1 + '2024-03-22 12:00:00.123456789', -- timestamp_col2 + '2024-03-22 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(3, 20), -- t_map_int + MAP(3, 200000000000), -- t_map_bigint + MAP(CAST(3.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(3.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(3.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(3.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(3.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(8.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value11', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240322 -- dt + ); + """ + order_qt_q02 """ select * from iceberg_overwrite_types_par_${format_compression}; + """ + + sql """ + INSERT OVERWRITE TABLE iceberg_overwrite_types_par_${format_compression}(float_col, t_map_int, t_ARRAY_decimal_precision_8, t_ARRAY_string_starting_with_nulls, dt) + VALUES ( + 123.45, -- float_col + MAP(1, 10), -- t_map_int + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + 20240321 -- dt + ); + """ + order_qt_q03 """ select * from iceberg_overwrite_types_par_${format_compression}; + """ + + sql """ DROP TABLE iceberg_overwrite_types_par_${format_compression}; """ + } + + String enabled = context.config.otherConfigs.get("enableExternalIcebergTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable iceberg test") + return + } + + String catalog_name = "test_s3tables_insert_overwrite" + String props = context.config.otherConfigs.get("icebergS3TablesCatalog") + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog ${catalog_name} properties ( + ${props} + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use my_namespace;""" + sql """ set enable_fallback_to_original_planner=false """ + def tables = sql """ show tables; """ + assertTrue(tables.size() > 0) + + try { + for (String format_compression in format_compressions) { + logger.info("Process format_compression " + format_compression) + q01(format_compression, catalog_name) + q03(format_compression, catalog_name) + } + } finally { + } +} diff --git a/regression-test/suites/external_table_p2/iceberg/test_s3tables_write_insert.groovy b/regression-test/suites/external_table_p2/iceberg/test_s3tables_write_insert.groovy new file mode 100644 index 00000000000..711aaac5f98 --- /dev/null +++ b/regression-test/suites/external_table_p2/iceberg/test_s3tables_write_insert.groovy @@ -0,0 +1,652 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_s3tables_write_insert", "p2,external,iceberg,external_remote,external_remote_iceberg") { + def format_compressions = ["parquet_zstd", "orc_zlib"] + + def q01 = { String format_compression, String catalog_name -> + def parts = format_compression.split("_") + def format = parts[0] + def compression = parts[1] + sql """ DROP TABLE IF EXISTS `iceberg_all_types_${format_compression}`; """ + sql """ + CREATE TABLE `iceberg_all_types_${format_compression}`( + `boolean_col` boolean, + `int_col` int, + `bigint_col` bigint, + `float_col` float, + `double_col` double, + `decimal_col1` decimal(9,0), + `decimal_col2` decimal(8,4), + `decimal_col3` decimal(18,6), + `decimal_col4` decimal(38,12), + `string_col` string, + `date_col` date, + `timestamp_col1` datetime, + `timestamp_col2` datetime, + `timestamp_col3` datetime, + `t_map_string` map<string,string>, + `t_map_int` map<int,int>, + `t_map_bigint` map<bigint,bigint>, + `t_map_float` map<float,float>, + `t_map_double` map<double,double>, + `t_map_boolean` map<boolean,boolean>, + `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>, + `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>, + `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>, + `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>, + `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>, + `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>, + `t_array_string` array<string>, + `t_array_int` array<int>, + `t_array_bigint` array<bigint>, + `t_array_float` array<float>, + `t_array_double` array<double>, + `t_array_boolean` array<boolean>, + `t_array_decimal_precision_2` array<decimal(2,1)>, + `t_array_decimal_precision_4` array<decimal(4,2)>, + `t_array_decimal_precision_8` array<decimal(8,4)>, + `t_array_decimal_precision_17` array<decimal(17,8)>, + `t_array_decimal_precision_18` array<decimal(18,8)>, + `t_array_decimal_precision_38` array<decimal(38,16)>, + `t_struct_bigint` struct<s_bigint:bigint>, + `t_complex` map<string,array<struct<s_int:int>>>, + `t_struct_nested` struct<struct_field:array<string>>, + `t_struct_null` struct<struct_field_null:string,struct_field_null2:string>, + `t_struct_non_nulls_after_nulls` struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>, + `t_nested_struct_non_nulls_after_nulls` struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>, + `t_map_null_value` map<string,string>, + `t_array_string_starting_with_nulls` array<string>, + `t_array_string_with_nulls_in_between` array<string>, + `t_array_string_ending_with_nulls` array<string>, + `t_array_string_all_nulls` array<string>, + `dt` int) ENGINE=iceberg + properties ( + "compression-codec" = ${compression}, + "write-format"=${format} + ) + """ + + sql """ + INSERT INTO iceberg_all_types_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + 123.45, -- float_col + 123456.789, -- double_col + CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(TRUE, FALSE), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(TRUE, FALSE), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(1.23456789, 2.34567891), -- t_ARRAY_decimal_precision_18 + ARRAY(1.234567890123456789, 2.345678901234567890), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ); + """ + order_qt_q01 """ select * from iceberg_all_types_${format_compression}; + """ + + sql """ + INSERT INTO iceberg_all_types_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + CAST(123.45 AS FLOAT), -- float_col + CAST(123456.789 AS DOUBLE), -- double_col + CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(true, false), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(true, false), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + CAST(-123.45 AS FLOAT), -- float_col + CAST(-123456.789 AS DOUBLE), -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-21', -- date_col + '2024-03-21 12:00:00', -- timestamp_col1 + '2024-03-21 12:00:00.123456789', -- timestamp_col2 + '2024-03-21 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(2, 20), -- t_map_int + MAP(2, 200000000000), -- t_map_bigint + MAP(CAST(2.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(2.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(2.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(2.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(2.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(3.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240321 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + CAST(-123.45 AS FLOAT), -- float_col + CAST(-123456.789 AS DOUBLE), -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-22', -- date_col + '2024-03-22 12:00:00', -- timestamp_col1 + '2024-03-22 12:00:00.123456789', -- timestamp_col2 + '2024-03-22 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(3, 20), -- t_map_int + MAP(3, 200000000000), -- t_map_bigint + MAP(CAST(3.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(3.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(3.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(3.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(3.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(8.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value11', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240322 -- dt + ); + """ + order_qt_q02 """ select * from iceberg_all_types_${format_compression}; + """ + + sql """ + INSERT INTO iceberg_all_types_${format_compression}(float_col, t_map_int, t_ARRAY_decimal_precision_8, t_ARRAY_string_starting_with_nulls) + VALUES ( + CAST(123.45 AS FLOAT), -- float_col + MAP(1, 10), -- t_map_int + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(null, 'value1', 'value2') -- t_ARRAY_string_starting_with_nulls + ); + """ + order_qt_q03 """ select * from iceberg_all_types_${format_compression}; + """ + + sql """ DROP TABLE iceberg_all_types_${format_compression}; """ + } + + def q03 = { String format_compression, String catalog_name -> + def parts = format_compression.split("_") + def format = parts[0] + def compression = parts[1] + sql """ DROP TABLE IF EXISTS `iceberg_all_types_par_${format_compression}`; """ + sql """ + CREATE TABLE `iceberg_all_types_par_${format_compression}`( + `boolean_col` boolean, + `int_col` int, + `bigint_col` bigint, + `float_col` float, + `double_col` double, + `decimal_col1` decimal(9,0), + `decimal_col2` decimal(8,4), + `decimal_col3` decimal(18,6), + `decimal_col4` decimal(38,12), + `string_col` string, + `date_col` date, + `timestamp_col1` datetime, + `timestamp_col2` datetime, + `timestamp_col3` datetime, + `t_map_string` map<string,string>, + `t_map_int` map<int,int>, + `t_map_bigint` map<bigint,bigint>, + `t_map_float` map<float,float>, + `t_map_double` map<double,double>, + `t_map_boolean` map<boolean,boolean>, + `t_map_decimal_precision_2` map<decimal(2,1),decimal(2,1)>, + `t_map_decimal_precision_4` map<decimal(4,2),decimal(4,2)>, + `t_map_decimal_precision_8` map<decimal(8,4),decimal(8,4)>, + `t_map_decimal_precision_17` map<decimal(17,8),decimal(17,8)>, + `t_map_decimal_precision_18` map<decimal(18,8),decimal(18,8)>, + `t_map_decimal_precision_38` map<decimal(38,16),decimal(38,16)>, + `t_array_string` array<string>, + `t_array_int` array<int>, + `t_array_bigint` array<bigint>, + `t_array_float` array<float>, + `t_array_double` array<double>, + `t_array_boolean` array<boolean>, + `t_array_decimal_precision_2` array<decimal(2,1)>, + `t_array_decimal_precision_4` array<decimal(4,2)>, + `t_array_decimal_precision_8` array<decimal(8,4)>, + `t_array_decimal_precision_17` array<decimal(17,8)>, + `t_array_decimal_precision_18` array<decimal(18,8)>, + `t_array_decimal_precision_38` array<decimal(38,16)>, + `t_struct_bigint` struct<s_bigint:bigint>, + `t_complex` map<string,array<struct<s_int:int>>>, + `t_struct_nested` struct<struct_field:array<string>>, + `t_struct_null` struct<struct_field_null:string,struct_field_null2:string>, + `t_struct_non_nulls_after_nulls` struct<struct_non_nulls_after_nulls1:int,struct_non_nulls_after_nulls2:string>, + `t_nested_struct_non_nulls_after_nulls` struct<struct_field1:int,struct_field2:string,strict_field3:struct<nested_struct_field1:int,nested_struct_field2:string>>, + `t_map_null_value` map<string,string>, + `t_array_string_starting_with_nulls` array<string>, + `t_array_string_with_nulls_in_between` array<string>, + `t_array_string_ending_with_nulls` array<string>, + `t_array_string_all_nulls` array<string>, + `dt` int) ENGINE=iceberg + PARTITION BY LIST (dt) () + properties ( + "compression-codec" = ${compression}, + "write-format"=${format} + ); + """ + + sql """ + INSERT INTO iceberg_all_types_par_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + 123.45, -- float_col + 123456.789, -- double_col + 123456789, -- decimal_col1 + 1234.5678, -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(true, false), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(true, false), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ); + """ + order_qt_q01 """ select * from iceberg_all_types_par_${format_compression}; + """ + + sql """ + INSERT INTO iceberg_all_types_par_${format_compression} + VALUES ( + 1, -- boolean_col + 2147483647, -- int_col + 9223372036854775807, -- bigint_col + CAST(123.45 AS FLOAT), -- float_col + CAST(123456.789 AS DOUBLE), -- double_col + CAST(123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-20', -- date_col + '2024-03-20 12:00:00', -- timestamp_col1 + '2024-03-20 12:00:00.123456789', -- timestamp_col2 + '2024-03-20 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(1, 10), -- t_map_int + MAP(1, 100000000000), -- t_map_bigint + MAP(CAST(1.1 AS FLOAT), CAST(10.1 AS FLOAT)), -- t_map_float + MAP(CAST(1.1 AS DOUBLE), CAST(10.1 AS DOUBLE)), -- t_map_double + MAP(true, false), -- t_map_boolean + MAP(CAST(1.1 AS DECIMAL(2,1)), CAST(1.1 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(1.23 AS DECIMAL(4,2)), CAST(1.23 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(1.2345 AS DECIMAL(8,4)), CAST(1.2345 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(1.23456789 AS DECIMAL(17,8)), CAST(1.23456789 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(1.23456789 AS DECIMAL(18,8)), CAST(1.23456789 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(1.234567890123456789 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(1, 2, 3), -- t_ARRAY_int + ARRAY(100000000000, 200000000000), -- t_ARRAY_bigint + ARRAY(CAST(1.1 AS FLOAT), CAST(2.2 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(1.123456789 AS DOUBLE), CAST(2.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(true, false), -- t_ARRAY_boolean + ARRAY(CAST(1.1 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(1.23 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(1.23456789 AS DECIMAL(17,8)), CAST(2.34567891 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(1.23456789 AS DECIMAL(18,8)), CAST(2.34567891 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(1.234567890123456789 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', 1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', 123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', 123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', 123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', 123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240320 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + CAST(-123.45 AS FLOAT), -- float_col + CAST(-123456.789 AS DOUBLE), -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-21', -- date_col + '2024-03-21 12:00:00', -- timestamp_col1 + '2024-03-21 12:00:00.123456789', -- timestamp_col2 + '2024-03-21 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(2, 20), -- t_map_int + MAP(2, 200000000000), -- t_map_bigint + MAP(CAST(2.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(2.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(2.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(2.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(2.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(3.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value1', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240321 -- dt + ), + ( + 0, -- boolean_col + -2147483648, -- int_col + -9223372036854775808, -- bigint_col + -123.45, -- float_col + -123456.789, -- double_col + CAST(-123456789 AS DECIMAL(9,0)), -- decimal_col1 + CAST(-1234.5678 AS DECIMAL(8,4)), -- decimal_col2 + CAST(-123456.789012 AS DECIMAL(18,6)), -- decimal_col3 + CAST(-123456789.012345678901 AS DECIMAL(38,12)), -- decimal_col4 + 'string_value', -- string_col + '2024-03-22', -- date_col + '2024-03-22 12:00:00', -- timestamp_col1 + '2024-03-22 12:00:00.123456789', -- timestamp_col2 + '2024-03-22 12:00:00.123456789', -- timestamp_col3 + MAP('key1', 'value1'), -- t_map_string + MAP(3, 20), -- t_map_int + MAP(3, 200000000000), -- t_map_bigint + MAP(CAST(3.2 AS FLOAT), CAST(20.2 AS FLOAT)), -- t_map_float + MAP(CAST(3.2 AS DOUBLE), CAST(20.2 AS DOUBLE)), -- t_map_double + MAP(false, true), -- t_map_boolean + MAP(CAST(3.2 AS DECIMAL(2,1)), CAST(2.2 AS DECIMAL(2,1))), -- t_map_decimal_precision_2 + MAP(CAST(3.34 AS DECIMAL(4,2)), CAST(2.34 AS DECIMAL(4,2))), -- t_map_decimal_precision_4 + MAP(CAST(2.3456 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_map_decimal_precision_8 + MAP(CAST(2.34567890 AS DECIMAL(17,8)), CAST(2.34567890 AS DECIMAL(17,8))), -- t_map_decimal_precision_17 + MAP(CAST(2.34567890 AS DECIMAL(18,8)), CAST(2.34567890 AS DECIMAL(18,8))), -- t_map_decimal_precision_18 + MAP(CAST(3.345678901234567890 AS DECIMAL(38,16)), CAST(2.345678901234567890 AS DECIMAL(38,16))), -- t_map_decimal_precision_38 + ARRAY('string1', 'string2'), -- t_ARRAY_string + ARRAY(4, 5, 6), -- t_ARRAY_int + ARRAY(300000000000, 400000000000), -- t_ARRAY_bigint + ARRAY(CAST(3.3 AS FLOAT), CAST(4.4 AS FLOAT)), -- t_ARRAY_float + ARRAY(CAST(3.123456789 AS DOUBLE), CAST(4.123456789 AS DOUBLE)), -- t_ARRAY_double + ARRAY(false, true), -- t_ARRAY_boolean + ARRAY(CAST(3.3 AS DECIMAL(2,1)), CAST(4.4 AS DECIMAL(2,1))), -- t_ARRAY_decimal_precision_2 + ARRAY(CAST(3.45 AS DECIMAL(4,2)), CAST(4.56 AS DECIMAL(4,2))), -- t_ARRAY_decimal_precision_4 + ARRAY(CAST(8.4567 AS DECIMAL(8,4)), CAST(4.5678 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(CAST(3.45678901 AS DECIMAL(17,8)), CAST(4.56789012 AS DECIMAL(17,8))), -- t_ARRAY_decimal_precision_17 + ARRAY(CAST(3.45678901 AS DECIMAL(18,8)), CAST(4.56789012 AS DECIMAL(18,8))), -- t_ARRAY_decimal_precision_18 + ARRAY(CAST(3.456789012345678901 AS DECIMAL(38,16)), CAST(4.567890123456789012 AS DECIMAL(38,16))), -- t_ARRAY_decimal_precision_38 + NAMED_STRUCT('s_bigint', -1234567890), -- t_struct_bigint + MAP('key', ARRAY(NAMED_STRUCT('s_int', -123))), -- t_complex + NAMED_STRUCT('struct_field', ARRAY('value1', 'value2')), -- t_struct_nested + NAMED_STRUCT('struct_field_null', null, 'struct_field_null2', null), -- t_struct_null + NAMED_STRUCT('struct_non_nulls_after_nulls1', -123, 'struct_non_nulls_after_nulls2', 'value'), -- t_struct_non_nulls_after_nulls + NAMED_STRUCT('struct_field1', -123, 'struct_field2', 'value', 'strict_field3', NAMED_STRUCT('nested_struct_field1', -123, 'nested_struct_field2', 'nested_value')), -- t_nested_struct_non_nulls_after_nulls + MAP('null_key', null), -- t_map_null_value + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + ARRAY('value1', null, 'value2'), -- t_ARRAY_string_with_nulls_in_between + ARRAY('value11', 'value2', null), -- t_ARRAY_string_ending_with_nulls + ARRAY(null, null, null), -- t_ARRAY_string_all_nulls + 20240322 -- dt + ); + """ + order_qt_q02 """ select * from iceberg_all_types_par_${format_compression}; + """ + + sql """ + INSERT INTO iceberg_all_types_par_${format_compression}(float_col, t_map_int, t_ARRAY_decimal_precision_8, t_ARRAY_string_starting_with_nulls, dt) + VALUES ( + 123.45, -- float_col + MAP(1, 10), -- t_map_int + ARRAY(CAST(1.2345 AS DECIMAL(8,4)), CAST(2.3456 AS DECIMAL(8,4))), -- t_ARRAY_decimal_precision_8 + ARRAY(null, 'value1', 'value2'), -- t_ARRAY_string_starting_with_nulls + 20240321 -- dt + ); + """ + order_qt_q03 """ select * from iceberg_all_types_par_${format_compression}; + """ + + sql """ DROP TABLE iceberg_all_types_par_${format_compression}; """ + } + + String enabled = context.config.otherConfigs.get("enableExternalIcebergTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable iceberg test") + return + } + + String catalog_name = "test_s3tables_write_insert" + String props = context.config.otherConfigs.get("icebergS3TablesCatalog") + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog ${catalog_name} properties ( + ${props} + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use my_namespace;""" + sql """ set enable_fallback_to_original_planner=false """ + def tables = sql """ show tables; """ + assertTrue(tables.size() > 0) + + // 1. test querying existing tables + qt_sql01 """select * from my_table order by id;""" + qt_sql01 """select * from partition_table order by ts;""" + + try { + for (String format_compression in format_compressions) { + logger.info("Process format_compression " + format_compression) + q01(format_compression, catalog_name) + q03(format_compression, catalog_name) + } + } finally { + } +} diff --git a/regression-test/suites/external_table_p2/iceberg/test_s3tables_write_partitions.groovy b/regression-test/suites/external_table_p2/iceberg/test_s3tables_write_partitions.groovy new file mode 100644 index 00000000000..a3b23981ab9 --- /dev/null +++ b/regression-test/suites/external_table_p2/iceberg/test_s3tables_write_partitions.groovy @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_s3tables_write_partitions", "p0,external,iceberg,external_docker,external_docker_iceberg") { + def format_compressions = ["parquet_snappy", "orc_zlib"] + + def test_columns_out_of_order = { String format_compression, String catalog_name -> + def parts = format_compression.split("_") + def format = parts[0] + def compression = parts[1] + sql """ drop table if exists columns_out_of_order_source_tbl_${format_compression} """ + sql """ + CREATE TABLE columns_out_of_order_source_tbl_${format_compression} ( + `col3` bigint, + `col6` int, + `col1` bigint, + `col4` int, + `col2` bigint, + `col5` int + ) ENGINE = iceberg + properties ( + "compression-codec" = ${compression}, + "write-format"=${format} + ) + """; + sql """ drop table if exists columns_out_of_order_target_tbl_${format_compression} """ + sql """ + CREATE TABLE columns_out_of_order_target_tbl_${format_compression} ( + `col1` bigint, + `col2` bigint, + `col3` bigint, + `col4` int, + `col5` int, + `col6` int + ) ENGINE = iceberg + PARTITION BY LIST ( + col4, col5, col6 + )() + properties ( + "compression-codec" = ${compression}, + "write-format"=${format} + ) + """; + + sql """ + INSERT INTO columns_out_of_order_source_tbl_${format_compression} ( + col1, col2, col3, col4, col5, col6 + ) VALUES (1, 2, 3, 4, 5, 6); + """ + order_qt_columns_out_of_order01 """ SELECT * FROM columns_out_of_order_source_tbl_${format_compression} """ + + sql """ + INSERT INTO columns_out_of_order_target_tbl_${format_compression} ( + col1, col2, col3, col4, col5, col6 + ) VALUES (1, 2, 3, 4, 5, 6); + """ + + order_qt_columns_out_of_order02 """ SELECT * FROM columns_out_of_order_target_tbl_${format_compression} """ + + sql """ drop table columns_out_of_order_source_tbl_${format_compression} """ + sql """ drop table columns_out_of_order_target_tbl_${format_compression} """ + sql """ drop database if exists `test_columns_out_of_order` """; + } + + String enabled = context.config.otherConfigs.get("enableExternalIcebergTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable iceberg test") + return + } + + String catalog_name = "test_s3tables_write_partitions" + String props = context.config.otherConfigs.get("icebergS3TablesCatalog") + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog ${catalog_name} properties ( + ${props} + ); + """ + + sql """ switch ${catalog_name};""" + sql """ use my_namespace;""" + sql """ set enable_fallback_to_original_planner=false """ + def tables = sql """ show tables; """ + assertTrue(tables.size() > 0) + + try { + for (String format_compression in format_compressions) { + logger.info("Process format_compression " + format_compression) + test_columns_out_of_order(format_compression, catalog_name) + } + } finally { + } + +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
