This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 05084ed54e2dcad678b11b3e6c8a4446f2f038d4 Author: slothever <[email protected]> AuthorDate: Thu Jul 13 18:02:15 2023 +0800 [fix](multi-catalog)support oss-hdfs service (#21504) 1. support oss-hdfs if it is enabled when use dlf or hms catalog 2. add docs for aliyun dlf and mc. --- docs/en/docs/lakehouse/multi-catalog/dlf.md | 99 +++++++++---------- docs/en/docs/lakehouse/multi-catalog/iceberg.md | 6 +- .../en/docs/lakehouse/multi-catalog/max_compute.md | 60 ++++++++++++ docs/sidebars.json | 3 +- docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md | 105 +++++++++------------ docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md | 6 +- .../docs/lakehouse/multi-catalog/max_compute.md | 60 ++++++++++++ .../java/org/apache/doris/common/util/S3Util.java | 14 ++- .../datasource/property/PropertyConverter.java | 51 +++++++++- .../property/constants/OssProperties.java | 1 + .../doris/planner/external/FileQueryScanNode.java | 4 + 11 files changed, 289 insertions(+), 120 deletions(-) diff --git a/docs/en/docs/lakehouse/multi-catalog/dlf.md b/docs/en/docs/lakehouse/multi-catalog/dlf.md index 42c302e5c7..763fa9fdd8 100644 --- a/docs/en/docs/lakehouse/multi-catalog/dlf.md +++ b/docs/en/docs/lakehouse/multi-catalog/dlf.md @@ -35,28 +35,28 @@ Doris can access DLF the same way as it accesses Hive Metastore. ## Connect to DLF -### The First Way, Create a Hive Catalog. +### Create a DLF Catalog. ```sql -CREATE CATALOG hive_with_dlf PROPERTIES ( +CREATE CATALOG dlf PROPERTIES ( "type"="hms", - "dlf.catalog.proxyMode" = "DLF_ONLY", "hive.metastore.type" = "dlf", - "dlf.catalog.endpoint" = "dlf.cn-beijing.aliyuncs.com", - "dlf.catalog.region" = "cn-beijing", - "dlf.catalog.uid" = "uid", - "dlf.catalog.accessKeyId" = "ak", - "dlf.catalog.accessKeySecret" = "sk" + "dlf.proxy.mode" = "DLF_ONLY", + "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com", + "dlf.region" = "cn-beijing", + "dlf.uid" = "uid", + "dlf.access_key" = "ak", + "dlf.secret_key" = "sk" ); ``` -`type` should always be `hms`. If you need to access Alibaba Cloud OSS on the public network, can add `"dlf.catalog.accessPublic"="true"`. +`type` should always be `hms`. If you need to access Alibaba Cloud OSS on the public network, can add `"dlf.access.public"="true"`. -* `dlf.catalog.endpoint`: DLF Endpoint. See [Regions and Endpoints of DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints). -* `dlf.catalog.region`: DLF Region. See [Regions and Endpoints of DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints). -* `dlf.catalog.uid`: Alibaba Cloud account. You can find the "Account ID" in the upper right corner on the Alibaba Cloud console. -* `dlf.catalog.accessKeyId`:AccessKey, which you can create and manage on the [Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak). -* `dlf.catalog.accessKeySecret`:SecretKey, which you can create and manage on the [Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak). +* `dlf.endpoint`: DLF Endpoint. See [Regions and Endpoints of DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints). +* `dlf.region`: DLF Region. See [Regions and Endpoints of DLF](https://www.alibabacloud.com/help/en/data-lake-formation/latest/regions-and-endpoints). +* `dlf.uid`: Alibaba Cloud account. You can find the "Account ID" in the upper right corner on the Alibaba Cloud console. +* `dlf.access_key`:AccessKey, which you can create and manage on the [Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak). +* `dlf.secret_key`:SecretKey, which you can create and manage on the [Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak). Other configuration items are fixed and require no modifications. @@ -64,54 +64,43 @@ After the above steps, you can access metadata in DLF the same way as you access Doris supports accessing Hive/Iceberg/Hudi metadata in DLF. -### The Second Way, Configure the Hive Conf +### Use OSS-HDFS as the datasource -1. Create the `hive-site.xml` file, and put it in the `fe/conf` directory. +1. Enable OSS-HDFS. [Grant access to OSS or OSS-HDFS](https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-hdfsnew) +2. Download the SDK. [JindoData SDK](https://github.com/aliyun/alibabacloud-jindodata/blob/master/docs/user/5.x/5.0.0-beta7/jindodata_download.md) +3. Decompress the jindosdk.tar.gz, and then enter its lib directory and put `jindo-core.jar, jindo-sdk.jar` to both `${DORIS_HOME}/fe/lib` and `${DORIS_HOME}/be/lib/java_extensions`. +4. Create DLF Catalog, set `oss.hdfs.enabled` as `true`: -``` -<?xml version="1.0"?> -<configuration> - <!--Set to use dlf client--> - <property> - <name>hive.metastore.type</name> - <value>dlf</value> - </property> - <property> - <name>dlf.catalog.endpoint</name> - <value>dlf-vpc.cn-beijing.aliyuncs.com</value> - </property> - <property> - <name>dlf.catalog.region</name> - <value>cn-beijing</value> - </property> - <property> - <name>dlf.catalog.proxyMode</name> - <value>DLF_ONLY</value> - </property> - <property> - <name>dlf.catalog.uid</name> - <value>20000000000000000</value> - </property> - <property> - <name>dlf.catalog.accessKeyId</name> - <value>XXXXXXXXXXXXXXX</value> - </property> - <property> - <name>dlf.catalog.accessKeySecret</name> - <value>XXXXXXXXXXXXXXXXX</value> - </property> -</configuration> +```sql +CREATE CATALOG dlf_oss_hdfs PROPERTIES ( + "type"="hms", + "hive.metastore.type" = "dlf", + "dlf.proxy.mode" = "DLF_ONLY", + "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com", + "dlf.region" = "cn-beijing", + "dlf.uid" = "uid", + "dlf.access_key" = "ak", + "dlf.secret_key" = "sk", + "oss.hdfs.enabled" = "true" +); ``` -2. Restart FE, Doris will read and parse `fe/conf/hive-site.xml`. And then Create Catalog via the `CREATE CATALOG` statement. +### DLF Iceberg Catalog ```sql -CREATE CATALOG hive_with_dlf PROPERTIES ( - "type"="hms", - "hive.metastore.uris" = "thrift://127.0.0.1:9083" -) +CREATE CATALOG dlf_iceberg PROPERTIES ( + "type"="iceberg", + "iceberg.catalog.type" = "dlf", + "dlf.proxy.mode" = "DLF_ONLY", + "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com", + "dlf.region" = "cn-beijing", + "dlf.uid" = "uid", + "dlf.access_key" = "ak", + "dlf.secret_key" = "sk" +); ``` -`type` should always be `hms`; while `hive.metastore.uris` can be arbitary since it is not used in real practice, but it should follow the format of Hive Metastore Thrift URI. +## Column type mapping +Consistent with Hive Catalog, please refer to the **column type mapping** section in [Hive Catalog](./hive.md). diff --git a/docs/en/docs/lakehouse/multi-catalog/iceberg.md b/docs/en/docs/lakehouse/multi-catalog/iceberg.md index 6f063ecf0b..18d66a5350 100644 --- a/docs/en/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/en/docs/lakehouse/multi-catalog/iceberg.md @@ -53,7 +53,7 @@ CREATE CATALOG iceberg PROPERTIES ( ### Create Catalog based on Iceberg API -Use the Iceberg API to access metadata, and support services such as Hive, REST, and Glue as Iceberg's Catalog. +Use the Iceberg API to access metadata, and support services such as Hive, REST, DLF and Glue as Iceberg's Catalog. #### Hive Metastore @@ -85,6 +85,10 @@ CREATE CATALOG glue PROPERTIES ( For Iceberg properties, see [Iceberg Glue Catalog](https://iceberg.apache.org/docs/latest/aws/#glue-catalog) +#### Alibaba Cloud DLF + +see [Alibaba Cloud DLF Catalog](dlf.md) + #### REST Catalog This method needs to provide REST services in advance, and users need to implement the REST interface for obtaining Iceberg metadata. diff --git a/docs/en/docs/lakehouse/multi-catalog/max_compute.md b/docs/en/docs/lakehouse/multi-catalog/max_compute.md new file mode 100644 index 0000000000..a2f141df70 --- /dev/null +++ b/docs/en/docs/lakehouse/multi-catalog/max_compute.md @@ -0,0 +1,60 @@ +--- +{ + "title": "Alibaba Cloud Max Compute", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + + +# Alibaba Cloud MaxCompute + +MaxCompute (previously known as ODPS) is a data warehousing solution that can process terabytes or petabytes of data. + +> [What is MaxCompute](https://www.alibabacloud.com/help/en/maxcompute/product-overview/what-is-maxcompute) + +## Connect to MaxCompute + +```sql +CREATE CATALOG mc PROPERTIES ( + "type" = "max_compute", + "mc.region" = "cn-beijing", + "mc.default.project" = "your-project", + "mc.access_key" = "ak", + "mc.secret_key" = "sk" +); +``` + +* `mc.region`: MaxCompute Region. Can Get the Region From [Endpoints](https://www.alibabacloud.com/help/en/maxcompute/user-guide/endpoints). +* `mc.default.project`: MaxCompute Project. See Your [MaxCompute Projects](https://maxcompute.console.aliyun.com/cn-beijing/project-list). +* `mc.access_key`: AccessKey, which you can create and manage on the [Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak). +* `mc.secret_key`: SecretKey, which you can create and manage on the [Alibaba Cloud console](https://ram.console.aliyun.com/manage/ak). +* `mc.public_access`: You can enable public network access for test, when set `"mc.public_access"="true"`. + +## Quotas + +Pay-as-you-go quota has limited concurrency and usage. For additional resources, please refer to the documentation. See [Manage quotas](https://www.alibabacloud.com/help/en/maxcompute/user-guide/manage-quotas-in-the-new-maxcompute-console). + +## Column type mapping + +Consistent with Hive Catalog, please refer to the **column type mapping** section in [Hive Catalog](./hive.md). + + diff --git a/docs/sidebars.json b/docs/sidebars.json index 4e31a3fd91..9af06eded0 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -207,9 +207,10 @@ "lakehouse/multi-catalog/iceberg", "lakehouse/multi-catalog/hudi", "lakehouse/multi-catalog/paimon", + "lakehouse/multi-catalog/dlf", + "lakehouse/multi-catalog/max_compute", "lakehouse/multi-catalog/es", "lakehouse/multi-catalog/jdbc", - "lakehouse/multi-catalog/dlf", "lakehouse/multi-catalog/faq" ] }, diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md b/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md index 02bf1a140b..822ecff1bb 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/dlf.md @@ -35,28 +35,28 @@ under the License. ## 连接 DLF -### 方式一:创建Hive Catalog连接DLF +### 创建DLF Catalog ```sql -CREATE CATALOG hive_with_dlf PROPERTIES ( +CREATE CATALOG dlf PROPERTIES ( "type"="hms", - "dlf.catalog.proxyMode" = "DLF_ONLY", "hive.metastore.type" = "dlf", - "dlf.catalog.endpoint" = "dlf.cn-beijing.aliyuncs.com", - "dlf.catalog.region" = "cn-beijing", - "dlf.catalog.uid" = "uid", - "dlf.catalog.accessKeyId" = "ak", - "dlf.catalog.accessKeySecret" = "sk" + "dlf.proxy.mode" = "DLF_ONLY", + "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com", + "dlf.region" = "cn-beijing", + "dlf.uid" = "uid", + "dlf.access_key" = "ak", + "dlf.secret_key" = "sk" ); ``` -其中 `type` 固定为 `hms`。 如果需要公网访问阿里云对象存储的数据,可以设置 `"dlf.catalog.accessPublic"="true"` +其中 `type` 固定为 `hms`。 如果需要公网访问阿里云对象存储的数据,可以设置 `"dlf.access.public"="true"` -* `dlf.catalog.endpoint`:DLF Endpoint,参阅:[DLF Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints) -* `dlf.catalog.region`:DLF Region,参阅:[DLF Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints) -* `dlf.catalog.uid`:阿里云账号。即阿里云控制台右上角个人信息的“云账号ID”。 -* `dlf.catalog.accessKeyId`:AccessKey。可以在 [阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。 -* `dlf.catalog.accessKeySecret`:SecretKey。可以在 [阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。 +* `dlf.endpoint`:DLF Endpoint,参阅:[DLF Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints) +* `dlf.region`:DLF Region,参阅:[DLF Region和Endpoint对照表](https://www.alibabacloud.com/help/zh/data-lake-formation/latest/regions-and-endpoints) +* `dlf.uid`:阿里云账号。即阿里云控制台右上角个人信息的“云账号ID”。 +* `dlf.access_key`:AccessKey。可以在 [阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。 +* `dlf.secret_key`:SecretKey。可以在 [阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。 其他配置项为固定值,无需改动。 @@ -64,55 +64,42 @@ CREATE CATALOG hive_with_dlf PROPERTIES ( 同 Hive Catalog 一样,支持访问 DLF 中的 Hive/Iceberg/Hudi 的元数据信息。 -### 方式二:配置Hive Conf连接DLF - -1. 创建 hive-site.xml 文件,并将其放置在 `fe/conf` 目录下。 - - ``` - <?xml version="1.0"?> - <configuration> - <!--Set to use dlf client--> - <property> - <name>hive.metastore.type</name> - <value>dlf</value> - </property> - <property> - <name>dlf.catalog.endpoint</name> - <value>dlf-vpc.cn-beijing.aliyuncs.com</value> - </property> - <property> - <name>dlf.catalog.region</name> - <value>cn-beijing</value> - </property> - <property> - <name>dlf.catalog.proxyMode</name> - <value>DLF_ONLY</value> - </property> - <property> - <name>dlf.catalog.uid</name> - <value>20000000000000000</value> - </property> - <property> - <name>dlf.catalog.accessKeyId</name> - <value>XXXXXXXXXXXXXXX</value> - </property> - <property> - <name>dlf.catalog.accessKeySecret</name> - <value>XXXXXXXXXXXXXXXXX</value> - </property> - </configuration> - ``` - -2. 重启 FE,Doris 会读取和解析 fe/conf/hive-site.xml。 并通过 `CREATE CATALOG` 语句创建 catalog。 +### 使用开启了HDFS服务的OSS存储数据 + +1. 确认OSS开启了HDFS服务。[开通并授权访问OSS-HDFS服务](https://help.aliyun.com/document_detail/419505.html?spm=a2c4g.2357115.0.i0) +2. 下载SDK。[JindoData SDK下载](https://github.com/aliyun/alibabacloud-jindodata/blob/master/docs/user/5.x/5.0.0-beta7/jindodata_download.md) +3. 解压下载后的jindosdk.tar.gz,将其lib目录下的`jindo-core.jar、jindo-sdk.jar`放到`${DORIS_HOME}/fe/lib`和`${DORIS_HOME}/be/lib/java_extensions`目录下。 +4. 创建DLF Catalog,并配置`oss.hdfs.enabled`为`true`: ```sql -CREATE CATALOG hive_with_dlf PROPERTIES ( +CREATE CATALOG dlf_oss_hdfs PROPERTIES ( "type"="hms", - "hive.metastore.uris" = "thrift://127.0.0.1:9083" -) + "hive.metastore.type" = "dlf", + "dlf.proxy.mode" = "DLF_ONLY", + "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com", + "dlf.region" = "cn-beijing", + "dlf.uid" = "uid", + "dlf.access_key" = "ak", + "dlf.secret_key" = "sk", + "oss.hdfs.enabled" = "true" +); ``` -其中 `type` 固定为 `hms`。`hive.metastore.uris` 的值随意填写即可,实际不会使用。但需要按照标准 hive metastore thrift uri 格式填写。 - +### 访问DLF Iceberg表 + +```sql +CREATE CATALOG dlf_iceberg PROPERTIES ( + "type"="iceberg", + "iceberg.catalog.type" = "dlf", + "dlf.proxy.mode" = "DLF_ONLY", + "dlf.endpoint" = "datalake-vpc.cn-beijing.aliyuncs.com", + "dlf.region" = "cn-beijing", + "dlf.uid" = "uid", + "dlf.access_key" = "ak", + "dlf.secret_key" = "sk" +); +``` +## 列类型映射 +和 Hive Catalog 一致,可参阅 [Hive Catalog](./hive.md) 中 **列类型映射** 一节。 diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md index f72553b6d0..bf5333388b 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/iceberg.md @@ -53,7 +53,7 @@ CREATE CATALOG iceberg PROPERTIES ( ### 基于Iceberg API创建Catalog -使用Iceberg API访问元数据的方式,支持Hive、REST、Glue等服务作为Iceberg的Catalog。 +使用Iceberg API访问元数据的方式,支持Hive、REST、Glue、DLF等服务作为Iceberg的Catalog。 #### Hive Metastore @@ -85,6 +85,10 @@ CREATE CATALOG glue PROPERTIES ( Iceberg 属性详情参见 [Iceberg Glue Catalog](https://iceberg.apache.org/docs/latest/aws/#glue-catalog) +#### 阿里云 DLF + +参见[阿里云DLF Catalog配置](dlf.md) + #### REST Catalog 该方式需要预先提供REST服务,用户需实现获取Iceberg元数据的REST接口。 diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/max_compute.md b/docs/zh-CN/docs/lakehouse/multi-catalog/max_compute.md new file mode 100644 index 0000000000..e4b6eacb5f --- /dev/null +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/max_compute.md @@ -0,0 +1,60 @@ +--- +{ + "title": "阿里云 Max Compute", + "language": "zh-CN" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + + +# 阿里云 MaxCompute + +MaxCompute是阿里云上的企业级SaaS(Software as a Service)模式云数据仓库。 + +> [什么是 MaxCompute](https://help.aliyun.com/zh/maxcompute/product-overview/what-is-maxcompute?spm=a2c4g.11174283.0.i1) + +## 连接 Max Compute + +```sql +CREATE CATALOG mc PROPERTIES ( + "type" = "max_compute", + "mc.region" = "cn-beijing", + "mc.default.project" = "your-project", + "mc.access_key" = "ak", + "mc.secret_key" = "sk" +); +``` + +* `mc.region`:MaxCompute开通的地域。可以从Endpoint中找到对应的Region,参阅[Endpoints](https://help.aliyun.com/zh/maxcompute/user-guide/endpoints?spm=a2c4g.11186623.0.0)。 +* `mc.default.project`:MaxCompute项目。可以在[MaxCompute项目列表](https://maxcompute.console.aliyun.com/cn-beijing/project-list)中创建和管理。 +* `mc.access_key`:AccessKey。可以在 [阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。 +* `mc.secret_key`:SecretKey。可以在 [阿里云控制台](https://ram.console.aliyun.com/manage/ak) 中创建和管理。 +* `mc.public_access`: 当配置了`"mc.public_access"="true"`,可以开启公网访问,建议测试时使用。 + +## 限额 + +连接MaxCompute时,按量付费的Quota查询并发和使用量有限,如需增加资源,请参照MaxCompute文档。参见[配额管理](https://help.aliyun.com/zh/maxcompute/user-guide/manage-quotas-in-the-new-maxcompute-console). + +## 列类型映射 + +和 Hive Catalog 一致,可参阅 [Hive Catalog](./hive.md) 中 **列类型映射** 一节。 + + diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java index e3ae85461d..64c897c306 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/S3Util.java @@ -87,11 +87,23 @@ public class S3Util { if (pos == -1) { throw new RuntimeException("No '://' found in location: " + location); } - location = "s3" + location.substring(pos); + if (isHdfsOnOssEndpoint(location)) { + // if hdfs service is enabled on oss, use oss location + // example: oss://examplebucket.cn-shanghai.oss-dls.aliyuncs.com/dir/file/0000.orc + location = "oss" + location.substring(pos); + } else { + location = "s3" + location.substring(pos); + } } return new Path(location); } + public static boolean isHdfsOnOssEndpoint(String location) { + // example: cn-shanghai.oss-dls.aliyuncs.com contains the "oss-dls.aliyuncs". + // https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen + return location.contains("oss-dls.aliyuncs"); + } + public static S3Client buildS3Client(URI endpoint, String region, CloudCredential credential) { StaticCredentialsProvider scp; AwsCredentials awsCredential; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java index 094871d9f5..8787233e7d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/PropertyConverter.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.property; +import org.apache.doris.common.util.S3Util; import org.apache.doris.common.util.Util; import org.apache.doris.datasource.credentials.CloudCredential; import org.apache.doris.datasource.credentials.CloudCredentialWithEndpoint; @@ -253,6 +254,11 @@ public class PropertyConverter { ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ENDPOINT_KEY, endpoint); ossProperties.put("fs.oss.impl.disable.cache", "true"); ossProperties.put("fs.oss.impl", AliyunOSSFileSystem.class.getName()); + boolean hdfsEnabled = Boolean.parseBoolean(props.getOrDefault(OssProperties.OSS_HDFS_ENABLED, "false")); + if (S3Util.isHdfsOnOssEndpoint(endpoint) || hdfsEnabled) { + // use endpoint or enable hdfs + rewriteHdfsOnOssProperties(ossProperties, endpoint); + } if (credential.isWhole()) { ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ACCESS_KEY_ID, credential.getAccessKey()); ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ACCESS_KEY_SECRET, credential.getSecretKey()); @@ -268,6 +274,22 @@ public class PropertyConverter { return ossProperties; } + private static void rewriteHdfsOnOssProperties(Map<String, String> ossProperties, String endpoint) { + if (!S3Util.isHdfsOnOssEndpoint(endpoint)) { + // just for robustness here, avoid wrong endpoint when oss-hdfs is enabled. + // convert "oss-cn-beijing.aliyuncs.com" to "cn-beijing.oss-dls.aliyuncs.com" + // reference link: https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen + String[] endpointSplit = endpoint.split("\\."); + if (endpointSplit.length > 0) { + String region = endpointSplit[0].replace("oss-", "").replace("-internal", ""); + ossProperties.put(org.apache.hadoop.fs.aliyun.oss.Constants.ENDPOINT_KEY, + region + ".oss-dls.aliyuncs.com"); + } + } + ossProperties.put("fs.oss.impl", "com.aliyun.emr.fs.oss.JindoOssFileSystem"); + ossProperties.put("fs.AbstractFileSystem.oss.impl", "com.aliyun.emr.fs.oss.OSS"); + } + private static Map<String, String> convertToCOSProperties(Map<String, String> props, CloudCredential credential) { Map<String, String> cosProperties = Maps.newHashMap(); cosProperties.put(CosNConfigKeys.COSN_ENDPOINT_SUFFIX_KEY, props.get(CosProperties.ENDPOINT)); @@ -368,6 +390,10 @@ public class PropertyConverter { if (Strings.isNullOrEmpty(uid)) { throw new IllegalArgumentException("Required dlf property: " + DataLakeConfig.CATALOG_USER_ID); } + if (!props.containsKey(DLFProperties.ENDPOINT)) { + // just display DLFProperties in catalog, and hide DataLakeConfig properties + putNewPropertiesForCompatibility(props, credential); + } // convert to oss property if (credential.isWhole()) { props.put(OssProperties.ACCESS_KEY, credential.getAccessKey()); @@ -379,11 +405,32 @@ public class PropertyConverter { String publicAccess = props.getOrDefault(DLFProperties.Site.ACCESS_PUBLIC, "false"); String region = props.getOrDefault(DataLakeConfig.CATALOG_REGION_ID, props.get(DLFProperties.REGION)); if (!Strings.isNullOrEmpty(region)) { - props.put(OssProperties.REGION, "oss-" + region); - props.put(OssProperties.ENDPOINT, getOssEndpoint(region, Boolean.parseBoolean(publicAccess))); + boolean hdfsEnabled = Boolean.parseBoolean(props.getOrDefault(OssProperties.OSS_HDFS_ENABLED, "false")); + if (hdfsEnabled) { + props.putIfAbsent("fs.oss.impl", "com.aliyun.emr.fs.oss.JindoOssFileSystem"); + props.putIfAbsent(OssProperties.REGION, region); + // example: cn-shanghai.oss-dls.aliyuncs.com + // from https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen + props.putIfAbsent(OssProperties.ENDPOINT, region + ".oss-dls.aliyuncs.com"); + } else { + props.putIfAbsent(OssProperties.REGION, "oss-" + region); + props.putIfAbsent(OssProperties.ENDPOINT, getOssEndpoint(region, Boolean.parseBoolean(publicAccess))); + } } } + private static void putNewPropertiesForCompatibility(Map<String, String> props, CloudCredential credential) { + props.put(DLFProperties.UID, props.get(DataLakeConfig.CATALOG_USER_ID)); + String endpoint = props.get(DataLakeConfig.CATALOG_ENDPOINT); + props.put(DLFProperties.ENDPOINT, endpoint); + props.put(DLFProperties.REGION, props.getOrDefault(DataLakeConfig.CATALOG_REGION_ID, + S3Properties.getRegionOfEndpoint(endpoint))); + props.put(DLFProperties.PROXY_MODE, props.getOrDefault(DataLakeConfig.CATALOG_PROXY_MODE, "DLF_ONLY")); + props.put(DLFProperties.ACCESS_KEY, credential.getAccessKey()); + props.put(DLFProperties.SECRET_KEY, credential.getSecretKey()); + props.put(DLFProperties.ACCESS_PUBLIC, props.getOrDefault(DLFProperties.Site.ACCESS_PUBLIC, "false")); + } + private static String getOssEndpoint(String region, boolean publicAccess) { String prefix = "http://oss-"; String suffix = ".aliyuncs.com"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java index 35c48c2730..210bc5814a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/property/constants/OssProperties.java @@ -33,6 +33,7 @@ public class OssProperties extends BaseProperties { public static final String ACCESS_KEY = "oss.access_key"; public static final String SECRET_KEY = "oss.secret_key"; public static final String SESSION_TOKEN = "oss.session_token"; + public static final String OSS_HDFS_ENABLED = "oss.hdfs.enabled"; public static final List<String> REQUIRED_FIELDS = Arrays.asList(ENDPOINT, ACCESS_KEY, SECRET_KEY); public static CloudCredential getCredential(Map<String, String> props) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java index d366cbe90e..7a7dd76ab7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java @@ -404,6 +404,10 @@ public abstract class FileQueryScanNode extends FileScanNode { protected static Optional<TFileType> getTFileType(String location) { if (location != null && !location.isEmpty()) { if (S3Util.isObjStorage(location)) { + if (S3Util.isHdfsOnOssEndpoint(location)) { + // if hdfs service is enabled on oss, use hdfs lib to access oss. + return Optional.of(TFileType.FILE_HDFS); + } return Optional.of(TFileType.FILE_S3); } else if (location.startsWith(FeConstants.FS_PREFIX_HDFS)) { return Optional.of(TFileType.FILE_HDFS); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
