This is an automated email from the ASF dual-hosted git repository.
ashingau pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 00bcf0d7625 [doc](paimon) add more example for paimon catalog (#28087)
00bcf0d7625 is described below
commit 00bcf0d7625708f2f5148973084e067b06620b5f
Author: Mingyu Chen <[email protected]>
AuthorDate: Thu Dec 7 10:21:02 2023 +0800
[doc](paimon) add more example for paimon catalog (#28087)
---
docs/en/docs/lakehouse/faq.md | 6 +++
docs/en/docs/lakehouse/multi-catalog/paimon.md | 47 +++++++++++++++++++++--
docs/zh-CN/docs/lakehouse/faq.md | 6 +++
docs/zh-CN/docs/lakehouse/multi-catalog/paimon.md | 45 ++++++++++++++++++++--
4 files changed, 97 insertions(+), 7 deletions(-)
diff --git a/docs/en/docs/lakehouse/faq.md b/docs/en/docs/lakehouse/faq.md
index a6c97cfbb6d..ee441961c96 100644
--- a/docs/en/docs/lakehouse/faq.md
+++ b/docs/en/docs/lakehouse/faq.md
@@ -269,6 +269,12 @@ under the License.
Note that the value here is the cumulative value of a single HDFS Client,
not the value of a single query. The same HDFS Client will be reused by
multiple queries.
+3. `Couldn't create proxy provider class
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider`
+
+ In the start scripts of FE and BE, the environment variable
`HADOOP_CONF_DIR` will be added to CLASSPATH. If `HADOOP_CONF_DIR` is set
incorrectly, such as pointing to a non-existent path or an incorrect path, the
wrong xxx-site.xml file may be loaded and incorrect information may be read.
+
+ You need to check whether `HADOOP_CONF_DIR` is configured correctly, or
unset this environment variable.
+
## DLF Catalog
1. When using DLF Catalog, BE reads `Invalid address` when fetching JindoFS
data and needs to add the domain name to IP mapping that appears in the log in
`/ets/hosts`.
diff --git a/docs/en/docs/lakehouse/multi-catalog/paimon.md
b/docs/en/docs/lakehouse/multi-catalog/paimon.md
index 29c6a364e38..7ff14528b95 100644
--- a/docs/en/docs/lakehouse/multi-catalog/paimon.md
+++ b/docs/en/docs/lakehouse/multi-catalog/paimon.md
@@ -46,6 +46,7 @@ Paimon Catalog Currently supports two types of Metastore
creation catalogs:
> For versions 2.0.1 and earlier, please use the following `Create Catalog
> based on Hive Metastore`.
#### HDFS
+
```sql
CREATE CATALOG `paimon_hdfs` PROPERTIES (
"type" = "paimon",
@@ -58,6 +59,18 @@ CREATE CATALOG `paimon_hdfs` PROPERTIES (
"hadoop.username" = "hadoop"
);
+CREATE CATALOG `paimon_kerberos` PROPERTIES (
+ 'type'='paimon',
+ "warehouse" = "hdfs://HDFS8000871/user/paimon",
+ "dfs.nameservices" = "HDFS8000871",
+ "dfs.ha.namenodes.HDFS8000871" = "nn1,nn2",
+ "dfs.namenode.rpc-address.HDFS8000871.nn1" = "172.21.0.1:4007",
+ "dfs.namenode.rpc-address.HDFS8000871.nn2" = "172.21.0.2:4007",
+ "dfs.client.failover.proxy.provider.HDFS8000871" =
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
+ 'hadoop.security.authentication' = 'kerberos',
+ 'hadoop.kerberos.keytab' = '/doris/hdfs.keytab',
+ 'hadoop.kerberos.principal' = '[email protected]'
+);
```
#### S3
@@ -66,7 +79,7 @@ CREATE CATALOG `paimon_hdfs` PROPERTIES (
>
> user need download
> [paimon-s3-0.5.0-incubating.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-s3/0.5.0-incubating/paimon-s3-0.5.0-incubating.jar)
>
-> Place it in directory
${DORIS_HOME}/be/lib/java_extensions/preload-extensions and restart be
+> Place it in directory
`${DORIS_HOME}/be/lib/java_extensions/preload-extensions` and restart be
>
> Starting from version 2.0.2, this file can be placed in BE's `custom_lib/`
> directory (if it does not exist, just create it manually) to prevent the
> file from being lost due to the replacement of the lib directory when
> upgrading the cluster.
@@ -78,7 +91,6 @@ CREATE CATALOG `paimon_s3` PROPERTIES (
"s3.access_key" = "ak",
"s3.secret_key" = "sk"
);
-
```
#### OSS
@@ -86,7 +98,8 @@ CREATE CATALOG `paimon_s3` PROPERTIES (
>Note that.
>
> user need download
> [paimon-oss-0.5.0-incubating.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-oss/0.5.0-incubating/paimon-oss-0.5.0-incubating.jar)
-> Place it in directory
${DORIS_HOME}/be/lib/java_extensions/preload-extensions and restart be
+>
+> Place it in directory
`${DORIS_HOME}/be/lib/java_extensions/preload-extensions` and restart be
```sql
@@ -97,7 +110,6 @@ CREATE CATALOG `paimon_oss` PROPERTIES (
"oss.access_key" = "ak",
"oss.secret_key" = "sk"
);
-
```
### Creating a Catalog Based on Hive Metastore
@@ -116,6 +128,22 @@ CREATE CATALOG `paimon_hms` PROPERTIES (
"hadoop.username" = "hadoop"
);
+CREATE CATALOG `paimon_kerberos` PROPERTIES (
+ "type" = "paimon",
+ "paimon.catalog.type" = "hms",
+ "warehouse" = "hdfs://HDFS8000871/user/zhangdong/paimon2",
+ "hive.metastore.uris" = "thrift://172.21.0.44:7004",
+ "hive.metastore.sasl.enabled" = "true",
+ "hive.metastore.kerberos.principal" = "hive/[email protected]",
+ "dfs.nameservices" = "HDFS8000871",
+ "dfs.ha.namenodes.HDFS8000871" = "nn1,nn2",
+ "dfs.namenode.rpc-address.HDFS8000871.nn1" = "172.21.0.1:4007",
+ "dfs.namenode.rpc-address.HDFS8000871.nn2" = "172.21.0.2:4007",
+ "dfs.client.failover.proxy.provider.HDFS8000871" =
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
+ "hadoop.security.authentication" = "kerberos",
+ "hadoop.kerberos.principal" = "[email protected]",
+ "hadoop.kerberos.keytab" = "/doris/hdfs.keytab"
+);
```
## Column Type Mapping
@@ -138,3 +166,14 @@ CREATE CATALOG `paimon_hms` PROPERTIES (
| ArrayType | Array | Support
Array nesting |
| VarBinaryType, BinaryType | Binary |
|
+## FAQ
+
+1. Kerberos
+
+ - Make sure principal and keytab are correct.
+ - You need to start a scheduled task (such as crontab) on the BE node, and
execute the `kinit -kt your_principal your_keytab` command every certain time
(such as 12 hours).
+
+2. Unknown type value: UNSUPPORTED
+
+ This is a compatible issue exist in 2.0.2 with Paimon 0.5, you need to
upgrade to 2.0.3 or higher to solve this problem. Or
[patch](https://github.com/apache/doris/pull/24985) yourself.
+
diff --git a/docs/zh-CN/docs/lakehouse/faq.md b/docs/zh-CN/docs/lakehouse/faq.md
index c1ab720e9e0..8651784e2e8 100644
--- a/docs/zh-CN/docs/lakehouse/faq.md
+++ b/docs/zh-CN/docs/lakehouse/faq.md
@@ -264,6 +264,12 @@ under the License.
注意,这里的值是单个 HDFS Client 的累计值,而不是单个查询的数值。同一个 HDFS Client 会被多个查询复用。
+3. `Couldn't create proxy provider class
org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider`
+
+ 在 FE 和 BE 的 start 脚本中,会将环境变量 `HADOOP_CONF_DIR` 加入 CLASSPATH。如果
`HADOOP_CONF_DIR` 设置错误,比如指向了不存在的路径或错误路径,则可能加载到错误的 xxx-site.xml 文件,从而读取到错误的信息。
+
+ 需检查 `HADOOP_CONF_DIR` 是否配置正确,或将这个环境变量删除。
+
## DLF Catalog
1. 使用DLF Catalog时,BE读在取JindoFS数据出现`Invalid
address`,需要在`/ets/hosts`中添加日志中出现的域名到IP的映射。
diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/paimon.md
b/docs/zh-CN/docs/lakehouse/multi-catalog/paimon.md
index ab11cc32c7b..8c79943e087 100644
--- a/docs/zh-CN/docs/lakehouse/multi-catalog/paimon.md
+++ b/docs/zh-CN/docs/lakehouse/multi-catalog/paimon.md
@@ -46,6 +46,7 @@ Paimon Catalog 当前支持两种类型的Metastore创建Catalog:
> 2.0.1 及之前版本,请使用后面的 `基于Hive Metastore创建Catalog`。
#### HDFS
+
```sql
CREATE CATALOG `paimon_hdfs` PROPERTIES (
"type" = "paimon",
@@ -58,6 +59,18 @@ CREATE CATALOG `paimon_hdfs` PROPERTIES (
"hadoop.username" = "hadoop"
);
+CREATE CATALOG `paimon_kerberos` PROPERTIES (
+ 'type'='paimon',
+ "warehouse" = "hdfs://HDFS8000871/user/paimon",
+ "dfs.nameservices" = "HDFS8000871",
+ "dfs.ha.namenodes.HDFS8000871" = "nn1,nn2",
+ "dfs.namenode.rpc-address.HDFS8000871.nn1" = "172.21.0.1:4007",
+ "dfs.namenode.rpc-address.HDFS8000871.nn2" = "172.21.0.2:4007",
+ "dfs.client.failover.proxy.provider.HDFS8000871" =
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
+ 'hadoop.security.authentication' = 'kerberos',
+ 'hadoop.kerberos.keytab' = '/doris/hdfs.keytab',
+ 'hadoop.kerberos.principal' = '[email protected]'
+);
```
#### S3
@@ -66,7 +79,7 @@ CREATE CATALOG `paimon_hdfs` PROPERTIES (
>
> 用户需要手动下载[paimon-s3-0.5.0-incubating.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-s3/0.5.0-incubating/paimon-s3-0.5.0-incubating.jar)
-> 放在${DORIS_HOME}/be/lib/java_extensions/preload-extensions目录下并重启be。
+> 放在 `${DORIS_HOME}/be/lib/java_extensions/preload-extensions` 目录下并重启be。
>
> 从 2.0.2 版本起,可以将这个文件放置在BE的 `custom_lib/` 目录下(如不存在,手动创建即可),以防止升级集群时因为 lib
> 目录被替换而导致文件丢失。
@@ -78,7 +91,6 @@ CREATE CATALOG `paimon_s3` PROPERTIES (
"s3.access_key" = "ak",
"s3.secret_key" = "sk"
);
-
```
#### OSS
@@ -86,7 +98,7 @@ CREATE CATALOG `paimon_s3` PROPERTIES (
>注意:
>
> 用户需要手动下载[paimon-oss-0.5.0-incubating.jar](https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-oss/0.5.0-incubating/paimon-oss-0.5.0-incubating.jar)
-> 放在${DORIS_HOME}/be/lib/java_extensions/preload-extensions目录下并重启be
+> 放在 `${DORIS_HOME}/be/lib/java_extensions/preload-extensions` 目录下并重启be
```sql
CREATE CATALOG `paimon_oss` PROPERTIES (
@@ -115,8 +127,25 @@ CREATE CATALOG `paimon_hms` PROPERTIES (
"hadoop.username" = "hadoop"
);
+CREATE CATALOG `paimon_kerberos` PROPERTIES (
+ "type" = "paimon",
+ "paimon.catalog.type" = "hms",
+ "warehouse" = "hdfs://HDFS8000871/user/zhangdong/paimon2",
+ "hive.metastore.uris" = "thrift://172.21.0.44:7004",
+ "hive.metastore.sasl.enabled" = "true",
+ "hive.metastore.kerberos.principal" = "hive/[email protected]",
+ "dfs.nameservices" = "HDFS8000871",
+ "dfs.ha.namenodes.HDFS8000871" = "nn1,nn2",
+ "dfs.namenode.rpc-address.HDFS8000871.nn1" = "172.21.0.1:4007",
+ "dfs.namenode.rpc-address.HDFS8000871.nn2" = "172.21.0.2:4007",
+ "dfs.client.failover.proxy.provider.HDFS8000871" =
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
+ "hadoop.security.authentication" = "kerberos",
+ "hadoop.kerberos.principal" = "[email protected]",
+ "hadoop.kerberos.keytab" = "/doris/hdfs.keytab"
+);
```
+
## 列类型映射
| Paimon Data Type | Doris Data Type | Comment
|
@@ -137,4 +166,14 @@ CREATE CATALOG `paimon_hms` PROPERTIES (
| ArrayType | Array |
支持Array嵌套 |
| VarBinaryType, BinaryType | Binary |
|
+## 常见问题
+
+1. Kerberos 问题
+
+ - 确保 principal 和 keytab 配置正确。
+ - 需在 BE 节点启动定时任务(如 crontab),每隔一定时间(如 12小时),执行一次 `kinit -kt your_principal
your_keytab` 命令。
+
+2. Unknown type value: UNSUPPORTED
+
+ 这是 Doris 2.0.2 版本和 Paimon 0.5 版本的一个兼容性问题,需要升级到 2.0.3 或更高版本解决,或自行
[patch](https://github.com/apache/doris/pull/24985)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]