This is an automated email from the ASF dual-hosted git repository. lzljs3620320 pushed a commit to branch release-1.11 in repository https://gitbox.apache.org/repos/asf/flink.git
The following commit(s) were added to refs/heads/release-1.11 by this push: new 256bd06 [FLINK-17406][doc] Add documentation about dynamic table options 256bd06 is described below commit 256bd06c12ca2cde9c3038f1a66b313313c00e92 Author: Danny Chan <yuzhao....@gmail.com> AuthorDate: Mon Jun 8 16:27:52 2020 +0800 [FLINK-17406][doc] Add documentation about dynamic table options This closes #12319 --- docs/dev/table/config.md | 6 +++ docs/dev/table/config.zh.md | 6 +++ docs/dev/table/sql/hints.md | 88 ++++++++++++++++++++++++++++++++++++++++ docs/dev/table/sql/hints.zh.md | 88 ++++++++++++++++++++++++++++++++++++++++ docs/dev/table/sql/index.md | 1 + docs/dev/table/sql/index.zh.md | 1 + docs/dev/table/sql/queries.md | 11 ++++- docs/dev/table/sql/queries.zh.md | 11 ++++- 8 files changed, 210 insertions(+), 2 deletions(-) diff --git a/docs/dev/table/config.md b/docs/dev/table/config.md index dd312d6..68a73ba 100644 --- a/docs/dev/table/config.md +++ b/docs/dev/table/config.md @@ -104,3 +104,9 @@ The following options can be used to tune the performance of the query execution The following options can be used to adjust the behavior of the query optimizer to get a better execution plan. {% include generated/optimizer_config_configuration.html %} + +### Table Options + +The following options can be used to adjust the behavior of the table planner. + +{% include generated/table_config_configuration.html %} diff --git a/docs/dev/table/config.zh.md b/docs/dev/table/config.zh.md index 9d325e6..3c41bdc 100644 --- a/docs/dev/table/config.zh.md +++ b/docs/dev/table/config.zh.md @@ -96,3 +96,9 @@ configuration.set_string("table.exec.mini-batch.size", "5000"); 以下配置可以用于调整查询优化器的行为以获得更好的执行计划。 {% include generated/optimizer_config_configuration.html %} + +### Planner 配置 + +以下配置可以用于调整 planner 的行为。 + +{% include generated/table_config_configuration.html %} diff --git a/docs/dev/table/sql/hints.md b/docs/dev/table/sql/hints.md new file mode 100644 index 0000000..d8c4c90 --- /dev/null +++ b/docs/dev/table/sql/hints.md @@ -0,0 +1,88 @@ +--- +title: "SQL Hints" +nav-parent_id: sql +nav-pos: 6 +--- +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +* This will be replaced by the TOC +{:toc} + +SQL hints can be used with SQL statements to alter execution plans. This chapter explains how to use hints to force various approaches. + +Generally a hint can be used to: + +- Enforce planner: there's no perfect planner, so it makes sense to implement hints to +allow user better control the execution; +- Append meta data(or statistics): some statistics like “table index for scan” and +“skew info of some shuffle keys” are somewhat dynamic for the query, it would be very +convenient to config them with hints because our planning metadata from the planner is very +often not that accurate; +- Operator resource constraints: for many cases, we would give a default resource +configuration for the execution operators, i.e. min parallelism or +managed memory (resource consuming UDF) or special resource requirement (GPU or SSD disk) +and so on, it would be very flexible to profile the resource with hints per query(instead of the Job). + +## Dynamic Table Options +Dynamic table options allows to specify or override table options dynamically, different with static table options defined with SQL DDL or connect API, +these options can be specified flexibly in per-table scope within each query. + +Thus it is very suitable to use for the ad-hoc queries in interactive terminal, for example, in the SQL-CLI, +you can specify to ignore the parse error for a CSV source just by adding a dynamic option `/*+ OPTIONS('csv.ignore-parse-errors'='true') */`. + +<b>Note:</b> Dynamic table options default is forbidden to use because it may change the semantics of the query. +You need to set the config option `table.dynamic-table-options.enabled` to be `true` explicitly (default is false), +See the <a href="{{ site.baseurl }}/dev/table/config.html">Configuration</a> for details on how to set up the config options. + +### Syntax +In order to not break the SQL compatibility, we use the Oracle style SQL hint syntax: +{% highlight sql %} +table_path /*+ OPTIONS(key=val [, key=val]*) */ + +key: + stringLiteral +val: + stringLiteral + +{% endhighlight %} + +### Examples + +{% highlight sql %} + +CREATE TABLE kafka_table1 (id BIGINT, name STRING, age INT) WITH (...); +CREATE TABLE kafka_table2 (id BIGINT, name STRING, age INT) WITH (...); + +-- override table options in query source +select id, name from kafka_table1 /*+ OPTIONS('scan.startup.mode'='earliest-offset') */; + +-- override table options in join +select * from + kafka_table1 /*+ OPTIONS('scan.startup.mode'='earliest-offset') */ t1 + join + kafka_table2 /*+ OPTIONS('scan.startup.mode'='earliest-offset') */ t2 + on t1.id = t2.id; + +-- override table options for INSERT target table +insert into kafka_table1 /*+ OPTIONS('sink.partitioner'='round-robin') */ select * from kafka_table2; + +{% endhighlight %} + +{% top %} \ No newline at end of file diff --git a/docs/dev/table/sql/hints.zh.md b/docs/dev/table/sql/hints.zh.md new file mode 100644 index 0000000..d8c4c90 --- /dev/null +++ b/docs/dev/table/sql/hints.zh.md @@ -0,0 +1,88 @@ +--- +title: "SQL Hints" +nav-parent_id: sql +nav-pos: 6 +--- +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +* This will be replaced by the TOC +{:toc} + +SQL hints can be used with SQL statements to alter execution plans. This chapter explains how to use hints to force various approaches. + +Generally a hint can be used to: + +- Enforce planner: there's no perfect planner, so it makes sense to implement hints to +allow user better control the execution; +- Append meta data(or statistics): some statistics like “table index for scan” and +“skew info of some shuffle keys” are somewhat dynamic for the query, it would be very +convenient to config them with hints because our planning metadata from the planner is very +often not that accurate; +- Operator resource constraints: for many cases, we would give a default resource +configuration for the execution operators, i.e. min parallelism or +managed memory (resource consuming UDF) or special resource requirement (GPU or SSD disk) +and so on, it would be very flexible to profile the resource with hints per query(instead of the Job). + +## Dynamic Table Options +Dynamic table options allows to specify or override table options dynamically, different with static table options defined with SQL DDL or connect API, +these options can be specified flexibly in per-table scope within each query. + +Thus it is very suitable to use for the ad-hoc queries in interactive terminal, for example, in the SQL-CLI, +you can specify to ignore the parse error for a CSV source just by adding a dynamic option `/*+ OPTIONS('csv.ignore-parse-errors'='true') */`. + +<b>Note:</b> Dynamic table options default is forbidden to use because it may change the semantics of the query. +You need to set the config option `table.dynamic-table-options.enabled` to be `true` explicitly (default is false), +See the <a href="{{ site.baseurl }}/dev/table/config.html">Configuration</a> for details on how to set up the config options. + +### Syntax +In order to not break the SQL compatibility, we use the Oracle style SQL hint syntax: +{% highlight sql %} +table_path /*+ OPTIONS(key=val [, key=val]*) */ + +key: + stringLiteral +val: + stringLiteral + +{% endhighlight %} + +### Examples + +{% highlight sql %} + +CREATE TABLE kafka_table1 (id BIGINT, name STRING, age INT) WITH (...); +CREATE TABLE kafka_table2 (id BIGINT, name STRING, age INT) WITH (...); + +-- override table options in query source +select id, name from kafka_table1 /*+ OPTIONS('scan.startup.mode'='earliest-offset') */; + +-- override table options in join +select * from + kafka_table1 /*+ OPTIONS('scan.startup.mode'='earliest-offset') */ t1 + join + kafka_table2 /*+ OPTIONS('scan.startup.mode'='earliest-offset') */ t2 + on t1.id = t2.id; + +-- override table options for INSERT target table +insert into kafka_table1 /*+ OPTIONS('sink.partitioner'='round-robin') */ select * from kafka_table2; + +{% endhighlight %} + +{% top %} \ No newline at end of file diff --git a/docs/dev/table/sql/index.md b/docs/dev/table/sql/index.md index 1affb42..42a1f2a 100644 --- a/docs/dev/table/sql/index.md +++ b/docs/dev/table/sql/index.md @@ -33,6 +33,7 @@ This page lists all the supported statements supported in Flink SQL for now: - [DROP TABLE, DATABASE, FUNCTION](drop.html) - [ALTER TABLE, DATABASE, FUNCTION](alter.html) - [INSERT](insert.html) +- [SQL HINTS](hints.html) ## Data Types diff --git a/docs/dev/table/sql/index.zh.md b/docs/dev/table/sql/index.zh.md index 5cdc8d0..3f7b49f 100644 --- a/docs/dev/table/sql/index.zh.md +++ b/docs/dev/table/sql/index.zh.md @@ -33,6 +33,7 @@ under the License. - [DROP TABLE, DATABASE, FUNCTION](drop.html) - [ALTER TABLE, DATABASE, FUNCTION](alter.html) - [INSERT](insert.html) +- [SQL HINTS](hints.html) ## 数据类型 diff --git a/docs/dev/table/sql/queries.md b/docs/dev/table/sql/queries.md index abffb4d..643ad8c 100644 --- a/docs/dev/table/sql/queries.md +++ b/docs/dev/table/sql/queries.md @@ -198,10 +198,19 @@ tableReference: [ [ AS ] alias [ '(' columnAlias [, columnAlias ]* ')' ] ] tablePrimary: - [ TABLE ] [ [ catalogName . ] schemaName . ] tableName + [ TABLE ] [ [ catalogName . ] schemaName . ] tableName [ dynamicTableOptions ] | LATERAL TABLE '(' functionName '(' expression [, expression ]* ')' ')' | UNNEST '(' expression ')' +dynamicTableOptions: + /*+ OPTIONS(key=val [, key=val]*) */ + +key: + stringLiteral + +val: + stringLiteral + values: VALUES expression [, expression ]* diff --git a/docs/dev/table/sql/queries.zh.md b/docs/dev/table/sql/queries.zh.md index 56ce652..d6b6cff 100644 --- a/docs/dev/table/sql/queries.zh.md +++ b/docs/dev/table/sql/queries.zh.md @@ -198,10 +198,19 @@ tableReference: [ [ AS ] alias [ '(' columnAlias [, columnAlias ]* ')' ] ] tablePrimary: - [ TABLE ] [ [ catalogName . ] schemaName . ] tableName + [ TABLE ] [ [ catalogName . ] schemaName . ] tableName [ dynamicTableOptions ] | LATERAL TABLE '(' functionName '(' expression [, expression ]* ')' ')' | UNNEST '(' expression ')' +dynamicTableOptions: + /*+ OPTIONS(key=val [, key=val]*) */ + +key: + stringLiteral + +val: + stringLiteral + values: VALUES expression [, expression ]*