wuchong commented on a change in pull request #9802: [FLINK-13361][documention] Add documentation for JDBC connector for Table API & SQL URL: https://github.com/apache/flink/pull/9802#discussion_r333314037
########## File path: docs/dev/table/connect.md ########## @@ -1075,6 +1076,143 @@ CREATE TABLE MyUserTable ( {% top %} +### JDBC Connector + +<span class="label label-primary">Source: Batch</span> +<span class="label label-primary">Sink: Batch</span> +<span class="label label-primary">Sink: Streaming Append Mode</span> +<span class="label label-primary">Sink: Streaming Upsert Mode</span> +<span class="label label-primary">Temporal Join: Sync Mode</span> + +The JDBC connector allows for reading from an JDBC client. +The JDBC connector allows for writing into an JDBC client. + +The connector can operate in [upsert mode](#update-modes) for exchanging UPSERT/DELETE messages with the external system using a [key defined by the query](./streaming/dynamic_tables.html#table-to-stream-conversion). + +For append-only queries, the connector can also operate in [append mode](#update-modes) for exchanging only INSERT messages with the external system. + +Need specify JDBC library, for example, if want to use Mysql library, the following dependency to your project: + +{% highlight xml %} +<dependency> + <groupId>mysql</groupId> + <artifactId>mysql-connector-java</artifactId> + <version>8.0.17</version> +</dependency> +{% endhighlight %} + +**Library support:** Now, we only support mysql, derby, postgres. + +The connector can be defined as follows: + +<div class="codetabs" markdown="1"> +<div data-lang="YAML" markdown="1"> +{% highlight yaml %} +connector: + type: jdbc + url: "jdbc:mysql://localhost:3306/flink-test" # required: JDBC DB url + table: "jdbc_table_name" # required: jdbc table name + driver: "com.mysql.jdbc.Driver" # optional: the class name of the JDBC driver to use to connect to this URL. + # If not set, it will automatically be derived from the URL. + + username: "name" # optional: jdbc user name and password + password: "password" + + read: # scan options, optional, used when reading from table + partition: # These options must all be specified if any of them is specified. In addition, partition.num must be specified. They + # describe how to partition the table when reading in parallel from multiple tasks. partition.column must be a numeric, + # date, or timestamp column from the table in question. Notice that lowerBound and upperBound are just used to decide + # the partition stride, not for filtering the rows in table. So all rows in the table will be partitioned and returned. + # This option applies only to reading. + column: "column_name" # optional, name of the column used for partitioning the input. + num: 50 # optional, the largest value of the last partition. + lower-bound: 500 # optional, the smallest value of the first partition. + upper-bound: 1000 # optional, the largest value of the last partition. + fetch-size: 100 # optional, Gives the reader a hint as to the number of rows that should be fetched + # from the database when reading per round trip. If the value specified is zero, then + # the hint is ignored. The default value is zero. + + lookup: # lookup options, optional, used in temporary join + cache: + max-rows: 5000 # optional, max number of rows of lookup cache, over this value, the oldest rows will + # be eliminated. "cache.max-rows" and "cache.ttl" options must all be specified if any + # of them is specified. Cache is not enabled as default. + ttl: "10s" # optional, the max time to live for each rows in lookup cache, over this time, the oldest rows + # will be expired. "cache.max-rows" and "cache.ttl" options must all be specified if any of + # them is specified. Cache is not enabled as default. + max-retries: 3 # optional, max retry times if lookup database failed + + write: # sink options, optional, used when writing into table + flush: + max-rows: 5000 # optional, flush max size (includes all append, upsert and delete records), + # over this number of records, will flush data. The default value is "5000". + interval: "2s" # optional, flush interval mills, over this time, asynchronous threads will flush data. + # The default value is "0s", which means no asynchronous flush thread will be scheduled. + max-retries: 3 # optional, max retry times if writing records to database failed. +{% endhighlight %} +</div> + +<div data-lang="DDL" markdown="1"> +{% highlight sql %} +CREATE TABLE MyUserTable ( + ... +) WITH ( + 'connector.type' = 'jdbc', -- required: specify this table type is jdbc + + 'connector.url' = 'jdbc:mysql://localhost:3306/flink-test', -- required: JDBC DB url + + 'connector.table' = 'jdbc_table_name', -- required: jdbc table name + + 'connector.driver' = 'com.mysql.jdbc.Driver', -- optional: the class name of the JDBC driver to use to connect to this URL. + -- If not set, it will automatically be derived from the URL. + + 'connector.username' = 'name', -- optional: jdbc user name and password + 'connector.password' = 'password', + + -- scan options, optional, used when reading from table + + -- These options must all be specified if any of them is specified. In addition, partition.num must be specified. They + -- describe how to partition the table when reading in parallel from multiple tasks. partition.column must be a numeric, + -- date, or timestamp column from the table in question. Notice that lowerBound and upperBound are just used to decide + -- the partition stride, not for filtering the rows in table. So all rows in the table will be partitioned and returned. + -- This option applies only to reading. + 'connector.read.partition.column' = 'column_name', -- optional, name of the column used for partitioning the input. + 'connector.read.partition.num' = '50', -- optional, the largest value of the last partition. Review comment: the number of partitions? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services