This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new 4cbcae1 [Spark on Doris] Shade and provide the thrift lib in spark-doris-connector (#3631) 4cbcae1 is described below commit 4cbcae1574a56b5c52795a41258fb9e53b6900eb Author: Mingyu Chen <morningman....@gmail.com> AuthorDate: Tue May 19 14:20:21 2020 +0800 [Spark on Doris] Shade and provide the thrift lib in spark-doris-connector (#3631) Mainly changes: 1. Shade and provide the thrift lib in spark-doris-connector 2. Add a `build.sh` for spark-doris-connector 3. Move the README.md of spark-doris-connector to `docs/` 4. Change the line delimiter of `fe/src/test/java/org/apache/doris/analysis/AggregateTest.java` --- docs/.vuepress/sidebar/en.js | 1 + docs/.vuepress/sidebar/zh-CN.js | 1 + docs/en/extending-doris/spark-doris-connector.md | 158 ++++++++++++++++ .../zh-CN/extending-doris/spark-doris-connector.md | 49 ++--- extension/spark-doris-connector/build.sh | 59 ++++++ extension/spark-doris-connector/pom.xml | 59 +++++- .../org/apache/doris/analysis/AggregateTest.java | 202 ++++++++++----------- 7 files changed, 401 insertions(+), 128 deletions(-) diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index eb85411..e7ade2c 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -122,6 +122,7 @@ module.exports = [ "doris-on-es", "plugin-development-manual", "user-defined-function", + "spark-doris-connector", ], }, { diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index bb4cc67..880c340 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -131,6 +131,7 @@ module.exports = [ "doris-on-es", "plugin-development-manual", "user-defined-function", + "spark-doris-connector", ], }, { diff --git a/docs/en/extending-doris/spark-doris-connector.md b/docs/en/extending-doris/spark-doris-connector.md new file mode 100644 index 0000000..f9b7a8e --- /dev/null +++ b/docs/en/extending-doris/spark-doris-connector.md @@ -0,0 +1,158 @@ +--- +{ + "title": "Spark Doris Connector", + "language": "en" +} +--- + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +# Spark Doris Connector + +Spark Doris Connector can support reading data stored in Doris through Spark. + +- The current version only supports reading data from `Doris`. +- You can map the `Doris` table to` DataFrame` or `RDD`, it is recommended to use` DataFrame`. +- Support the completion of data filtering on the `Doris` side to reduce the amount of data transmission. + +## Version Compatibility + +| Connector | Spark | Doris | Java | Scala | +| --------- | ----- | ------ | ---- | ----- | +| 1.0.0 | 2.x | 0.12+ | 8 | 2.11 | + + +## Build and Install + +Execute following command in dir `extension/spark-doris-connector/`: + +```bash +sh build.sh +``` + +After successful compilation, the file `doris-spark-1.0.0-SNAPSHOT.jar` will be generated in the `output/` directory. Copy this file to `ClassPath` in `Spark` to use `Spark-Doris-Connector`. For example, `Spark` running in `Local` mode, put this file in the `jars/` folder. `Spark` running in `Yarn` cluster mode, put this file in the pre-deployment package. + +## Example + +### SQL + +```sql +CREATE TEMPORARY VIEW spark_doris +USING doris +OPTIONS( + "table.identifier"="$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME", + "fenodes"="$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", + "user"="$YOUR_DORIS_USERNAME", + "password"="$YOUR_DORIS_PASSWORD" +); + +SELECT * FROM spark_doris; +``` + +### DataFrame + +```scala +val dorisSparkDF = spark.read.format("doris") + .option("doris.table.identifier", "$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME") + .option("doris.fenodes", "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT") + .option("user", "$YOUR_DORIS_USERNAME") + .option("password", "$YOUR_DORIS_PASSWORD") + .load() + +dorisSparkDF.show(5) +``` + +### RDD + +```scala +import org.apache.doris.spark._ +val dorisSparkRDD = sc.dorisRDD( + tableIdentifier = Some("$YOUR_DORIS_DATABASE_NAME.$YOUR_DORIS_TABLE_NAME"), + cfg = Some(Map( + "doris.fenodes" -> "$YOUR_DORIS_FE_HOSTNAME:$YOUR_DORIS_FE_RESFUL_PORT", + "doris.request.auth.user" -> "$YOUR_DORIS_USERNAME", + "doris.request.auth.password" -> "$YOUR_DORIS_PASSWORD" + )) +) + +dorisSparkRDD.collect() +``` + +## Configuration + +### General + +| Key | Default Value | Comment | +| -------------------------------- | ----------------- | ------------------------------------------------------------ | +| doris.fenodes | -- | Doris FE http address, support multiple addresses, separated by commas | +| doris.table.identifier | -- | Doris table identifier, eg, db1.tbl1 | +| doris.request.retries | 3 | Number of retries to send requests to Doris | +| doris.request.connect.timeout.ms | 30000 | Connection timeout for sending requests to Doris | +| doris.request.read.timeout.ms | 30000 | Read timeout for sending request to Doris | +| doris.request.query.timeout.s | 3600 | Query the timeout time of doris, the default is 1 hour, -1 means no timeout limit | +| doris.request.tablet.size | Integer.MAX_VALUE | The number of Doris Tablets corresponding to an RDD Partition. The smaller this value is set, the more partitions will be generated. This will increase the parallelism on the Spark side, but at the same time will cause greater pressure on Doris. | +| doris.batch.size | 1024 | The maximum number of rows to read data from BE at one time. Increasing this value can reduce the number of connections between Spark and Doris. Thereby reducing the extra time overhead caused by network delay. | +| doris.exec.mem.limit | 2147483648 | Memory limit for a single query. The default is 2GB, in bytes. | +| doris.deserialize.arrow.async | false | Whether to support asynchronous conversion of Arrow format to RowBatch required for spark-doris-connector iteration | +| doris.deserialize.queue.size | 64 | Asynchronous conversion of the internal processing queue in Arrow format takes effect when doris.deserialize.arrow.async is true | + +### SQL & Dataframe Configuration + +| Key | Default Value | Comment | +| ------------------------------- | ------------- | ------------------------------------------------------------ | +| user | -- | Doris username | +| password | -- | Doris password | +| doris.filter.query.in.max.count | 100 | In the predicate pushdown, the maximum number of elements in the in expression value list. If this number is exceeded, the in-expression conditional filtering is processed on the Spark side. | + +### RDD Configuration + +| Key | Default Value | Comment | +| --------------------------- | ------------- | ------------------------------------------------------------ | +| doris.request.auth.user | -- | Doris username | +| doris.request.auth.password | -- | Doris password | +| doris.read.field | -- | List of column names in the Doris table, separated by commas | +| doris.filter.query | -- | Filter expression of the query, which is transparently transmitted to Doris. Doris uses this expression to complete source-side data filtering. | + + + +## Doris & Spark Column Type Mapping + +| Doris Type | Spark Type | +| ---------- | -------------------------------- | +| NULL_TYPE | DataTypes.NullType | +| BOOLEAN | DataTypes.BooleanType | +| TINYINT | DataTypes.ByteType | +| SMALLINT | DataTypes.ShortType | +| INT | DataTypes.IntegerType | +| BIGINT | DataTypes.LongType | +| FLOAT | DataTypes.FloatType | +| DOUBLE | DataTypes.DoubleType | +| DATE | DataTypes.StringType<sup>1</sup> | +| DATETIME | DataTypes.StringType<sup>1</sup> | +| BINARY | DataTypes.BinaryType | +| DECIMAL | DecimalType | +| CHAR | DataTypes.StringType | +| LARGEINT | DataTypes.StringType | +| VARCHAR | DataTypes.StringType | +| DECIMALV2 | DecimalType | +| TIME | DataTypes.DoubleType | +| HLL | Unsupported datatype | + +* Note: In Connector, `DATE` and` DATETIME` are mapped to `String`. Due to the processing logic of the Doris underlying storage engine, when the time type is used directly, the time range covered cannot meet the demand. So use `String` type to directly return the corresponding time readable text. \ No newline at end of file diff --git a/extension/spark-doris-connector/README.md b/docs/zh-CN/extending-doris/spark-doris-connector.md similarity index 75% rename from extension/spark-doris-connector/README.md rename to docs/zh-CN/extending-doris/spark-doris-connector.md index 3c41b93..2fbc54e 100644 --- a/extension/spark-doris-connector/README.md +++ b/docs/zh-CN/extending-doris/spark-doris-connector.md @@ -1,3 +1,10 @@ +--- +{ + "title": "Spark Doris Connector", + "language": "zh-CN" +} +--- + <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file @@ -17,31 +24,32 @@ specific language governing permissions and limitations under the License. --> -# Spark-Doris-Connector +# Spark Doris Connector -## Fetures +Spark Doris Connector 可以支持通过 Spark 读取 Doris 中存储的数据。 - 当前版本只支持从`Doris`中读取数据。 - 可以将`Doris`表映射为`DataFrame`或者`RDD`,推荐使用`DataFrame`。 - 支持在`Doris`端完成数据过滤,减少数据传输量。 -## Version Compatibility +## 版本兼容 | Connector | Spark | Doris | Java | Scala | | --------- | ----- | ------ | ---- | ----- | -| 1.0.0 | 2.x | master | 8 | 2.11 | +| 1.0.0 | 2.x | 0.12+ | 8 | 2.11 | +## 编译与安装 -## Building +在 `extension/spark-doris-connector/` 源码目录下执行: ```bash -mvn clean package +sh build.sh ``` -编译成功后,会在`target`目录下生成文件`doris-spark-1.0.0-SNAPSHOT.jar`。将此文件复制到`Spark`的`ClassPath`中即可使用`Spark-Doris-Connector`。例如,`Local`模式运行的`Spark`,将此文件放入`jars`文件夹下。`Yarn`集群模式运行的`Spark`,则将此文件放入预部署包中。 +编译成功后,会在 `output/` 目录下生成文件 `doris-spark-1.0.0-SNAPSHOT.jar`。将此文件复制到 `Spark` 的 `ClassPath` 中即可使用 `Spark-Doris-Connector`。例如,`Local` 模式运行的 `Spark`,将此文件放入 `jars/` 文件夹下。`Yarn`集群模式运行的`Spark`,则将此文件放入预部署包中。 -## QuickStart +## 使用示例 ### SQL @@ -87,44 +95,43 @@ val dorisSparkRDD = sc.dorisRDD( dorisSparkRDD.collect() ``` -## Configuration +## 配置 -### General +### 通用配置项 | Key | Default Value | Comment | | -------------------------------- | ----------------- | ------------------------------------------------------------ | -| doris.fenodes | -- | Doris Restful接口地址,支持多个地址,使用逗号分隔 | -| doris.table.identifier | -- | DataFame/RDD对应的Doris表名 | +| doris.fenodes | -- | Doris FE http 地址,支持多个地址,使用逗号分隔 | +| doris.table.identifier | -- | Doris 表名,如:db1.tbl1 | | doris.request.retries | 3 | 向Doris发送请求的重试次数 | | doris.request.connect.timeout.ms | 30000 | 向Doris发送请求的连接超时时间 | | doris.request.read.timeout.ms | 30000 | 向Doris发送请求的读取超时时间 | | doris.request.query.timeout.s | 3600 | 查询doris的超时时间,默认值为1小时,-1表示无超时限制 | -| doris.request.tablet.size | Integer.MAX_VALUE | 一个RDD Partition对应的Doris Tablet个数。<br />此数值设置越小,则会生成越多的Partition。<br />从而提升Spark侧的并行度,但同时会对Doris造成更大的压力。 | -| doris.batch.size | 1024 | 一次从BE读取数据的最大行数。<br />增大此数值可减少Spark与Doris之间建立连接的次数。<br />从而减轻网络延迟所带来的的额外时间开销。 | +| doris.request.tablet.size | Integer.MAX_VALUE | 一个RDD Partition对应的Doris Tablet个数。<br />此数值设置越小,则会生成越多的Partition。从而提升Spark侧的并行度,但同时会对Doris造成更大的压力。 | +| doris.batch.size | 1024 | 一次从BE读取数据的最大行数。增大此数值可减少Spark与Doris之间建立连接的次数。<br />从而减轻网络延迟所带来的的额外时间开销。 | | doris.exec.mem.limit | 2147483648 | 单个查询的内存限制。默认为 2GB,单位为字节 | | doris.deserialize.arrow.async | false | 是否支持异步转换Arrow格式到spark-doris-connector迭代所需的RowBatch | | doris.deserialize.queue.size | 64 | 异步转换Arrow格式的内部处理队列,当doris.deserialize.arrow.async为true时生效 | -### SQL and Dataframe Only +### SQL 和 Dataframe 专有配置 | Key | Default Value | Comment | | ------------------------------- | ------------- | ------------------------------------------------------------ | | user | -- | 访问Doris的用户名 | | password | -- | 访问Doris的密码 | -| doris.filter.query.in.max.count | 100 | 谓词下推中,in表达式value列表元素最大数量。<br />超过此数量,则in表达式条件过滤在Spark侧处理。 | +| doris.filter.query.in.max.count | 100 | 谓词下推中,in表达式value列表元素最大数量。超过此数量,则in表达式条件过滤在Spark侧处理。 | -### RDD Only +### RDD 专有配置 | Key | Default Value | Comment | | --------------------------- | ------------- | ------------------------------------------------------------ | | doris.request.auth.user | -- | 访问Doris的用户名 | | doris.request.auth.password | -- | 访问Doris的密码 | | doris.read.field | -- | 读取Doris表的列名列表,多列之间使用逗号分隔 | -| doris.filter.query | -- | 过滤读取数据的表达式,此表达式透传给Doris。<br />Doris使用此表达式完成源端数据过滤。 | - +| doris.filter.query | -- | 过滤读取数据的表达式,此表达式透传给Doris。Doris使用此表达式完成源端数据过滤。 | -## Doris Data Type - Spark Data Type Mapping +## Doris 和 Spark 列类型映射关系 | Doris Type | Spark Type | | ---------- | -------------------------------- | @@ -147,4 +154,4 @@ dorisSparkRDD.collect() | TIME | DataTypes.DoubleType | | HLL | Unsupported datatype | -<sup>1</sup>: Connector中,将`DATE`和`DATETIME`映射为`String`。由于`Doris`底层存储引擎处理逻辑,直接使用时间类型时,覆盖的时间范围无法满足需求。所以使用`String`类型直接返回对应的时间可读文本。 \ No newline at end of file +* 注:Connector中,将`DATE`和`DATETIME`映射为`String`。由于`Doris`底层存储引擎处理逻辑,直接使用时间类型时,覆盖的时间范围无法满足需求。所以使用 `String` 类型直接返回对应的时间可读文本。 \ No newline at end of file diff --git a/extension/spark-doris-connector/build.sh b/extension/spark-doris-connector/build.sh new file mode 100755 index 0000000..9119841 --- /dev/null +++ b/extension/spark-doris-connector/build.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############################################################## +# This script is used to compile Spark-Doris-Connector +# Usage: +# sh build.sh +# +############################################################## + +set -eo pipefail + +ROOT=`dirname "$0"` +ROOT=`cd "$ROOT"; pwd` + +export DORIS_HOME=${ROOT}/../../ + +# include custom environment variables +if [[ -f ${DORIS_HOME}/custom_env.sh ]]; then + . ${DORIS_HOME}/custom_env.sh +fi + +# check maven +MVN_CMD=mvn +if [[ ! -z ${CUSTOM_MVN} ]]; then + MVN_CMD=${CUSTOM_MVN} +fi +if ! ${MVN_CMD} --version; then + echo "Error: mvn is not found" + exit 1 +fi +export MVN_CMD + +${MVN_CMD} clean package + + +mkdir -p output/ +cp target/doris-spark-1.0.0-SNAPSHOT.jar ./output/ + +echo "*****************************************" +echo "Successfully build Spark-Doris-Connector" +echo "*****************************************" + +exit 0 diff --git a/extension/spark-doris-connector/pom.xml b/extension/spark-doris-connector/pom.xml index 35986ad..cdf1055 100644 --- a/extension/spark-doris-connector/pom.xml +++ b/extension/spark-doris-connector/pom.xml @@ -36,6 +36,50 @@ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> + <profiles> + <!-- for custom internal repository --> + <profile> + <id>custom-env</id> + <activation> + <property> + <name>env.CUSTOM_MAVEN_REPO</name> + </property> + </activation> + + <repositories> + <repository> + <id>custom-nexus</id> + <url>${env.CUSTOM_MAVEN_REPO}</url> + </repository> + </repositories> + + <pluginRepositories> + <pluginRepository> + <id>custom-nexus</id> + <url>${env.CUSTOM_MAVEN_REPO}</url> + </pluginRepository> + </pluginRepositories> + </profile> + + <!-- for general repository --> + <profile> + <id>general-env</id> + <activation> + <property> + <name>!env.CUSTOM_MAVEN_REPO</name> + </property> + </activation> + + <repositories> + <repository> + <id>central</id> + <name>central maven repo https</name> + <url>https://repo.maven.apache.org/maven2</url> + </repository> + </repositories> + </profile> + </profiles> + <dependencies> <dependency> <groupId>org.apache.spark</groupId> @@ -53,7 +97,6 @@ <groupId>org.apache.thrift</groupId> <artifactId>libthrift</artifactId> <version>${libthrift.version}</version> - <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.arrow</groupId> @@ -150,23 +193,27 @@ <relocations> <relocation> <pattern>org.apache.arrow</pattern> - <shadedPattern>org.apache.doris.arrow</shadedPattern> + <shadedPattern>org.apache.doris.shaded.org.apache.arrow</shadedPattern> </relocation> <relocation> <pattern>io.netty</pattern> - <shadedPattern>org.apache.doris.netty</shadedPattern> + <shadedPattern>org.apache.doris.shaded.io.netty</shadedPattern> </relocation> <relocation> <pattern>com.fasterxml.jackson</pattern> - <shadedPattern>org.apache.doris.jackson</shadedPattern> + <shadedPattern>org.apache.doris.shaded.com.fasterxml.jackson</shadedPattern> </relocation> <relocation> <pattern>org.apache.commons.codec</pattern> - <shadedPattern>org.apache.doris.commons.codec</shadedPattern> + <shadedPattern>org.apache.doris.shaded.org.apache.commons.codec</shadedPattern> </relocation> <relocation> <pattern>com.google.flatbuffers</pattern> - <shadedPattern>org.apache.doris.flatbuffers</shadedPattern> + <shadedPattern>org.apache.doris.shaded.com.google.flatbuffers</shadedPattern> + </relocation> + <relocation> + <pattern>org.apache.thrift</pattern> + <shadedPattern>org.apache.doris.shaded.org.apache.thrift</shadedPattern> </relocation> </relocations> </configuration> diff --git a/fe/src/test/java/org/apache/doris/analysis/AggregateTest.java b/fe/src/test/java/org/apache/doris/analysis/AggregateTest.java index 328aa2b..722a932 100644 --- a/fe/src/test/java/org/apache/doris/analysis/AggregateTest.java +++ b/fe/src/test/java/org/apache/doris/analysis/AggregateTest.java @@ -1,102 +1,102 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.analysis; - -import org.apache.commons.io.filefilter.FalseFileFilter; -import org.apache.doris.common.AnalysisException; -import org.apache.doris.common.FeConstants; -import org.apache.doris.qe.ConnectContext; -import org.apache.doris.utframe.DorisAssert; -import org.apache.doris.utframe.UtFrameUtils; - -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import javax.validation.constraints.AssertTrue; -import java.util.UUID; - -public class AggregateTest { - - private static String baseDir = "fe"; - private static String runningDir = baseDir + "/mocked/AggregateTest/" - + UUID.randomUUID().toString() + "/"; - private static final String TABLE_NAME = "table1"; - private static final String DB_NAME = "db1"; - private static DorisAssert dorisAssert; - - @BeforeClass - public static void beforeClass() throws Exception{ - FeConstants.runningUnitTest = true; - UtFrameUtils.createMinDorisCluster(runningDir); - dorisAssert = new DorisAssert(); - dorisAssert.withDatabase(DB_NAME).useDatabase(DB_NAME); - String createTableSQL = "create table " + DB_NAME + "." + TABLE_NAME + " (empid int, name varchar, " + - "deptno int, salary int, commission int) " - + "distributed by hash(empid) buckets 3 properties('replication_num' = '1');"; - dorisAssert.withTable(createTableSQL); - } - - /** - * ISSUE-3492 - */ - @Test - public void testCountDisintctAnalysisException() throws Exception { - ConnectContext ctx = UtFrameUtils.createDefaultCtx(); - - // NOT support mix distinct, one DistinctAggregationFunction has one column, the other DistinctAggregationFunction has some columns. - do { - String query = "select count(distinct empid), count(distinct salary), count(distinct empid, salary) from " + DB_NAME + "." + TABLE_NAME; - try { - UtFrameUtils.parseAndAnalyzeStmt(query, ctx); - } catch (AnalysisException e) { - Assert.assertTrue(e.getMessage().contains("The query contains multi count distinct or sum distinct, each can't have multi columns.")); - break; - } catch (Exception e) { - Assert.fail("must be AnalysisException."); - } - Assert.fail("must be AnalysisException."); - } while (false); - - // support multi DistinctAggregationFunction, but each DistinctAggregationFunction only has one column - do { - String query = "select count(distinct empid), count(distinct salary) from " + DB_NAME + "." + TABLE_NAME; - try { - UtFrameUtils.parseAndAnalyzeStmt(query, ctx); - } catch (Exception e) { - Assert.fail("should be query, no exception"); - } - } while (false); - - // support 1 DistinctAggregationFunction with some columns - do { - String query = "select count(distinct salary, empid) from " + DB_NAME + "." + TABLE_NAME; - try { - UtFrameUtils.parseAndAnalyzeStmt(query, ctx); - } catch (Exception e) { - Assert.fail("should be query, no exception"); - } - } while (false); - } - - @AfterClass - public static void afterClass() throws Exception { - UtFrameUtils.cleanDorisFeDir(baseDir); - } +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.commons.io.filefilter.FalseFileFilter; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.FeConstants; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.utframe.DorisAssert; +import org.apache.doris.utframe.UtFrameUtils; + +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import javax.validation.constraints.AssertTrue; +import java.util.UUID; + +public class AggregateTest { + + private static String baseDir = "fe"; + private static String runningDir = baseDir + "/mocked/AggregateTest/" + + UUID.randomUUID().toString() + "/"; + private static final String TABLE_NAME = "table1"; + private static final String DB_NAME = "db1"; + private static DorisAssert dorisAssert; + + @BeforeClass + public static void beforeClass() throws Exception{ + FeConstants.runningUnitTest = true; + UtFrameUtils.createMinDorisCluster(runningDir); + dorisAssert = new DorisAssert(); + dorisAssert.withDatabase(DB_NAME).useDatabase(DB_NAME); + String createTableSQL = "create table " + DB_NAME + "." + TABLE_NAME + " (empid int, name varchar, " + + "deptno int, salary int, commission int) " + + "distributed by hash(empid) buckets 3 properties('replication_num' = '1');"; + dorisAssert.withTable(createTableSQL); + } + + /** + * ISSUE-3492 + */ + @Test + public void testCountDisintctAnalysisException() throws Exception { + ConnectContext ctx = UtFrameUtils.createDefaultCtx(); + + // NOT support mix distinct, one DistinctAggregationFunction has one column, the other DistinctAggregationFunction has some columns. + do { + String query = "select count(distinct empid), count(distinct salary), count(distinct empid, salary) from " + DB_NAME + "." + TABLE_NAME; + try { + UtFrameUtils.parseAndAnalyzeStmt(query, ctx); + } catch (AnalysisException e) { + Assert.assertTrue(e.getMessage().contains("The query contains multi count distinct or sum distinct, each can't have multi columns.")); + break; + } catch (Exception e) { + Assert.fail("must be AnalysisException."); + } + Assert.fail("must be AnalysisException."); + } while (false); + + // support multi DistinctAggregationFunction, but each DistinctAggregationFunction only has one column + do { + String query = "select count(distinct empid), count(distinct salary) from " + DB_NAME + "." + TABLE_NAME; + try { + UtFrameUtils.parseAndAnalyzeStmt(query, ctx); + } catch (Exception e) { + Assert.fail("should be query, no exception"); + } + } while (false); + + // support 1 DistinctAggregationFunction with some columns + do { + String query = "select count(distinct salary, empid) from " + DB_NAME + "." + TABLE_NAME; + try { + UtFrameUtils.parseAndAnalyzeStmt(query, ctx); + } catch (Exception e) { + Assert.fail("should be query, no exception"); + } + } while (false); + } + + @AfterClass + public static void afterClass() throws Exception { + UtFrameUtils.cleanDorisFeDir(baseDir); + } } \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org