Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
urosstan-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1610243753 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession + +trait V2JDBCPushdownTest extends SharedSparkSession { + protected def isFilterRemoved(df: DataFrame): Boolean = { +df.queryExecution.sparkPlan.collectFirst { + case f: FilterExec => f +}.isEmpty + } + + protected def isAggregateRemoved(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case agg: Aggregate => agg +}.isEmpty + } + + private def isLimitPushed(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case lim: LocalLimit => lim +}.isEmpty + } + + protected val catalog: String + + protected val tablePrefix: String + + protected val schema: String + + protected def executeUpdate(sql: String): Unit + + protected def commonAssertionOnDataFrame(df: DataFrame): Unit + + protected def prepareTable(): Unit = { +executeUpdate( + s"""CREATE SCHEMA "$schema +) + +executeUpdate( + s"""CREATE TABLE "$schema"."$tablePrefix" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce" + | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_string_test" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls" + | (id INTEGER, st STRING);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test" + | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin +) + } + + protected def prepareData(): Unit = { + +prepareTable() + +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (1, NULL, 1)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (2, '2', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (3, NULL, NULL)""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 'first')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 'second')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 'third')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 'null')""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'ab''', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'FiRs''T', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co nD', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, ' forth ', 1000)""") + +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 1000)""") Review Comment: On spark, nulls should be ignored, I am not sure how other databases handles null in aggregates. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail:
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
urosstan-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1610235091 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession + +trait V2JDBCPushdownTest extends SharedSparkSession { + protected def isFilterRemoved(df: DataFrame): Boolean = { +df.queryExecution.sparkPlan.collectFirst { + case f: FilterExec => f +}.isEmpty + } + + protected def isAggregateRemoved(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case agg: Aggregate => agg +}.isEmpty + } + + private def isLimitPushed(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case lim: LocalLimit => lim +}.isEmpty + } + + protected val catalog: String + + protected val tablePrefix: String + + protected val schema: String + + protected def executeUpdate(sql: String): Unit + + protected def commonAssertionOnDataFrame(df: DataFrame): Unit + + protected def prepareTable(): Unit = { +executeUpdate( + s"""CREATE SCHEMA "$schema +) + +executeUpdate( + s"""CREATE TABLE "$schema"."$tablePrefix" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce" + | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_string_test" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls" + | (id INTEGER, st STRING);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test" + | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin +) + } + + protected def prepareData(): Unit = { + +prepareTable() + +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (1, NULL, 1)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (2, '2', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (3, NULL, NULL)""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 'first')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 'second')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 'third')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 'null')""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'ab''', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'FiRs''T', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co nD', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, ' forth ', 1000)""") + +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 1000)""") Review Comment: It is maybe good to insert one null int col, to test aggregate functions -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
andrej-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1608401795 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession + +trait V2JDBCPushdownTest extends SharedSparkSession { + protected def isFilterRemoved(df: DataFrame): Boolean = { +df.queryExecution.sparkPlan.collectFirst { + case f: FilterExec => f +}.isEmpty + } + + protected def isAggregateRemoved(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case agg: Aggregate => agg +}.isEmpty + } + + private def isLimitPushed(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case lim: LocalLimit => lim +}.isEmpty + } + + protected val catalog: String + + protected val tablePrefix: String + + protected val schema: String + + protected def executeUpdate(sql: String): Unit + + protected def commonAssertionOnDataFrame(df: DataFrame): Unit + + protected def prepareTable(): Unit = { +executeUpdate( + s"""CREATE SCHEMA "$schema +) + +executeUpdate( + s"""CREATE TABLE "$schema"."$tablePrefix" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce" + | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_string_test" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls" + | (id INTEGER, st STRING);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test" + | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin +) + } + + protected def prepareData(): Unit = { + +prepareTable() + +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (1, NULL, 1)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (2, '2', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (3, NULL, NULL)""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 'first')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 'second')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 'third')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 'null')""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'ab''', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'FiRs''T', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co nD', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, ' forth ', 1000)""") + +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 1000)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (2, 'aba', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (3, 'abb', 800)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (4, 'abc', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (5, 'abd', 1200)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (6, 'abe', 1250)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (7, 'abf', 1200)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (8, 'abg',
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
andrej-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1608291743 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession + +trait V2JDBCPushdownTest extends SharedSparkSession { + protected def isFilterRemoved(df: DataFrame): Boolean = { +df.queryExecution.sparkPlan.collectFirst { + case f: FilterExec => f +}.isEmpty + } + + protected def isAggregateRemoved(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case agg: Aggregate => agg +}.isEmpty + } + + private def isLimitPushed(df: DataFrame): Boolean = { +df.queryExecution.optimizedPlan.collect { + case lim: LocalLimit => lim +}.isEmpty + } + + protected val catalog: String + + protected val tablePrefix: String + + protected val schema: String + + protected def executeUpdate(sql: String): Unit + + protected def commonAssertionOnDataFrame(df: DataFrame): Unit + + protected def prepareTable(): Unit = { +executeUpdate( + s"""CREATE SCHEMA "$schema +) + +executeUpdate( + s"""CREATE TABLE "$schema"."$tablePrefix" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce" + | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_string_test" + | (id INTEGER, st STRING, num_col INT);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls" + | (id INTEGER, st STRING);""".stripMargin +) + +executeUpdate( + s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test" + | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin +) + } + + protected def prepareData(): Unit = { + +prepareTable() + +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (1, NULL, 1)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (2, '2', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES (3, NULL, NULL)""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 'first')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 'second')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 'third')""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 'null')""") + +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'ab''', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'FiRs''T', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co nD', 1000)""") +executeUpdate( + s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, ' forth ', 1000)""") + +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 1000)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (2, 'aba', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (3, 'abb', 800)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (4, 'abc', NULL)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (5, 'abd', 1200)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (6, 'abe', 1250)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (7, 'abf', 1200)""") +executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (8, 'abg',
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
stefanbuk-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606936510 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerPushdownIntegrationSuite.scala: ## @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest + +import org.apache.spark.SparkConf +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite, MsSQLServerDatabaseOnDocker} + +class MsSqlServerPushdownIntegrationSuite Review Comment: Same answer as above -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
stefanbuk-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606935984 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.scala.org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.tags.DockerTest + +@DockerTest Review Comment: Well, it seemed as a fitting place, but it can be used for more than docker integration tests, what is a fitting place for this trait? Also, we could add some way to filterout tests here, but we already use `override def excluded` from `SparkFunSuite`, as suites implementing this trait are extended from there, do we need another method for that in this trait? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
stefanbuk-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606926117 ## sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala: ## @@ -141,6 +160,9 @@ private case class MsSqlServerDialect() extends JdbcDialect { case ShortType if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled => Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT)) case ByteType => Some(JdbcType("SMALLINT", java.sql.Types.TINYINT)) +case LongType => Some(JdbcType("BIGINT", java.sql.Types.BIGINT)) +case DoubleType => Some(JdbcType("FLOAT", java.sql.Types.FLOAT)) +case _ if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled => JdbcUtils.getCommonJDBCType(dt) Review Comment: If question is about a `!SQLConf.get.legacyMsSqlServerNumericMappingEnabled`, it is added here because there is this config, false by default, and when it is set, some type mapping shouldn't be supported (not sure which, or why there is this config), but if we didn't have this check here, some tests with this config would fail, as we would, for example convert ShortType to `SMALLINT` even tho we shouldn't. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
stefanbuk-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606922807 ## sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala: ## @@ -155,7 +162,8 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper { getJDBCType(et).map(_.databaseTypeDefinition) .orElse(JdbcUtils.getCommonJDBCType(et).map(_.databaseTypeDefinition)) .map(typeName => JdbcType(s"$typeName[]", java.sql.Types.ARRAY)) -case _ => None +case LongType => Some(JdbcType("BIGINT", Types.BIGINT)) +case _ => JdbcUtils.getCommonJDBCType(dt) Review Comment: Not sure if this is what you mean but in visitCast we have `getJDBCType(dataType).map(_.databaseTypeDefinition).getOrElse(dataType.typeName)`, so if we return None, `JdbcUtils.getCommonJDBCType` won't be called here. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
stefanbuk-db commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606917652 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySqlPushdownIntegrationSuite.scala: ## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest + +import org.apache.spark.SparkConf +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite, MySQLDatabaseOnDocker} + +class MySqlPushdownIntegrationSuite + extends DockerJDBCIntegrationSuite +with V2JDBCPushdownTest { Review Comment: We could. That would run all tests from `V2JDBCTest` as well? Not sure we want that? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
milastdbx commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606871003 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerPushdownIntegrationSuite.scala: ## @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest + +import org.apache.spark.SparkConf +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite, MsSQLServerDatabaseOnDocker} + +class MsSqlServerPushdownIntegrationSuite Review Comment: same here ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySqlPushdownIntegrationSuite.scala: ## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc.v2 + +import java.sql.Connection + +import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest + +import org.apache.spark.SparkConf +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog +import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite, MySQLDatabaseOnDocker} + +class MySqlPushdownIntegrationSuite + extends DockerJDBCIntegrationSuite +with V2JDBCPushdownTest { Review Comment: why don't you just extend existing MySqlIntegrationSuite ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
milastdbx commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606868106 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.scala.org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.tags.DockerTest + +@DockerTest Review Comment: Should we add a way to filterout tests? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]
milastdbx commented on code in PR #46642: URL: https://github.com/apache/spark/pull/46642#discussion_r1606852425 ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.scala.org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.tags.DockerTest + +@DockerTest Review Comment: Why did you put this in docker-integration tests folder ? this seems generic to jdbc ## sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala: ## @@ -155,7 +162,8 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper { getJDBCType(et).map(_.databaseTypeDefinition) .orElse(JdbcUtils.getCommonJDBCType(et).map(_.databaseTypeDefinition)) .map(typeName => JdbcType(s"$typeName[]", java.sql.Types.ARRAY)) -case _ => None +case LongType => Some(JdbcType("BIGINT", Types.BIGINT)) +case _ => JdbcUtils.getCommonJDBCType(dt) Review Comment: none code path returns this as well ? ## connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala: ## @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.scala.org.apache.spark.sql.jdbc.v2 + +import scala.collection.immutable.Seq + +import org.apache.spark.sql.{DataFrame, QueryTest, Row} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit} +import org.apache.spark.sql.execution.FilterExec +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.tags.DockerTest + +@DockerTest Review Comment: remove this ## sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala: ## @@ -155,7 +162,8 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper { getJDBCType(et).map(_.databaseTypeDefinition) .orElse(JdbcUtils.getCommonJDBCType(et).map(_.databaseTypeDefinition)) .map(typeName => JdbcType(s"$typeName[]", java.sql.Types.ARRAY)) -case _ => None +case LongType => Some(JdbcType("BIGINT", Types.BIGINT)) +case _ => JdbcUtils.getCommonJDBCType(dt) Review Comment: When you return None I mean, `JdbcUtils.getCommonJDBCType` should be called ? ## sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala: ## @@ -141,6 +160,9 @@ private case class MsSqlServerDialect() extends JdbcDialect { case ShortType if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled => Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT)) case ByteType => Some(JdbcType("SMALLINT", java.sql.Types.TINYINT)) +case LongType => Some(JdbcType("BIGINT", java.sql.Types.BIGINT)) +case DoubleType => Some(JdbcType("FLOAT", java.sql.Types.FLOAT)) +case _ if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled => JdbcUtils.getCommonJDBCType(dt) Review Comment: why this ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the