Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-22 Thread via GitHub


urosstan-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1610243753


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait V2JDBCPushdownTest extends SharedSparkSession {
+  protected def isFilterRemoved(df: DataFrame): Boolean = {
+df.queryExecution.sparkPlan.collectFirst {
+  case f: FilterExec => f
+}.isEmpty
+  }
+
+  protected def isAggregateRemoved(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case agg: Aggregate => agg
+}.isEmpty
+  }
+
+  private def isLimitPushed(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case lim: LocalLimit => lim
+}.isEmpty
+  }
+
+  protected val catalog: String
+
+  protected val tablePrefix: String
+
+  protected val schema: String
+
+  protected def executeUpdate(sql: String): Unit
+
+  protected def commonAssertionOnDataFrame(df: DataFrame): Unit
+
+  protected def prepareTable(): Unit = {
+executeUpdate(
+  s"""CREATE SCHEMA "$schema
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."$tablePrefix"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce"
+ | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_string_test"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls"
+ | (id INTEGER, st STRING);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test"
+ | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin
+)
+  }
+
+  protected def prepareData(): Unit = {
+
+prepareTable()
+
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(1, NULL, 1)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(2, '2', NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(3, NULL, NULL)""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 
'first')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 
'second')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 
'third')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 
'null')""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'ab''', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'FiRs''T', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co 
nD', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, '   
forth   ', 1000)""")
+
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 
1000)""")

Review Comment:
   On spark, nulls should be ignored, I am not sure how other databases handles 
null in aggregates.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: 

Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-22 Thread via GitHub


urosstan-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1610235091


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait V2JDBCPushdownTest extends SharedSparkSession {
+  protected def isFilterRemoved(df: DataFrame): Boolean = {
+df.queryExecution.sparkPlan.collectFirst {
+  case f: FilterExec => f
+}.isEmpty
+  }
+
+  protected def isAggregateRemoved(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case agg: Aggregate => agg
+}.isEmpty
+  }
+
+  private def isLimitPushed(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case lim: LocalLimit => lim
+}.isEmpty
+  }
+
+  protected val catalog: String
+
+  protected val tablePrefix: String
+
+  protected val schema: String
+
+  protected def executeUpdate(sql: String): Unit
+
+  protected def commonAssertionOnDataFrame(df: DataFrame): Unit
+
+  protected def prepareTable(): Unit = {
+executeUpdate(
+  s"""CREATE SCHEMA "$schema
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."$tablePrefix"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce"
+ | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_string_test"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls"
+ | (id INTEGER, st STRING);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test"
+ | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin
+)
+  }
+
+  protected def prepareData(): Unit = {
+
+prepareTable()
+
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(1, NULL, 1)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(2, '2', NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(3, NULL, NULL)""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 
'first')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 
'second')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 
'third')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 
'null')""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'ab''', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'FiRs''T', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co 
nD', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, '   
forth   ', 1000)""")
+
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 
1000)""")

Review Comment:
   It is maybe good to insert one null int col, to test aggregate functions



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-21 Thread via GitHub


andrej-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1608401795


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait V2JDBCPushdownTest extends SharedSparkSession {
+  protected def isFilterRemoved(df: DataFrame): Boolean = {
+df.queryExecution.sparkPlan.collectFirst {
+  case f: FilterExec => f
+}.isEmpty
+  }
+
+  protected def isAggregateRemoved(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case agg: Aggregate => agg
+}.isEmpty
+  }
+
+  private def isLimitPushed(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case lim: LocalLimit => lim
+}.isEmpty
+  }
+
+  protected val catalog: String
+
+  protected val tablePrefix: String
+
+  protected val schema: String
+
+  protected def executeUpdate(sql: String): Unit
+
+  protected def commonAssertionOnDataFrame(df: DataFrame): Unit
+
+  protected def prepareTable(): Unit = {
+executeUpdate(
+  s"""CREATE SCHEMA "$schema
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."$tablePrefix"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce"
+ | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_string_test"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls"
+ | (id INTEGER, st STRING);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test"
+ | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin
+)
+  }
+
+  protected def prepareData(): Unit = {
+
+prepareTable()
+
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(1, NULL, 1)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(2, '2', NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(3, NULL, NULL)""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 
'first')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 
'second')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 
'third')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 
'null')""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'ab''', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'FiRs''T', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co 
nD', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, '   
forth   ', 1000)""")
+
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 
1000)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (2, 'aba', 
NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (3, 'abb', 
800)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (4, 'abc', 
NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (5, 'abd', 
1200)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (6, 'abe', 
1250)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (7, 'abf', 
1200)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (8, 'abg', 

Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-21 Thread via GitHub


andrej-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1608291743


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,385 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait V2JDBCPushdownTest extends SharedSparkSession {
+  protected def isFilterRemoved(df: DataFrame): Boolean = {
+df.queryExecution.sparkPlan.collectFirst {
+  case f: FilterExec => f
+}.isEmpty
+  }
+
+  protected def isAggregateRemoved(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case agg: Aggregate => agg
+}.isEmpty
+  }
+
+  private def isLimitPushed(df: DataFrame): Boolean = {
+df.queryExecution.optimizedPlan.collect {
+  case lim: LocalLimit => lim
+}.isEmpty
+  }
+
+  protected val catalog: String
+
+  protected val tablePrefix: String
+
+  protected val schema: String
+
+  protected def executeUpdate(sql: String): Unit
+
+  protected def commonAssertionOnDataFrame(df: DataFrame): Unit
+
+  protected def prepareTable(): Unit = {
+executeUpdate(
+  s"""CREATE SCHEMA "$schema
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."$tablePrefix"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_coalesce"
+ | (id INTEGER, col1 VARCHAR(128), col2 INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_string_test"
+ | (id INTEGER, st STRING, num_col INT);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_with_nulls"
+ | (id INTEGER, st STRING);""".stripMargin
+)
+
+executeUpdate(
+  s"""CREATE TABLE "$schema"."${tablePrefix}_numeric_test"
+ | (id INTEGER, dec_col DECIMAL(10, 2));""".stripMargin
+)
+  }
+
+  protected def prepareData(): Unit = {
+
+prepareTable()
+
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(1, NULL, 1)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(2, '2', NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."${tablePrefix}_coalesce" VALUES 
(3, NULL, NULL)""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (1, 
'first')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (2, 
'second')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (3, 
'third')""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_with_nulls" VALUES (NULL, 
'null')""")
+
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'ab''', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 
'FiRs''T', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, 'sE Co 
nD', 1000)""")
+executeUpdate(
+  s"""INSERT INTO "$schema"."${tablePrefix}_string_test" VALUES (0, '   
forth   ', 1000)""")
+
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (1, 'ab', 
1000)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (2, 'aba', 
NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (3, 'abb', 
800)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (4, 'abc', 
NULL)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (5, 'abd', 
1200)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (6, 'abe', 
1250)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (7, 'abf', 
1200)""")
+executeUpdate(s"""INSERT INTO "$schema"."$tablePrefix" VALUES (8, 'abg', 

Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


stefanbuk-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606936510


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerPushdownIntegrationSuite.scala:
##
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, 
DockerJDBCIntegrationSuite, MsSQLServerDatabaseOnDocker}
+
+class MsSqlServerPushdownIntegrationSuite

Review Comment:
   Same answer as above



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


stefanbuk-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606935984


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,387 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.scala.org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.DockerTest
+
+@DockerTest

Review Comment:
   Well, it seemed as a fitting place, but it can be used for more than docker 
integration tests, what is a fitting place for this trait?
   Also, we could add some way to filterout tests here, but we already use 
`override def excluded` from `SparkFunSuite`, as suites implementing this trait 
are extended from there, do we need another method for that in this trait?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


stefanbuk-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606926117


##
sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala:
##
@@ -141,6 +160,9 @@ private case class MsSqlServerDialect() extends JdbcDialect 
{
 case ShortType if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled =>
   Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
 case ByteType => Some(JdbcType("SMALLINT", java.sql.Types.TINYINT))
+case LongType => Some(JdbcType("BIGINT", java.sql.Types.BIGINT))
+case DoubleType => Some(JdbcType("FLOAT", java.sql.Types.FLOAT))
+case _ if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled => 
JdbcUtils.getCommonJDBCType(dt)

Review Comment:
   If question is about a 
`!SQLConf.get.legacyMsSqlServerNumericMappingEnabled`, it is added here because 
there is this config, false by default, and when it is set, some type mapping 
shouldn't be supported (not sure which, or why there is this config), but if we 
didn't have this check here, some tests with this config would fail, as we 
would, for example convert ShortType to `SMALLINT` even tho we shouldn't.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


stefanbuk-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606922807


##
sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala:
##
@@ -155,7 +162,8 @@ private case class PostgresDialect() extends JdbcDialect 
with SQLConfHelper {
   getJDBCType(et).map(_.databaseTypeDefinition)
 .orElse(JdbcUtils.getCommonJDBCType(et).map(_.databaseTypeDefinition))
 .map(typeName => JdbcType(s"$typeName[]", java.sql.Types.ARRAY))
-case _ => None
+case LongType => Some(JdbcType("BIGINT", Types.BIGINT))
+case _ => JdbcUtils.getCommonJDBCType(dt)

Review Comment:
   Not sure if this is what you mean but in visitCast we have 
`getJDBCType(dataType).map(_.databaseTypeDefinition).getOrElse(dataType.typeName)`,
 so if we return None, `JdbcUtils.getCommonJDBCType` won't be called here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


stefanbuk-db commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606917652


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySqlPushdownIntegrationSuite.scala:
##
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, 
DockerJDBCIntegrationSuite, MySQLDatabaseOnDocker}
+
+class MySqlPushdownIntegrationSuite
+  extends DockerJDBCIntegrationSuite
+with V2JDBCPushdownTest {

Review Comment:
   We could. That would run all tests from `V2JDBCTest` as well? Not sure we 
want that?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


milastdbx commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606871003


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerPushdownIntegrationSuite.scala:
##
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, 
DockerJDBCIntegrationSuite, MsSQLServerDatabaseOnDocker}
+
+class MsSqlServerPushdownIntegrationSuite

Review Comment:
   same here



##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySqlPushdownIntegrationSuite.scala:
##
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import test.scala.org.apache.spark.sql.jdbc.v2.V2JDBCPushdownTest
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, 
DockerJDBCIntegrationSuite, MySQLDatabaseOnDocker}
+
+class MySqlPushdownIntegrationSuite
+  extends DockerJDBCIntegrationSuite
+with V2JDBCPushdownTest {

Review Comment:
   why don't you just extend existing MySqlIntegrationSuite ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


milastdbx commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606868106


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,387 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.scala.org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.DockerTest
+
+@DockerTest

Review Comment:
   Should we add a way to filterout tests?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



Re: [PR] [SPARK-48354][SQL] JDBC Connectors predicate pushdown testing [spark]

2024-05-20 Thread via GitHub


milastdbx commented on code in PR #46642:
URL: https://github.com/apache/spark/pull/46642#discussion_r1606852425


##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,387 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.scala.org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.DockerTest
+
+@DockerTest

Review Comment:
   Why did you put this in docker-integration tests folder ? this seems generic 
to jdbc



##
sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala:
##
@@ -155,7 +162,8 @@ private case class PostgresDialect() extends JdbcDialect 
with SQLConfHelper {
   getJDBCType(et).map(_.databaseTypeDefinition)
 .orElse(JdbcUtils.getCommonJDBCType(et).map(_.databaseTypeDefinition))
 .map(typeName => JdbcType(s"$typeName[]", java.sql.Types.ARRAY))
-case _ => None
+case LongType => Some(JdbcType("BIGINT", Types.BIGINT))
+case _ => JdbcUtils.getCommonJDBCType(dt)

Review Comment:
   none code path returns this as well ? 



##
connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCPushdownTest.scala:
##
@@ -0,0 +1,387 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.scala.org.apache.spark.sql.jdbc.v2
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalLimit}
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.DockerTest
+
+@DockerTest

Review Comment:
   remove this



##
sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala:
##
@@ -155,7 +162,8 @@ private case class PostgresDialect() extends JdbcDialect 
with SQLConfHelper {
   getJDBCType(et).map(_.databaseTypeDefinition)
 .orElse(JdbcUtils.getCommonJDBCType(et).map(_.databaseTypeDefinition))
 .map(typeName => JdbcType(s"$typeName[]", java.sql.Types.ARRAY))
-case _ => None
+case LongType => Some(JdbcType("BIGINT", Types.BIGINT))
+case _ => JdbcUtils.getCommonJDBCType(dt)

Review Comment:
   When you return None I mean, `JdbcUtils.getCommonJDBCType` should be called ?



##
sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala:
##
@@ -141,6 +160,9 @@ private case class MsSqlServerDialect() extends JdbcDialect 
{
 case ShortType if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled =>
   Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
 case ByteType => Some(JdbcType("SMALLINT", java.sql.Types.TINYINT))
+case LongType => Some(JdbcType("BIGINT", java.sql.Types.BIGINT))
+case DoubleType => Some(JdbcType("FLOAT", java.sql.Types.FLOAT))
+case _ if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled => 
JdbcUtils.getCommonJDBCType(dt)

Review Comment:
   why this ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the