Github user MaxGekk commented on a diff in the pull request: https://github.com/apache/spark/pull/21657#discussion_r198863283 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala --- @@ -1579,4 +1579,16 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te } } } + + test("SPARK-24676 project required data from parsed data when columnPruning disabled") { + withSQLConf(SQLConf.CSV_PARSER_COLUMN_PRUNING.key -> "false") { + withTempPath { path => + val dir = path.getAbsolutePath + spark.range(10).selectExpr("id % 2 AS p", "id AS c0", "id AS c1").write.partitionBy("p") + .option("header", "true").csv(dir) + val df = spark.read.option("header", "true").csv(dir).selectExpr("sum(p)", "avg(c0)") + checkAnswer(df, Row(5, 4.5)) --- End diff -- Can you use something else instead of `avg`? I would eliminate equality checking for floating point operands. You can easily get `4.49999999999` instead of `4.5`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org