Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/22566#discussion_r221158906 --- Diff: sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala --- @@ -653,6 +653,49 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto } } + test("collecting statistics for all columns") { + val table = "update_col_stats_table" + withTable(table) { + sql(s"CREATE TABLE $table (c1 INT, c2 STRING, c3 DOUBLE)") + sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS") + val fetchedStats0 = + checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(0)) + assert(fetchedStats0.get.colStats == Map( + "c1" -> CatalogColumnStat(distinctCount = Some(0), min = None, max = None, + nullCount = Some(0), avgLen = Some(4), maxLen = Some(4)), + "c3" -> CatalogColumnStat(distinctCount = Some(0), min = None, max = None, + nullCount = Some(0), avgLen = Some(8), maxLen = Some(8)), + "c2" -> CatalogColumnStat(distinctCount = Some(0), min = None, max = None, + nullCount = Some(0), avgLen = Some(20), maxLen = Some(20)))) + + // Insert new data and analyze: have the latest column stats. + sql(s"INSERT INTO TABLE $table SELECT 1, 'a', 10.0") + sql(s"INSERT INTO TABLE $table SELECT 1, 'b', null") + + sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS") + val fetchedStats1 = + checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = Some(2)) + assert(fetchedStats1.get.colStats == Map( + "c1" -> CatalogColumnStat(distinctCount = Some(1), min = Some("1"), max = Some("1"), + nullCount = Some(0), avgLen = Some(4), maxLen = Some(4)), + "c3" -> CatalogColumnStat(distinctCount = Some(1), min = Some("10.0"), max = Some("10.0"), + nullCount = Some(1), avgLen = Some(8), maxLen = Some(8)), + "c2" -> CatalogColumnStat(distinctCount = Some(2), min = None, max = None, + nullCount = Some(0), avgLen = Some(1), maxLen = Some(1)))) + + val e1 = intercept[IllegalArgumentException] { + AnalyzeColumnCommand(TableIdentifier(table), Option(Seq("c1")), true).run(spark) + } + assert(e1.getMessage.contains("Parameter `columnNames` or `allColumns` are" + + " mutually exclusive")) + val e2 = intercept[IllegalArgumentException] { + AnalyzeColumnCommand(TableIdentifier(table), None, false).run(spark) + } + assert(e1.getMessage.contains("Parameter `columnNames` or `allColumns` are" + + " mutually exclusive")) --- End diff -- Could we create a separate test case between 686 and 695
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org