[ https://issues.apache.org/jira/browse/SPARK-34075?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Hyukjin Kwon updated SPARK-34075: --------------------------------- Target Version/s: 3.1.1 > Hidden directories are being listed for partition inference > ----------------------------------------------------------- > > Key: SPARK-34075 > URL: https://issues.apache.org/jira/browse/SPARK-34075 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.1.0 > Reporter: Burak Yavuz > Priority: Blocker > > Marking this as a blocker since it seems to be a regression. We are running > Delta's tests against Spark 3.1 as part of QA here: > [https://github.com/delta-io/delta/pull/579] > > We have noticed that one of our tests regressed with: > {code:java} > java.lang.AssertionError: assertion failed: Conflicting directory structures > detected. Suspicious paths: > [info] > file:/private/var/folders/_2/xn1c9yr11_93wjdk2vkvmwm00000gp/t/spark-18706bcc-23ea-4853-b8bc-c4cc2a5ed551 > [info] > file:/private/var/folders/_2/xn1c9yr11_93wjdk2vkvmwm00000gp/t/spark-18706bcc-23ea-4853-b8bc-c4cc2a5ed551/_delta_log > [info] > [info] If provided paths are partition directories, please set "basePath" in > the options of the data source to specify the root directory of the table. If > there are multiple root directories, please load them separately and then > union them. > [info] at scala.Predef$.assert(Predef.scala:223) > [info] at > org.apache.spark.sql.execution.datasources.PartitioningUtils$.parsePartitions(PartitioningUtils.scala:172) > [info] at > org.apache.spark.sql.execution.datasources.PartitioningUtils$.parsePartitions(PartitioningUtils.scala:104) > [info] at > org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning(PartitioningAwareFileIndex.scala:158) > [info] at > org.apache.spark.sql.execution.datasources.InMemoryFileIndex.partitionSpec(InMemoryFileIndex.scala:73) > [info] at > org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.partitionSchema(PartitioningAwareFileIndex.scala:50) > [info] at > org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:167) > [info] at > org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:418) > [info] at > org.apache.spark.sql.execution.datasources.ResolveSQLOnFile$$anonfun$apply$1.applyOrElse(rules.scala:62) > [info] at > org.apache.spark.sql.execution.datasources.ResolveSQLOnFile$$anonfun$apply$1.applyOrElse(rules.scala:45) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$2(AnalysisHelper.scala:108) > [info] at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:73) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsDown$1(AnalysisHelper.scala:108) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:221) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown(AnalysisHelper.scala:106) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperatorsDown$(AnalysisHelper.scala:104) > [info] at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperatorsDown(LogicalPlan.scala:29) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators(AnalysisHelper.scala:73) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.resolveOperators$(AnalysisHelper.scala:72) > [info] at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:29) > [info] at > org.apache.spark.sql.execution.datasources.ResolveSQLOnFile.apply(rules.scala:45) > [info] at > org.apache.spark.sql.execution.datasources.ResolveSQLOnFile.apply(rules.scala:40) > [info] at > org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:216) > [info] at > scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126) > [info] at > scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122) > [info] at scala.collection.immutable.List.foldLeft(List.scala:89) > [info] at > org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:213) > [info] at > org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:205) > [info] at scala.collection.immutable.List.foreach(List.scala:392) > [info] at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:205) > [info] at > org.apache.spark.sql.catalyst.analysis.Analyzer.org$apache$spark$sql$catalyst$analysis$Analyzer$$executeSameContext(Analyzer.scala:195) > [info] at > org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:189) > {code} > It seems like a hidden directory is not being filtered out, when it actually > should. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org