Github user rdblue commented on a diff in the pull request: https://github.com/apache/spark/pull/20726#discussion_r172317377 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExec.scala --- @@ -46,34 +48,46 @@ case class DataSourceV2ScanExec( new DataSourcePartitioning( s.outputPartitioning(), AttributeMap(output.map(a => a -> a.name))) + case _ if readerFactories.size == 1 => SinglePartition + case _ => super.outputPartitioning } - private lazy val readerFactories: java.util.List[DataReaderFactory[UnsafeRow]] = reader match { - case r: SupportsScanUnsafeRow => r.createUnsafeRowReaderFactories() + private lazy val readerFactories: Seq[DataReaderFactory[_]] = reader match { --- End diff -- Why not separate the cases for columnar batch and unsafe rows? That would avoid needing to cast this later to `Seq[DataReaderFactory[UnsafeRow]]` and `Seq[DataReaderFactory[ColumnarBatch]]`, which isn't a very clean solution.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org