Repository: spark Updated Branches: refs/heads/master a7ab7f234 -> 4427a96bc
[SPARK-25806][SQL] The instance of FileSplit is redundant ## What changes were proposed in this pull request? The instance of `FileSplit` is redundant for `ParquetFileFormat` and `hive\orc\OrcFileFormat` class. ## How was this patch tested? Existing unit tests in `ParquetQuerySuite.scala` and `HiveOrcQuerySuite.scala` Closes #22802 from 10110346/FileSplitnotneed. Authored-by: liuxian <liu.xi...@zte.com.cn> Signed-off-by: Sean Owen <sean.o...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4427a96b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4427a96b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4427a96b Branch: refs/heads/master Commit: 4427a96bcea625bc51fc5e0e999f170ad537a2fc Parents: a7ab7f2 Author: liuxian <liu.xi...@zte.com.cn> Authored: Sun Oct 28 17:39:16 2018 -0500 Committer: Sean Owen <sean.o...@databricks.com> Committed: Sun Oct 28 17:39:16 2018 -0500 ---------------------------------------------------------------------- .../datasources/parquet/ParquetFileFormat.scala | 13 +++++-------- .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala | 3 +-- 2 files changed, 6 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4427a96b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index ea4f159..f04502d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -352,17 +352,14 @@ class ParquetFileFormat (file: PartitionedFile) => { assert(file.partitionValues.numFields == partitionSchema.size) - val fileSplit = - new FileSplit(new Path(new URI(file.filePath)), file.start, file.length, Array.empty) - val filePath = fileSplit.getPath - + val filePath = new Path(new URI(file.filePath)) val split = new org.apache.parquet.hadoop.ParquetInputSplit( filePath, - fileSplit.getStart, - fileSplit.getStart + fileSplit.getLength, - fileSplit.getLength, - fileSplit.getLocations, + file.start, + file.start + file.length, + file.length, + Array.empty, null) val sharedConf = broadcastedHadoopConf.value.value http://git-wip-us.apache.org/repos/asf/spark/blob/4427a96b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index de8085f..89e6ea8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -154,13 +154,12 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable val job = Job.getInstance(conf) FileInputFormat.setInputPaths(job, file.filePath) - val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty) // Custom OrcRecordReader is used to get // ObjectInspector during recordReader creation itself and can // avoid NameNode call in unwrapOrcStructs per file. // Specifically would be helpful for partitioned datasets. val orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf)) - new SparkOrcNewRecordReader(orcReader, conf, fileSplit.getStart, fileSplit.getLength) + new SparkOrcNewRecordReader(orcReader, conf, file.start, file.length) } val recordsIterator = new RecordReaderIterator[OrcStruct](orcRecordReader) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org