spark git commit: [SPARK-25806][SQL] The instance of FileSplit is redundant

srowen Sun, 28 Oct 2018 15:39:25 -0700

Repository: spark
Updated Branches:
  refs/heads/master a7ab7f234 -> 4427a96bc



[SPARK-25806][SQL] The instance of FileSplit is redundant

## What changes were proposed in this pull request?

 The instance of `FileSplit` is redundant for   `ParquetFileFormat` and 
`hive\orc\OrcFileFormat` class.

## How was this patch tested?
Existing unit tests in `ParquetQuerySuite.scala` and `HiveOrcQuerySuite.scala`

Closes #22802 from 10110346/FileSplitnotneed.

Authored-by: liuxian <liu.xi...@zte.com.cn>
Signed-off-by: Sean Owen <sean.o...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4427a96b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4427a96b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4427a96b

Branch: refs/heads/master
Commit: 4427a96bcea625bc51fc5e0e999f170ad537a2fc
Parents: a7ab7f2
Author: liuxian <liu.xi...@zte.com.cn>
Authored: Sun Oct 28 17:39:16 2018 -0500
Committer: Sean Owen <sean.o...@databricks.com>
Committed: Sun Oct 28 17:39:16 2018 -0500

----------------------------------------------------------------------
 .../datasources/parquet/ParquetFileFormat.scala        | 13 +++++--------
 .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala  |  3 +--
 2 files changed, 6 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4427a96b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index ea4f159..f04502d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -352,17 +352,14 @@ class ParquetFileFormat
     (file: PartitionedFile) => {
       assert(file.partitionValues.numFields == partitionSchema.size)
 
-      val fileSplit =
-        new FileSplit(new Path(new URI(file.filePath)), file.start, 
file.length, Array.empty)
-      val filePath = fileSplit.getPath
-
+      val filePath = new Path(new URI(file.filePath))
       val split =
         new org.apache.parquet.hadoop.ParquetInputSplit(
           filePath,
-          fileSplit.getStart,
-          fileSplit.getStart + fileSplit.getLength,
-          fileSplit.getLength,
-          fileSplit.getLocations,
+          file.start,
+          file.start + file.length,
+          file.length,
+          Array.empty,
           null)
 
       val sharedConf = broadcastedHadoopConf.value.value

http://git-wip-us.apache.org/repos/asf/spark/blob/4427a96b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index de8085f..89e6ea8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -154,13 +154,12 @@ class OrcFileFormat extends FileFormat with 
DataSourceRegister with Serializable
           val job = Job.getInstance(conf)
           FileInputFormat.setInputPaths(job, file.filePath)
 
-          val fileSplit = new FileSplit(filePath, file.start, file.length, 
Array.empty)
           // Custom OrcRecordReader is used to get
           // ObjectInspector during recordReader creation itself and can
           // avoid NameNode call in unwrapOrcStructs per file.
           // Specifically would be helpful for partitioned datasets.
           val orcReader = OrcFile.createReader(filePath, 
OrcFile.readerOptions(conf))
-          new SparkOrcNewRecordReader(orcReader, conf, fileSplit.getStart, 
fileSplit.getLength)
+          new SparkOrcNewRecordReader(orcReader, conf, file.start, file.length)
         }
 
         val recordsIterator = new 
RecordReaderIterator[OrcStruct](orcRecordReader)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-25806][SQL] The instance of FileSplit is redundant

Reply via email to