[1/2] spark git commit: [SPARK-8610] [SQL] Separate Row and InternalRow (part 2)

davies Sun, 28 Jun 2015 08:05:09 -0700

Repository: spark
Updated Branches:
  refs/heads/master 52d128180 -> 77da5be6f



http://git-wip-us.apache.org/repos/asf/spark/blob/77da5be6/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
index 87f8e3f..41b645b 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveNativeCommand.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, 
InternalRow}
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.HiveContext
-import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.{Row, SQLContext}
 
 private[hive]
 case class HiveNativeCommand(sql: String) extends RunnableCommand {
@@ -29,6 +29,6 @@ case class HiveNativeCommand(sql: String) extends 
RunnableCommand {
   override def output: Seq[AttributeReference] =
     Seq(AttributeReference("result", StringType, nullable = false)())
 
-  override def run(sqlContext: SQLContext): Seq[InternalRow] =
-    sqlContext.asInstanceOf[HiveContext].runSqlHive(sql).map(InternalRow(_))
+  override def run(sqlContext: SQLContext): Seq[Row] =
+    sqlContext.asInstanceOf[HiveContext].runSqlHive(sql).map(Row(_))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/77da5be6/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index 1f5e4af..f4c8c9a 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -123,7 +123,7 @@ case class HiveTableScan(
 
         // Only partitioned values are needed here, since the predicate has 
already been bound to
         // partition key attribute references.
-        val row = new GenericRow(castedValues.toArray)
+        val row = InternalRow.fromSeq(castedValues)
         shouldKeep.eval(row).asInstanceOf[Boolean]
       }
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/77da5be6/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
index 9d8872a..6118880 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -129,11 +129,11 @@ case class ScriptTransformation(
             val prevLine = curLine
             curLine = reader.readLine()
             if (!ioschema.schemaLess) {
-              new GenericRow(CatalystTypeConverters.convertToCatalyst(
+              new GenericInternalRow(CatalystTypeConverters.convertToCatalyst(
                 
prevLine.split(ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD")))
                 .asInstanceOf[Array[Any]])
             } else {
-              new GenericRow(CatalystTypeConverters.convertToCatalyst(
+              new GenericInternalRow(CatalystTypeConverters.convertToCatalyst(
                 
prevLine.split(ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD"), 2))
                 .asInstanceOf[Array[Any]])
             }
@@ -167,7 +167,8 @@ case class ScriptTransformation(
 
               outputStream.write(data)
             } else {
-              val writable = 
inputSerde.serialize(row.asInstanceOf[GenericRow].values, inputSoi)
+              val writable = inputSerde.serialize(
+                row.asInstanceOf[GenericInternalRow].values, inputSoi)
               prepareWritable(writable).write(dataOutputStream)
             }
           }

http://git-wip-us.apache.org/repos/asf/spark/blob/77da5be6/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index aad58bf..71fa3e9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -17,15 +17,14 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.sources._
-import org.apache.spark.sql.{SaveMode, DataFrame, SQLContext}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, InternalRow}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.RunnableCommand
 import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -39,9 +38,9 @@ import org.apache.spark.util.Utils
 private[hive]
 case class AnalyzeTable(tableName: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     sqlContext.asInstanceOf[HiveContext].analyze(tableName)
-    Seq.empty[InternalRow]
+    Seq.empty[Row]
   }
 }
 
@@ -53,7 +52,7 @@ case class DropTable(
     tableName: String,
     ifExists: Boolean) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     val ifExistsClause = if (ifExists) "IF EXISTS " else ""
     try {
@@ -70,7 +69,7 @@ case class DropTable(
     hiveContext.invalidateTable(tableName)
     hiveContext.runSqlHive(s"DROP TABLE $ifExistsClause$tableName")
     hiveContext.catalog.unregisterTable(Seq(tableName))
-    Seq.empty[InternalRow]
+    Seq.empty[Row]
   }
 }
 
@@ -83,7 +82,7 @@ case class AddJar(path: String) extends RunnableCommand {
     schema.toAttributes
   }
 
-  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     val currentClassLoader = Utils.getContextOrSparkClassLoader
 
@@ -105,18 +104,18 @@ case class AddJar(path: String) extends RunnableCommand {
     // Add jar to executors
     hiveContext.sparkContext.addJar(path)
 
-    Seq(InternalRow(0))
+    Seq(Row(0))
   }
 }
 
 private[hive]
 case class AddFile(path: String) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     hiveContext.runSqlHive(s"ADD FILE $path")
     hiveContext.sparkContext.addFile(path)
-    Seq.empty[InternalRow]
+    Seq.empty[Row]
   }
 }
 
@@ -129,12 +128,12 @@ case class CreateMetastoreDataSource(
     allowExisting: Boolean,
     managedIfNoPath: Boolean) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
 
     if (hiveContext.catalog.tableExists(tableName :: Nil)) {
       if (allowExisting) {
-        return Seq.empty[InternalRow]
+        return Seq.empty[Row]
       } else {
         throw new AnalysisException(s"Table $tableName already exists.")
       }
@@ -157,7 +156,7 @@ case class CreateMetastoreDataSource(
       optionsWithPath,
       isExternal)
 
-    Seq.empty[InternalRow]
+    Seq.empty[Row]
   }
 }
 
@@ -170,7 +169,7 @@ case class CreateMetastoreDataSourceAsSelect(
     options: Map[String, String],
     query: LogicalPlan) extends RunnableCommand {
 
-  override def run(sqlContext: SQLContext): Seq[InternalRow] = {
+  override def run(sqlContext: SQLContext): Seq[Row] = {
     val hiveContext = sqlContext.asInstanceOf[HiveContext]
     var createMetastoreTable = false
     var isExternal = true
@@ -194,7 +193,7 @@ case class CreateMetastoreDataSourceAsSelect(
             s"Or, if you are using SQL CREATE TABLE, you need to drop 
$tableName first.")
         case SaveMode.Ignore =>
           // Since the table already exists and the save mode is Ignore, we 
will just return.
-          return Seq.empty[InternalRow]
+          return Seq.empty[Row]
         case SaveMode.Append =>
           // Check if the specified data source match the data source of the 
existing table.
           val resolved = ResolvedDataSource(
@@ -259,6 +258,6 @@ case class CreateMetastoreDataSourceAsSelect(
 
     // Refresh the cache of the table in the catalog.
     hiveContext.refreshTable(tableName)
-    Seq.empty[InternalRow]
+    Seq.empty[Row]
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/77da5be6/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
index 0fd7b3a..300f83d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
@@ -190,7 +190,7 @@ private[sql] class OrcRelation(
       filters: Array[Filter],
       inputPaths: Array[FileStatus]): RDD[Row] = {
     val output = StructType(requiredColumns.map(dataSchema(_))).toAttributes
-    OrcTableScan(output, this, filters, inputPaths).execute()
+    OrcTableScan(output, this, filters, 
inputPaths).execute().map(_.asInstanceOf[Row])
   }
 
   override def prepareJobForWrite(job: Job): OutputWriterFactory = {
@@ -234,13 +234,13 @@ private[orc] case class OrcTableScan(
     HiveShim.appendReadColumns(conf, sortedIds, sortedNames)
   }
 
-  // Transform all given raw `Writable`s into `Row`s.
+  // Transform all given raw `Writable`s into `InternalRow`s.
   private def fillObject(
       path: String,
       conf: Configuration,
       iterator: Iterator[Writable],
       nonPartitionKeyAttrs: Seq[(Attribute, Int)],
-      mutableRow: MutableRow): Iterator[Row] = {
+      mutableRow: MutableRow): Iterator[InternalRow] = {
     val deserializer = new OrcSerde
     val soi = OrcFileOperator.getObjectInspector(path, Some(conf))
     val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map {
@@ -261,11 +261,11 @@ private[orc] case class OrcTableScan(
         }
         i += 1
       }
-      mutableRow: Row
+      mutableRow: InternalRow
     }
   }
 
-  def execute(): RDD[Row] = {
+  def execute(): RDD[InternalRow] = {
     val job = new Job(sqlContext.sparkContext.hadoopConfiguration)
     val conf = job.getConfiguration
 

http://git-wip-us.apache.org/repos/asf/spark/blob/77da5be6/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
index aff0456..a93acb9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala
@@ -202,9 +202,9 @@ class HiveInspectorSuite extends SparkFunSuite with 
HiveInspectors {
     val dt = StructType(dataTypes.zipWithIndex.map {
       case (t, idx) => StructField(s"c_$idx", t)
     })
-
+    val inspector = toInspector(dt)
     checkValues(row,
-      unwrap(wrap(Row.fromSeq(row), toInspector(dt)), 
toInspector(dt)).asInstanceOf[InternalRow])
+      unwrap(wrap(InternalRow.fromSeq(row), inspector), 
inspector).asInstanceOf[InternalRow])
     checkValue(null, unwrap(wrap(null, toInspector(dt)), toInspector(dt)))
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: [SPARK-8610] [SQL] Separate Row and InternalRow (part 2)

Reply via email to