spark git commit: [SQL] minor - internal doc improvement for InsertIntoTable.
Repository: spark Updated Branches: refs/heads/branch-2.1 bc7f05f5f -> 71104c9c9 [SQL] minor - internal doc improvement for InsertIntoTable. ## What changes were proposed in this pull request? I was reading this part of the code and was really confused by the "partition" parameter. This patch adds some documentation for it to reduce confusion in the future. I also looked around other logical plans but most of them are either already documented, or pretty self-evident to people that know Spark SQL. ## How was this patch tested? N/A - doc change only. Author: Reynold XinCloses #15749 from rxin/doc-improvement. (cherry picked from commit 0ea5d5b24c1f7b29efeac0e72d271aba279523f7) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/71104c9c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/71104c9c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/71104c9c Branch: refs/heads/branch-2.1 Commit: 71104c9c97a648c94e6619279ad49752c01c89c3 Parents: bc7f05f Author: Reynold Xin Authored: Thu Nov 3 02:45:54 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 02:46:01 2016 -0700 -- .../plans/logical/basicLogicalOperators.scala | 16 ++ .../hive/execution/InsertIntoHiveTable.scala| 31 2 files changed, 42 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/71104c9c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 7a15c22..65ceab2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -360,6 +360,22 @@ case class OverwriteOptions( } } +/** + * Insert some data into a table. + * + * @param table the logical plan representing the table. In the future this should be a + * [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables + * and data source tables. + * @param partition a map from the partition key to the partition value (optional). If the partition + * value is optional, dynamic partition insert will be performed. + * As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have + * Map('a' -> Some('1'), 'b' -> Some('2')), + * and `INSERT INTO tbl PARTITION (a=1, b) AS ...` + * would have Map('a' -> Some('1'), 'b' -> None). + * @param child the logical plan representing data to write to. + * @param overwrite overwrite existing table or partitions. + * @param ifNotExists If true, only write if the table or partition does not exist. + */ case class InsertIntoTable( table: LogicalPlan, partition: Map[String, Option[String]], http://git-wip-us.apache.org/repos/asf/spark/blob/71104c9c/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 05164d7..15be12c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -35,13 +35,35 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} -import org.apache.spark.sql.execution.command.{AlterTableAddPartitionCommand, AlterTableDropPartitionCommand} import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc} import org.apache.spark.SparkException import org.apache.spark.util.SerializableJobConf +/** + * Command for writing data out to a Hive table. + * + * This class is mostly a mess, for legacy reasons (since it evolved in organic ways and had to + * follow Hive's internal implementations closely, which itself was a mess too). Please don't + * blame Reynold for this! He was just moving code around! + * + * In the future we
spark git commit: [SQL] minor - internal doc improvement for InsertIntoTable.
Repository: spark Updated Branches: refs/heads/master 937af592e -> 0ea5d5b24 [SQL] minor - internal doc improvement for InsertIntoTable. ## What changes were proposed in this pull request? I was reading this part of the code and was really confused by the "partition" parameter. This patch adds some documentation for it to reduce confusion in the future. I also looked around other logical plans but most of them are either already documented, or pretty self-evident to people that know Spark SQL. ## How was this patch tested? N/A - doc change only. Author: Reynold XinCloses #15749 from rxin/doc-improvement. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ea5d5b2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ea5d5b2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ea5d5b2 Branch: refs/heads/master Commit: 0ea5d5b24c1f7b29efeac0e72d271aba279523f7 Parents: 937af59 Author: Reynold Xin Authored: Thu Nov 3 02:45:54 2016 -0700 Committer: Reynold Xin Committed: Thu Nov 3 02:45:54 2016 -0700 -- .../plans/logical/basicLogicalOperators.scala | 16 ++ .../hive/execution/InsertIntoHiveTable.scala| 31 2 files changed, 42 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0ea5d5b2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 7a15c22..65ceab2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -360,6 +360,22 @@ case class OverwriteOptions( } } +/** + * Insert some data into a table. + * + * @param table the logical plan representing the table. In the future this should be a + * [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables + * and data source tables. + * @param partition a map from the partition key to the partition value (optional). If the partition + * value is optional, dynamic partition insert will be performed. + * As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have + * Map('a' -> Some('1'), 'b' -> Some('2')), + * and `INSERT INTO tbl PARTITION (a=1, b) AS ...` + * would have Map('a' -> Some('1'), 'b' -> None). + * @param child the logical plan representing data to write to. + * @param overwrite overwrite existing table or partitions. + * @param ifNotExists If true, only write if the table or partition does not exist. + */ case class InsertIntoTable( table: LogicalPlan, partition: Map[String, Option[String]], http://git-wip-us.apache.org/repos/asf/spark/blob/0ea5d5b2/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 05164d7..15be12c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -35,13 +35,35 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} -import org.apache.spark.sql.execution.command.{AlterTableAddPartitionCommand, AlterTableDropPartitionCommand} import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc} import org.apache.spark.SparkException import org.apache.spark.util.SerializableJobConf +/** + * Command for writing data out to a Hive table. + * + * This class is mostly a mess, for legacy reasons (since it evolved in organic ways and had to + * follow Hive's internal implementations closely, which itself was a mess too). Please don't + * blame Reynold for this! He was just moving code around! + * + * In the future we should converge the write path for Hive with the normal data source write path, + * as defined in