spark git commit: [SPARK-15583][SQL] Disallow altering datasource properties
Repository: spark Updated Branches: refs/heads/branch-2.0 702755f92 -> 8e26b74fc [SPARK-15583][SQL] Disallow altering datasource properties ## What changes were proposed in this pull request? Certain table properties (and SerDe properties) are in the protected namespace `spark.sql.sources.`, which we use internally for datasource tables. The user should not be allowed to (1) Create a Hive table setting these properties (2) Alter these properties in an existing table Previously, we threw an exception if the user tried to alter the properties of an existing datasource table. However, this is overly restrictive for datasource tables and does not do anything for Hive tables. ## How was this patch tested? DDLSuite Author: Andrew OrCloses #13341 from andrewor14/alter-table-props. (cherry picked from commit 3fca635b4ed322208debcd89a539e42cdde6bbd4) Signed-off-by: Yin Huai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8e26b74f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8e26b74f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8e26b74f Branch: refs/heads/branch-2.0 Commit: 8e26b74fccc8e7b52db1011f6d6e295c6ba0c5aa Parents: 702755f Author: Andrew Or Authored: Thu May 26 20:11:09 2016 -0700 Committer: Yin Huai Committed: Thu May 26 20:11:19 2016 -0700 -- .../command/createDataSourceTables.scala| 17 +++ .../spark/sql/execution/command/ddl.scala | 37 +++-- .../spark/sql/execution/command/tables.scala| 2 + .../spark/sql/execution/command/DDLSuite.scala | 148 --- .../spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- 5 files changed, 139 insertions(+), 67 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8e26b74f/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala index 6ca66a2..deedb68 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala @@ -255,6 +255,23 @@ case class CreateDataSourceTableAsSelectCommand( object CreateDataSourceTableUtils extends Logging { + + // TODO: Actually replace usages with these variables (SPARK-15584) + + val DATASOURCE_PREFIX = "spark.sql.sources." + val DATASOURCE_PROVIDER = DATASOURCE_PREFIX + "provider" + val DATASOURCE_WRITEJOBUUID = DATASOURCE_PREFIX + "writeJobUUID" + val DATASOURCE_OUTPUTPATH = DATASOURCE_PREFIX + "output.path" + val DATASOURCE_SCHEMA_PREFIX = DATASOURCE_PREFIX + "schema." + val DATASOURCE_SCHEMA_NUMPARTS = DATASOURCE_SCHEMA_PREFIX + "numParts" + val DATASOURCE_SCHEMA_NUMPARTCOLS = DATASOURCE_SCHEMA_PREFIX + "numPartCols" + val DATASOURCE_SCHEMA_NUMBUCKETS = DATASOURCE_SCHEMA_PREFIX + "numBuckets" + val DATASOURCE_SCHEMA_NUMBUCKETCOLS = DATASOURCE_SCHEMA_PREFIX + "numBucketCols" + val DATASOURCE_SCHEMA_PART_PREFIX = DATASOURCE_SCHEMA_PREFIX + "part." + val DATASOURCE_SCHEMA_PARTCOL_PREFIX = DATASOURCE_SCHEMA_PREFIX + "partCol." + val DATASOURCE_SCHEMA_BUCKETCOL_PREFIX = DATASOURCE_SCHEMA_PREFIX + "bucketCol." + val DATASOURCE_SCHEMA_SORTCOL_PREFIX = DATASOURCE_SCHEMA_PREFIX + "sortCol." + /** * Checks if the given name conforms the Hive standard ("[a-zA-z_0-9]+"), * i.e. if this name only contains characters, numbers, and _. http://git-wip-us.apache.org/repos/asf/spark/blob/8e26b74f/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 7ce7bb9..15eba3b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable} import org.apache.spark.sql.catalyst.catalog.{CatalogTablePartition, CatalogTableType, SessionCatalog} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.execution.command.CreateDataSourceTableUtils.DATASOURCE_PREFIX import org.apache.spark.sql.execution.datasources.BucketSpec
spark git commit: [SPARK-15583][SQL] Disallow altering datasource properties
Repository: spark Updated Branches: refs/heads/master 6ab973ec5 -> 3fca635b4 [SPARK-15583][SQL] Disallow altering datasource properties ## What changes were proposed in this pull request? Certain table properties (and SerDe properties) are in the protected namespace `spark.sql.sources.`, which we use internally for datasource tables. The user should not be allowed to (1) Create a Hive table setting these properties (2) Alter these properties in an existing table Previously, we threw an exception if the user tried to alter the properties of an existing datasource table. However, this is overly restrictive for datasource tables and does not do anything for Hive tables. ## How was this patch tested? DDLSuite Author: Andrew OrCloses #13341 from andrewor14/alter-table-props. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fca635b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fca635b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fca635b Branch: refs/heads/master Commit: 3fca635b4ed322208debcd89a539e42cdde6bbd4 Parents: 6ab973e Author: Andrew Or Authored: Thu May 26 20:11:09 2016 -0700 Committer: Yin Huai Committed: Thu May 26 20:11:09 2016 -0700 -- .../command/createDataSourceTables.scala| 17 +++ .../spark/sql/execution/command/ddl.scala | 37 +++-- .../spark/sql/execution/command/tables.scala| 2 + .../spark/sql/execution/command/DDLSuite.scala | 148 --- .../spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- 5 files changed, 139 insertions(+), 67 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3fca635b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala index 6ca66a2..deedb68 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala @@ -255,6 +255,23 @@ case class CreateDataSourceTableAsSelectCommand( object CreateDataSourceTableUtils extends Logging { + + // TODO: Actually replace usages with these variables (SPARK-15584) + + val DATASOURCE_PREFIX = "spark.sql.sources." + val DATASOURCE_PROVIDER = DATASOURCE_PREFIX + "provider" + val DATASOURCE_WRITEJOBUUID = DATASOURCE_PREFIX + "writeJobUUID" + val DATASOURCE_OUTPUTPATH = DATASOURCE_PREFIX + "output.path" + val DATASOURCE_SCHEMA_PREFIX = DATASOURCE_PREFIX + "schema." + val DATASOURCE_SCHEMA_NUMPARTS = DATASOURCE_SCHEMA_PREFIX + "numParts" + val DATASOURCE_SCHEMA_NUMPARTCOLS = DATASOURCE_SCHEMA_PREFIX + "numPartCols" + val DATASOURCE_SCHEMA_NUMBUCKETS = DATASOURCE_SCHEMA_PREFIX + "numBuckets" + val DATASOURCE_SCHEMA_NUMBUCKETCOLS = DATASOURCE_SCHEMA_PREFIX + "numBucketCols" + val DATASOURCE_SCHEMA_PART_PREFIX = DATASOURCE_SCHEMA_PREFIX + "part." + val DATASOURCE_SCHEMA_PARTCOL_PREFIX = DATASOURCE_SCHEMA_PREFIX + "partCol." + val DATASOURCE_SCHEMA_BUCKETCOL_PREFIX = DATASOURCE_SCHEMA_PREFIX + "bucketCol." + val DATASOURCE_SCHEMA_SORTCOL_PREFIX = DATASOURCE_SCHEMA_PREFIX + "sortCol." + /** * Checks if the given name conforms the Hive standard ("[a-zA-z_0-9]+"), * i.e. if this name only contains characters, numbers, and _. http://git-wip-us.apache.org/repos/asf/spark/blob/3fca635b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 7ce7bb9..15eba3b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable} import org.apache.spark.sql.catalyst.catalog.{CatalogTablePartition, CatalogTableType, SessionCatalog} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.execution.command.CreateDataSourceTableUtils.DATASOURCE_PREFIX import org.apache.spark.sql.execution.datasources.BucketSpec import org.apache.spark.sql.types._ @@ -228,15 +229,13 @@ case class AlterTableSetPropertiesCommand( extends