spark git commit: [SPARK-17571][SQL] AssertOnQuery.condition should always return Boolean value
Repository: spark Updated Branches: refs/heads/branch-2.0 151f808a1 -> 27ce39cf2 [SPARK-17571][SQL] AssertOnQuery.condition should always return Boolean value ## What changes were proposed in this pull request? AssertOnQuery has two apply constructor: one that accepts a closure that returns boolean, and another that accepts a closure that returns Unit. This is actually very confusing because developers could mistakenly think that AssertOnQuery always require a boolean return type and verifies the return result, when indeed the value of the last statement is ignored in one of the constructors. This pull request makes the two constructor consistent and always require boolean value. It will overall make the test suites more robust against developer errors. As an evidence for the confusing behavior, this change also identified a bug with an existing test case due to file system time granularity. This pull request fixes that test case as well. ## How was this patch tested? This is a test only change. Author: petermaxleeCloses #15127 from petermaxlee/SPARK-17571. (cherry picked from commit 8f0c35a4d0dd458719627be5f524792bf244d70a) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27ce39cf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27ce39cf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27ce39cf Branch: refs/heads/branch-2.0 Commit: 27ce39cf207eba46502ed11fcbfd51bed3e68f2b Parents: 151f808 Author: petermaxlee Authored: Sun Sep 18 15:22:01 2016 -0700 Committer: Reynold Xin Committed: Sun Sep 18 15:22:08 2016 -0700 -- .../apache/spark/sql/streaming/FileStreamSourceSuite.scala| 7 +-- .../scala/org/apache/spark/sql/streaming/StreamTest.scala | 4 ++-- .../spark/sql/streaming/StreamingQueryListenerSuite.scala | 3 +++ 3 files changed, 10 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/27ce39cf/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index 886f7be..a02a36c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -354,7 +354,9 @@ class FileStreamSourceSuite extends FileStreamSourceTest { CheckAnswer("a", "b"), // SLeeps longer than 5ms (maxFileAge) -AssertOnQuery { _ => Thread.sleep(10); true }, +// Unfortunately since a lot of file system does not have modification time granularity +// finer grained than 1 sec, we need to use 1 sec here. +AssertOnQuery { _ => Thread.sleep(1000); true }, AddTextFileData("c\nd", src, tmp), CheckAnswer("a", "b", "c", "d"), @@ -363,7 +365,8 @@ class FileStreamSourceSuite extends FileStreamSourceTest { val source = streamExecution.logicalPlan.collect { case e: StreamingExecutionRelation => e.source.asInstanceOf[FileStreamSource] }.head - source.seenFiles.size == 1 + assert(source.seenFiles.size == 1) + true } ) } http://git-wip-us.apache.org/repos/asf/spark/blob/27ce39cf/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index af2b581..6c5b170 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -188,8 +188,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts { new AssertOnQuery(condition, message) } -def apply(message: String)(condition: StreamExecution => Unit): AssertOnQuery = { - new AssertOnQuery(s => { condition(s); true }, message) +def apply(message: String)(condition: StreamExecution => Boolean): AssertOnQuery = { + new AssertOnQuery(condition, message) } } http://git-wip-us.apache.org/repos/asf/spark/blob/27ce39cf/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala -- diff --git
spark git commit: [SPARK-17571][SQL] AssertOnQuery.condition should always return Boolean value
Repository: spark Updated Branches: refs/heads/master 1dbb725db -> 8f0c35a4d [SPARK-17571][SQL] AssertOnQuery.condition should always return Boolean value ## What changes were proposed in this pull request? AssertOnQuery has two apply constructor: one that accepts a closure that returns boolean, and another that accepts a closure that returns Unit. This is actually very confusing because developers could mistakenly think that AssertOnQuery always require a boolean return type and verifies the return result, when indeed the value of the last statement is ignored in one of the constructors. This pull request makes the two constructor consistent and always require boolean value. It will overall make the test suites more robust against developer errors. As an evidence for the confusing behavior, this change also identified a bug with an existing test case due to file system time granularity. This pull request fixes that test case as well. ## How was this patch tested? This is a test only change. Author: petermaxleeCloses #15127 from petermaxlee/SPARK-17571. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f0c35a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f0c35a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f0c35a4 Branch: refs/heads/master Commit: 8f0c35a4d0dd458719627be5f524792bf244d70a Parents: 1dbb725 Author: petermaxlee Authored: Sun Sep 18 15:22:01 2016 -0700 Committer: Reynold Xin Committed: Sun Sep 18 15:22:01 2016 -0700 -- .../apache/spark/sql/streaming/FileStreamSourceSuite.scala| 7 +-- .../scala/org/apache/spark/sql/streaming/StreamTest.scala | 4 ++-- .../spark/sql/streaming/StreamingQueryListenerSuite.scala | 3 +++ 3 files changed, 10 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8f0c35a4/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index 886f7be..a02a36c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -354,7 +354,9 @@ class FileStreamSourceSuite extends FileStreamSourceTest { CheckAnswer("a", "b"), // SLeeps longer than 5ms (maxFileAge) -AssertOnQuery { _ => Thread.sleep(10); true }, +// Unfortunately since a lot of file system does not have modification time granularity +// finer grained than 1 sec, we need to use 1 sec here. +AssertOnQuery { _ => Thread.sleep(1000); true }, AddTextFileData("c\nd", src, tmp), CheckAnswer("a", "b", "c", "d"), @@ -363,7 +365,8 @@ class FileStreamSourceSuite extends FileStreamSourceTest { val source = streamExecution.logicalPlan.collect { case e: StreamingExecutionRelation => e.source.asInstanceOf[FileStreamSource] }.head - source.seenFiles.size == 1 + assert(source.seenFiles.size == 1) + true } ) } http://git-wip-us.apache.org/repos/asf/spark/blob/8f0c35a4/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala index af2b581..6c5b170 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala @@ -188,8 +188,8 @@ trait StreamTest extends QueryTest with SharedSQLContext with Timeouts { new AssertOnQuery(condition, message) } -def apply(message: String)(condition: StreamExecution => Unit): AssertOnQuery = { - new AssertOnQuery(s => { condition(s); true }, message) +def apply(message: String)(condition: StreamExecution => Boolean): AssertOnQuery = { + new AssertOnQuery(condition, message) } } http://git-wip-us.apache.org/repos/asf/spark/blob/8f0c35a4/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala index
spark git commit: [SPARK-16462][SPARK-16460][SPARK-15144][SQL] Make CSV cast null values properly
Repository: spark Updated Branches: refs/heads/branch-2.0 6c67d86f2 -> 151f808a1 [SPARK-16462][SPARK-16460][SPARK-15144][SQL] Make CSV cast null values properly ## Problem CSV in Spark 2.0.0: - does not read null values back correctly for certain data types such as `Boolean`, `TimestampType`, `DateType` -- this is a regression comparing to 1.6; - does not read empty values (specified by `options.nullValue`) as `null`s for `StringType` -- this is compatible with 1.6 but leads to problems like SPARK-16903. ## What changes were proposed in this pull request? This patch makes changes to read all empty values back as `null`s. ## How was this patch tested? New test cases. Author: Liwei LinCloses #14118 from lw-lin/csv-cast-null. (cherry picked from commit 1dbb725dbef30bf7633584ce8efdb573f2d92bca) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/151f808a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/151f808a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/151f808a Branch: refs/heads/branch-2.0 Commit: 151f808a181333daa6300c7d5d7c49c3cec3307c Parents: 6c67d86 Author: Liwei Lin Authored: Sun Sep 18 19:25:58 2016 +0100 Committer: Sean Owen Committed: Sun Sep 18 19:26:08 2016 +0100 -- python/pyspark/sql/readwriter.py| 3 +- python/pyspark/sql/streaming.py | 3 +- .../org/apache/spark/sql/DataFrameReader.scala | 3 +- .../datasources/csv/CSVInferSchema.scala| 108 +-- .../spark/sql/streaming/DataStreamReader.scala | 3 +- .../execution/datasources/csv/CSVSuite.scala| 2 +- .../datasources/csv/CSVTypeCastSuite.scala | 54 ++ 7 files changed, 93 insertions(+), 83 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/151f808a/python/pyspark/sql/readwriter.py -- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 3da6f49..dc13a81 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -327,7 +327,8 @@ class DataFrameReader(OptionUtils): being read should be skipped. If None is set, it uses the default value, ``false``. :param nullValue: sets the string representation of a null value. If None is set, it uses - the default value, empty string. + the default value, empty string. Since 2.0.1, this ``nullValue`` param + applies to all supported types including the string type. :param nanValue: sets the string representation of a non-number value. If None is set, it uses the default value, ``NaN``. :param positiveInf: sets the string representation of a positive infinity value. If None http://git-wip-us.apache.org/repos/asf/spark/blob/151f808a/python/pyspark/sql/streaming.py -- diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 9487f9d..38c19e2 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -495,7 +495,8 @@ class DataStreamReader(OptionUtils): being read should be skipped. If None is set, it uses the default value, ``false``. :param nullValue: sets the string representation of a null value. If None is set, it uses - the default value, empty string. + the default value, empty string. Since 2.0.1, this ``nullValue`` param + applies to all supported types including the string type. :param nanValue: sets the string representation of a non-number value. If None is set, it uses the default value, ``NaN``. :param positiveInf: sets the string representation of a positive infinity value. If None http://git-wip-us.apache.org/repos/asf/spark/blob/151f808a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 410cb20..fe3da25 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -377,7 +377,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession)
spark git commit: [SPARK-16462][SPARK-16460][SPARK-15144][SQL] Make CSV cast null values properly
Repository: spark Updated Branches: refs/heads/master 7151011b3 -> 1dbb725db [SPARK-16462][SPARK-16460][SPARK-15144][SQL] Make CSV cast null values properly ## Problem CSV in Spark 2.0.0: - does not read null values back correctly for certain data types such as `Boolean`, `TimestampType`, `DateType` -- this is a regression comparing to 1.6; - does not read empty values (specified by `options.nullValue`) as `null`s for `StringType` -- this is compatible with 1.6 but leads to problems like SPARK-16903. ## What changes were proposed in this pull request? This patch makes changes to read all empty values back as `null`s. ## How was this patch tested? New test cases. Author: Liwei LinCloses #14118 from lw-lin/csv-cast-null. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1dbb725d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1dbb725d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1dbb725d Branch: refs/heads/master Commit: 1dbb725dbef30bf7633584ce8efdb573f2d92bca Parents: 7151011 Author: Liwei Lin Authored: Sun Sep 18 19:25:58 2016 +0100 Committer: Sean Owen Committed: Sun Sep 18 19:25:58 2016 +0100 -- python/pyspark/sql/readwriter.py| 3 +- python/pyspark/sql/streaming.py | 3 +- .../org/apache/spark/sql/DataFrameReader.scala | 3 +- .../datasources/csv/CSVInferSchema.scala| 108 +-- .../spark/sql/streaming/DataStreamReader.scala | 3 +- .../execution/datasources/csv/CSVSuite.scala| 2 +- .../datasources/csv/CSVTypeCastSuite.scala | 54 ++ 7 files changed, 93 insertions(+), 83 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1dbb725d/python/pyspark/sql/readwriter.py -- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 3d79e0c..a6860ef 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -329,7 +329,8 @@ class DataFrameReader(OptionUtils): being read should be skipped. If None is set, it uses the default value, ``false``. :param nullValue: sets the string representation of a null value. If None is set, it uses - the default value, empty string. + the default value, empty string. Since 2.0.1, this ``nullValue`` param + applies to all supported types including the string type. :param nanValue: sets the string representation of a non-number value. If None is set, it uses the default value, ``NaN``. :param positiveInf: sets the string representation of a positive infinity value. If None http://git-wip-us.apache.org/repos/asf/spark/blob/1dbb725d/python/pyspark/sql/streaming.py -- diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 67375f6..0136451 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -497,7 +497,8 @@ class DataStreamReader(OptionUtils): being read should be skipped. If None is set, it uses the default value, ``false``. :param nullValue: sets the string representation of a null value. If None is set, it uses - the default value, empty string. + the default value, empty string. Since 2.0.1, this ``nullValue`` param + applies to all supported types including the string type. :param nanValue: sets the string representation of a non-number value. If None is set, it uses the default value, ``NaN``. :param positiveInf: sets the string representation of a positive infinity value. If None http://git-wip-us.apache.org/repos/asf/spark/blob/1dbb725d/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index d29d90c..30f39c7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -376,7 +376,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * from values being read should be skipped. * `ignoreTrailingWhiteSpace` (default `false`): defines
spark git commit: [SPARK-17586][BUILD] Do not call static member via instance reference
Repository: spark Updated Branches: refs/heads/branch-2.0 5619f095b -> 6c67d86f2 [SPARK-17586][BUILD] Do not call static member via instance reference ## What changes were proposed in this pull request? This PR fixes a warning message as below: ``` [WARNING] .../UnsafeInMemorySorter.java:284: warning: [static] static method should be qualified by type name, TaskMemoryManager, instead of by an expression [WARNING] currentPageNumber = memoryManager.decodePageNumber(recordPointer) ``` by referencing the static member via class not instance reference. ## How was this patch tested? Existing tests should cover this - Jenkins tests. Author: hyukjinkwonCloses #15141 from HyukjinKwon/SPARK-17586. (cherry picked from commit 7151011b38a841d9d4bc2e453b9a7cfe42f74f8f) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c67d86f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c67d86f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c67d86f Branch: refs/heads/branch-2.0 Commit: 6c67d86f2f0a24764146827ec5c42969194cb11d Parents: 5619f09 Author: hyukjinkwon Authored: Sun Sep 18 19:18:49 2016 +0100 Committer: Sean Owen Committed: Sun Sep 18 19:18:59 2016 +0100 -- .../spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6c67d86f/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java -- diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index 9710529..b517371 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -258,7 +258,7 @@ public final class UnsafeInMemorySorter { public void loadNext() { // This pointer points to a 4-byte record length, followed by the record's bytes final long recordPointer = array.get(offset + position); - currentPageNumber = memoryManager.decodePageNumber(recordPointer); + currentPageNumber = TaskMemoryManager.decodePageNumber(recordPointer); baseObject = memoryManager.getPage(recordPointer); baseOffset = memoryManager.getOffsetInPage(recordPointer) + 4; // Skip over record length recordLength = Platform.getInt(baseObject, baseOffset - 4); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17586][BUILD] Do not call static member via instance reference
Repository: spark Updated Branches: refs/heads/master 342c0e65b -> 7151011b3 [SPARK-17586][BUILD] Do not call static member via instance reference ## What changes were proposed in this pull request? This PR fixes a warning message as below: ``` [WARNING] .../UnsafeInMemorySorter.java:284: warning: [static] static method should be qualified by type name, TaskMemoryManager, instead of by an expression [WARNING] currentPageNumber = memoryManager.decodePageNumber(recordPointer) ``` by referencing the static member via class not instance reference. ## How was this patch tested? Existing tests should cover this - Jenkins tests. Author: hyukjinkwonCloses #15141 from HyukjinKwon/SPARK-17586. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7151011b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7151011b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7151011b Branch: refs/heads/master Commit: 7151011b38a841d9d4bc2e453b9a7cfe42f74f8f Parents: 342c0e6 Author: hyukjinkwon Authored: Sun Sep 18 19:18:49 2016 +0100 Committer: Sean Owen Committed: Sun Sep 18 19:18:49 2016 +0100 -- .../spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7151011b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java -- diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java index be38295..3b1ece4 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java @@ -281,7 +281,7 @@ public final class UnsafeInMemorySorter { public void loadNext() { // This pointer points to a 4-byte record length, followed by the record's bytes final long recordPointer = array.get(offset + position); - currentPageNumber = memoryManager.decodePageNumber(recordPointer); + currentPageNumber = TaskMemoryManager.decodePageNumber(recordPointer); baseObject = memoryManager.getPage(recordPointer); baseOffset = memoryManager.getOffsetInPage(recordPointer) + 4; // Skip over record length recordLength = Platform.getInt(baseObject, baseOffset - 4); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17546][DEPLOY] start-* scripts should use hostname -f
Repository: spark Updated Branches: refs/heads/branch-2.0 cf728b0f2 -> 5619f095b [SPARK-17546][DEPLOY] start-* scripts should use hostname -f ## What changes were proposed in this pull request? Call `hostname -f` to get fully qualified host name ## How was this patch tested? Jenkins tests of course, but also verified output of command on OS X and Linux Author: Sean OwenCloses #15129 from srowen/SPARK-17546. (cherry picked from commit 342c0e65bec4b9a715017089ab6ea127f3c46540) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5619f095 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5619f095 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5619f095 Branch: refs/heads/branch-2.0 Commit: 5619f095bfac76009758b4f4a4f8c9e319eeb5b1 Parents: cf728b0 Author: Sean Owen Authored: Sun Sep 18 16:22:31 2016 +0100 Committer: Sean Owen Committed: Sun Sep 18 16:22:40 2016 +0100 -- sbin/start-master.sh | 2 +- sbin/start-mesos-dispatcher.sh | 2 +- sbin/start-slaves.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5619f095/sbin/start-master.sh -- diff --git a/sbin/start-master.sh b/sbin/start-master.sh index 981cb15..d970fcc 100755 --- a/sbin/start-master.sh +++ b/sbin/start-master.sh @@ -48,7 +48,7 @@ if [ "$SPARK_MASTER_PORT" = "" ]; then fi if [ "$SPARK_MASTER_HOST" = "" ]; then - SPARK_MASTER_HOST=`hostname` + SPARK_MASTER_HOST=`hostname -f` fi if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then http://git-wip-us.apache.org/repos/asf/spark/blob/5619f095/sbin/start-mesos-dispatcher.sh -- diff --git a/sbin/start-mesos-dispatcher.sh b/sbin/start-mesos-dispatcher.sh index 06a966d..ef65fb9 100755 --- a/sbin/start-mesos-dispatcher.sh +++ b/sbin/start-mesos-dispatcher.sh @@ -34,7 +34,7 @@ if [ "$SPARK_MESOS_DISPATCHER_PORT" = "" ]; then fi if [ "$SPARK_MESOS_DISPATCHER_HOST" = "" ]; then - SPARK_MESOS_DISPATCHER_HOST=`hostname` + SPARK_MESOS_DISPATCHER_HOST=`hostname -f` fi if [ "$SPARK_MESOS_DISPATCHER_NUM" = "" ]; then http://git-wip-us.apache.org/repos/asf/spark/blob/5619f095/sbin/start-slaves.sh -- diff --git a/sbin/start-slaves.sh b/sbin/start-slaves.sh index 0fa1605..7d88712 100755 --- a/sbin/start-slaves.sh +++ b/sbin/start-slaves.sh @@ -32,7 +32,7 @@ if [ "$SPARK_MASTER_PORT" = "" ]; then fi if [ "$SPARK_MASTER_HOST" = "" ]; then - SPARK_MASTER_HOST="`hostname`" + SPARK_MASTER_HOST="`hostname -f`" fi # Launch the slaves - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17546][DEPLOY] start-* scripts should use hostname -f
Repository: spark Updated Branches: refs/heads/master 5d3f4615f -> 342c0e65b [SPARK-17546][DEPLOY] start-* scripts should use hostname -f ## What changes were proposed in this pull request? Call `hostname -f` to get fully qualified host name ## How was this patch tested? Jenkins tests of course, but also verified output of command on OS X and Linux Author: Sean OwenCloses #15129 from srowen/SPARK-17546. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/342c0e65 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/342c0e65 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/342c0e65 Branch: refs/heads/master Commit: 342c0e65bec4b9a715017089ab6ea127f3c46540 Parents: 5d3f461 Author: Sean Owen Authored: Sun Sep 18 16:22:31 2016 +0100 Committer: Sean Owen Committed: Sun Sep 18 16:22:31 2016 +0100 -- sbin/start-master.sh | 2 +- sbin/start-mesos-dispatcher.sh | 2 +- sbin/start-slaves.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/342c0e65/sbin/start-master.sh -- diff --git a/sbin/start-master.sh b/sbin/start-master.sh index 981cb15..d970fcc 100755 --- a/sbin/start-master.sh +++ b/sbin/start-master.sh @@ -48,7 +48,7 @@ if [ "$SPARK_MASTER_PORT" = "" ]; then fi if [ "$SPARK_MASTER_HOST" = "" ]; then - SPARK_MASTER_HOST=`hostname` + SPARK_MASTER_HOST=`hostname -f` fi if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then http://git-wip-us.apache.org/repos/asf/spark/blob/342c0e65/sbin/start-mesos-dispatcher.sh -- diff --git a/sbin/start-mesos-dispatcher.sh b/sbin/start-mesos-dispatcher.sh index 06a966d..ef65fb9 100755 --- a/sbin/start-mesos-dispatcher.sh +++ b/sbin/start-mesos-dispatcher.sh @@ -34,7 +34,7 @@ if [ "$SPARK_MESOS_DISPATCHER_PORT" = "" ]; then fi if [ "$SPARK_MESOS_DISPATCHER_HOST" = "" ]; then - SPARK_MESOS_DISPATCHER_HOST=`hostname` + SPARK_MESOS_DISPATCHER_HOST=`hostname -f` fi if [ "$SPARK_MESOS_DISPATCHER_NUM" = "" ]; then http://git-wip-us.apache.org/repos/asf/spark/blob/342c0e65/sbin/start-slaves.sh -- diff --git a/sbin/start-slaves.sh b/sbin/start-slaves.sh index 0fa1605..7d88712 100755 --- a/sbin/start-slaves.sh +++ b/sbin/start-slaves.sh @@ -32,7 +32,7 @@ if [ "$SPARK_MASTER_PORT" = "" ]; then fi if [ "$SPARK_MASTER_HOST" = "" ]; then - SPARK_MASTER_HOST="`hostname`" + SPARK_MASTER_HOST="`hostname -f`" fi # Launch the slaves - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17506][SQL] Improve the check double values equality rule.
Repository: spark Updated Branches: refs/heads/master 3fe630d31 -> 5d3f4615f [SPARK-17506][SQL] Improve the check double values equality rule. ## What changes were proposed in this pull request? In `ExpressionEvalHelper`, we check the equality between two double values by comparing whether the expected value is within the range [target - tolerance, target + tolerance], but this can cause a negative false when the compared numerics are very large. Beforeï¼ ``` val1 = 1.6358558070241E306 val2 = 1.6358558070240974E306 ExpressionEvalHelper.compareResults(val1, val2) false ``` In fact, `val1` and `val2` are but with different precisions, we should tolerant this case by comparing with percentage range, eg.,expected is within range [target - target * tolerance_percentage, target + target * tolerance_percentage]. After: ``` val1 = 1.6358558070241E306 val2 = 1.6358558070240974E306 ExpressionEvalHelper.compareResults(val1, val2) true ``` ## How was this patch tested? Exsiting testcases. Author: jiangxingboCloses #15059 from jiangxb1987/deq. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5d3f4615 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5d3f4615 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5d3f4615 Branch: refs/heads/master Commit: 5d3f4615f8d0a19b97cde5ae603f74aef2cc2fd2 Parents: 3fe630d Author: jiangxingbo Authored: Sun Sep 18 16:04:37 2016 +0100 Committer: Sean Owen Committed: Sun Sep 18 16:04:37 2016 +0100 -- .../expressions/ArithmeticExpressionSuite.scala | 8 ++ .../expressions/ExpressionEvalHelper.scala | 29 ++-- 2 files changed, 30 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5d3f4615/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 6873875..5c98242 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -170,11 +170,9 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(Remainder(positiveLongLit, positiveLongLit), 0L) checkEvaluation(Remainder(negativeLongLit, negativeLongLit), 0L) -// TODO: the following lines would fail the test due to inconsistency result of interpret -// and codegen for remainder between giant values, seems like a numeric stability issue -// DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => -// checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe) -// } +DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe => + checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe) +} } test("Abs") { http://git-wip-us.apache.org/repos/asf/spark/blob/5d3f4615/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala index 668543a..f0c149c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.scalacheck.Gen import org.scalactic.TripleEqualsSupport.Spread +import org.scalatest.exceptions.TestFailedException import org.scalatest.prop.GeneratorDrivenPropertyChecks import org.apache.spark.SparkFunSuite @@ -289,13 +290,37 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks { (result, expected) match { case (result: Array[Byte], expected: Array[Byte]) => java.util.Arrays.equals(result, expected) - case (result: Double, expected: Spread[Double @unchecked]) => -expected.asInstanceOf[Spread[Double]].isWithin(result) case (result: Double, expected: Double) if result.isNaN && expected.isNaN => true + case (result: Double, expected: Double) => +relativeErrorComparison(result, expected) case (result: Float, expected: Float) if
spark git commit: [SPARK-17541][SQL] fix some DDL bugs about table management when same-name temp view exists
Repository: spark Updated Branches: refs/heads/branch-2.0 5fd354b2d -> cf728b0f2 [SPARK-17541][SQL] fix some DDL bugs about table management when same-name temp view exists In `SessionCatalog`, we have several operations(`tableExists`, `dropTable`, `loopupRelation`, etc) that handle both temp views and metastore tables/views. This brings some bugs to DDL commands that want to handle temp view only or metastore table/view only. These bugs are: 1. `CREATE TABLE USING` will fail if a same-name temp view exists 2. `Catalog.dropTempView`will un-cache and drop metastore table if a same-name table exists 3. `saveAsTable` will fail or have unexpected behaviour if a same-name temp view exists. These bug fixes are pulled out from https://github.com/apache/spark/pull/14962 and targets both master and 2.0 branch new regression tests Author: Wenchen FanCloses #15099 from cloud-fan/fix-view. (cherry picked from commit 3fe630d314cf50d69868b7707ac8d8d2027080b8) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf728b0f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf728b0f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf728b0f Branch: refs/heads/branch-2.0 Commit: cf728b0f2dc7c1e9f62a8984122d3bf91e6ba439 Parents: 5fd354b Author: Wenchen Fan Authored: Sun Sep 18 21:15:35 2016 +0800 Committer: Wenchen Fan Committed: Sun Sep 18 21:50:05 2016 +0800 -- .../sql/catalyst/catalog/SessionCatalog.scala | 32 ++--- .../catalyst/catalog/SessionCatalogSuite.scala | 24 +++ .../org/apache/spark/sql/DataFrameWriter.scala | 9 ++- .../command/createDataSourceTables.scala| 30 .../apache/spark/sql/internal/CatalogImpl.scala | 6 +- .../org/apache/spark/sql/SQLQuerySuite.scala| 11 +++ .../spark/sql/internal/CatalogSuite.scala | 11 +++ .../sql/test/DataFrameReaderWriterSuite.scala | 76 .../sql/hive/MetastoreDataSourcesSuite.scala| 13 ++-- .../sql/sources/HadoopFsRelationTest.scala | 10 ++- 10 files changed, 172 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cf728b0f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 2448513..ecb4dab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -332,9 +332,9 @@ class SessionCatalog( new Path(new Path(dbLocation), formatTableName(tableIdent.table)).toString } - // - - // | Methods that interact with temporary and metastore tables | - // - + // -- + // | Methods that interact with temp views only | + // -- /** * Create a temporary table. @@ -351,6 +351,24 @@ class SessionCatalog( } /** + * Return a temporary view exactly as it was stored. + */ + def getTempView(name: String): Option[LogicalPlan] = synchronized { +tempTables.get(formatTableName(name)) + } + + /** + * Drop a temporary view. + */ + def dropTempView(name: String): Unit = synchronized { +tempTables.remove(formatTableName(name)) + } + + // - + // | Methods that interact with temporary and metastore tables | + // - + + /** * Rename a table. * * If a database is specified in `oldName`, this will rename the table in that database. @@ -506,14 +524,6 @@ class SessionCatalog( tempTables.clear() } - /** - * Return a temporary table exactly as it was stored. - * For testing only. - */ - private[catalog] def getTempTable(name: String): Option[LogicalPlan] = synchronized { -tempTables.get(formatTableName(name)) - } - // // Partitions // http://git-wip-us.apache.org/repos/asf/spark/blob/cf728b0f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
spark git commit: [SPARK-17541][SQL] fix some DDL bugs about table management when same-name temp view exists
Repository: spark Updated Branches: refs/heads/master 3a3c9ffbd -> 3fe630d31 [SPARK-17541][SQL] fix some DDL bugs about table management when same-name temp view exists ## What changes were proposed in this pull request? In `SessionCatalog`, we have several operations(`tableExists`, `dropTable`, `loopupRelation`, etc) that handle both temp views and metastore tables/views. This brings some bugs to DDL commands that want to handle temp view only or metastore table/view only. These bugs are: 1. `CREATE TABLE USING` will fail if a same-name temp view exists 2. `Catalog.dropTempView`will un-cache and drop metastore table if a same-name table exists 3. `saveAsTable` will fail or have unexpected behaviour if a same-name temp view exists. These bug fixes are pulled out from https://github.com/apache/spark/pull/14962 and targets both master and 2.0 branch ## How was this patch tested? new regression tests Author: Wenchen FanCloses #15099 from cloud-fan/fix-view. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fe630d3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fe630d3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fe630d3 Branch: refs/heads/master Commit: 3fe630d314cf50d69868b7707ac8d8d2027080b8 Parents: 3a3c9ff Author: Wenchen Fan Authored: Sun Sep 18 21:15:35 2016 +0800 Committer: Wenchen Fan Committed: Sun Sep 18 21:15:35 2016 +0800 -- .../sql/catalyst/catalog/SessionCatalog.scala | 32 ++--- .../catalyst/catalog/SessionCatalogSuite.scala | 24 +++ .../org/apache/spark/sql/DataFrameWriter.scala | 9 ++- .../command/createDataSourceTables.scala| 22 -- .../apache/spark/sql/internal/CatalogImpl.scala | 8 ++- .../org/apache/spark/sql/SQLQuerySuite.scala| 11 +++ .../spark/sql/internal/CatalogSuite.scala | 11 +++ .../sql/test/DataFrameReaderWriterSuite.scala | 76 .../sql/hive/MetastoreDataSourcesSuite.scala| 13 ++-- .../sql/sources/HadoopFsRelationTest.scala | 10 ++- 10 files changed, 170 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3fe630d3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 9fb5db5..574c3d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -325,9 +325,9 @@ class SessionCatalog( new Path(new Path(dbLocation), formatTableName(tableIdent.table)).toString } - // - - // | Methods that interact with temporary and metastore tables | - // - + // -- + // | Methods that interact with temp views only | + // -- /** * Create a temporary table. @@ -344,6 +344,24 @@ class SessionCatalog( } /** + * Return a temporary view exactly as it was stored. + */ + def getTempView(name: String): Option[LogicalPlan] = synchronized { +tempTables.get(formatTableName(name)) + } + + /** + * Drop a temporary view. + */ + def dropTempView(name: String): Unit = synchronized { +tempTables.remove(formatTableName(name)) + } + + // - + // | Methods that interact with temporary and metastore tables | + // - + + /** * Rename a table. * * If a database is specified in `oldName`, this will rename the table in that database. @@ -492,14 +510,6 @@ class SessionCatalog( tempTables.clear() } - /** - * Return a temporary table exactly as it was stored. - * For testing only. - */ - private[catalog] def getTempTable(name: String): Option[LogicalPlan] = synchronized { -tempTables.get(formatTableName(name)) - } - // // Partitions // http://git-wip-us.apache.org/repos/asf/spark/blob/3fe630d3/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala -- diff --git
spark git commit: [SPARK-17518][SQL] Block Users to Specify the Internal Data Source Provider Hive
Repository: spark Updated Branches: refs/heads/master 8faa5217b -> 3a3c9ffbd [SPARK-17518][SQL] Block Users to Specify the Internal Data Source Provider Hive ### What changes were proposed in this pull request? In Spark 2.1, we introduced a new internal provider `hive` for telling Hive serde tables from data source tables. This PR is to block users to specify this in `DataFrameWriter` and SQL APIs. ### How was this patch tested? Added a test case Author: gatorsmileCloses #15073 from gatorsmile/formatHive. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a3c9ffb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a3c9ffb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a3c9ffb Branch: refs/heads/master Commit: 3a3c9ffbd282244407e9437c2b02ae7e062dd183 Parents: 8faa521 Author: gatorsmile Authored: Sun Sep 18 15:37:15 2016 +0800 Committer: Wenchen Fan Committed: Sun Sep 18 15:37:15 2016 +0800 -- .../org/apache/spark/sql/DataFrameWriter.scala | 3 ++ .../spark/sql/execution/SparkSqlParser.scala| 5 +- .../apache/spark/sql/internal/CatalogImpl.scala | 2 +- .../spark/sql/internal/CatalogSuite.scala | 7 +++ .../apache/spark/sql/hive/HiveStrategies.scala | 2 +- .../sql/hive/MetastoreDataSourcesSuite.scala| 51 6 files changed, 67 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index e137f07..64d3422 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -357,6 +357,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { } private def saveAsTable(tableIdent: TableIdentifier): Unit = { +if (source.toLowerCase == "hive") { + throw new AnalysisException("Cannot create hive serde table with saveAsTable API") +} val tableExists = df.sparkSession.sessionState.catalog.tableExists(tableIdent) http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 7ba1a9f..5359ced 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -22,7 +22,7 @@ import scala.collection.JavaConverters._ import org.antlr.v4.runtime.{ParserRuleContext, Token} import org.antlr.v4.runtime.tree.TerminalNode -import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.{AnalysisException, SaveMode} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.parser._ @@ -316,6 +316,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { } val options = Option(ctx.tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty) val provider = ctx.tableProvider.qualifiedName.getText +if (provider.toLowerCase == "hive") { + throw new AnalysisException("Cannot create hive serde table with CREATE TABLE USING") +} val schema = Option(ctx.colTypeList()).map(createStructType) val partitionColumnNames = Option(ctx.partitionColumnNames) http://git-wip-us.apache.org/repos/asf/spark/blob/3a3c9ffb/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index 1f87f0e..78ad710 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -258,7 +258,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { source: String, schema: StructType, options: Map[String, String]): DataFrame = { -if (source == "hive") { +if (source.toLowerCase == "hive") { throw new AnalysisException("Cannot create hive serde table with createExternalTable API.") }