spark git commit: [SPARK-19866][ML][PYSPARK] Add local version of Word2Vec findSynonyms for spark.ml: Python API
Repository: spark Updated Branches: refs/heads/master 8598d03a0 -> 31c74fec2 [SPARK-19866][ML][PYSPARK] Add local version of Word2Vec findSynonyms for spark.ml: Python API https://issues.apache.org/jira/browse/SPARK-19866 ## What changes were proposed in this pull request? Add Python API for findSynonymsArray matching Scala API. ## How was this patch tested? Manual test `./python/run-tests --python-executables=python2.7 --modules=pyspark-ml` Author: Xin RenAuthor: Xin Ren Author: Xin Ren Closes #17451 from keypointt/SPARK-19866. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31c74fec Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31c74fec Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31c74fec Branch: refs/heads/master Commit: 31c74fec24ae3bc8b9eb4ecd90896de459c3cc22 Parents: 8598d03 Author: Xin Ren Authored: Fri Sep 8 12:09:00 2017 -0700 Committer: Holden Karau Committed: Fri Sep 8 12:09:00 2017 -0700 -- .../scala/org/apache/spark/ml/feature/Word2Vec.scala | 2 +- python/pyspark/ml/feature.py | 15 +++ 2 files changed, 16 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/31c74fec/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index d4c8e4b..f6095e2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -229,7 +229,7 @@ class Word2VecModel private[ml] ( * Find "num" number of words closest in similarity to the given word, not * including the word itself. * @return a dataframe with columns "word" and "similarity" of the word and the cosine - * similarities between the synonyms and the given word vector. + * similarities between the synonyms and the given word. */ @Since("1.5.0") def findSynonyms(word: String, num: Int): DataFrame = { http://git-wip-us.apache.org/repos/asf/spark/blob/31c74fec/python/pyspark/ml/feature.py -- diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 050537b..232ae3e 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -2751,6 +2751,8 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, Has | c|[-0.3794820010662...| +++ ... +>>> model.findSynonymsArray("a", 2) +[(u'b', 0.25053444504737854), (u'c', -0.6980510950088501)] >>> from pyspark.sql.functions import format_number as fmt >>> model.findSynonyms("a", 2).select("word", fmt("similarity", 5).alias("similarity")).show() ++--+ @@ -2927,6 +2929,19 @@ class Word2VecModel(JavaModel, JavaMLReadable, JavaMLWritable): word = _convert_to_vector(word) return self._call_java("findSynonyms", word, num) +@since("2.3.0") +def findSynonymsArray(self, word, num): +""" +Find "num" number of words closest in similarity to "word". +word can be a string or vector representation. +Returns an array with two fields word and similarity (which +gives the cosine similarity). +""" +if not isinstance(word, basestring): +word = _convert_to_vector(word) +tuples = self._java_obj.findSynonymsArray(word, num) +return list(map(lambda st: (st._1(), st._2()), list(tuples))) + @inherit_doc class PCA(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15243][ML][SQL][PYTHON] Add missing support for unicode in Param methods & functions in dataframe
Repository: spark Updated Branches: refs/heads/master 8a4f228dc -> 8598d03a0 [SPARK-15243][ML][SQL][PYTHON] Add missing support for unicode in Param methods & functions in dataframe ## What changes were proposed in this pull request? This PR proposes to support unicodes in Param methods in ML, other missed functions in DataFrame. For example, this causes a `ValueError` in Python 2.x when param is a unicode string: ```python >>> from pyspark.ml.classification import LogisticRegression >>> lr = LogisticRegression() >>> lr.hasParam("threshold") True >>> lr.hasParam(u"threshold") Traceback (most recent call last): ... raise TypeError("hasParam(): paramName must be a string") TypeError: hasParam(): paramName must be a string ``` This PR is based on https://github.com/apache/spark/pull/13036 ## How was this patch tested? Unit tests in `python/pyspark/ml/tests.py` and `python/pyspark/sql/tests.py`. Author: hyukjinkwonAuthor: sethah Closes #17096 from HyukjinKwon/SPARK-15243. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8598d03a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8598d03a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8598d03a Branch: refs/heads/master Commit: 8598d03a00a39dd23646bf752f9fed5d28e271c6 Parents: 8a4f228 Author: hyukjinkwon Authored: Fri Sep 8 11:57:33 2017 -0700 Committer: Holden Karau Committed: Fri Sep 8 11:57:33 2017 -0700 -- python/pyspark/ml/param/__init__.py | 4 ++-- python/pyspark/ml/tests.py | 15 +++ python/pyspark/sql/dataframe.py | 22 +++--- python/pyspark/sql/tests.py | 22 ++ 4 files changed, 42 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8598d03a/python/pyspark/ml/param/__init__.py -- diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 1334207..043c25c 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -330,7 +330,7 @@ class Params(Identifiable): Tests whether this instance contains a param with a given (string) name. """ -if isinstance(paramName, str): +if isinstance(paramName, basestring): p = getattr(self, paramName, None) return isinstance(p, Param) else: @@ -413,7 +413,7 @@ class Params(Identifiable): if isinstance(param, Param): self._shouldOwn(param) return param -elif isinstance(param, str): +elif isinstance(param, basestring): return self.getParam(param) else: raise ValueError("Cannot resolve %r as a param." % param) http://git-wip-us.apache.org/repos/asf/spark/blob/8598d03a/python/pyspark/ml/tests.py -- diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 6076b3c..509698f 100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -352,6 +353,20 @@ class ParamTests(PySparkTestCase): testParams = TestParams() self.assertTrue(all([testParams.hasParam(p.name) for p in testParams.params])) self.assertFalse(testParams.hasParam("notAParameter")) +self.assertTrue(testParams.hasParam(u"maxIter")) + +def test_resolveparam(self): +testParams = TestParams() +self.assertEqual(testParams._resolveParam(testParams.maxIter), testParams.maxIter) +self.assertEqual(testParams._resolveParam("maxIter"), testParams.maxIter) + +self.assertEqual(testParams._resolveParam(u"maxIter"), testParams.maxIter) +if sys.version_info[0] >= 3: +# In Python 3, it is allowed to get/set attributes with non-ascii characters. +e_cls = AttributeError +else: +e_cls = UnicodeEncodeError +self.assertRaises(e_cls, lambda: testParams._resolveParam(u"ì")) def test_params(self): testParams = TestParams() http://git-wip-us.apache.org/repos/asf/spark/blob/8598d03a/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 1cea130..8f88545 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -748,7 +748,7 @@ class DataFrame(object):
spark git commit: [SPARK-21128][R][BACKPORT-2.2] Remove both "spark-warehouse" and "metastore_db" before listing files in R tests
Repository: spark Updated Branches: refs/heads/branch-2.2 9ae7c96ce -> 987682160 [SPARK-21128][R][BACKPORT-2.2] Remove both "spark-warehouse" and "metastore_db" before listing files in R tests ## What changes were proposed in this pull request? This PR proposes to list the files in test _after_ removing both "spark-warehouse" and "metastore_db" so that the next run of R tests pass fine. This is sometimes a bit annoying. ## How was this patch tested? Manually running multiple times R tests via `./R/run-tests.sh`. **Before** Second run: ``` SparkSQL functions: Spark package found in SPARK_HOME: .../spark ... ... ... ... ... 1234... Failed - 1. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3384) length(list1) not equal to length(list2). 1/1 mismatches [1] 25 - 23 == 2 2. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3384) sort(list1, na.last = TRUE) not equal to sort(list2, na.last = TRUE). 10/25 mismatches x[16]: "metastore_db" y[16]: "pkg" x[17]: "pkg" y[17]: "R" x[18]: "R" y[18]: "README.md" x[19]: "README.md" y[19]: "run-tests.sh" x[20]: "run-tests.sh" y[20]: "SparkR_2.2.0.tar.gz" x[21]: "metastore_db" y[21]: "pkg" x[22]: "pkg" y[22]: "R" x[23]: "R" y[23]: "README.md" x[24]: "README.md" y[24]: "run-tests.sh" x[25]: "run-tests.sh" y[25]: "SparkR_2.2.0.tar.gz" 3. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3388) length(list1) not equal to length(list2). 1/1 mismatches [1] 25 - 23 == 2 4. Failure: No extra files are created in SPARK_HOME by starting session and making calls (test_sparkSQL.R#3388) sort(list1, na.last = TRUE) not equal to sort(list2, na.last = TRUE). 10/25 mismatches x[16]: "metastore_db" y[16]: "pkg" x[17]: "pkg" y[17]: "R" x[18]: "R" y[18]: "README.md" x[19]: "README.md" y[19]: "run-tests.sh" x[20]: "run-tests.sh" y[20]: "SparkR_2.2.0.tar.gz" x[21]: "metastore_db" y[21]: "pkg" x[22]: "pkg" y[22]: "R" x[23]: "R" y[23]: "README.md" x[24]: "README.md" y[24]: "run-tests.sh" x[25]: "run-tests.sh" y[25]: "SparkR_2.2.0.tar.gz" DONE === ``` **After** Second run: ``` SparkSQL functions: Spark package found in SPARK_HOME: .../spark ... ... ... ... ... ... ``` Author: hyukjinkwon Closes #18335 from HyukjinKwon/SPARK-21128. Author: hyukjinkwonCloses #19166 from felixcheung/rbackport21128. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/98768216 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/98768216 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/98768216 Branch: refs/heads/branch-2.2 Commit: 9876821603ec12e77ee58e8ef6f5841c9c310c93 Parents: 9ae7c96 Author: hyukjinkwon Authored: Fri Sep 8 09:47:45 2017 -0700 Committer: Felix Cheung Committed: Fri Sep 8 09:47:45 2017 -0700
spark git commit: [SPARK-21946][TEST] fix flaky test: "alter table: rename cached table" in InMemoryCatalogedDDLSuite
Repository: spark Updated Branches: refs/heads/branch-2.2 08cb06af2 -> 9ae7c96ce [SPARK-21946][TEST] fix flaky test: "alter table: rename cached table" in InMemoryCatalogedDDLSuite ## What changes were proposed in this pull request? This PR fixes flaky test `InMemoryCatalogedDDLSuite "alter table: rename cached table"`. Since this test validates distributed DataFrame, the result should be checked by using `checkAnswer`. The original version used `df.collect().Seq` method that does not guaranty an order of each element of the result. ## How was this patch tested? Use existing test case Author: Kazuaki IshizakiCloses #19159 from kiszk/SPARK-21946. (cherry picked from commit 8a4f228dc0afed7992695486ecab6bc522f1e392) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ae7c96c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ae7c96c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ae7c96c Branch: refs/heads/branch-2.2 Commit: 9ae7c96ce33d3d67f49059b5b83ef1d9d3d8e8e5 Parents: 08cb06a Author: Kazuaki Ishizaki Authored: Fri Sep 8 09:39:20 2017 -0700 Committer: gatorsmile Committed: Fri Sep 8 09:39:32 2017 -0700 -- .../scala/org/apache/spark/sql/execution/command/DDLSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9ae7c96c/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 56d2937..5109c64 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -758,7 +758,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { val df = (1 to 2).map { i => (i, i.toString) }.toDF("age", "name") df.write.insertInto("students") spark.catalog.cacheTable("students") -assume(spark.table("students").collect().toSeq == df.collect().toSeq, "bad test: wrong data") +checkAnswer(spark.table("students"), df) assume(spark.catalog.isCached("students"), "bad test: table was not cached in the first place") sql("ALTER TABLE students RENAME TO teachers") sql("CREATE TABLE students (age INT, name STRING) USING parquet") @@ -767,7 +767,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { assert(!spark.catalog.isCached("students")) assert(spark.catalog.isCached("teachers")) assert(spark.table("students").collect().isEmpty) -assert(spark.table("teachers").collect().toSeq == df.collect().toSeq) +checkAnswer(spark.table("teachers"), df) } test("rename temporary table - destination table with database name") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21946][TEST] fix flaky test: "alter table: rename cached table" in InMemoryCatalogedDDLSuite
Repository: spark Updated Branches: refs/heads/master 0dfc1ec59 -> 8a4f228dc [SPARK-21946][TEST] fix flaky test: "alter table: rename cached table" in InMemoryCatalogedDDLSuite ## What changes were proposed in this pull request? This PR fixes flaky test `InMemoryCatalogedDDLSuite "alter table: rename cached table"`. Since this test validates distributed DataFrame, the result should be checked by using `checkAnswer`. The original version used `df.collect().Seq` method that does not guaranty an order of each element of the result. ## How was this patch tested? Use existing test case Author: Kazuaki IshizakiCloses #19159 from kiszk/SPARK-21946. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a4f228d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a4f228d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a4f228d Branch: refs/heads/master Commit: 8a4f228dc0afed7992695486ecab6bc522f1e392 Parents: 0dfc1ec Author: Kazuaki Ishizaki Authored: Fri Sep 8 09:39:20 2017 -0700 Committer: gatorsmile Committed: Fri Sep 8 09:39:20 2017 -0700 -- .../scala/org/apache/spark/sql/execution/command/DDLSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8a4f228d/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index ad6fc20..d19cfee 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -783,7 +783,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { val df = (1 to 2).map { i => (i, i.toString) }.toDF("age", "name") df.write.insertInto("students") spark.catalog.cacheTable("students") -assume(spark.table("students").collect().toSeq == df.collect().toSeq, "bad test: wrong data") +checkAnswer(spark.table("students"), df) assume(spark.catalog.isCached("students"), "bad test: table was not cached in the first place") sql("ALTER TABLE students RENAME TO teachers") sql("CREATE TABLE students (age INT, name STRING) USING parquet") @@ -792,7 +792,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { assert(!spark.catalog.isCached("students")) assert(spark.catalog.isCached("teachers")) assert(spark.table("students").collect().isEmpty) -assert(spark.table("teachers").collect().toSeq == df.collect().toSeq) +checkAnswer(spark.table("teachers"), df) } test("rename temporary table - destination table with database name") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21936][SQL][2.2] backward compatibility test framework for HiveExternalCatalog
Repository: spark Updated Branches: refs/heads/branch-2.2 781a1f83c -> 08cb06af2 [SPARK-21936][SQL][2.2] backward compatibility test framework for HiveExternalCatalog backport https://github.com/apache/spark/pull/19148 to 2.2 Author: Wenchen FanCloses #19163 from cloud-fan/test. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/08cb06af Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/08cb06af Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/08cb06af Branch: refs/heads/branch-2.2 Commit: 08cb06af20f87d40b78b521f82774cf1b6f9c80a Parents: 781a1f8 Author: Wenchen Fan Authored: Fri Sep 8 09:35:41 2017 -0700 Committer: gatorsmile Committed: Fri Sep 8 09:35:41 2017 -0700 -- sql/hive/pom.xml| 4 + ...ernalCatalogBackwardCompatibilitySuite.scala | 264 --- .../hive/HiveExternalCatalogVersionsSuite.scala | 194 ++ .../spark/sql/hive/HiveSparkSubmitSuite.scala | 77 +- .../sql/hive/MetastoreDataSourcesSuite.scala| 27 -- .../spark/sql/hive/SparkSubmitTestUtils.scala | 101 +++ 6 files changed, 301 insertions(+), 366 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/08cb06af/sql/hive/pom.xml -- diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 3dca866..616f7cd 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -163,6 +163,10 @@ libfb303 + org.apache.derby + derby + + org.scalacheck scalacheck_${scala.binary.version} test http://git-wip-us.apache.org/repos/asf/spark/blob/08cb06af/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala deleted file mode 100644 index 705d43f..000 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import java.net.URI - -import org.apache.hadoop.fs.Path -import org.scalatest.BeforeAndAfterEach - -import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} -import org.apache.spark.sql.hive.client.HiveClient -import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.sql.types.StructType -import org.apache.spark.util.Utils - - -class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest - with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach { - - // To test `HiveExternalCatalog`, we need to read/write the raw table meta from/to hive client. - val hiveClient: HiveClient = -spark.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client - - val tempDir = Utils.createTempDir().getCanonicalFile - val tempDirUri = tempDir.toURI - val tempDirStr = tempDir.getAbsolutePath - - override def beforeEach(): Unit = { -sql("CREATE DATABASE test_db") -for ((tbl, _) <- rawTablesAndExpectations) { - hiveClient.createTable(tbl, ignoreIfExists = false) -} - } - - override def afterEach(): Unit = { -Utils.deleteRecursively(tempDir) -hiveClient.dropDatabase("test_db", ignoreIfNotExists = false, cascade = true) - } - - private def getTableMetadata(tableName: String): CatalogTable = { -spark.sharedState.externalCatalog.getTable("test_db", tableName) - } - - private def defaultTableURI(tableName: String): URI = { -
spark git commit: [SPARK-21726][SQL][FOLLOW-UP] Check for structural integrity of the plan in Optimzer in test mode
Repository: spark Updated Branches: refs/heads/master dbb824125 -> 0dfc1ec59 [SPARK-21726][SQL][FOLLOW-UP] Check for structural integrity of the plan in Optimzer in test mode ## What changes were proposed in this pull request? The condition in `Optimizer.isPlanIntegral` is wrong. We should always return `true` if not in test mode. ## How was this patch tested? Manually test. Author: Liang-Chi HsiehCloses #19161 from viirya/SPARK-21726-followup. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0dfc1ec5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0dfc1ec5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0dfc1ec5 Branch: refs/heads/master Commit: 0dfc1ec59e45c836cb968bc9b77c69bf0e917b06 Parents: dbb8241 Author: Liang-Chi Hsieh Authored: Fri Sep 8 20:21:37 2017 +0900 Committer: hyukjinkwon Committed: Fri Sep 8 20:21:37 2017 +0900 -- .../scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0dfc1ec5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 2426a8b..a602894 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -41,7 +41,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) // Check for structural integrity of the plan in test mode. Currently we only check if a plan is // still resolved after the execution of each rule. override protected def isPlanIntegral(plan: LogicalPlan): Boolean = { -Utils.isTesting && plan.resolved +!Utils.isTesting || plan.resolved } protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/5] spark-website git commit: Use HTTPS for all apache.org links
http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/releases/spark-release-2-1-1.html -- diff --git a/site/releases/spark-release-2-1-1.html b/site/releases/spark-release-2-1-1.html index 2f5e4e0..bbfc09d 100644 --- a/site/releases/spark-release-2-1-1.html +++ b/site/releases/spark-release-2-1-1.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -217,7 +217,7 @@ Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/releases/spark-release-2-2-0.html -- diff --git a/site/releases/spark-release-2-2-0.html b/site/releases/spark-release-2-2-0.html index d39e0dc..4e7d31a 100644 --- a/site/releases/spark-release-2-2-0.html +++ b/site/releases/spark-release-2-2-0.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -429,7 +429,7 @@ ALeksander Eskilson, Aaditya Ramesh, Adam Budde, Adam Roberts, Adrian Ionescu, A Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/research.html -- diff --git a/site/research.html b/site/research.html index 010eac0..173 100644 --- a/site/research.html +++ b/site/research.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -261,7 +261,7 @@ Spark offers an abstraction called http://people.csail.mit.edu/matei/pa Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/screencasts/1-first-steps-with-spark.html -- diff --git a/site/screencasts/1-first-steps-with-spark.html b/site/screencasts/1-first-steps-with-spark.html index 77194b2..cc7ae10 100644 --- a/site/screencasts/1-first-steps-with-spark.html +++ b/site/screencasts/1-first-steps-with-spark.html @@ -139,14 +139,14 @@ -
[4/5] spark-website git commit: Use HTTPS for all apache.org links
http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/spark-1-2-2-released.html -- diff --git a/site/news/spark-1-2-2-released.html b/site/news/spark-1-2-2-released.html index e68dd8c..a1f64c7 100644 --- a/site/news/spark-1-2-2-released.html +++ b/site/news/spark-1-2-2-released.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -215,7 +215,7 @@ Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/spark-1-3-0-released.html -- diff --git a/site/news/spark-1-3-0-released.html b/site/news/spark-1-3-0-released.html index ea97ff1..d651cae 100644 --- a/site/news/spark-1-3-0-released.html +++ b/site/news/spark-1-3-0-released.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -215,7 +215,7 @@ Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/spark-1-4-0-released.html -- diff --git a/site/news/spark-1-4-0-released.html b/site/news/spark-1-4-0-released.html index ac3690e..4f1909b 100644 --- a/site/news/spark-1-4-0-released.html +++ b/site/news/spark-1-4-0-released.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -215,7 +215,7 @@ Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/spark-1-4-1-released.html -- diff --git a/site/news/spark-1-4-1-released.html b/site/news/spark-1-4-1-released.html index 839209d..fa50b1d 100644 --- a/site/news/spark-1-4-1-released.html +++ b/site/news/spark-1-4-1-released.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown">
[1/5] spark-website git commit: Added CVE-2017-12612
Repository: spark-website Updated Branches: refs/heads/asf-site 434db70b4 -> a1f847efc Added CVE-2017-12612 Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/6d90ff44 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/6d90ff44 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/6d90ff44 Branch: refs/heads/asf-site Commit: 6d90ff44d674c716e93f811cc9308144cb67f083 Parents: 434db70 Author: Sean OwenAuthored: Fri Sep 8 08:12:58 2017 +0100 Committer: Sean Owen Committed: Fri Sep 8 08:12:58 2017 +0100 -- security.md| 25 + site/security.html | 27 +++ 2 files changed, 52 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/6d90ff44/security.md -- diff --git a/security.md b/security.md index a26f1d1..c5e6dbd 100644 --- a/security.md +++ b/security.md @@ -17,6 +17,31 @@ non-public list that will reach the Spark PMC. Messages to `secur...@apache.org` Known Security Issues +CVE-2017-12612 Unsafe deserialization in Apache Spark launcher API + +Severity: Medium + +Vendor: The Apache Software Foundation + +Versions Affected: +Versions of Apache Spark from 1.6.0 until 2.1.1 + +Description: +In Apache Spark 1.6.0 until 2.1.1, the launcher API performs unsafe +deserialization of data received by its socket. This makes applications +launched programmatically using the launcher API potentially +vulnerable to arbitrary code execution by an attacker with access to any user +account on the local machine. It does not affect apps run by spark-submit or +spark-shell. The attacker would be able to execute code as the user that ran +the Spark application. Users are encouraged to update to version 2.2.0 or +later. + +Mitigation: +Update to Apache Spark 2.2.0 or later. + +Credit: +- Aditya Sharad, Semmle + CVE-2017-7678 Apache Spark XSS web UI MHTML vulnerability Severity: Low http://git-wip-us.apache.org/repos/asf/spark-website/blob/6d90ff44/site/security.html -- diff --git a/site/security.html b/site/security.html index 31496f8..4b71319 100644 --- a/site/security.html +++ b/site/security.html @@ -204,6 +204,33 @@ non-public list that will reach the Spark PMC. Messages to security@apache Known Security Issues +CVE-2017-12612 Unsafe deserialization in Apache Spark launcher API + +Severity: Medium + +Vendor: The Apache Software Foundation + +Versions Affected: +Versions of Apache Spark from 1.6.0 until 2.1.1 + +Description: +In Apache Spark 1.6.0 until 2.1.1, the launcher API performs unsafe +deserialization of data received by its socket. This makes applications +launched programmatically using the launcher API potentially +vulnerable to arbitrary code execution by an attacker with access to any user +account on the local machine. It does not affect apps run by spark-submit or +spark-shell. The attacker would be able to execute code as the user that ran +the Spark application. Users are encouraged to update to version 2.2.0 or +later. + +Mitigation: +Update to Apache Spark 2.2.0 or later. + +Credit: + + Aditya Sharad, Semmle + + CVE-2017-7678 Apache Spark XSS web UI MHTML vulnerability Severity: Low - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[3/5] spark-website git commit: Use HTTPS for all apache.org links
http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/submit-talks-to-spark-summit-2016.html -- diff --git a/site/news/submit-talks-to-spark-summit-2016.html b/site/news/submit-talks-to-spark-summit-2016.html index 2aef7d2..5299202 100644 --- a/site/news/submit-talks-to-spark-summit-2016.html +++ b/site/news/submit-talks-to-spark-summit-2016.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -214,7 +214,7 @@ Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/submit-talks-to-spark-summit-east-2016.html -- diff --git a/site/news/submit-talks-to-spark-summit-east-2016.html b/site/news/submit-talks-to-spark-summit-east-2016.html index 1f76cee..f31dbea 100644 --- a/site/news/submit-talks-to-spark-summit-east-2016.html +++ b/site/news/submit-talks-to-spark-summit-east-2016.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -213,7 +213,7 @@ Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/submit-talks-to-spark-summit-eu-2016.html -- diff --git a/site/news/submit-talks-to-spark-summit-eu-2016.html b/site/news/submit-talks-to-spark-summit-eu-2016.html index 1db021f..b88509c 100644 --- a/site/news/submit-talks-to-spark-summit-eu-2016.html +++ b/site/news/submit-talks-to-spark-summit-eu-2016.html @@ -139,14 +139,14 @@ -http://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> +https://www.apache.org/; class="dropdown-toggle" data-toggle="dropdown"> Apache Software Foundation - http://www.apache.org/;>Apache Homepage - http://www.apache.org/licenses/;>License - http://www.apache.org/foundation/sponsorship.html;>Sponsorship - http://www.apache.org/foundation/thanks.html;>Thanks - http://www.apache.org/security/;>Security + https://www.apache.org/;>Apache Homepage + https://www.apache.org/licenses/;>License + https://www.apache.org/foundation/sponsorship.html;>Sponsorship + https://www.apache.org/foundation/thanks.html;>Thanks + https://www.apache.org/security/;>Security @@ -213,7 +213,7 @@ Apache Spark, Spark, Apache, and the Spark logo are trademarks of - http://www.apache.org;>The Apache Software Foundation. + https://www.apache.org;>The Apache Software Foundation. http://git-wip-us.apache.org/repos/asf/spark-website/blob/a1f847ef/site/news/two-weeks-to-spark-summit-2014.html -- diff --git a/site/news/two-weeks-to-spark-summit-2014.html b/site/news/two-weeks-to-spark-summit-2014.html index ca97cc2..4292f96 100644 ---
[5/5] spark-website git commit: Use HTTPS for all apache.org links
Use HTTPS for all apache.org links Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/a1f847ef Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/a1f847ef Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/a1f847ef Branch: refs/heads/asf-site Commit: a1f847efc1223e83bee5178f909ebe6930d28d09 Parents: 6d90ff4 Author: Sean OwenAuthored: Fri Sep 8 08:21:23 2017 +0100 Committer: Sean Owen Committed: Fri Sep 8 08:21:23 2017 +0100 -- _layouts/global.html | 14 +++--- site/committers.html | 14 +++--- site/community.html | 14 +++--- site/contributing.html| 14 +++--- site/developer-tools.html | 14 +++--- site/documentation.html | 14 +++--- site/downloads.html | 14 +++--- site/examples.html| 14 +++--- site/faq.html | 14 +++--- site/graphx/index.html| 14 +++--- site/improvement-proposals.html | 14 +++--- site/index.html | 14 +++--- site/mailing-lists.html | 14 +++--- site/mllib/index.html | 14 +++--- site/news/amp-camp-2013-registration-ope.html | 14 +++--- site/news/announcing-the-first-spark-summit.html | 14 +++--- site/news/fourth-spark-screencast-published.html | 14 +++--- site/news/index.html | 14 +++--- site/news/nsdi-paper.html | 14 +++--- site/news/one-month-to-spark-summit-2015.html | 14 +++--- site/news/proposals-open-for-spark-summit-east.html | 14 +++--- .../news/registration-open-for-spark-summit-east.html | 14 +++--- site/news/run-spark-and-shark-on-amazon-emr.html | 14 +++--- site/news/spark-0-6-1-and-0-5-2-released.html | 14 +++--- site/news/spark-0-6-2-released.html | 14 +++--- site/news/spark-0-7-0-released.html | 14 +++--- site/news/spark-0-7-2-released.html | 14 +++--- site/news/spark-0-7-3-released.html | 14 +++--- site/news/spark-0-8-0-released.html | 14 +++--- site/news/spark-0-8-1-released.html | 14 +++--- site/news/spark-0-9-0-released.html | 14 +++--- site/news/spark-0-9-1-released.html | 14 +++--- site/news/spark-0-9-2-released.html | 14 +++--- site/news/spark-1-0-0-released.html | 14 +++--- site/news/spark-1-0-1-released.html | 14 +++--- site/news/spark-1-0-2-released.html | 14 +++--- site/news/spark-1-1-0-released.html | 14 +++--- site/news/spark-1-1-1-released.html | 14 +++--- site/news/spark-1-2-0-released.html | 14 +++--- site/news/spark-1-2-1-released.html | 14 +++--- site/news/spark-1-2-2-released.html | 14 +++--- site/news/spark-1-3-0-released.html | 14 +++--- site/news/spark-1-4-0-released.html | 14 +++--- site/news/spark-1-4-1-released.html | 14 +++--- site/news/spark-1-5-0-released.html | 14 +++--- site/news/spark-1-5-1-released.html | 14 +++--- site/news/spark-1-5-2-released.html | 14 +++--- site/news/spark-1-6-0-released.html | 14 +++--- site/news/spark-1-6-1-released.html | 14 +++--- site/news/spark-1-6-2-released.html | 14 +++--- site/news/spark-1-6-3-released.html | 14 +++--- site/news/spark-2-0-0-released.html | 14 +++--- site/news/spark-2-0-1-released.html | 14 +++--- site/news/spark-2-0-2-released.html | 14 +++--- site/news/spark-2-1-0-released.html | 14 +++--- site/news/spark-2-1-1-released.html | 14 +++--- site/news/spark-2-2-0-released.html | 14 +++--- site/news/spark-2.0.0-preview.html| 14
spark git commit: [SPARK-21915][ML][PYSPARK] Model 1 and Model 2 ParamMaps Missing
Repository: spark Updated Branches: refs/heads/branch-2.2 4304d0bf0 -> 781a1f83c [SPARK-21915][ML][PYSPARK] Model 1 and Model 2 ParamMaps Missing dongjoon-hyun HyukjinKwon Error in PySpark example code: /examples/src/main/python/ml/estimator_transformer_param_example.py The original Scala code says println("Model 2 was fit using parameters: " + model2.parent.extractParamMap) The parent is lr There is no method for accessing parent as is done in Scala. This code has been tested in Python, and returns values consistent with Scala ## What changes were proposed in this pull request? Proposing to call the lr variable instead of model1 or model2 ## How was this patch tested? This patch was tested with Spark 2.1.0 comparing the Scala and PySpark results. Pyspark returns nothing at present for those two print lines. The output for model2 in PySpark should be {Param(parent='LogisticRegression_4187be538f744d5a9090', name='tol', doc='the convergence tolerance for iterative algorithms (>= 0).'): 1e-06, Param(parent='LogisticRegression_4187be538f744d5a9090', name='elasticNetParam', doc='the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.'): 0.0, Param(parent='LogisticRegression_4187be538f744d5a9090', name='predictionCol', doc='prediction column name.'): 'prediction', Param(parent='LogisticRegression_4187be538f744d5a9090', name='featuresCol', doc='features column name.'): 'features', Param(parent='LogisticRegression_4187be538f744d5a9090', name='labelCol', doc='label column name.'): 'label', Param(parent='LogisticRegression_4187be538f744d5a9090', name='probabilityCol', doc='Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.'): 'myProbability', Param(parent='LogisticRegression_4187be538f744d5a9090', name='rawPredictionCol', doc='raw prediction (a.k.a. confidence) column name.'): 'rawPrediction', Param(parent='LogisticRegression_4187be538f744d5a9090', name='family', doc='The name of family which is a description of the label distribution to be used in the model. Supported options: auto, binomial, multinomial'): 'auto', Param(parent='LogisticRegression_4187be538f744d5a9090', name='fitIntercept', doc='whether to fit an intercept term.'): True, Param(parent='LogisticRegression_4187be538f744d5a9090', name='threshold', doc='Threshold in binary classification prediction, in range [0, 1]. If threshold and thresholds are both set, they must match.e.g. if threshold is p, then thresholds must be equal to [1-p, p].'): 0.55, Param(parent='LogisticRegression_4187be538f744d5a9090', name='aggregationDepth', doc='suggested depth for treeAggregate (>= 2).'): 2, Param(parent='LogisticRegression_4187be538f744d5a9090', name='maxIter', doc='max number of iterations (>= 0).'): 30, Param(parent='LogisticRegression_4187be538f744d5a9090', name='regParam', doc='regularization parameter (>= 0).'): 0.1, Param(parent='LogisticRegression_4187be538f744d5a9090', name='standardization', doc='whether to standardize the training features before fitting the model.'): True} Please review http://spark.apache.org/contributing.html before opening a pull request. Author: MarkTab marktab.netCloses #19152 from marktab/branch-2.2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/781a1f83 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/781a1f83 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/781a1f83 Branch: refs/heads/branch-2.2 Commit: 781a1f83c538a80ce1f1876e4786b02cb7984e16 Parents: 4304d0b Author: MarkTab marktab.net Authored: Fri Sep 8 08:08:09 2017 +0100 Committer: Sean Owen Committed: Fri Sep 8 08:08:09 2017 +0100 -- .../src/main/python/ml/estimator_transformer_param_example.py| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/781a1f83/examples/src/main/python/ml/estimator_transformer_param_example.py -- diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py index eb21051..929bd25 100644 --- a/examples/src/main/python/ml/estimator_transformer_param_example.py +++ b/examples/src/main/python/ml/estimator_transformer_param_example.py @@ -53,7 +53,7 @@ if __name__ == "__main__": # This prints the parameter (name: value) pairs, where names are unique IDs for this # LogisticRegression instance. print("Model 1 was fit using parameters: ")
spark git commit: [SPARK-21936][SQL] backward compatibility test framework for HiveExternalCatalog
Repository: spark Updated Branches: refs/heads/master 6e37524a1 -> dbb824125 [SPARK-21936][SQL] backward compatibility test framework for HiveExternalCatalog ## What changes were proposed in this pull request? `HiveExternalCatalog` is a semi-public interface. When creating tables, `HiveExternalCatalog` converts the table metadata to hive table format and save into hive metastore. It's very import to guarantee backward compatibility here, i.e., tables created by previous Spark versions should still be readable in newer Spark versions. Previously we find backward compatibility issues manually, which is really easy to miss bugs. This PR introduces a test framework to automatically test `HiveExternalCatalog` backward compatibility, by downloading Spark binaries with different versions, and create tables with these Spark versions, and read these tables with current Spark version. ## How was this patch tested? test-only change Author: Wenchen FanCloses #19148 from cloud-fan/test. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dbb82412 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dbb82412 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dbb82412 Branch: refs/heads/master Commit: dbb824125d4d31166d9a47c330f8d51f5d159515 Parents: 6e37524 Author: Wenchen Fan Authored: Thu Sep 7 23:21:49 2017 -0700 Committer: gatorsmile Committed: Thu Sep 7 23:21:49 2017 -0700 -- sql/hive/pom.xml| 4 + ...ernalCatalogBackwardCompatibilitySuite.scala | 260 --- .../hive/HiveExternalCatalogVersionsSuite.scala | 194 ++ .../spark/sql/hive/HiveSparkSubmitSuite.scala | 77 +- .../sql/hive/MetastoreDataSourcesSuite.scala| 27 -- .../spark/sql/hive/SparkSubmitTestUtils.scala | 101 +++ 6 files changed, 301 insertions(+), 362 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dbb82412/sql/hive/pom.xml -- diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index a649daf..66fad85 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -177,6 +177,10 @@ libfb303 + org.apache.derby + derby + + org.scala-lang scala-compiler test http://git-wip-us.apache.org/repos/asf/spark/blob/dbb82412/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala deleted file mode 100644 index 3bd3d0d..000 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala +++ /dev/null @@ -1,260 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import java.net.URI - -import org.apache.hadoop.fs.Path -import org.scalatest.BeforeAndAfterEach - -import org.apache.spark.sql.QueryTest -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} -import org.apache.spark.sql.hive.client.HiveClient -import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.test.SQLTestUtils -import org.apache.spark.sql.types.StructType -import org.apache.spark.util.Utils - - -class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest - with SQLTestUtils with TestHiveSingleton with BeforeAndAfterEach { - - val tempDir = Utils.createTempDir().getCanonicalFile - val tempDirUri = tempDir.toURI - val tempDirStr = tempDir.getAbsolutePath - - override def beforeEach(): Unit = { -sql("CREATE DATABASE test_db") -for ((tbl, _) <-
spark git commit: [SPARK-21726][SQL] Check for structural integrity of the plan in Optimzer in test mode.
Repository: spark Updated Branches: refs/heads/master f62b20f39 -> 6e37524a1 [SPARK-21726][SQL] Check for structural integrity of the plan in Optimzer in test mode. ## What changes were proposed in this pull request? We have many optimization rules now in `Optimzer`. Right now we don't have any checks in the optimizer to check for the structural integrity of the plan (e.g. resolved). When debugging, it is difficult to identify which rules return invalid plans. It would be great if in test mode, we can check whether a plan is still resolved after the execution of each rule, so we can catch rules that return invalid plans. ## How was this patch tested? Added tests. Author: Liang-Chi HsiehCloses #18956 from viirya/SPARK-21726. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e37524a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e37524a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e37524a Branch: refs/heads/master Commit: 6e37524a1fd26bbfe5034ecf971472931d1d47a9 Parents: f62b20f Author: Liang-Chi Hsieh Authored: Thu Sep 7 23:12:18 2017 -0700 Committer: gatorsmile Committed: Thu Sep 7 23:12:18 2017 -0700 -- .../sql/catalyst/optimizer/Optimizer.scala | 7 +++ .../spark/sql/catalyst/rules/RuleExecutor.scala | 15 + .../expressions/ExpressionEvalHelper.scala | 6 +- ...timizerStructuralIntegrityCheckerSuite.scala | 60 .../sql/catalyst/trees/RuleExecutorSuite.scala | 17 ++ 5 files changed, 103 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6e37524a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 02d6778..2426a8b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** * Abstract class all optimizers should inherit of, contains the standard batches (extending @@ -37,6 +38,12 @@ import org.apache.spark.sql.types._ abstract class Optimizer(sessionCatalog: SessionCatalog) extends RuleExecutor[LogicalPlan] { + // Check for structural integrity of the plan in test mode. Currently we only check if a plan is + // still resolved after the execution of each rule. + override protected def isPlanIntegral(plan: LogicalPlan): Boolean = { +Utils.isTesting && plan.resolved + } + protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations) def batches: Seq[Batch] = { http://git-wip-us.apache.org/repos/asf/spark/blob/6e37524a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala index 0e89d1c..7e4b784 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala @@ -64,6 +64,14 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging { protected def batches: Seq[Batch] /** + * Defines a check function that checks for structural integrity of the plan after the execution + * of each rule. For example, we can check whether a plan is still resolved after each rule in + * `Optimizer`, so we can catch rules that return invalid plans. The check function returns + * `false` if the given plan doesn't pass the structural integrity check. + */ + protected def isPlanIntegral(plan: TreeType): Boolean = true + + /** * Executes the batches of rules defined by the subclass. The batches are executed serially * using the defined execution strategy. Within each batch, rules are also executed serially. */ @@ -93,6 +101,13 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging { """.stripMargin) } +// Run the structural integrity checker against the plan after each rule. +if (!isPlanIntegral(result)) { + val message =
spark git commit: [SPARK-21949][TEST] Tables created in unit tests should be dropped after use
Repository: spark Updated Branches: refs/heads/master 57bc1e9eb -> f62b20f39 [SPARK-21949][TEST] Tables created in unit tests should be dropped after use ## What changes were proposed in this pull request? Tables should be dropped after use in unit tests. ## How was this patch tested? N/A Author: liuxianCloses #19155 from 10110346/droptable. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f62b20f3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f62b20f3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f62b20f3 Branch: refs/heads/master Commit: f62b20f39c5e44ad6de535117e076060fef3f9ec Parents: 57bc1e9 Author: liuxian Authored: Thu Sep 7 23:09:26 2017 -0700 Committer: gatorsmile Committed: Thu Sep 7 23:09:26 2017 -0700 -- .../sql/hive/InsertIntoHiveTableSuite.scala | 116 --- .../sql/hive/execution/HiveQuerySuite.scala | 59 ++-- .../sql/hive/execution/HiveTableScanSuite.scala | 18 +- .../spark/sql/hive/execution/HiveUDFSuite.scala | 42 +-- .../sql/hive/execution/SQLQuerySuite.scala | 326 ++- 5 files changed, 301 insertions(+), 260 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f62b20f3/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala index cc80f2e..e93c654 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala @@ -50,47 +50,53 @@ class InsertIntoHiveTableSuite extends QueryTest with TestHiveSingleton with Bef } test("insertInto() HiveTable") { -sql("CREATE TABLE createAndInsertTest (key int, value string)") - -// Add some data. -testData.write.mode(SaveMode.Append).insertInto("createAndInsertTest") - -// Make sure the table has also been updated. -checkAnswer( - sql("SELECT * FROM createAndInsertTest"), - testData.collect().toSeq -) - -// Add more data. -testData.write.mode(SaveMode.Append).insertInto("createAndInsertTest") - -// Make sure the table has been updated. -checkAnswer( - sql("SELECT * FROM createAndInsertTest"), - testData.toDF().collect().toSeq ++ testData.toDF().collect().toSeq -) - -// Now overwrite. -testData.write.mode(SaveMode.Overwrite).insertInto("createAndInsertTest") - -// Make sure the registered table has also been updated. -checkAnswer( - sql("SELECT * FROM createAndInsertTest"), - testData.collect().toSeq -) +withTable("createAndInsertTest") { + sql("CREATE TABLE createAndInsertTest (key int, value string)") + + // Add some data. + testData.write.mode(SaveMode.Append).insertInto("createAndInsertTest") + + // Make sure the table has also been updated. + checkAnswer( +sql("SELECT * FROM createAndInsertTest"), +testData.collect().toSeq + ) + + // Add more data. + testData.write.mode(SaveMode.Append).insertInto("createAndInsertTest") + + // Make sure the table has been updated. + checkAnswer( +sql("SELECT * FROM createAndInsertTest"), +testData.toDF().collect().toSeq ++ testData.toDF().collect().toSeq + ) + + // Now overwrite. + testData.write.mode(SaveMode.Overwrite).insertInto("createAndInsertTest") + + // Make sure the registered table has also been updated. + checkAnswer( +sql("SELECT * FROM createAndInsertTest"), +testData.collect().toSeq + ) +} } test("Double create fails when allowExisting = false") { -sql("CREATE TABLE doubleCreateAndInsertTest (key int, value string)") - -intercept[AnalysisException] { +withTable("doubleCreateAndInsertTest") { sql("CREATE TABLE doubleCreateAndInsertTest (key int, value string)") + + intercept[AnalysisException] { +sql("CREATE TABLE doubleCreateAndInsertTest (key int, value string)") + } } } test("Double create does not fail when allowExisting = true") { -sql("CREATE TABLE doubleCreateAndInsertTest (key int, value string)") -sql("CREATE TABLE IF NOT EXISTS doubleCreateAndInsertTest (key int, value string)") +withTable("doubleCreateAndInsertTest") { + sql("CREATE TABLE doubleCreateAndInsertTest (key int, value string)") + sql("CREATE TABLE IF NOT EXISTS doubleCreateAndInsertTest (key int, value string)") +} } test("SPARK-4052: scala.collection.Map as value