spark git commit: [SPARK-19064][PYSPARK] Fix pip installing of sub components
Repository: spark Updated Branches: refs/heads/branch-2.1 97d3353ef -> a5c10ff23 [SPARK-19064][PYSPARK] Fix pip installing of sub components ## What changes were proposed in this pull request? Fix instalation of mllib and ml sub components, and more eagerly cleanup cache files during test script & make-distribution. ## How was this patch tested? Updated sanity test script to import mllib and ml sub-components. Author: Holden KarauCloses #16465 from holdenk/SPARK-19064-fix-pip-install-sub-components. (cherry picked from commit 965c82d8c4b7f2d4dfbc45ec4d47d6b6588094c3) Signed-off-by: Holden Karau Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a5c10ff2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a5c10ff2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a5c10ff2 Branch: refs/heads/branch-2.1 Commit: a5c10ff238e4a117f597e017b7d746404aaa1663 Parents: 97d3353 Author: Holden Karau Authored: Wed Jan 25 14:43:39 2017 -0800 Committer: Holden Karau Committed: Wed Jan 25 14:44:50 2017 -0800 -- dev/make-distribution.sh | 2 ++ dev/pip-sanity-check.py | 2 ++ dev/requirements.txt | 1 + dev/run-pip-tests| 7 +-- python/setup.py | 5 + 5 files changed, 15 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a5c10ff2/dev/make-distribution.sh -- diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 6ea319e..00e0580 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -213,6 +213,8 @@ cp -r "$SPARK_HOME/data" "$DISTDIR" if [ "$MAKE_PIP" == "true" ]; then echo "Building python distribution package" pushd "$SPARK_HOME/python" > /dev/null + # Delete the egg info file if it exists, this can cache older setup files. + rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" python setup.py sdist popd > /dev/null else http://git-wip-us.apache.org/repos/asf/spark/blob/a5c10ff2/dev/pip-sanity-check.py -- diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py index 430c2ab..c491005 100644 --- a/dev/pip-sanity-check.py +++ b/dev/pip-sanity-check.py @@ -18,6 +18,8 @@ from __future__ import print_function from pyspark.sql import SparkSession +from pyspark.ml.param import Params +from pyspark.mllib.linalg import * import sys if __name__ == "__main__": http://git-wip-us.apache.org/repos/asf/spark/blob/a5c10ff2/dev/requirements.txt -- diff --git a/dev/requirements.txt b/dev/requirements.txt index bf042d2..7978227 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -1,3 +1,4 @@ jira==1.0.3 PyGithub==1.26.0 Unidecode==0.04.19 +pypandoc==1.3.3 http://git-wip-us.apache.org/repos/asf/spark/blob/a5c10ff2/dev/run-pip-tests -- diff --git a/dev/run-pip-tests b/dev/run-pip-tests index e1da18e..af1b1fe 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -78,11 +78,14 @@ for python in "${PYTHON_EXECS[@]}"; do mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" source "$VIRTUALENV_PATH"/bin/activate -# Upgrade pip -pip install --upgrade pip +# Upgrade pip & friends +pip install --upgrade pip pypandoc wheel +pip install numpy # Needed so we can verify mllib imports echo "Creating pip installable source dist" cd "$FWDIR"/python +# Delete the egg info file if it exists, this can cache the setup file. +rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" $python setup.py sdist http://git-wip-us.apache.org/repos/asf/spark/blob/a5c10ff2/python/setup.py -- diff --git a/python/setup.py b/python/setup.py index bc2eb4c..47eab98 100644 --- a/python/setup.py +++ b/python/setup.py @@ -162,7 +162,12 @@ try: url='https://github.com/apache/spark/tree/master/python', packages=['pyspark', 'pyspark.mllib', + 'pyspark.mllib.linalg', + 'pyspark.mllib.stat', 'pyspark.ml', + 'pyspark.ml.linalg', + 'pyspark.ml.param', + 'pyspark.ml.stat', 'pyspark.sql', 'pyspark.streaming', 'pyspark.bin', - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional
spark git commit: [SPARK-19064][PYSPARK] Fix pip installing of sub components
Repository: spark Updated Branches: refs/heads/master 92afaa93a -> 965c82d8c [SPARK-19064][PYSPARK] Fix pip installing of sub components ## What changes were proposed in this pull request? Fix instalation of mllib and ml sub components, and more eagerly cleanup cache files during test script & make-distribution. ## How was this patch tested? Updated sanity test script to import mllib and ml sub-components. Author: Holden KarauCloses #16465 from holdenk/SPARK-19064-fix-pip-install-sub-components. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/965c82d8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/965c82d8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/965c82d8 Branch: refs/heads/master Commit: 965c82d8c4b7f2d4dfbc45ec4d47d6b6588094c3 Parents: 92afaa9 Author: Holden Karau Authored: Wed Jan 25 14:43:39 2017 -0800 Committer: Holden Karau Committed: Wed Jan 25 14:43:39 2017 -0800 -- dev/make-distribution.sh | 2 ++ dev/pip-sanity-check.py | 2 ++ dev/requirements.txt | 1 + dev/run-pip-tests| 7 +-- python/setup.py | 5 + 5 files changed, 15 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/make-distribution.sh -- diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 47ff504..6fb25f3 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -220,6 +220,8 @@ cp -r "$SPARK_HOME/data" "$DISTDIR" if [ "$MAKE_PIP" == "true" ]; then echo "Building python distribution package" pushd "$SPARK_HOME/python" > /dev/null + # Delete the egg info file if it exists, this can cache older setup files. + rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" python setup.py sdist popd > /dev/null else http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/pip-sanity-check.py -- diff --git a/dev/pip-sanity-check.py b/dev/pip-sanity-check.py index 430c2ab..c491005 100644 --- a/dev/pip-sanity-check.py +++ b/dev/pip-sanity-check.py @@ -18,6 +18,8 @@ from __future__ import print_function from pyspark.sql import SparkSession +from pyspark.ml.param import Params +from pyspark.mllib.linalg import * import sys if __name__ == "__main__": http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/requirements.txt -- diff --git a/dev/requirements.txt b/dev/requirements.txt index bf042d2..7978227 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -1,3 +1,4 @@ jira==1.0.3 PyGithub==1.26.0 Unidecode==0.04.19 +pypandoc==1.3.3 http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/dev/run-pip-tests -- diff --git a/dev/run-pip-tests b/dev/run-pip-tests index e1da18e..af1b1fe 100755 --- a/dev/run-pip-tests +++ b/dev/run-pip-tests @@ -78,11 +78,14 @@ for python in "${PYTHON_EXECS[@]}"; do mkdir -p "$VIRTUALENV_PATH" virtualenv --python=$python "$VIRTUALENV_PATH" source "$VIRTUALENV_PATH"/bin/activate -# Upgrade pip -pip install --upgrade pip +# Upgrade pip & friends +pip install --upgrade pip pypandoc wheel +pip install numpy # Needed so we can verify mllib imports echo "Creating pip installable source dist" cd "$FWDIR"/python +# Delete the egg info file if it exists, this can cache the setup file. +rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion" $python setup.py sdist http://git-wip-us.apache.org/repos/asf/spark/blob/965c82d8/python/setup.py -- diff --git a/python/setup.py b/python/setup.py index bc2eb4c..47eab98 100644 --- a/python/setup.py +++ b/python/setup.py @@ -162,7 +162,12 @@ try: url='https://github.com/apache/spark/tree/master/python', packages=['pyspark', 'pyspark.mllib', + 'pyspark.mllib.linalg', + 'pyspark.mllib.stat', 'pyspark.ml', + 'pyspark.ml.linalg', + 'pyspark.ml.param', + 'pyspark.ml.stat', 'pyspark.sql', 'pyspark.streaming', 'pyspark.bin', - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org