[
https://issues.apache.org/jira/browse/AMATERASU-28?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16525900#comment-16525900
]
ASF GitHub Bot commented on AMATERASU-28:
-
roadan closed pull request #21: AMATERASU-28 Miniconda version pulling away
from code
URL: https://github.com/apache/incubator-amaterasu/pull/21
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala
b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala
index 94b8056..5897e1d 100755
---
a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala
+++
b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/PySpark/PySparkRunner.scala
@@ -175,7 +175,7 @@ object PySparkRunner {
* Installs Anaconda and then links it with the local spark that was
installed on the executor.
*/
private def installAnacondaOnNode(): Unit = {
-Seq("bash", "-c", "sh Miniconda2-latest-Linux-x86_64.sh -b -p
$PWD/miniconda")
+Seq("bash", "-c", "sh miniconda-install.sh -b -p $PWD/miniconda")
Seq("bash", "-c", "$PWD/miniconda/bin/python -m conda install -y
conda-build")
Seq("bash", "-c", "ln -s $PWD/spark-2.2.1-bin-hadoop2.7/python/pyspark
$PWD/miniconda/pkgs/pyspark")
}
diff --git
a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala
b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala
index ff56d8c..3d33e8e 100644
---
a/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala
+++
b/executor/src/main/scala/org/apache/amaterasu/executor/execution/actions/runners/spark/SparkRunnersProvider.scala
@@ -67,11 +67,6 @@ class SparkRunnersProvider extends RunnersProvider with
Logging {
jars ++= getDependencies(execData.deps)
}
-if (execData.pyDeps != null &&
- execData.pyDeps.packages.nonEmpty) {
- loadPythonDependencies(execData.pyDeps, notifier)
-}
-
conf = execData.configurations.get("spark")
executorEnv = execData.configurations.get("spark_exec_env")
val sparkAppName = s"job_${jobId}_executor_$executorId"
@@ -85,56 +80,21 @@ class SparkRunnersProvider extends RunnersProvider with
Logging {
runners.put(sparkScalaRunner.getIdentifier, sparkScalaRunner)
// TODO: get rid of hard-coded version
-lazy val pySparkRunner = PySparkRunner(execData.env, jobId, notifier,
spark,
s"${config.spark.home}/python:${config.spark.home}/python/pyspark:${config.spark.home}/python/pyspark/build:${config.spark.home}/python/pyspark/lib/py4j-0.10.4-src.zip",
execData.pyDeps, config)
+lazy val pySparkRunner =
+ PySparkRunner(
+execData.env,
+jobId,
+notifier,
+spark,
+s"${config.spark.home}/python:${config.spark.home}/python/pyspark",
+execData.pyDeps,
+config)
runners.put(pySparkRunner.getIdentifier, pySparkRunner)
lazy val sparkSqlRunner = SparkSqlRunner(execData.env, jobId, notifier,
spark)
runners.put(sparkSqlRunner.getIdentifier, sparkSqlRunner)
}
- private def installAnacondaPackage(pythonPackage: PythonPackage): Unit = {
-val channel = pythonPackage.channel.getOrElse("anaconda")
-if (channel == "anaconda") {
- Seq("bash", "-c", s"$$PWD/miniconda/bin/python -m conda install -y
${pythonPackage.packageId}") ! shellLoger
-} else {
- Seq("bash", "-c", s"$$PWD/miniconda/bin/python -m conda install -y -c
$channel ${pythonPackage.packageId}") ! shellLoger
-}
- }
-
- private def installAnacondaOnNode(): Unit = {
-// TODO: get rid of hard-coded version
-Seq("bash", "-c", "sh Miniconda2-latest-Linux-x86_64.sh -b -p
$PWD/miniconda") ! shellLoger
-Seq("bash", "-c", "$PWD/miniconda/bin/python -m conda install -y
conda-build") ! shellLoger
-Seq("bash", "-c", "ln -s $PWD/spark-2.2.1-bin-hadoop2.7/python/pyspark
$PWD/miniconda/pkgs/pyspark") ! shellLoger
- }
-
- private def loadPythonDependencies(deps: PythonDependencies, notifier:
Notifier): Unit = {
-notifier.info("loading anaconda evn")
-installAnacondaOnNode()
-val codegenPackage = PythonPackage("codegen", channel = Option("auto"))
-installAnacondaPackage(codegenPackage)
-try {
- // notifier.info("loadPythonDependencies #5")
- deps.packages.foreach(pack => {
-pack.index.getOrElse("anaconda").toLowerCase match {
-