This is an automated email from the ASF dual-hosted git repository. zjffdu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/zeppelin.git
The following commit(s) were added to refs/heads/master by this push: new d8d18f5 [ZEPPELIN-5576] Improve Zeppelin CI d8d18f5 is described below commit d8d18f59b2926c90b34b5cfb27a3c2c3174f62a5 Author: Jeff Zhang <zjf...@apache.org> AuthorDate: Tue Feb 22 09:57:42 2022 +0100 [ZEPPELIN-5576] Improve Zeppelin CI ### What is this PR for? Restructure the CI in `core.yaml` * Split interpreters CI into 5 groups: * jupyter-interpreter, rlang, python * non-core interpreters (sh, markdown, etc) * spark * flink * livy * Run different profiles' spark test in one bulid. Only run it under different python versions. * Refine the hadoop2 and hadoop3 profiles, use different hadoop profiles in integration test (`SparkIntegrationTest`, `ZeppelinSparkClusterTest`) ### What type of PR is it? [Improvement ] ### Todos * [ ] - Task ### What is the Jira issue? * https://issues.apache.org/jira/browse/ZEPPELIN-5576 ### How should this be tested? * CI pass except the frontend e2e test (not related) ### Screenshots (if appropriate) ### Questions: * Does the licenses files need update? NO * Is there breaking changes for older versions? NO * Does this needs documentation? NO Author: Jeff Zhang <zjf...@apache.org> Author: Philipp Dallig <philipp.dal...@gmail.com> Closes #4266 from zjffdu/ZEPPELIN-5576 and squashes the following commits: 1010d043d4 [Philipp Dallig] Adjust interpreter pom.xml 76117056b3 [Jeff Zhang] address comments e5477a2a80 [Jeff Zhang] update on mamba 6fab322589 [Jeff Zhang] use mamba ed7b431bed [Jeff Zhang] Remove hadoop-client-api in zeppelin-interpreter 8eca79545b [Jeff Zhang] address comment 458040cadf [Jeff Zhang] address comments 7c4ca219da [Jeff Zhang] address comments 19c9f89300 [Jeff Zhang] address comment 6ce636853d [Jeff Zhang] address comments 916d86ebfc [Jeff Zhang] address comment 25cdd68b0d [Jeff Zhang] use mmaba cba304f45b [Jeff Zhang] update 175d75e944 [Jeff Zhang] [ZEPPELIN-5576] Improve Zeppelin CI --- .github/workflows/core.yml | 297 +++++------- .github/workflows/frontend.yml | 7 + alluxio/pom.xml | 7 + pom.xml | 505 ++++++++++++++++++--- rlang/pom.xml | 15 +- scalding/pom.xml | 6 + spark/interpreter/pom.xml | 15 +- .../zeppelin/spark/SparkIRInterpreterTest.java | 84 ++-- .../zeppelin/spark/SparkRInterpreterTest.java | 3 +- spark/pom.xml | 1 - spark/spark-dependencies/pom.xml | 20 +- spark/spark-scala-parent/pom.xml | 15 + spark/spark-shims/pom.xml | 2 +- testing/env_python_3_with_flink_110.yml | 27 -- testing/env_python_3_with_flink_111.yml | 28 -- zeppelin-interpreter-integration/pom.xml | 140 ++++-- .../zeppelin/integration/FlinkIntegrationTest.java | 6 +- .../zeppelin/integration/MiniHadoopCluster.java | 4 +- .../zeppelin/integration/SparkIntegrationTest.java | 19 + .../integration/SparkIntegrationTest24.java | 4 +- .../integration/ZeppelinSparkClusterTest.java | 109 +++-- zeppelin-interpreter/pom.xml | 61 ++- zeppelin-plugins/launcher/yarn/pom.xml | 68 ++- zeppelin-plugins/notebookrepo/filesystem/pom.xml | 59 ++- zeppelin-server/pom.xml | 59 +-- zeppelin-zengine/pom.xml | 56 ++- 26 files changed, 1095 insertions(+), 522 deletions(-) diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index b6426284..808356d 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -26,13 +26,14 @@ defaults: shell: bash -l {0} jobs: - test-core-modules: + # test on core-modules (zeppelin-interpreter,zeppelin-zengine,zeppelin-server), + # some interpreters are included, because zeppelin-server test depends on them: spark, shell & markdown + core-modules: runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: hadoop: [hadoop2, hadoop3] - python: [3.7, 3.8] steps: - name: Checkout uses: actions/checkout@v2 @@ -52,29 +53,34 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python ${{ matrix.python }} and R + - name: install application with some interpreter + run: ./mvnw install -Pbuild-distr -DskipRat -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -B + - name: install and test plugins + run: ./mvnw package -DskipRat -pl zeppelin-plugins -amd -B + - name: Setup conda environment with python 3.7 and R uses: conda-incubator/setup-miniconda@v2 with: activate-environment: python_3_with_R - environment-file: testing/env_python_${{ matrix.python }}_with_R.yml - python-version: ${{ matrix.python }} + environment-file: testing/env_python_3.7_with_R.yml + python-version: 3.7 + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false - channel-priority: strict + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" conda list conda info - - name: install application with some interpreter - run: ./mvnw install -Pbuild-distr -DskipRat -DskipTests -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -B - - name: install and test plugins - run: ./mvnw package -DskipRat -pl zeppelin-plugins -amd -B - - name: run tests with ${{ matrix.hadoop }} + - name: run tests with ${{ matrix.hadoop }} # skip spark test because we would run them in other CI run: ./mvnw verify -Pusing-packaged-distr -DskipRat -pl zeppelin-server,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,angular,shell -am -Phelium-dev -Pexamples -P${{ matrix.hadoop }} -Dtests.to.exclude=**/org/apache/zeppelin/spark/* -DfailIfNoTests=false - test-interpreter-modules: + + # test interpreter modules except spark, flink, python, rlang, jupyter + interpreter-test-non-core: runs-on: ubuntu-20.04 env: - INTERPRETERS: 'beam,hbase,pig,jdbc,file,flink,flink-cmd,ignite,kylin,cassandra,elasticsearch,bigquery,alluxio,scio,livy,groovy,sap,java,geode,neo4j,hazelcastjet,submarine,sparql,mongodb,influxdb,ksql,scalding' + INTERPRETERS: 'beam,hbase,pig,jdbc,file,flink-cmd,ignite,kylin,cassandra,elasticsearch,bigquery,alluxio,scio,livy,groovy,sap,java,geode,neo4j,hazelcastjet,submarine,sparql,mongodb,influxdb,ksql,scalding' steps: - name: Checkout uses: actions/checkout@v2 @@ -94,97 +100,29 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- + - name: install environment + run: ./mvnw install -DskipTests -DskipRat -am -pl .,zeppelin-interpreter,zeppelin-interpreter-shaded,${INTERPRETERS} -Pscala-2.10 -B - name: Setup conda environment with python 3.7 and R uses: conda-incubator/setup-miniconda@v2 with: activate-environment: python_3_with_R_and_tensorflow environment-file: testing/env_python_3_with_R_and_tensorflow.yml python-version: 3.7 + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false - - name: Make IRkernel available to Jupyter - run: | - R -e "IRkernel::installspec()" + use-mamba: true - name: verify interpreter - run: ./mvnw verify -DskipRat -am -pl .,zeppelin-interpreter,zeppelin-interpreter-shaded,${INTERPRETERS} -Pscala-2.10 -B - test-zeppelin-client-integration-test: - runs-on: ubuntu-20.04 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Tune Runner VM - uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - distribution: 'adopt' - java-version: 8 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/zeppelin/ - key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 - auto-activate-base: false - - name: Make IRkernel available to Jupyter - run: | - R -e "IRkernel::installspec()" - - name: install environment - run: | - ./mvnw install -DskipTests -DskipRat -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,flink-cmd,flink/flink-scala-2.11,flink/flink-scala-2.12,jdbc,shell -am - ./mvnw package -DskipRat -pl zeppelin-plugins -amd -DskipTests -B - - name: run tests - run: ./mvnw test -DskipRat -pl zeppelin-interpreter-integration -Pintegration -DfailIfNoTests=false -Dtest=ZeppelinClientIntegrationTest,ZeppelinClientWithAuthIntegrationTest,ZSessionIntegrationTest + run: ./mvnw verify -DskipRat -pl ${INTERPRETERS} -Pscala-2.10 -B - test-flink-and-flink-integration-test: + # test interpreter modules for jupyter, python, rlang + interpreter-test-jupyter-python-rlang: runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: - flink: [112, 113, 114] - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Tune Runner VM - uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - distribution: 'adopt' - java-version: 8 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/zeppelin/ - key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: python_3_with_flink - environment-file: testing/env_python_3_with_flink_${{ matrix.flink }}.yml - python-version: 3.7 - auto-activate-base: false - - name: install environment - run: | - ./mvnw install -DskipTests -DskipRat -am -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Pintegration -B - ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests -B - - name: run tests - run: ./mvnw test -DskipRat -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Pintegration -DfailIfNoTests=false -B -Dtest=org.apache.zeppelin.flink.*,FlinkIntegrationTest${{ matrix.flink }} - - run-spark-intergration-test: - runs-on: ubuntu-20.04 + python: [ 3.7, 3.8 ] steps: - name: Checkout uses: actions/checkout@v2 @@ -204,24 +142,29 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R + - name: Setup conda environment with python ${{ matrix.python }} and R uses: conda-incubator/setup-miniconda@v2 with: activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 + environment-file: testing/env_python_${{ matrix.python }}_with_R.yml + python-version: ${{ matrix.python }} + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - name: install environment run: | - ./mvnw install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B - ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests -B - - name: run tests - run: ./mvnw test -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B -Dtest=SparkSubmitIntegrationTest,ZeppelinSparkClusterTest24,SparkIntegrationTest24,ZeppelinSparkClusterTest30,SparkIntegrationTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest31,ZeppelinSparkClusterTest32,SparkIntegrationTest32 -DfailIfNoTests=false + ./mvnw install -DskipTests -DskipRat -pl python,rlang,zeppelin-jupyter-interpreter -am -Phadoop2 -B + - name: run tests with ${{ matrix.python }} + run: | + ./mvnw test -DskipRat -pl python,rlang,zeppelin-jupyter-interpreter -DfailIfNoTests=false -B - jdbcIntegrationTest-and-unit-test-of-Spark-2-4-with-Scala-2-11: + # zeppelin integration test except Spark & Flink + zeppelin-integration-test: runs-on: ubuntu-20.04 steps: # user/password => root/root @@ -245,66 +188,33 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python 3.7 and R - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: python_3_with_R - environment-file: testing/env_python_3_with_R.yml - python-version: 3.7 - auto-activate-base: false - - name: Make IRkernel available to Jupyter - run: | - R -e "IRkernel::installspec()" - name: install environment run: | - ./mvnw install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B - ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests -B - - name: run tests - run: ./mvnw test -DskipRat -pl zeppelin-interpreter-integration,jdbc,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Pspark-2.4 -Pspark-scala-2.11 -Phadoop2 -Pintegration -B -Dtest=JdbcIntegrationTest,org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false - - spark-2-4-and-scala-2-12: - runs-on: ubuntu-20.04 - steps: - - name: Checkout - uses: actions/checkout@v2 - - name: Tune Runner VM - uses: ./.github/actions/tune-runner-vm - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - distribution: 'adopt' - java-version: 8 - - name: Cache local Maven repository - uses: actions/cache@v2 - with: - path: | - ~/.m2/repository - !~/.m2/repository/org/apache/zeppelin/ - key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-zeppelin- + ./mvnw install -DskipTests -DskipRat -Phadoop2 -Pintegration -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown,flink-cmd,flink/flink-scala-2.11,flink/flink-scala-2.12,jdbc,shell -am + ./mvnw package -DskipRat -pl zeppelin-plugins -amd -DskipTests -B - name: Setup conda environment with python 3.7 and R uses: conda-incubator/setup-miniconda@v2 with: activate-environment: python_3_with_R environment-file: testing/env_python_3_with_R.yml python-version: 3.7 + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment - run: | - ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B - name: run tests - run: ./mvnw test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,org.apache.zeppelin.kotlin.* -DfailIfNoTests=false + run: ./mvnw test -DskipRat -pl zeppelin-interpreter-integration -Phadoop2 -Pintegration -DfailIfNoTests=false -Dtest=ZeppelinClientIntegrationTest,ZeppelinClientWithAuthIntegrationTest,ZSessionIntegrationTest,ShellIntegrationTest,JdbcIntegrationTest - spark-3-0-and-scala-2-12-and-other-interpreter: + flink-test-and-flink-integration-test: runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: - python: [ 3.7, 3.8 ] + flink: [112, 113, 114] steps: - name: Checkout uses: actions/checkout@v2 @@ -324,28 +234,26 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python ${{ matrix.python }} and R + - name: install environment + run: | + ./mvnw install -DskipTests -DskipRat -am -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Phadoop2 -Pintegration -B + ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests -B + - name: Setup conda environment with python 3.7 and uses: conda-incubator/setup-miniconda@v2 with: - activate-environment: python_3_with_R - environment-file: testing/env_python_${{ matrix.python }}_with_R.yml - python-version: ${{ matrix.python }} + activate-environment: python_3_with_flink + environment-file: testing/env_python_3_with_flink_${{ matrix.flink }}.yml + python-version: 3.7 + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false - - name: Make IRkernel available to Jupyter - run: | - R -e "IRkernel::installspec()" - - name: install environment - run: | - ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B - - name: run tests with ${{ matrix.python }} - run: ./mvnw test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + use-mamba: true + - name: run tests + run: ./mvnw test -DskipRat -pl flink/flink-scala-2.11,flink/flink-scala-2.12,flink-cmd,zeppelin-interpreter-integration -Pflink-${{ matrix.flink }} -Phadoop2 -Pintegration -DfailIfNoTests=false -B -Dtest=org.apache.zeppelin.flink.*,FlinkIntegrationTest${{ matrix.flink }} - spark-3-1-and-scala-2-12-and-other-interpreter: + spark-integration-test: runs-on: ubuntu-20.04 - strategy: - fail-fast: false - matrix: - python: [ 3.7, 3.8 ] steps: - name: Checkout uses: actions/checkout@v2 @@ -365,22 +273,33 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- - - name: Setup conda environment with python ${{ matrix.python }} and R + - name: install environment + run: | + ./mvnw install -DskipTests -DskipRat -pl zeppelin-interpreter-integration,zeppelin-web,spark-submit,spark/spark-dependencies,markdown -am -Phadoop2 -Pintegration -B + ./mvnw clean package -pl zeppelin-plugins -amd -DskipTests -B + - name: Setup conda environment with python 3.7 and R uses: conda-incubator/setup-miniconda@v2 with: activate-environment: python_3_with_R - environment-file: testing/env_python_${{ matrix.python }}_with_R.yml - python-version: ${{ matrix.python }} + environment-file: testing/env_python_3_with_R.yml + python-version: 3.7 + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment - run: ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B - - name: run tests with ${{ matrix.python }} - run: ./mvnw test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + - name: run tests on hadoop2 + run: ./mvnw test -DskipRat -pl zeppelin-interpreter-integration -Phadoop2 -Pintegration -B -Dtest=SparkSubmitIntegrationTest,ZeppelinSparkClusterTest24,SparkIntegrationTest24,ZeppelinSparkClusterTest30,SparkIntegrationTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest31,ZeppelinSparkClusterTest32,SparkIntegrationTest32 -DfailIfNoTests=false + - name: run tests on hadoop3 + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw test -DskipRat -pl zeppelin-interpreter-integration -Phadoop3 -Pintegration -B -Dtest=SparkSubmitIntegrationTest,ZeppelinSparkClusterTest24,SparkIntegrationTest24,ZeppelinSparkClusterTest30,SparkIntegrationTest30,ZeppelinSparkClusterTest31,SparkIntegrationTest31,ZeppelinSparkClusterTest32,SparkIntegrationTest32 -DfailIfNoTests=false - spark-3-2-and-scala-2-12-and-other-interpreter: + # test on spark for each spark version & scala version + spark-test: runs-on: ubuntu-20.04 strategy: fail-fast: false @@ -405,22 +324,44 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- + - name: install environment + run: ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Phadoop2 -B - name: Setup conda environment with python ${{ matrix.python }} and R uses: conda-incubator/setup-miniconda@v2 with: activate-environment: python_3_with_R environment-file: testing/env_python_${{ matrix.python }}_with_R.yml python-version: ${{ matrix.python }} + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment - run: ./mvnw install -DskipTests -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.2 -Pspark-scala-2.12 -Phadoop2 -B - - name: run tests with ${{ matrix.python }} - run: ./mvnw test -DskipRat -pl spark-submit,spark/spark-dependencies -am -Pspark-3.2 -Pspark-scala-2.12 -Phadoop2 -B -Dtest=org.apache.zeppelin.spark.*,apache.zeppelin.python.*,apache.zeppelin.jupyter.*,apache.zeppelin.r.* -DfailIfNoTests=false + - name: run spark-2.4 tests with scala-2.11 and python-${{ matrix.python }} + if: matrix.python == '3.7' # Spark 2.4 doesn't support python 3.8 + run: ./mvnw test -DskipRat -pl spark-submit,spark/interpreter -Pspark-2.4 -Pspark-scala-2.11 -DfailIfNoTests=false -B + - name: run spark-2.4 tests with scala-2.12 and python-${{ matrix.python }} + if: matrix.python == '3.7' # Spark 2.4 doesn't support python 3.8 + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw test -DskipRat -pl spark-submit,spark/interpreter -Pspark-2.4 -Pspark-scala-2.12 -Phadoop2 -Pintegration -B -DfailIfNoTests=false + - name: run spark-3.0 tests with scala-2.12 and python-${{ matrix.python }} + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw test -DskipRat -pl spark-submit,spark/interpreter -Pspark-3.0 -Pspark-scala-2.12 -Phadoop2 -Pintegration -B -DfailIfNoTests=false + - name: run spark-3.1 tests with scala-2.12 and python-${{ matrix.python }} + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw test -DskipRat -pl spark-submit,spark/interpreter -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -Pintegration -B -DfailIfNoTests=false + - name: run spark-3.2 tests with scala-2.12 and python-${{ matrix.python }} + run: | + rm -rf spark/interpreter/metastore_db + ./mvnw test -DskipRat -pl spark-submit,spark/interpreter -Pspark-3.1 -Pspark-scala-2.12 -Phadoop2 -Pintegration -B -DfailIfNoTests=false - test-livy-0-5-with-spark-2-2-0-under-python3: + livy-0-5-with-spark-2-2-0-under-python3: runs-on: ubuntu-20.04 steps: - name: Checkout @@ -441,20 +382,24 @@ jobs: key: ${{ runner.os }}-zeppelin-${{ hashFiles('**/pom.xml') }} restore-keys: | ${{ runner.os }}-zeppelin- + - name: install environment + run: | + ./mvnw install -DskipTests -DskipRat -pl livy -am -B + ./testing/downloadSpark.sh "2.2.0" "2.6" + ./testing/downloadLivy.sh "0.5.0-incubating" - name: Setup conda environment with python 3.7 and R uses: conda-incubator/setup-miniconda@v2 with: activate-environment: python_3_with_R environment-file: testing/env_python_3_with_R.yml python-version: 3.7 + mamba-version: "*" + channels: conda-forge,defaults + channel-priority: true auto-activate-base: false + use-mamba: true - name: Make IRkernel available to Jupyter run: | R -e "IRkernel::installspec()" - - name: install environment - run: | - ./mvnw install -DskipTests -DskipRat -pl livy -am -B - ./testing/downloadSpark.sh "2.2.0" "2.6" - ./testing/downloadLivy.sh "0.5.0-incubating" - name: run tests run: ./mvnw verify -DskipRat -pl livy -am -B diff --git a/.github/workflows/frontend.yml b/.github/workflows/frontend.yml index 6d1dd90..20f4aff 100644 --- a/.github/workflows/frontend.yml +++ b/.github/workflows/frontend.yml @@ -47,6 +47,10 @@ jobs: run: ./mvnw -B install -DskipTests -DskipRat -pl ${INTERPRETERS} -Phadoop2 -Pscala-2.11 - name: Run headless test run: xvfb-run --auto-servernum --server-args="-screen 0 1024x768x24" ./mvnw verify -DskipRat -pl zeppelin-web -Phadoop2 -Pscala-2.11 -Pweb-e2e -B + - name: Print zeppelin logs + if: always() + run: if [ -d "logs" ]; then cat logs/*; fi + run-tests-in-zeppelin-web-angular: runs-on: ubuntu-20.04 steps: @@ -112,3 +116,6 @@ jobs: ./testing/downloadSpark.sh "2.4.7" "2.7" - name: run tests run: xvfb-run --auto-servernum --server-args="-screen 0 1600x1024x16" ./mvnw verify -DskipRat -Pspark-2.4 -Phadoop2 -Phelium-dev -Pexamples -Pintegration -Pspark-scala-2.11 -B -pl zeppelin-integration -DfailIfNoTests=false + - name: Print zeppelin logs + if: always() + run: if [ -d "logs" ]; then cat logs/*; fi \ No newline at end of file diff --git a/alluxio/pom.xml b/alluxio/pom.xml index 36e5dda..c64993f 100644 --- a/alluxio/pom.xml +++ b/alluxio/pom.xml @@ -100,6 +100,13 @@ <version>${alluxio.version}</version> <scope>test</scope> </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>2.7.7</version> + <scope>test</scope> + </dependency> </dependencies> <build> diff --git a/pom.xml b/pom.xml index 379f33c..8d0754c 100644 --- a/pom.xml +++ b/pom.xml @@ -151,8 +151,11 @@ <hadoop3.1.version>3.1.3</hadoop3.1.version> <hadoop3.2.version>3.2.0</hadoop3.2.version> <hadoop.version>${hadoop2.7.version}</hadoop.version> - <hadoop.deps.scope>provided</hadoop.deps.scope> + <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact> + <hadoop-client-runtime.artifact>hadoop-yarn-api</hadoop-client-runtime.artifact> + <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact> + <quartz.scheduler.version>2.3.2</quartz.scheduler.version> <jettison.version>1.4.0</jettison.version> <jsoup.version>1.13.1</jsoup.version> @@ -399,11 +402,19 @@ <dependency> <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> + <artifactId>${hadoop-client-api.artifact}</artifactId> <version>${hadoop.version}</version> <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> + <groupId>org.apache.zookeeper</groupId> + <artifactId>zookeeper</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </exclusion> + <exclusion> <groupId>com.sun.jersey</groupId> <artifactId>jersey-core</artifactId> </exclusion> @@ -533,6 +544,129 @@ <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + <version>${hadoop.version}</version> + <scope>${hadoop.deps.scope}</scope> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.ow2.asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>com.sun.jersey</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.sun.jersey.jersey-test-framework</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.sun.jersey.contribs</groupId> + <artifactId>*</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + <version>${hadoop.version}</version> + <scope>${hadoop.deps.scope}</scope> + <exclusions> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.jackrabbit</groupId> + <artifactId>jackrabbit-webdav</artifactId> + </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>commons-httpclient</groupId> + <artifactId>commons-httpclient</artifactId> + </exclusion> + <exclusion> + <groupId>org.eclipse.jgit</groupId> + <artifactId>org.eclipse.jgit</artifactId> + </exclusion> + <exclusion> + <groupId>com.jcraft</groupId> + <artifactId>jsch</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + </exclusion> + <exclusion> + <groupId>xml-apis</groupId> + <artifactId>xml-apis</artifactId> + </exclusion> + <exclusion> + <groupId>xerces</groupId> + <artifactId>xercesImpl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> + </exclusion> + <!-- using jcl-over-slf4j instead --> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-api</artifactId> <version>${hadoop.version}</version> <scope>${hadoop.deps.scope}</scope> @@ -605,22 +739,32 @@ </exclusions> </dependency> + + <dependency> <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-hdfs</artifactId> + <artifactId>hadoop-yarn-server-tests</artifactId> <version>${hadoop.version}</version> <classifier>tests</classifier> <scope>test</scope> <exclusions> <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + </exclusion> + <exclusion> <groupId>com.sun.jersey</groupId> - <artifactId>jersey-json</artifactId> + <artifactId>jersey-core</artifactId> </exclusion> <exclusion> <groupId>com.sun.jersey</groupId> <artifactId>jersey-client</artifactId> </exclusion> <exclusion> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-server</artifactId> + </exclusion> + <exclusion> <groupId>javax.servlet</groupId> <artifactId>servlet-api</artifactId> </exclusion> @@ -661,26 +805,66 @@ <artifactId>xercesImpl</artifactId> </exclusion> <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId> </exclusion> <exclusion> - <groupId>io.netty</groupId> - <artifactId>netty-all</artifactId> + <groupId>javax.xml.bind</groupId> + <artifactId>jaxb-api</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> </exclusion> <exclusion> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-util</artifactId> </exclusion> <exclusion> + <groupId>com.zaxxer</groupId> + <artifactId>HikariCP-java7</artifactId> + </exclusion> + <exclusion> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-annotations</artifactId> </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.module</groupId> + <artifactId>jackson-module-jaxb-annotations</artifactId> + </exclusion> <!-- using jcl-over-slf4j instead --> <exclusion> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> </exclusion> + <exclusion> + <groupId>io.dropwizard.metrics</groupId> + <artifactId>metrics-core</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> </exclusions> </dependency> @@ -688,8 +872,7 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> - <classifier>tests</classifier> - <scope>test</scope> + <scope>${hadoop.deps.scope}</scope> <exclusions> <exclusion> <groupId>com.sun.jersey</groupId> @@ -772,6 +955,10 @@ <artifactId>commons-beanutils</artifactId> </exclusion> <exclusion> + <groupId> commons-beanutils</groupId> + <artifactId>commons-beanutils-core</artifactId> + </exclusion> + <exclusion> <groupId>org.apache.commons</groupId> <artifactId>commons-configuration2</artifactId> </exclusion> @@ -808,12 +995,20 @@ <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> </exclusion> + <exclusion> + <groupId>org.ow2.asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>com.jamesmurty.utils</groupId> + <artifactId>java-xmlbuilder</artifactId> + </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-server-tests</artifactId> + <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> <classifier>tests</classifier> <scope>test</scope> @@ -824,6 +1019,10 @@ </exclusion> <exclusion> <groupId>com.sun.jersey</groupId> + <artifactId>jersey-json</artifactId> + </exclusion> + <exclusion> + <groupId>com.sun.jersey</groupId> <artifactId>jersey-client</artifactId> </exclusion> <exclusion> @@ -872,57 +1071,69 @@ </exclusion> <exclusion> <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> + <artifactId>jackson-mapper-asl</artifactId> </exclusion> <exclusion> <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> + <artifactId>jackson-core-asl</artifactId> </exclusion> <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> </exclusion> <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> </exclusion> <exclusion> - <groupId>com.google.guava</groupId> - <artifactId>guava</artifactId> + <groupId>org.apache.commons</groupId> + <artifactId>commons-math3</artifactId> </exclusion> <exclusion> - <groupId>javax.xml.bind</groupId> - <artifactId>jaxb-api</artifactId> + <groupId>commons-beanutils</groupId> + <artifactId>commons-beanutils</artifactId> </exclusion> <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-core</artifactId> + <groupId>org.apache.commons</groupId> + <artifactId>commons-configuration2</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.zookeeper</groupId> + <artifactId>zookeeper</artifactId> + </exclusion> + <exclusion> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-servlet</artifactId> </exclusion> <exclusion> <groupId>org.eclipse.jetty</groupId> <artifactId>jetty-util</artifactId> </exclusion> <exclusion> - <groupId>com.zaxxer</groupId> - <artifactId>HikariCP-java7</artifactId> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-webapp</artifactId> </exclusion> <exclusion> - <groupId>com.fasterxml.jackson.core</groupId> - <artifactId>jackson-annotations</artifactId> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-server</artifactId> </exclusion> <exclusion> - <groupId>com.fasterxml.jackson.module</groupId> - <artifactId>jackson-module-jaxb-annotations</artifactId> + <groupId>com.nimbusds</groupId> + <artifactId>nimbus-jose-jwt</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> </exclusion> <!-- using jcl-over-slf4j instead --> <exclusion> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> </exclusion> -<!-- <exclusion>--> -<!-- <groupId>com.google.inject.extensions</groupId>--> -<!-- <artifactId>guice-servlet</artifactId>--> -<!-- </exclusion>--> + <exclusion> + <groupId>org.ow2.asm</groupId> + <artifactId>asm</artifactId> + </exclusion> </exclusions> </dependency> @@ -996,6 +1207,195 @@ <version>${testcontainers.version}</version> <scope>test</scope> </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-json</artifactId> + </exclusion> + <exclusion> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-client</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.jackrabbit</groupId> + <artifactId>jackrabbit-webdav</artifactId> + </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>commons-httpclient</groupId> + <artifactId>commons-httpclient</artifactId> + </exclusion> + <exclusion> + <groupId>org.eclipse.jgit</groupId> + <artifactId>org.eclipse.jgit</artifactId> + </exclusion> + <exclusion> + <groupId>com.jcraft</groupId> + <artifactId>jsch</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + </exclusion> + <exclusion> + <groupId>xml-apis</groupId> + <artifactId>xml-apis</artifactId> + </exclusion> + <exclusion> + <groupId>xerces</groupId> + <artifactId>xercesImpl</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty-all</artifactId> + </exclusion> + <exclusion> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-util</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + </exclusion> + <!-- using jcl-over-slf4j instead --> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + <classifier>tests</classifier> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-json</artifactId> + </exclusion> + <exclusion> + <groupId>com.sun.jersey</groupId> + <artifactId>jersey-client</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.jackrabbit</groupId> + <artifactId>jackrabbit-webdav</artifactId> + </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>commons-httpclient</groupId> + <artifactId>commons-httpclient</artifactId> + </exclusion> + <exclusion> + <groupId>org.eclipse.jgit</groupId> + <artifactId>org.eclipse.jgit</artifactId> + </exclusion> + <exclusion> + <groupId>com.jcraft</groupId> + <artifactId>jsch</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.commons</groupId> + <artifactId>commons-compress</artifactId> + </exclusion> + <exclusion> + <groupId>xml-apis</groupId> + <artifactId>xml-apis</artifactId> + </exclusion> + <exclusion> + <groupId>xerces</groupId> + <artifactId>xercesImpl</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>io.netty</groupId> + <artifactId>netty-all</artifactId> + </exclusion> + <exclusion> + <groupId>org.eclipse.jetty</groupId> + <artifactId>jetty-util</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + </exclusion> + <!-- using jcl-over-slf4j instead --> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-runtime.artifact}</artifactId> + <version>${hadoop.version}</version> + <scope>${hadoop.deps.scope}</scope> + <exclusions> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-minicluster.artifact}</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + </dependency> + </dependencies> </dependencyManagement> @@ -1012,32 +1412,6 @@ </configuration> </plugin> -<!-- <plugin>--> -<!-- <groupId>org.codehaus.mojo</groupId>--> -<!-- <artifactId>flatten-maven-plugin</artifactId>--> -<!-- <version>1.0.0</version>--> -<!-- <configuration>--> -<!-- <flattenMode>ossrh</flattenMode>--> -<!-- <updatePomFile>true</updatePomFile>--> -<!-- </configuration>--> -<!-- <executions>--> -<!-- <execution>--> -<!-- <id>flatten</id>--> -<!-- <phase>process-resources</phase>--> -<!-- <goals>--> -<!-- <goal>flatten</goal>--> -<!-- </goals>--> -<!-- </execution>--> -<!-- <execution>--> -<!-- <id>flatten.clean</id>--> -<!-- <phase>clean</phase>--> -<!-- <goals>--> -<!-- <goal>clean</goal>--> -<!-- </goals>--> -<!-- </execution>--> -<!-- </executions>--> -<!-- </plugin>--> - <!-- (TODO zjffdu disable it temporary to make CI pass)Test coverage plugin --> <!-- <plugin>--> @@ -1885,23 +2259,6 @@ </build> </profile> - <profile> - <id>hadoop2</id> - <properties> - <hadoop.version>${hadoop2.7.version}</hadoop.version> - <curator.version>2.13.0</curator.version> - </properties> - </profile> - - <profile> - <id>hadoop3</id> - <properties> - <hadoop.version>${hadoop3.2.version}</hadoop.version> - <curator.version>2.13.0</curator.version> - <kerberos-client.version>2.0.0-M15</kerberos-client.version> - </properties> - </profile> - </profiles> </project> diff --git a/rlang/pom.xml b/rlang/pom.xml index 51125d8..0037d7d 100644 --- a/rlang/pom.xml +++ b/rlang/pom.xml @@ -107,16 +107,23 @@ </dependency> <dependency> + <groupId>org.jsoup</groupId> + <artifactId>jsoup</artifactId> + <version>${jsoup.version}</version> + </dependency> + + <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> - <version>2.7.7</version> + <version>${hadoop.version}</version> <scope>compile</scope> </dependency> <dependency> - <groupId>org.jsoup</groupId> - <artifactId>jsoup</artifactId> - <version>${jsoup.version}</version> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + <scope>compile</scope> </dependency> <dependency> diff --git a/scalding/pom.xml b/scalding/pom.xml index bd74e81..4dc5914 100644 --- a/scalding/pom.xml +++ b/scalding/pom.xml @@ -135,6 +135,12 @@ <version>${hadoop.version}</version> </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + </dependency> + </dependencies> <build> diff --git a/spark/interpreter/pom.xml b/spark/interpreter/pom.xml index 0ac70a9..3a22650 100644 --- a/spark/interpreter/pom.xml +++ b/spark/interpreter/pom.xml @@ -198,11 +198,24 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> - <version>2.6.0</version> + <version>${hadoop.version}</version> <scope>provided</scope> </dependency> <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + <scope>provided</scope> + <exclusions> + <exclusion> + <groupId>com.google.protobuf</groupId> + <artifactId>protobuf-java</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_${spark.scala.binary.version}</artifactId> <version>${spark.version}</version> diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkIRInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkIRInterpreterTest.java index 12cd6b0..0e7b67f 100644 --- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkIRInterpreterTest.java +++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkIRInterpreterTest.java @@ -63,6 +63,7 @@ public class SparkIRInterpreterTest extends IRInterpreterTest { properties.setProperty("zeppelin.spark.maxResult", "100"); properties.setProperty("spark.r.backendConnectionTimeout", "10"); properties.setProperty("zeppelin.spark.deprecatedMsg.show", "false"); + properties.setProperty("spark.sql.execution.arrow.sparkr.enabled", "false"); InterpreterContext context = getInterpreterContext(); InterpreterContext.set(context); @@ -90,54 +91,43 @@ public class SparkIRInterpreterTest extends IRInterpreterTest { context = getInterpreterContext(); result = interpreter.interpret("sparkR.version()", context); - if (result.code() == InterpreterResult.Code.ERROR) { - // Spark 1.x has no api for Spark.version() - // spark 1.x - context = getInterpreterContext(); - result = interpreter.interpret("df <- createDataFrame(sqlContext, faithful)\nhead(df)", context); - interpreterResultMessages = context.out.toInterpreterResultMessage(); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(interpreterResultMessages.get(0).getData().contains(">eruptions</th>")); - // spark job url is sent - verify(mockRemoteIntpEventClient, atLeastOnce()).onParaInfosReceived(any(Map.class)); - } else { - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - interpreterResultMessages = context.out.toInterpreterResultMessage(); - if (interpreterResultMessages.get(0).getData().contains("2.2")) { - ENABLE_GOOGLEVIS_TEST = false; - } - context = getInterpreterContext(); - result = interpreter.interpret("df <- as.DataFrame(faithful)\nhead(df)", context); - interpreterResultMessages = context.out.toInterpreterResultMessage(); - assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(interpreterResultMessages.get(0).getData().contains(">eruptions</th>")); - // spark job url is sent - verify(mockRemoteIntpEventClient, atLeastOnce()).onParaInfosReceived(any(Map.class)); - - // cancel - final InterpreterContext context2 = getInterpreterContext(); - Thread thread = new Thread() { - @Override - public void run() { - try { - InterpreterResult result = interpreter.interpret("ldf <- dapplyCollect(\n" + - " df,\n" + - " function(x) {\n" + - " Sys.sleep(3)\n" + - " x <- cbind(x, \"waiting_secs\" = x$waiting * 60)\n" + - " })\n" + - "head(ldf, 3)", context2); - assertTrue(result.message().get(0).getData().contains("cancelled")); - } catch (InterpreterException e) { - fail("Should not throw InterpreterException"); - } - } - }; - thread.setName("Cancel-Thread"); - thread.start(); - Thread.sleep(1000); - interpreter.cancel(context2); + + assertEquals(InterpreterResult.Code.SUCCESS, result.code()); + interpreterResultMessages = context.out.toInterpreterResultMessage(); + if (interpreterResultMessages.get(0).getData().contains("2.2")) { + ENABLE_GOOGLEVIS_TEST = false; } + context = getInterpreterContext(); + result = interpreter.interpret("df <- as.DataFrame(faithful)\nhead(df)", context); + interpreterResultMessages = context.out.toInterpreterResultMessage(); + assertEquals(context.out.toString(), InterpreterResult.Code.SUCCESS, result.code()); + assertTrue(interpreterResultMessages.get(0).getData().contains(">eruptions</th>")); + // spark job url is sent + verify(mockRemoteIntpEventClient, atLeastOnce()).onParaInfosReceived(any(Map.class)); + + // cancel + final InterpreterContext context2 = getInterpreterContext(); + Thread thread = new Thread() { + @Override + public void run() { + try { + InterpreterResult result = interpreter.interpret("ldf <- dapplyCollect(\n" + + " df,\n" + + " function(x) {\n" + + " Sys.sleep(3)\n" + + " x <- cbind(x, \"waiting_secs\" = x$waiting * 60)\n" + + " })\n" + + "head(ldf, 3)", context2); + assertTrue(result.message().get(0).getData().contains("cancelled")); + } catch (InterpreterException e) { + fail("Should not throw InterpreterException"); + } + } + }; + thread.setName("Cancel-Thread"); + thread.start(); + Thread.sleep(1000); + interpreter.cancel(context2); } @Override diff --git a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkRInterpreterTest.java b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkRInterpreterTest.java index 7087be2..921cd36 100644 --- a/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkRInterpreterTest.java +++ b/spark/interpreter/src/test/java/org/apache/zeppelin/spark/SparkRInterpreterTest.java @@ -57,6 +57,7 @@ public class SparkRInterpreterTest { properties.setProperty("zeppelin.R.knitr", "true"); properties.setProperty("spark.r.backendConnectionTimeout", "10"); properties.setProperty("zeppelin.spark.deprecatedMsg.show", "false"); + properties.setProperty("spark.sql.execution.arrow.sparkr.enabled", "false"); InterpreterContext context = getInterpreterContext(); InterpreterContext.set(context); @@ -88,7 +89,7 @@ public class SparkRInterpreterTest { result = sparkRInterpreter.interpret("df <- as.DataFrame(faithful)\nhead(df)", getInterpreterContext()); assertEquals(InterpreterResult.Code.SUCCESS, result.code()); - assertTrue(result.message().get(0).getData().contains("eruptions waiting")); + assertTrue(result.toString(), result.message().get(0).getData().contains("eruptions waiting")); // spark job url is sent verify(mockRemoteIntpEventClient, atLeastOnce()).onParaInfosReceived(any(Map.class)); diff --git a/spark/pom.xml b/spark/pom.xml index 748d77b..a5a5a9d 100644 --- a/spark/pom.xml +++ b/spark/pom.xml @@ -109,7 +109,6 @@ </execution> </executions> <configuration> - <scalaVersion>${scala.compile.version}</scalaVersion> <args> <arg>-unchecked</arg> <arg>-deprecation</arg> diff --git a/spark/spark-dependencies/pom.xml b/spark/spark-dependencies/pom.xml index 81d9bb0..0a8e71e 100644 --- a/spark/spark-dependencies/pom.xml +++ b/spark/spark-dependencies/pom.xml @@ -118,7 +118,14 @@ <version>${spark.version}</version> </dependency> - <!-- hadoop --> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-yarn_${spark.scala.binary.version}</artifactId> + <version>${spark.version}</version> + </dependency> + + <!-- Include hadoop 2.7 into spark-dependencies jar. + Explicit specify compile scope, otherwise it would use provided defined in root pom.xml --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> @@ -127,15 +134,16 @@ </dependency> <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn_${spark.scala.binary.version}</artifactId> - <version>${spark.version}</version> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + <version>${hadoop.version}</version> + <scope>compile</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <version>${yarn.version}</version> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> <scope>compile</scope> </dependency> diff --git a/spark/spark-scala-parent/pom.xml b/spark/spark-scala-parent/pom.xml index a947f4e..1cc02a3 100644 --- a/spark/spark-scala-parent/pom.xml +++ b/spark/spark-scala-parent/pom.xml @@ -80,6 +80,21 @@ <scope>provided</scope> </dependency> + <!-- Use provided scope, hadoop dependencies are only for compilation --> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <version>${hadoop.version}</version> + <scope>provided</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + <scope>provided</scope> + </dependency> + <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-compiler</artifactId> diff --git a/spark/spark-shims/pom.xml b/spark/spark-shims/pom.xml index f4cf30e..8ec8b30 100644 --- a/spark/spark-shims/pom.xml +++ b/spark/spark-shims/pom.xml @@ -39,7 +39,7 @@ <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> - <version>${hadoop2.6.version}</version> + <version>${hadoop.version}</version> <scope>provided</scope> </dependency> </dependencies> diff --git a/testing/env_python_3_with_flink_110.yml b/testing/env_python_3_with_flink_110.yml deleted file mode 100644 index 855378a..0000000 --- a/testing/env_python_3_with_flink_110.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: python_3_with_flink -channels: - - conda-forge - - defaults -dependencies: - - pycodestyle - - scipy - - numpy=1.19.5 - - grpcio - - protobuf - - pandasql - - ipython - - ipython_genutils - - ipykernel - - jupyter_client=5 - - hvplot - - plotnine - - seaborn - - intake - - intake-parquet - - intake-xarray - - altair - - vega_datasets - - plotly - - pip - - pip: - - apache-flink==1.10.2 diff --git a/testing/env_python_3_with_flink_111.yml b/testing/env_python_3_with_flink_111.yml deleted file mode 100644 index 889d723..0000000 --- a/testing/env_python_3_with_flink_111.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: python_3_with_flink -channels: - - conda-forge - - defaults -dependencies: - - pycodestyle - - scipy - - numpy=1.19.5 - - grpcio - - protobuf - - pandasql - - ipython - - ipython_genutils - - ipykernel - - jupyter_client=5 - - hvplot - - plotnine - - seaborn - - intake - - intake-parquet - - intake-xarray - - altair - - vega_datasets - - plotly - - pip - - pip: - - apache-flink==1.11.3 - diff --git a/zeppelin-interpreter-integration/pom.xml b/zeppelin-interpreter-integration/pom.xml index d9302fa..f751241 100644 --- a/zeppelin-interpreter-integration/pom.xml +++ b/zeppelin-interpreter-integration/pom.xml @@ -24,7 +24,6 @@ <artifactId>zeppelin</artifactId> <groupId>org.apache.zeppelin</groupId> <version>0.11.0-SNAPSHOT</version> - <relativePath>..</relativePath> </parent> <artifactId>zeppelin-interpreter-integration</artifactId> @@ -95,17 +94,6 @@ </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - </dependency> - - <!--test libraries--> - <dependency> <groupId>org.apache.zeppelin</groupId> <artifactId>zeppelin-zengine</artifactId> <version>${project.version}</version> @@ -128,27 +116,6 @@ </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-server-tests</artifactId> - <classifier>tests</classifier> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-hdfs</artifactId> - <classifier>tests</classifier> - <scope>test</scope> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - <classifier>tests</classifier> - <scope>test</scope> - </dependency> - - <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <scope>test</scope> @@ -172,6 +139,7 @@ <build> <plugins> <plugin> + <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-failsafe-plugin</artifactId> <executions> <execution> @@ -186,6 +154,7 @@ </configuration> </plugin> <plugin> + <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-surefire-plugin</artifactId> <configuration> <forkCount>1</forkCount> @@ -203,4 +172,109 @@ </plugins> </build> + <profiles> + <profile> + <id>hadoop2</id> + + <activation> + <activeByDefault>true</activeByDefault> + </activation> + + <properties> + <hadoop.version>${hadoop2.7.version}</hadoop.version> + <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact> + <hadoop-client-runtime.artifact>hadoop-yarn-api</hadoop-client-runtime.artifact> + <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <scope>test</scope> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-server-tests</artifactId> + <version>${hadoop.version}</version> + <classifier>tests</classifier> + <scope>test</scope> + </dependency> + + </dependencies> + + </profile> + <profile> + <id>hadoop3</id> + + <properties> + <hadoop.version>${hadoop3.2.version}</hadoop.version> + <hadoop-client-runtime.artifact>hadoop-client-runtime</hadoop-client-runtime.artifact> + <hadoop-client-minicluster.artifact>hadoop-client-minicluster</hadoop-client-minicluster.artifact> + </properties> + + <dependencies> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-runtime.artifact}</artifactId> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-minicluster.artifact}</artifactId> + <scope>test</scope> + </dependency> + + </dependencies> + </profile> + </profiles> + </project> diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/FlinkIntegrationTest.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/FlinkIntegrationTest.java index 7240181..44d64f5 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/FlinkIntegrationTest.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/FlinkIntegrationTest.java @@ -95,15 +95,15 @@ public abstract class FlinkIntegrationTest { InterpreterContext context = new InterpreterContext.Builder().setNoteId("note1").setParagraphId("paragraph_1").build(); InterpreterResult interpreterResult = flinkInterpreter.interpret("1+1", context); - assertEquals(InterpreterResult.Code.SUCCESS, interpreterResult.code()); + assertEquals(interpreterResult.toString(), InterpreterResult.Code.SUCCESS, interpreterResult.code()); assertTrue(interpreterResult.message().get(0).getData().contains("2")); interpreterResult = flinkInterpreter.interpret("val data = benv.fromElements(1, 2, 3)\ndata.collect()", context); - assertEquals(InterpreterResult.Code.SUCCESS, interpreterResult.code()); + assertEquals(interpreterResult.toString(), InterpreterResult.Code.SUCCESS, interpreterResult.code()); assertTrue(interpreterResult.message().get(0).getData().contains("1, 2, 3")); interpreterResult = flinkInterpreter.interpret("val data = senv.fromElements(1, 2, 3)\ndata.print()", context); - assertEquals(InterpreterResult.Code.SUCCESS, interpreterResult.code()); + assertEquals(interpreterResult.toString(), InterpreterResult.Code.SUCCESS, interpreterResult.code()); // check spark weburl in zeppelin-server side InterpreterSetting flinkInterpreterSetting = interpreterSettingManager.getByName("flink"); diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/MiniHadoopCluster.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/MiniHadoopCluster.java index eb11819..1b65ff2 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/MiniHadoopCluster.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/MiniHadoopCluster.java @@ -67,7 +67,9 @@ public class MiniHadoopCluster { YarnConfiguration baseConfig = new YarnConfiguration(hadoopConf); baseConfig.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage", "98"); baseConfig.set("yarn.scheduler.capacity.maximum-am-resource-percent", "1.0"); - + // the following 2 config are required by hadoop3 + baseConfig.set("yarn.scheduler.capacity.root.queues", "default"); + baseConfig.set("yarn.scheduler.capacity.root.default.capacity", "100"); this.yarnCluster = new MiniYARNCluster(getClass().getName(), 2, 1, 1); yarnCluster.init(baseConfig); diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java index 647f844..fad2e54 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest.java @@ -53,6 +53,7 @@ import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeTrue; public abstract class SparkIntegrationTest { @@ -64,12 +65,14 @@ public abstract class SparkIntegrationTest { protected static InterpreterSettingManager interpreterSettingManager; private String sparkVersion; + private String hadoopVersion; private String sparkHome; public SparkIntegrationTest(String sparkVersion, String hadoopVersion) { LOGGER.info("Testing Spark Version: " + sparkVersion); LOGGER.info("Testing Hadoop Version: " + hadoopVersion); this.sparkVersion = sparkVersion; + this.hadoopVersion = hadoopVersion; this.sparkHome = DownloadUtils.downloadSpark(sparkVersion, hadoopVersion); } @@ -98,6 +101,12 @@ public abstract class SparkIntegrationTest { // sub class can customize spark interpreter setting. } + private boolean isHadoopVersionMatch() { + String version = org.apache.hadoop.util.VersionInfo.getVersion(); + String majorVersion = version.split("\\.")[0]; + return majorVersion.equals(hadoopVersion.split("\\.")[0]); + } + private void testInterpreterBasics() throws IOException, InterpreterException, XmlPullParserException { // add jars & packages for testing InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark"); @@ -161,6 +170,8 @@ public abstract class SparkIntegrationTest { @Test public void testLocalMode() throws IOException, YarnException, InterpreterException, XmlPullParserException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark"); sparkInterpreterSetting.setProperty("spark.master", "local[*]"); sparkInterpreterSetting.setProperty("SPARK_HOME", sparkHome); @@ -186,6 +197,8 @@ public abstract class SparkIntegrationTest { @Test public void testYarnClientMode() throws IOException, YarnException, InterruptedException, InterpreterException, XmlPullParserException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark"); sparkInterpreterSetting.setProperty("spark.master", "yarn-client"); sparkInterpreterSetting.setProperty("HADOOP_CONF_DIR", hadoopCluster.getConfigPath()); @@ -236,6 +249,8 @@ public abstract class SparkIntegrationTest { @Test public void testYarnClusterMode() throws IOException, YarnException, InterruptedException, InterpreterException, XmlPullParserException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark"); sparkInterpreterSetting.setProperty("spark.master", "yarn-cluster"); sparkInterpreterSetting.setProperty("HADOOP_CONF_DIR", hadoopCluster.getConfigPath()); @@ -281,6 +296,8 @@ public abstract class SparkIntegrationTest { @Test public void testSparkSubmit() throws InterpreterException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + try { InterpreterSetting sparkSubmitInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark-submit"); sparkSubmitInterpreterSetting.setProperty("SPARK_HOME", sparkHome); @@ -297,6 +314,8 @@ public abstract class SparkIntegrationTest { @Test public void testScopedMode() throws InterpreterException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + InterpreterSetting sparkInterpreterSetting = interpreterSettingManager.getInterpreterSettingByName("spark"); try { sparkInterpreterSetting.setProperty("spark.master", "local[*]"); diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java index 9a2391d..df5eb2f 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/SparkIntegrationTest24.java @@ -29,7 +29,7 @@ import java.util.Arrays; import java.util.List; @RunWith(value = Parameterized.class) -public class SparkIntegrationTest24 extends SparkIntegrationTest{ +public class SparkIntegrationTest24 extends SparkIntegrationTest { public SparkIntegrationTest24(String sparkVersion, String hadoopVersion) { super(sparkVersion, hadoopVersion); @@ -38,7 +38,7 @@ public class SparkIntegrationTest24 extends SparkIntegrationTest{ @Parameterized.Parameters public static List<Object[]> data() { return Arrays.asList(new Object[][]{ - {"2.4.7", "2.7"} + {"2.4.8", "2.7"} }); } diff --git a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest.java b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest.java index 24a7be6..9ba356b 100644 --- a/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest.java +++ b/zeppelin-interpreter-integration/src/test/java/org/apache/zeppelin/integration/ZeppelinSparkClusterTest.java @@ -60,6 +60,8 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.junit.Assume.assumeTrue; + /** * Test against spark cluster. @@ -78,12 +80,14 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { private String sparkVersion; private String sparkHome; + private String hadoopVersion; private AuthenticationInfo anonymous = new AuthenticationInfo("anonymous"); public ZeppelinSparkClusterTest(String sparkVersion, String hadoopVersion) throws Exception { this.sparkVersion = sparkVersion; LOGGER.info("Testing SparkVersion: " + sparkVersion); this.sparkHome = DownloadUtils.downloadSpark(sparkVersion, hadoopVersion); + this.hadoopVersion = hadoopVersion; if (!verifiedSparkVersions.contains(sparkVersion)) { verifiedSparkVersions.add(sparkVersion); setupSparkInterpreter(sparkHome); @@ -91,6 +95,12 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { } } + private boolean isHadoopVersionMatch() { + String version = org.apache.hadoop.util.VersionInfo.getVersion(); + String majorVersion = version.split("\\.")[0]; + return majorVersion.equals(hadoopVersion.split("\\.")[0]); + } + public void setupSparkInterpreter(String sparkHome) throws InterpreterException { InterpreterSetting sparkIntpSetting = TestUtils.getInstance(Notebook.class).getInterpreterSettingManager() .getInterpreterSettingByName("spark"); @@ -155,6 +165,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void scalaOutputTest() throws IOException, InterruptedException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { // create new note @@ -212,7 +224,10 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void basicRDDTransformationAndActionTest() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; + try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); TestUtils.getInstance(Notebook.class).processNote(noteId, @@ -233,6 +248,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void sparkReadJSONTest() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -269,10 +286,7 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void sparkReadCSVTest() throws IOException { - if (isSpark1()) { - // csv if not supported in spark 1.x natively - return; - } + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); String noteId = null; try { @@ -300,6 +314,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void sparkSQLTest() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -401,6 +417,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void sparkRTest() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -438,12 +456,15 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void pySparkTest() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + // create new note String noteId = null; try { - noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); - TestUtils.getInstance(Notebook.class).processNote(noteId, - note -> { + noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); + TestUtils.getInstance(Notebook.class).processNote(noteId, + note -> { + // run markdown paragraph, again Paragraph p = note.addNewParagraph(anonymous); p.setText("%spark.pyspark sc.parallelize(range(1, 11)).reduce(lambda a, b: a + b)"); @@ -463,53 +484,7 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { List<InterpreterCompletion> completions = note.completion(p.getId(), code, code.length(), AuthenticationInfo.ANONYMOUS); assertTrue(completions.size() > 0); - if (isSpark1()) { - // run sqlContext test - p = note.addNewParagraph(anonymous); - p.setText("%pyspark from pyspark.sql import Row\n" + - "df=sqlContext.createDataFrame([Row(id=1, age=20)])\n" + - "df.collect()"); - note.run(p.getId(), true); - assertEquals(Status.FINISHED, p.getStatus()); - assertEquals("[Row(age=20, id=1)]\n", p.getReturn().message().get(0).getData()); - - // test display Dataframe - p = note.addNewParagraph(anonymous); - p.setText("%pyspark from pyspark.sql import Row\n" + - "df=sqlContext.createDataFrame([Row(id=1, age=20)])\n" + - "z.show(df)"); - note.run(p.getId(), true); - waitForFinish(p); - assertEquals(Status.FINISHED, p.getStatus()); - assertEquals(InterpreterResult.Type.TABLE, p.getReturn().message().get(0).getType()); - // TODO(zjffdu), one more \n is appended, need to investigate why. - assertEquals("age\tid\n20\t1\n", p.getReturn().message().get(0).getData()); - - // test udf - p = note.addNewParagraph(anonymous); - p.setText("%pyspark sqlContext.udf.register(\"f1\", lambda x: len(x))\n" + - "sqlContext.sql(\"select f1(\\\"abc\\\") as len\").collect()"); - note.run(p.getId(), true); - assertEquals(Status.FINISHED, p.getStatus()); - assertTrue("[Row(len=u'3')]\n".equals(p.getReturn().message().get(0).getData()) || - "[Row(len='3')]\n".equals(p.getReturn().message().get(0).getData())); - - // test exception - p = note.addNewParagraph(anonymous); - /* - %pyspark - a=1 - - print(a2) - */ - p.setText("%pyspark a=1\n\nprint(a2)"); - note.run(p.getId(), true); - assertEquals(Status.ERROR, p.getStatus()); - assertTrue(p.getReturn().message().get(0).getData() - .contains("Fail to execute line 3: print(a2)")); - assertTrue(p.getReturn().message().get(0).getData() - .contains("name 'a2' is not defined")); - } else if (isSpark2()){ + if (isSpark2()){ // run SparkSession test p = note.addNewParagraph(anonymous); p.setText("%pyspark from pyspark.sql import Row\n" + @@ -559,6 +534,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void zRunTest() throws IOException, InterruptedException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; String note2Id = null; try { @@ -653,6 +630,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testZeppelinContextResource() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -701,6 +680,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testZeppelinContextHook() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; String note2Id = null; try { @@ -775,10 +756,6 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { } } - private boolean isSpark1() { - return sparkVersion.startsWith("1."); - } - private boolean isSpark2() { return sparkVersion.startsWith("2."); } @@ -789,6 +766,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testSparkZeppelinContextDynamicForms() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -832,6 +811,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testPySparkZeppelinContextDynamicForms() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -873,6 +854,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testAngularObjects() throws IOException, InterpreterNotFoundException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -988,6 +971,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testScalaNoteDynamicForms() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -1056,6 +1041,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testPythonNoteDynamicForms() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -1124,6 +1111,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testRNoteDynamicForms() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { noteId = TestUtils.getInstance(Notebook.class).createNote("note1", anonymous); @@ -1158,6 +1147,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testConfInterpreter() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { TestUtils.getInstance(Notebook.class).getInterpreterSettingManager().close(); @@ -1197,6 +1188,8 @@ public abstract class ZeppelinSparkClusterTest extends AbstractTestRestApi { @Test public void testFailtoLaunchSpark() throws IOException { + assumeTrue("Hadoop version mismatch, skip test", isHadoopVersionMatch()); + String noteId = null; try { TestUtils.getInstance(Notebook.class).getInterpreterSettingManager().close(); diff --git a/zeppelin-interpreter/pom.xml b/zeppelin-interpreter/pom.xml index fdbc678..bd84210 100644 --- a/zeppelin-interpreter/pom.xml +++ b/zeppelin-interpreter/pom.xml @@ -205,13 +205,6 @@ </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <!-- Should always use provided, yarn container (YarnInterpreterLauncher) will provide all the hadoop jars --> - <scope>provided</scope> - </dependency> - - <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <scope>test</scope> @@ -233,4 +226,58 @@ </resources> </build> + <profiles> + <profile> + <id>hadoop2</id> + + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <properties> + <hadoop.version>${hadoop2.7.version}</hadoop.version> + <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact> + <hadoop-client-runtime.artifact>hadoop-yarn-api</hadoop-client-runtime.artifact> + <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + </dependency> + </dependencies> + + </profile> + <profile> + <id>hadoop3</id> + + <properties> + <hadoop.version>${hadoop3.2.version}</hadoop.version> + <hadoop-client-api.artifact>hadoop-client-api</hadoop-client-api.artifact> + <hadoop-client-runtime.artifact>hadoop-client-runtime</hadoop-client-runtime.artifact> + <hadoop-client-minicluster.artifact>hadoop-client-minicluster</hadoop-client-minicluster.artifact> + </properties> + + <dependencies> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-runtime.artifact}</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-minicluster.artifact}</artifactId> + <scope>test</scope> + </dependency> + + </dependencies> + </profile> + </profiles> + </project> diff --git a/zeppelin-plugins/launcher/yarn/pom.xml b/zeppelin-plugins/launcher/yarn/pom.xml index 63cdc46..64d579a 100644 --- a/zeppelin-plugins/launcher/yarn/pom.xml +++ b/zeppelin-plugins/launcher/yarn/pom.xml @@ -38,19 +38,6 @@ <plugin.name>Launcher/YarnInterpreterLauncher</plugin.name> </properties> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </dependency> - - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - </dependency> - - </dependencies> - <build> <testResources> <testResource> @@ -90,4 +77,59 @@ </resource> </resources> </build> + + <profiles> + <profile> + <id>hadoop2</id> + + <activation> + <activeByDefault>true</activeByDefault> + </activation> + + <properties> + <hadoop.version>${hadoop2.7.version}</hadoop.version> + </properties> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + + </profile> + <profile> + <id>hadoop3</id> + + <properties> + <hadoop.version>${hadoop3.2.version}</hadoop.version> + <hadoop-client-runtime.artifact>hadoop-client-runtime</hadoop-client-runtime.artifact> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client-runtime</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + </profile> + </profiles> + </project> diff --git a/zeppelin-plugins/notebookrepo/filesystem/pom.xml b/zeppelin-plugins/notebookrepo/filesystem/pom.xml index 6c88cc5..baa7ae7 100644 --- a/zeppelin-plugins/notebookrepo/filesystem/pom.xml +++ b/zeppelin-plugins/notebookrepo/filesystem/pom.xml @@ -38,13 +38,6 @@ <plugin.name>NotebookRepo/FileSystemNotebookRepo</plugin.name> </properties> - <dependencies> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </dependency> - </dependencies> - <build> <plugins> <plugin> @@ -56,6 +49,58 @@ <profiles> <profile> + <id>hadoop2</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <properties> + <hadoop.version>${hadoop2.7.version}</hadoop.version> + </properties> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + </profile> + + <profile> + <id>hadoop3</id> + <properties> + <hadoop.version>${hadoop3.2.version}</hadoop.version> + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-client-runtime</artifactId> + <scope>provided</scope> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>com.google.code.findbugs</groupId> + <artifactId>jsr305</artifactId> + </exclusion> + </exclusions> + </dependency> + </dependencies> + </profile> + + <profile> <id>hadoop2-azure</id> <properties> <hadoop.version>${hadoop2.7.version}</hadoop.version> diff --git a/zeppelin-server/pom.xml b/zeppelin-server/pom.xml index 2ecf4a8..2099047 100644 --- a/zeppelin-server/pom.xml +++ b/zeppelin-server/pom.xml @@ -40,6 +40,7 @@ <jna.version>4.1.0</jna.version> <jackson.version>2.9.10.6</jackson.version> <nimbus.version>9.13</nimbus.version> + <kerberos.version>2.0.0-M15</kerberos.version> <!--test library versions--> <selenium.java.version>2.48.2</selenium.java.version> @@ -293,30 +294,9 @@ </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <exclusions> - <!-- Provided by javax-websocket-server-impl --> - <exclusion> - <groupId>org.ow2.asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - </exclusions> - </dependency> - - <!--test libraries--> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-common</artifactId> - <classifier>tests</classifier> - <scope>test</scope> - <exclusions> - <!-- Provided by javax-websocket-server-impl --> - <exclusion> - <groupId>org.ow2.asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - </exclusions> + <groupId>org.apache.directory.server</groupId> + <artifactId>apacheds-kerberos-codec</artifactId> + <version>${kerberos.version}</version> </dependency> <dependency> @@ -498,17 +478,42 @@ </zeppelin.daemon.package.base> </properties> </profile> + + <profile> + <id>hadoop2</id> + + <properties> + <hadoop.version>${hadoop2.7.version}</hadoop.version> + </properties> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + <scope>${hadoop.deps.scope}</scope> + </dependency> + </dependencies> + + </profile> <profile> <id>hadoop3</id> + + <properties> + <hadoop.version>${hadoop3.2.version}</hadoop.version> + <hadoop-client-runtime.artifact>hadoop-client-runtime</hadoop-client-runtime.artifact> + </properties> + <dependencies> <dependency> - <groupId>org.apache.directory.server</groupId> - <artifactId>kerberos-client</artifactId> - <version>${kerberos-client.version}</version> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-runtime.artifact}</artifactId> + <version>${hadoop.version}</version> <scope>${hadoop.deps.scope}</scope> </dependency> </dependencies> + </profile> + </profiles> </project> diff --git a/zeppelin-zengine/pom.xml b/zeppelin-zengine/pom.xml index f0aa0d5..515e7ac 100644 --- a/zeppelin-zengine/pom.xml +++ b/zeppelin-zengine/pom.xml @@ -213,11 +213,6 @@ </dependency> <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - </dependency> - - <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <scope>test</scope> @@ -301,4 +296,55 @@ </plugin> </plugins> </build> + + <profiles> + <profile> + <id>hadoop2</id> + + <activation> + <activeByDefault>true</activeByDefault> + </activation> + + <properties> + <hadoop.version>${hadoop2.7.version}</hadoop.version> + </properties> + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + </dependency> + </dependencies> + + </profile> + <profile> + <id>hadoop3</id> + + <properties> + <hadoop.version>${hadoop3.2.version}</hadoop.version> + <hadoop-client-api.artifact>hadoop-client-api</hadoop-client-api.artifact> + <hadoop-client-runtime.artifact>hadoop-client-runtime</hadoop-client-runtime.artifact> + <hadoop-client-minicluster.artifact>hadoop-client-minicluster</hadoop-client-minicluster.artifact> + + </properties> + + <dependencies> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-runtime.artifact}</artifactId> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>${hadoop-client-minicluster.artifact}</artifactId> + <scope>test</scope> + <version>${hadoop.version}</version> + </dependency> + </dependencies> + </profile> + </profiles> </project>