This is an automated email from the ASF dual-hosted git repository. stevel pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/hadoop-release-support.git
commit 64748ebec1413e6030dff9e91a9058088187f0b5 Author: Steve Loughran <ste...@cloudera.com> AuthorDate: Sat Dec 3 13:11:15 2022 +0000 HADOOP-18470. more work on validating the RC0 * fetching artifacts (mvn pom) * building other modules (can't get parquet to work tho') * docs --- README.md | 128 ++++++++++++++++++--- build.xml | 65 ++++++++++- pom.xml | 91 ++++++++++++++- .../steveloughran/validator/TestRuntimeValid.java | 19 +++ 4 files changed, 279 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 35ceb05..a3f8b6d 100644 --- a/README.md +++ b/README.md @@ -158,14 +158,14 @@ In build properties, declare `hadoop.version`, `rc` and `http.source` ```properties hadoop.version=3.3.5 -rc=1 +rc=0 http.source=https://dist.apache.org/repos/dist/dev/hadoop/hadoop-${hadoop.version}-RC${rc}/ ``` targets of relevance -| target | action | -|--------------------|----------------------------| +| target | action | +|----------------------|----------------------------| | `release.fetch.http` | fetch artifacts | | `release.dir.check` | verify release dir exists | | `release.src.untar` | untar retrieved artifacts | @@ -173,7 +173,7 @@ targets of relevance | `release.src.test` | build and test the source | | `gpg.keys` | import the hadoop KEYS | | `gpg.verify ` | verify the D/L'd artifacts | -| | | +| | | set `release.native.binaries` to false to skip native binary checks on platforms without them @@ -199,9 +199,9 @@ A lot of the targets build maven projects from the staged maven artifacts. For this to work -1. check out the relevant projects somewhere -2. set their location in the `build.properties` file -3. make sure that the branch checked out is the one you want to build. +1. Check out the relevant projects somewhere +2. Set their location in the `build.properties` file +3. Make sure that the branch checked out is the one you want to build. This matters for anyone who works on those other projects on their own branches. 4. Some projects need java11. @@ -214,22 +214,27 @@ ant purge-from-maven ## Cloudstore +[cloudstore](https://github.com/steveloughran/cloudstore). + No tests, sorry. -``` +```bash ant cloudstore.build ``` ## Google GCS -This is java11 only. + +[Big Data Interop](https://github.com/GoogleCloudPlatform/bigdata-interop). + +This is java 11+ only. Ideally, you should run the tests, or even better, run them before the RC is up for review. Building the libraries. Do this only if you aren't running the tests. -``` +```bash ant gcs.build ``` @@ -241,8 +246,14 @@ Validates hadoop client artifacts; the cloud tests cover hadoop cloud storage cl ant spark.build ``` -Then followup cloud examples if you are set up +### Spark cloud integration tests + +Then followup cloud integration tests if you are set up to build. +Spark itself does not include any integration tests of the object store connectors. +This independent module tests the s3a, gcs and abfs connectors, +and associated committers, through the spark RDD and SQL APIs. +[cloud integration](https://github.com/hortonworks-spark/cloud-integration) ```bash ant cloud-examples.build ant cloud-examples.test @@ -250,35 +261,118 @@ ant cloud-examples.test ## HBase filesystem +[hbase-filesystem](https://github.com/apache/hbase-filesystem.git) + +Adds zookeeper-based locking on those filesystem API calls for which +atomic access is required. + +Integration tests will go through S3A connector. + ```bash ant hboss.build ``` -## building the site +## building the Hadoop site -set `hadoop.site.dir` to be the path to where the git -clone of the asf site repo is +Set `hadoop.site.dir` to be the path to where the git +clone of the ASF site repo is ```properties hadoop.site.dir=/Users/stevel/hadoop/release/hadoop-site ``` -prepare the site with the following targets +Prepare the site with the following targets ```bash ant release.site.announcement ant release.site.docs ``` -review the annoucement. +Review the annoucement. + +### Manually link the current/stable symlinks to the new release In the hadoop site dir ```bash + +# review current status +ls -l + +# symlink current rm current3 ln -s r.3.3.5 current3 -ls -l + +# symlink stable rm stable3 ln -s r3.3.5 stable ln -s r3.3.5 stable3 + +# review new status +ls -l +``` + +Finally, *commit* + +## Adding a global staging profile `asf-staging` + +Many projects have a profile to use a staging repository, especially the ASF one. + +Not all do -these builds are likely to fail. +Here is a profile, `asf-staging` which can be used to enable this. +The paths to the repository can be changed too, if desired. + +Some of the maven builds invoked rely on this profile (e.g. avro). +For some unknown reason the parquet build doesn't seem to cope. + +```xml + <profile> + <id>asf-staging</id> + <properties> + <!-- override point for ASF staging/snapshot repos --> + <asf.staging>https://repository.apache.org/content/groups/staging/</asf.staging> + <asf.snapshots>https://repository.apache.org/content/repositories/snapshots/</asf.snapshots> + </properties> + + <pluginRepositories> + <pluginRepository> + <id>ASF Staging</id> + <url>${asf.staging}</url> + </pluginRepository> + <pluginRepository> + <id>ASF Snapshots</id> + <url>${asf.snapshots}</url> + <snapshots> + <enabled>true</enabled> + </snapshots> + <releases> + <enabled>false</enabled> + </releases> + </pluginRepository> + + </pluginRepositories> + <repositories> + <repository> + <id>ASF Staging</id> + <url>${asf.staging}</url> + <snapshots> + <enabled>true</enabled> + </snapshots> + <releases> + <enabled>true</enabled> + </releases> + </repository> + <repository> + <id>ASF Snapshots</id> + <url>${asf.snapshots}</url> + <snapshots> + <enabled>true</enabled> + </snapshots> + <releases> + <enabled>true</enabled> + </releases> + </repository> + </repositories> +</profile> + ``` diff --git a/build.xml b/build.xml index 31fa2ed..6dd0e3e 100644 --- a/build.xml +++ b/build.xml @@ -42,8 +42,9 @@ - <property name="dist.dir" location="${target}/dist"/> - <property name="incoming.dir" location="${target}/incoming"/> + <property name="downloads.dir" location="downloads"/> + <property name="dist.dir" location="${downloads.dir}/dist"/> + <property name="incoming.dir" location="${downloads.dir}/incoming"/> <!-- base name of a release --> @@ -66,13 +67,13 @@ <property name="release" value="hadoop-${hadoop.version}"/> <property name="rc-dirname" value="${release}-${rc}"/> - <property name="release.dir" location="${target}/${rc-dirname}"/> + <property name="release.dir" location="${downloads.dir}/${rc-dirname}"/> <property name="staged.artifacts.dir" location="${staging.dir}/${rc.name}"/> <property name="tag.name" value="release-${rc.name}"/> <!-- <property name="nexus.staging.url" value=""/>--> - <property name="release.untar.dir" location="${target}/untar"/> + <property name="release.untar.dir" location="${downloads.dir}/untar"/> <property name="release.source.dir" location="${release.untar.dir}/source"/> <property name="release.site.dir" location="${release.untar.dir}/site"/> <property name="site.dir" location="${release.untar.dir}/site/r${hadoop.version}"/> @@ -121,7 +122,7 @@ </macrodef> - <mkdir dir="${target}"/> + <mkdir dir="${downloads.dir}"/> <property name="scp.source" value="${scp.user}@${scp.hostname}:${scp.hadoop.dir}/target/artifacts"/> @@ -161,6 +162,7 @@ description="clean up target/ dir"> <!-- Delete the ${dist} directory trees --> <delete dir="${target}"/> + <delete dir="${downloads.dir}"/> </target> <target name="ant"> @@ -452,11 +454,11 @@ Message is in file ${message.out} <mvn dir="${hboss.dir}"> <arg value="-T 1C"/> <arg value="-Pasf-staging"/> - <arg value="-DskipTests"/> <arg value="-Dhadoop.version=${hadoop.version}"/> <arg value="-Dhadoop33.version=${hadoop.version}"/> <arg value="clean"/> <arg value="install"/> + <arg value="-DskipTests"/> </mvn> </target> <target name="hboss.test" if="hboss.dir" @@ -516,6 +518,57 @@ Message is in file ${message.out} </mvn> </target> + <target name="parquet.build" if="parquet.dir" + depends="init" + description="Build parquet"> + <echo> + Build the parquet jars. + There's no profile for using ASF staging as a source for artifacts. + Run this after other builds so the files are already present + </echo> + <mvn dir="${parquet.dir}"> + <arg value="-Dhadoop.version=${hadoop.version}"/> + <arg value="-Pasf-staging"/> + <arg value="--pl"/> + <arg value="parquet-hadoop"/> + <arg value="clean"/> + <arg value="install"/> + <arg value="-DskipTests"/> + </mvn> + </target> + + <target name="parquet.test" if="parquet.dir" + depends="init" + description="Build and test the parquet-hadoop module"> + <echo> + Build and test parquet-hadoop. + There's no profile for using ASF staging as a source for artifacts. + Run this after other builds so the files are already present + </echo> + <mvn dir="${parquet.dir}"> + <arg value="-Dhadoop.version=${hadoop.version}"/> + <arg value="--pl"/> + <arg value="parquet-hadoop"/> + <arg value="install"/> + </mvn> + </target> + + <target name="avro.build" if="avro.dir" + depends="init" + description="Build avro"> + <echo> + Build avro. + Relies on the user having an asf-staging profile. + </echo> + <mvn dir="${avro.dir}/lang/java"> + <arg value="-Dhadoop.version=${hadoop.version}"/> + <arg value="-Pasf-staging"/> + <arg value="clean"/> + <arg value="install"/> + <arg value="-DskipTests"/> + </mvn> + </target> + <!-- Fetch the artifacts from an http repo, for validating someone else's release. the download is into incoming.dir, then after a cleanup copied into release.dir; --> diff --git a/pom.xml b/pom.xml index 6af88d3..d66bb02 100644 --- a/pom.xml +++ b/pom.xml @@ -23,6 +23,10 @@ mvn clean test -Pstaging -U + This imports a lot so that the artifacts can + all be retrieved from staging and stored locally so that + other projects can find them. + </description> @@ -40,7 +44,7 @@ <!-- SLF4J/LOG4J version --> <slf4j.version>1.7.36</slf4j.version> - <reload4j.version>1.2.18.3</reload4j.version> + <reload4j.version>1.2.22</reload4j.version> </properties> @@ -59,6 +63,91 @@ <version>${hadoop.version}</version> </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + <type>test-jar</type> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-core</artifactId> + <version>${hadoop.version}</version> + + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-server-tests</artifactId> + <version>${hadoop.version}</version> + <type>test-jar</type> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-hs</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-examples</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + <type>test-jar</type> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-distcp</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-distcp</artifactId> + <version>${hadoop.version}</version> + <type>test-jar</type> + </dependency> + <!-- artifacts needed to bring up a Mini MR Yarn cluster--> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-app</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-app</artifactId> + <type>test-jar</type> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-jobclient</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-mapreduce-client-jobclient</artifactId> + <version>${hadoop.version}</version> + <type>test-jar</type> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-minikdc</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-cloud-storage</artifactId> + <version>${hadoop.version}</version> + </dependency> + + <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> diff --git a/src/test/java/com/github/steveloughran/validator/TestRuntimeValid.java b/src/test/java/com/github/steveloughran/validator/TestRuntimeValid.java index 751e500..204b423 100644 --- a/src/test/java/com/github/steveloughran/validator/TestRuntimeValid.java +++ b/src/test/java/com/github/steveloughran/validator/TestRuntimeValid.java @@ -20,6 +20,10 @@ package com.github.steveloughran.validator; import org.junit.Test; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.hdfs.DistributedFileSystem; + /** * Let's test that runtime. */ @@ -30,4 +34,19 @@ public class TestRuntimeValid { final CompileFS compileFS = new CompileFS(); compileFS.run(); } + + @Test + public void testS3AConstructor() throws Throwable { + new S3AFileSystem(); + } + + @Test + public void testHDFSConstructor() throws Throwable { + new DistributedFileSystem(); + } + @Test + public void testABFSConstructor() throws Throwable { + new AzureBlobFileSystem(); + } + } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org