This is an automated email from the ASF dual-hosted git repository. agrove pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/master by this push: new 7f2cb01 Add release scripts, bump version to 0.7.0 (#74) 7f2cb01 is described below commit 7f2cb0144b0e3b3b401341b43f3fb9f8dd87d59a Author: Andy Grove <andygrov...@gmail.com> AuthorDate: Tue Nov 15 08:51:55 2022 -0700 Add release scripts, bump version to 0.7.0 (#74) * prepare for next release * rat * update Cargo.lock --- Cargo.lock | 2 +- Cargo.toml | 8 +- dev/release/README.md | 36 +++++ dev/release/create-tarball.sh | 136 ++++++++++++++++++ dev/release/release-tarball.sh | 74 ++++++++++ dev/release/update_change_log-datafusion-python.sh | 33 +++++ dev/release/update_change_log.sh | 87 ++++++++++++ dev/release/verify-release-candidate.sh | 155 +++++++++++++++++++++ 8 files changed, 526 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 90bbf7c..db3a43e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -654,7 +654,7 @@ dependencies = [ [[package]] name = "datafusion-python" -version = "0.6.0" +version = "0.7.0" dependencies = [ "async-trait", "datafusion", diff --git a/Cargo.toml b/Cargo.toml index b67cbc5..0e8db9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,11 +17,11 @@ [package] name = "datafusion-python" -version = "0.6.0" -homepage = "https://github.com/apache/arrow" -repository = "https://github.com/apache/arrow" +version = "0.7.0" +homepage = "https://github.com/apache/arrow-datafusion-python" +repository = "https://github.com/apache/arrow-datafusion-python" authors = ["Apache Arrow <d...@arrow.apache.org>"] -description = "Build and run queries against data" +description = "Apache Arrow DataFusion DataFrame and SQL Query Engine" readme = "README.md" license = "Apache-2.0" edition = "2021" diff --git a/dev/release/README.md b/dev/release/README.md new file mode 100644 index 0000000..6e4fc9a --- /dev/null +++ b/dev/release/README.md @@ -0,0 +1,36 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# DataFusion Python Release Process + +This is a work-in-progress that will be updated as we work through the next release. + +## Preparing a Release Candidate + +- Update the version number in Cargo.toml +- Generate changelog +- Tag the repo with an rc tag e.g. `0.7.0-rc1` +- Create tarball and upload to ASF +- Start the vote + +## Releasing Artifacts + +```bash +maturin publish +``` \ No newline at end of file diff --git a/dev/release/create-tarball.sh b/dev/release/create-tarball.sh new file mode 100755 index 0000000..64150f5 --- /dev/null +++ b/dev/release/create-tarball.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/create-tarball.sh + +# This script creates a signed tarball in +# dev/dist/apache-arrow-datafusion-python-<version>-<sha>.tar.gz and uploads it to +# the "dev" area of the dist.apache.arrow repository and prepares an +# email for sending to the d...@arrow.apache.org list for a formal +# vote. +# +# See release/README.md for full release instructions +# +# Requirements: +# +# 1. gpg setup for signing and have uploaded your public +# signature to https://pgp.mit.edu/ +# +# 2. Logged into the apache svn server with the appropriate +# credentials +# +# 3. Install the requests python package +# +# +# Based in part on 02-source.sh from apache/arrow +# + +set -e + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc>" + echo "ex. $0 4.1.0 2" + exit +fi + +if [[ -z "${GH_TOKEN}" ]]; then + echo "Please set personal github token through GH_TOKEN environment variable" + exit +fi + +version=$1 +rc=$2 +tag="${version}-rc${rc}" + +echo "Attempting to create ${tarball} from tag ${tag}" +release_hash=$(cd "${SOURCE_TOP_DIR}" && git rev-list --max-count=1 ${tag}) + +release=apache-arrow-datafusion-python-${version} +distdir=${SOURCE_TOP_DIR}/dev/dist/${release}-rc${rc} +tarname=${release}.tar.gz +tarball=${distdir}/${tarname} +url="https://dist.apache.org/repos/dist/dev/arrow/${release}-rc${rc}" + +if [ -z "$release_hash" ]; then + echo "Cannot continue: unknown git tag: ${tag}" +fi + +echo "Draft email for d...@arrow.apache.org mailing list" +echo "" +echo "---------------------------------------------------------" +cat <<MAIL +To: d...@arrow.apache.org +Subject: [VOTE][RUST][DataFusion] Release DataFusion Python Bindings ${version} RC${rc} +Hi, + +I would like to propose a release of Apache Arrow DataFusion Python Bindings, +version ${version}. + +This release candidate is based on commit: ${release_hash} [1] +The proposed release tarball and signatures are hosted at [2]. +The changelog is located at [3]. + +Please download, verify checksums and signatures, run the unit tests, and vote +on the release. The vote will be open for at least 72 hours. + +Only votes from PMC members are binding, but all members of the community are +encouraged to test the release and vote with "(non-binding)". + +The standard verification procedure is documented at https://github.com/apache/arrow-datafusion-python/blob/master/dev/release/README.md#verifying-release-candidates. + +[ ] +1 Release this as Apache Arrow DataFusion Python ${version} +[ ] +0 +[ ] -1 Do not release this as Apache Arrow DataFusion Python ${version} because... + +Here is my vote: + ++1 + +[1]: https://github.com/apache/arrow-datafusion-python/tree/${release_hash} +[2]: ${url} +[3]: https://github.com/apache/arrow-datafusion-python/blob/${release_hash}/CHANGELOG.md +MAIL +echo "---------------------------------------------------------" + + +# create <tarball> containing the files in git at $release_hash +# the files in the tarball are prefixed with {version} (e.g. 4.0.1) +mkdir -p ${distdir} +(cd "${SOURCE_TOP_DIR}" && git archive ${release_hash} --prefix ${release}/ | gzip > ${tarball}) + +echo "Running rat license checker on ${tarball}" +${SOURCE_DIR}/run-rat.sh ${tarball} + +echo "Signing tarball and creating checksums" +gpg --armor --output ${tarball}.asc --detach-sig ${tarball} +# create signing with relative path of tarball +# so that they can be verified with a command such as +# shasum --check apache-arrow-datafusion-python-4.1.0-rc2.tar.gz.sha512 +(cd ${distdir} && shasum -a 256 ${tarname}) > ${tarball}.sha256 +(cd ${distdir} && shasum -a 512 ${tarname}) > ${tarball}.sha512 + + +echo "Uploading to apache dist/dev to ${url}" +svn co --depth=empty https://dist.apache.org/repos/dist/dev/arrow ${SOURCE_TOP_DIR}/dev/dist +svn add ${distdir} +svn ci -m "Apache Arrow DataFusion Python ${version} ${rc}" ${distdir} diff --git a/dev/release/release-tarball.sh b/dev/release/release-tarball.sh new file mode 100755 index 0000000..15aa85e --- /dev/null +++ b/dev/release/release-tarball.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/release-tarball.sh + +# This script copies a tarball from the "dev" area of the +# dist.apache.arrow repository to the "release" area +# +# This script should only be run after the release has been approved +# by the arrow PMC committee. +# +# See release/README.md for full release instructions +# +# Based in part on post-01-upload.sh from apache/arrow + + +set -e +set -u + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <version> <rc-num>" + echo "ex. $0 4.1.0 2" + exit +fi + +version=$1 +rc=$2 + +tmp_dir=tmp-apache-arrow-datafusion-python-dist + +echo "Recreate temporary directory: ${tmp_dir}" +rm -rf ${tmp_dir} +mkdir -p ${tmp_dir} + +echo "Clone dev dist repository" +svn \ + co \ + https://dist.apache.org/repos/dist/dev/arrow/apache-arrow-datafusion-python-${version}-rc${rc} \ + ${tmp_dir}/dev + +echo "Clone release dist repository" +svn co https://dist.apache.org/repos/dist/release/arrow ${tmp_dir}/release + +echo "Copy ${version}-rc${rc} to release working copy" +release_version=arrow-datafusion-${version} +mkdir -p ${tmp_dir}/release/${release_version} +cp -r ${tmp_dir}/dev/* ${tmp_dir}/release/${release_version}/ +svn add ${tmp_dir}/release/${release_version} + +echo "Commit release" +svn ci -m "Apache Arrow DataFusion Python ${version}" ${tmp_dir}/release + +echo "Clean up" +rm -rf ${tmp_dir} + +echo "Success! The release is available here:" +echo " https://dist.apache.org/repos/dist/release/arrow/${release_version}" diff --git a/dev/release/update_change_log-datafusion-python.sh b/dev/release/update_change_log-datafusion-python.sh new file mode 100755 index 0000000..d993536 --- /dev/null +++ b/dev/release/update_change_log-datafusion-python.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Usage: +# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-datafusion.sh master 8.0.0 7.1.0 +# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log-datafusion.sh maint-7.x 7.1.0 7.0.0 + +RELEASE_BRANCH=$1 +RELEASE_TAG=$2 +BASE_TAG=$3 + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +${SOURCE_DIR}/update_change_log.sh \ + "${BASE_TAG}" \ + --future-release "${RELEASE_TAG}" \ + --release-branch "${RELEASE_BRANCH}" diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh new file mode 100755 index 0000000..a0b3981 --- /dev/null +++ b/dev/release/update_change_log.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Adapted from https://github.com/apache/arrow-rs/tree/master/dev/release/update_change_log.sh + +# invokes the changelog generator from +# https://github.com/github-changelog-generator/github-changelog-generator +# +# With the config located in +# arrow-datafusion/.github_changelog_generator +# +# Usage: +# CHANGELOG_GITHUB_TOKEN=<TOKEN> ./update_change_log.sh <PROJECT> <SINCE_TAG> <EXTRA_ARGS...> + +set -e + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)" + +if [[ "$#" -lt 1 ]]; then + echo "USAGE: $0 SINCE_TAG EXTRA_ARGS..." + exit 1 +fi + +SINCE_TAG=$1 +shift 1 + +OUTPUT_PATH="CHANGELOG.md" + +pushd ${SOURCE_TOP_DIR} + +# reset content in changelog +git checkout "${SINCE_TAG}" "${OUTPUT_PATH}" +# remove license header so github-changelog-generator has a clean base to append +sed -i.bak '1,18d' "${OUTPUT_PATH}" + +docker run -it --rm \ + --cpus "0.1" \ + -e CHANGELOG_GITHUB_TOKEN=$CHANGELOG_GITHUB_TOKEN \ + -v "$(pwd)":/usr/local/src/your-app \ + githubchangeloggenerator/github-changelog-generator \ + --user apache \ + --project arrow-datafusion-python \ + --since-tag "${SINCE_TAG}" \ + --base "${OUTPUT_PATH}" \ + --output "${OUTPUT_PATH}" \ + "$@" + +sed -i.bak "s/\\\n/\n\n/" "${OUTPUT_PATH}" + +echo '<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +' | cat - "${OUTPUT_PATH}" > "${OUTPUT_PATH}".tmp +mv "${OUTPUT_PATH}".tmp "${OUTPUT_PATH}" diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh new file mode 100755 index 0000000..fee276c --- /dev/null +++ b/dev/release/verify-release-candidate.sh @@ -0,0 +1,155 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +case $# in + 2) VERSION="$1" + RC_NUMBER="$2" + ;; + *) echo "Usage: $0 X.Y.Z RC_NUMBER" + exit 1 + ;; +esac + +set -e +set -x +set -o pipefail + +SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" +ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))" +ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow' + +download_dist_file() { + curl \ + --silent \ + --show-error \ + --fail \ + --location \ + --remote-name $ARROW_DIST_URL/$1 +} + +download_rc_file() { + download_dist_file apache-arrow-datafusion-python-${VERSION}-rc${RC_NUMBER}/$1 +} + +import_gpg_keys() { + download_dist_file KEYS + gpg --import KEYS +} + +if type shasum >/dev/null 2>&1; then + sha256_verify="shasum -a 256 -c" + sha512_verify="shasum -a 512 -c" +else + sha256_verify="sha256sum -c" + sha512_verify="sha512sum -c" +fi + +fetch_archive() { + local dist_name=$1 + download_rc_file ${dist_name}.tar.gz + download_rc_file ${dist_name}.tar.gz.asc + download_rc_file ${dist_name}.tar.gz.sha256 + download_rc_file ${dist_name}.tar.gz.sha512 + verify_dir_artifact_signatures +} + +verify_dir_artifact_signatures() { + # verify the signature and the checksums of each artifact + find . -name '*.asc' | while read sigfile; do + artifact=${sigfile/.asc/} + gpg --verify $sigfile $artifact || exit 1 + + # go into the directory because the checksum files contain only the + # basename of the artifact + pushd $(dirname $artifact) + base_artifact=$(basename $artifact) + ${sha256_verify} $base_artifact.sha256 || exit 1 + ${sha512_verify} $base_artifact.sha512 || exit 1 + popd + done +} + +setup_tempdir() { + cleanup() { + if [ "${TEST_SUCCESS}" = "yes" ]; then + rm -fr "${ARROW_TMPDIR}" + else + echo "Failed to verify release candidate. See ${ARROW_TMPDIR} for details." + fi + } + + if [ -z "${ARROW_TMPDIR}" ]; then + # clean up automatically if ARROW_TMPDIR is not defined + ARROW_TMPDIR=$(mktemp -d -t "$1.XXXXX") + trap cleanup EXIT + else + # don't clean up automatically + mkdir -p "${ARROW_TMPDIR}" + fi +} + +test_source_distribution() { + # install rust toolchain in a similar fashion like test-miniconda + export RUSTUP_HOME=$PWD/test-rustup + export CARGO_HOME=$PWD/test-rustup + + curl https://sh.rustup.rs -sSf | sh -s -- -y --no-modify-path + + export PATH=$RUSTUP_HOME/bin:$PATH + source $RUSTUP_HOME/env + + # build and test rust + + # raises on any formatting errors + rustup component add rustfmt --toolchain stable + cargo fmt --all -- --check + + # Clone testing repositories into the expected location + git clone https://github.com/apache/arrow-testing.git testing + git clone https://github.com/apache/parquet-testing.git parquet-testing + + cargo build + cargo test --all + + if ( find -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then + echo "Cargo.toml version should not contain SNAPSHOT for releases" + exit 1 + fi + + cargo publish --dry-run +} + +TEST_SUCCESS=no + +setup_tempdir "arrow-${VERSION}" +echo "Working in sandbox ${ARROW_TMPDIR}" +cd ${ARROW_TMPDIR} + +dist_name="apache-arrow-datafusion-python-${VERSION}" +import_gpg_keys +fetch_archive ${dist_name} +tar xf ${dist_name}.tar.gz +pushd ${dist_name} + test_source_distribution +popd + +TEST_SUCCESS=yes +echo 'Release candidate looks good!' +exit 0