This is an automated email from the ASF dual-hosted git repository. tvalentyn pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push: new 2e9b3b7dcd8 Reduce the number of layers in Python base image. (#27006) 2e9b3b7dcd8 is described below commit 2e9b3b7dcd801fd83c722e857e07573c25b217a3 Author: tvalentyn <tvalen...@users.noreply.github.com> AuthorDate: Tue Jun 6 05:03:52 2023 -0700 Reduce the number of layers in Python base image. (#27006) --- sdks/python/container/Dockerfile | 69 +++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile index 6d64fff9883..83340643cf4 100644 --- a/sdks/python/container/Dockerfile +++ b/sdks/python/container/Dockerfile @@ -22,8 +22,17 @@ LABEL Author "Apache Beam <d...@beam.apache.org>" ARG TARGETOS ARG TARGETARCH -# Install native bindings required for dependencies. -RUN apt-get update && \ +COPY target/base_image_requirements.txt /tmp/base_image_requirements.txt +COPY target/apache-beam.tar.gz /opt/apache/beam/tars/ +COPY target/launcher/${TARGETOS}_${TARGETARCH}/boot target/LICENSE target/NOTICE target/LICENSE.python /opt/apache/beam/ + +ENV CLOUDSDK_CORE_DISABLE_PROMPTS yes +ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin + +# Use one RUN command to reduce the number of layers. +RUN \ + # Install native bindings required for dependencies. + apt-get update && \ apt-get install -y \ # Required by python-snappy libsnappy-dev \ @@ -32,56 +41,44 @@ RUN apt-get update && \ # This is used to speed up the re-installation of the sdk. ccache \ && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* && \ -#### -# Install required packages for Beam Python SDK and common dependencies used by users. -#### + pip install --upgrade setuptools && \ -COPY target/base_image_requirements.txt /tmp/base_image_requirements.txt -RUN \ + # Install required packages for Beam Python SDK and common dependencies used by users. # use --no-deps to ensure the list includes all transitive dependencies. pip install --no-deps -r /tmp/base_image_requirements.txt && \ + rm -rf /tmp/base_image_requirements.txt && \ python -c "import nltk; nltk.download('stopwords')" && \ rm /root/nltk_data/corpora/stopwords.zip && \ + # Check that the protobuf upb(also called micro protobuf) is used. python -c "from google.protobuf.internal import api_implementation; assert api_implementation._implementation_type == 'upb'; print ('Verified fast protobuf used.')" && \ - # Remove pip cache. - rm -rf /root/.cache/pip && \ - rm -rf /tmp/base_image_requirements.txt - -RUN pip install --upgrade pip setuptools -# Install Google Cloud SDK. -ENV CLOUDSDK_CORE_DISABLE_PROMPTS yes -ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin -RUN mkdir -p /usr/local/gcloud && \ + # Install Google Cloud SDK. + mkdir -p /usr/local/gcloud && \ cd /usr/local/gcloud && \ curl -s -O https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz && \ tar -xf google-cloud-sdk.tar.gz && \ /usr/local/gcloud/google-cloud-sdk/install.sh && \ rm -rf /usr/local/gcloud/google-cloud-sdk/.install/.backup && \ - rm google-cloud-sdk.tar.gz + rm google-cloud-sdk.tar.gz && \ -# Configure ccache prior to installing Beam SDK. -RUN ln -s /usr/bin/ccache /usr/local/bin/gcc -# These parameters are needed as pip compiles artifacts in random temporary directories. -RUN ccache --set-config=sloppiness=file_macro && ccache --set-config=hash_dir=false + # Configure ccache prior to installing Beam SDK. This speeds up wheels compilation when installing the SDK from sources. + ln -s /usr/bin/ccache /usr/local/bin/gcc && \ + # These parameters are needed as pip compiles artifacts in random temporary directories. + ccache --set-config=sloppiness=file_macro && ccache --set-config=hash_dir=false && \ -#### -# Install Apache Beam SDK. Use --no-deps and pip check to verify that all -# necessary dependencies are specified in base_image_requirements.txt. -#### -COPY target/apache-beam.tar.gz /opt/apache/beam/tars/ -RUN pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp] -RUN pip check || (echo "Container does not include required Beam dependencies or has conflicting dependencies. If Beam dependencies have changed, you need to regenerate base_image_requirements.txt files. See: https://s.apache.org/beam-python-requirements-generate" && exit 1) -# Log complete list of what exact packages and versions are installed. -RUN pip freeze --all - -COPY target/LICENSE /opt/apache/beam/ -COPY target/LICENSE.python /opt/apache/beam/ -COPY target/NOTICE /opt/apache/beam/ -COPY target/launcher/${TARGETOS}_${TARGETARCH}/boot /opt/apache/beam/ + # Install Apache Beam SDK. Use --no-deps and pip check to verify that all + # necessary dependencies are specified in base_image_requirements.txt. + pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp] && \ + pip check || (echo "Container does not include required Beam dependencies or has conflicting dependencies. If Beam dependencies have changed, you need to regenerate base_image_requirements.txt files. See: https://s.apache.org/beam-python-requirements-generate" && exit 1) && \ + + # Log complete list of what exact packages and versions are installed. + pip freeze --all && \ + + # Remove pip cache. + rm -rf /root/.cache/pip ENTRYPOINT ["/opt/apache/beam/boot"]