This is an automated email from the ASF dual-hosted git repository.

tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e9b3b7dcd8 Reduce the number of layers in Python base image. (#27006)
2e9b3b7dcd8 is described below

commit 2e9b3b7dcd801fd83c722e857e07573c25b217a3
Author: tvalentyn <tvalen...@users.noreply.github.com>
AuthorDate: Tue Jun 6 05:03:52 2023 -0700

    Reduce the number of layers in Python base image. (#27006)
---
 sdks/python/container/Dockerfile | 69 +++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/sdks/python/container/Dockerfile b/sdks/python/container/Dockerfile
index 6d64fff9883..83340643cf4 100644
--- a/sdks/python/container/Dockerfile
+++ b/sdks/python/container/Dockerfile
@@ -22,8 +22,17 @@ LABEL Author "Apache Beam <d...@beam.apache.org>"
 ARG TARGETOS
 ARG TARGETARCH
 
-# Install native bindings required for dependencies.
-RUN apt-get update && \
+COPY target/base_image_requirements.txt /tmp/base_image_requirements.txt
+COPY target/apache-beam.tar.gz /opt/apache/beam/tars/
+COPY target/launcher/${TARGETOS}_${TARGETARCH}/boot target/LICENSE 
target/NOTICE target/LICENSE.python /opt/apache/beam/
+
+ENV CLOUDSDK_CORE_DISABLE_PROMPTS yes
+ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin
+
+# Use one RUN command to reduce the number of layers.
+RUN  \
+    # Install native bindings required for dependencies.
+    apt-get update && \
     apt-get install -y \
        # Required by python-snappy
        libsnappy-dev \
@@ -32,56 +41,44 @@ RUN apt-get update && \
        # This is used to speed up the re-installation of the sdk.
        ccache \
        && \
-    rm -rf /var/lib/apt/lists/*
+    rm -rf /var/lib/apt/lists/* && \
 
-####
-# Install required packages for Beam Python SDK and common dependencies used 
by users.
-####
+    pip install --upgrade setuptools && \
 
-COPY target/base_image_requirements.txt /tmp/base_image_requirements.txt
-RUN \
+    # Install required packages for Beam Python SDK and common dependencies 
used by users.
     # use --no-deps to ensure the list includes all transitive dependencies.
     pip install --no-deps -r /tmp/base_image_requirements.txt && \
+    rm -rf /tmp/base_image_requirements.txt && \
     python -c "import nltk; nltk.download('stopwords')" && \
     rm /root/nltk_data/corpora/stopwords.zip && \
+
     # Check that the protobuf upb(also called micro protobuf) is used.
     python -c "from google.protobuf.internal import api_implementation; assert 
api_implementation._implementation_type == 'upb'; print ('Verified fast 
protobuf used.')" && \
-    # Remove pip cache.
-    rm -rf /root/.cache/pip && \
-    rm -rf /tmp/base_image_requirements.txt
-
-RUN pip install --upgrade pip setuptools
 
-# Install Google Cloud SDK.
-ENV CLOUDSDK_CORE_DISABLE_PROMPTS yes
-ENV PATH $PATH:/usr/local/gcloud/google-cloud-sdk/bin
-RUN mkdir -p /usr/local/gcloud && \
+    # Install Google Cloud SDK.
+    mkdir -p /usr/local/gcloud && \
     cd /usr/local/gcloud && \
     curl -s -O 
https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz && \
     tar -xf google-cloud-sdk.tar.gz && \
     /usr/local/gcloud/google-cloud-sdk/install.sh && \
     rm -rf /usr/local/gcloud/google-cloud-sdk/.install/.backup && \
-    rm google-cloud-sdk.tar.gz
+    rm google-cloud-sdk.tar.gz && \
 
-# Configure ccache prior to installing Beam SDK.
-RUN ln -s /usr/bin/ccache /usr/local/bin/gcc
-# These parameters are needed as pip compiles artifacts in random temporary 
directories.
-RUN ccache --set-config=sloppiness=file_macro && ccache 
--set-config=hash_dir=false
+    # Configure ccache prior to installing Beam SDK. This speeds up wheels 
compilation when installing the SDK from sources.
+    ln -s /usr/bin/ccache /usr/local/bin/gcc && \
+    # These parameters are needed as pip compiles artifacts in random 
temporary directories.
+    ccache --set-config=sloppiness=file_macro && ccache 
--set-config=hash_dir=false && \
 
-####
-# Install Apache Beam SDK. Use --no-deps and pip check to verify that all
-# necessary dependencies are specified in base_image_requirements.txt.
-####
-COPY target/apache-beam.tar.gz /opt/apache/beam/tars/
-RUN pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp]
-RUN pip check || (echo "Container does not include required Beam dependencies 
or has conflicting dependencies. If Beam dependencies have changed, you need to 
regenerate base_image_requirements.txt files. See: 
https://s.apache.org/beam-python-requirements-generate"; && exit 1)
-# Log complete list of what exact packages and versions are installed.
-RUN pip freeze --all
-
-COPY target/LICENSE /opt/apache/beam/
-COPY target/LICENSE.python /opt/apache/beam/
-COPY target/NOTICE /opt/apache/beam/
-COPY target/launcher/${TARGETOS}_${TARGETARCH}/boot /opt/apache/beam/
+    # Install Apache Beam SDK. Use --no-deps and pip check to verify that all
+    # necessary dependencies are specified in base_image_requirements.txt.
+    pip install --no-deps -v /opt/apache/beam/tars/apache-beam.tar.gz[gcp] && \
+    pip check || (echo "Container does not include required Beam dependencies 
or has conflicting dependencies. If Beam dependencies have changed, you need to 
regenerate base_image_requirements.txt files. See: 
https://s.apache.org/beam-python-requirements-generate"; && exit 1) && \
+
+    # Log complete list of what exact packages and versions are installed.
+    pip freeze --all && \
+
+    # Remove pip cache.
+    rm -rf /root/.cache/pip
 
 ENTRYPOINT ["/opt/apache/beam/boot"]
 

Reply via email to