This is an automated email from the ASF dual-hosted git repository.
kunwp1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new ec025091aa chore: remove R support from Docker images for license
compliance (#4385)
ec025091aa is described below
commit ec025091aa7d24ace6c741566eedeecedf090a2a
Author: Jiadong Bai <[email protected]>
AuthorDate: Wed Apr 15 15:35:52 2026 -0700
chore: remove R support from Docker images for license compliance (#4385)
### What changes were proposed in this PR?
Remove R support from `computing-unit-master` and
`computing-unit-worker` Docker images. R itself is GPLv2, and its
install chain required `gnupg`/`dirmngr`/`software-properties-common`
(GPLv3/v2) — all ASF Category X. Also drops unused `git` and `unzip`
from the runtime stage (JGit reads `.git` directly; no git CLI needed).
R UDF Scala/Python/frontend code is left intact. `executor_manager.py`
already raises a clear ImportError when the optional `texera-rudf`
plugin is missing, so users who need R can build their own image on top.
### Any related issues, documentation, discussions?
Part of #4371.
### How was this PR tested?
Images built locally without R. Non-R services are untouched.
### Was this PR authored or co-authored using generative AI tooling?
Co-authored with: Claude Code (claude-opus-4-6)
Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
Co-authored-by: Chen Li <[email protected]>
---
.github/workflows/build-and-push-images.yml | 13 -------
bin/build-images.sh | 34 ++++-------------
bin/computing-unit-master.dockerfile | 57 +----------------------------
bin/computing-unit-worker.dockerfile | 54 +--------------------------
4 files changed, 10 insertions(+), 148 deletions(-)
diff --git a/.github/workflows/build-and-push-images.yml
b/.github/workflows/build-and-push-images.yml
index f0ded5ffa8..e2d019bf13 100644
--- a/.github/workflows/build-and-push-images.yml
+++ b/.github/workflows/build-and-push-images.yml
@@ -49,11 +49,6 @@ on:
- both
- amd64
- arm64
- with_r_support:
- description: 'Enable R support for workflow-execution-coordinator'
- required: false
- default: false
- type: boolean
schedule:
# Run nightly at 2:00 AM UTC
- cron: '0 2 * * *'
@@ -76,7 +71,6 @@ jobs:
docker_registry: ${{ steps.set-params.outputs.docker_registry }}
services: ${{ steps.set-params.outputs.services }}
platforms: ${{ steps.set-params.outputs.platforms }}
- with_r_support: ${{ steps.set-params.outputs.with_r_support }}
steps:
- name: Set build parameters
id: set-params
@@ -91,7 +85,6 @@ jobs:
echo "docker_registry=ghcr.io/apache" >> $GITHUB_OUTPUT
echo "services=*" >> $GITHUB_OUTPUT
echo "platforms=both" >> $GITHUB_OUTPUT
- echo "with_r_support=false" >> $GITHUB_OUTPUT
else
echo "Manual workflow_dispatch - using user inputs"
BRANCH="${{ github.event.inputs.branch || 'main' }}"
@@ -109,7 +102,6 @@ jobs:
echo "docker_registry=${{ github.event.inputs.docker_registry ||
'ghcr.io/apache' }}" >> $GITHUB_OUTPUT
echo "services=${{ github.event.inputs.services || '*' }}" >>
$GITHUB_OUTPUT
echo "platforms=${{ github.event.inputs.platforms || 'both' }}" >>
$GITHUB_OUTPUT
- echo "with_r_support=${{ github.event.inputs.with_r_support ||
'false' }}" >> $GITHUB_OUTPUT
fi
# Step 1: Generate JOOQ code once and share it
@@ -380,8 +372,6 @@ jobs:
tags: ${{ env.DOCKER_REGISTRY }}/${{ matrix.image_name }}:${{
needs.set-parameters.outputs.image_tag }}-amd64
cache-from: type=gha,scope=${{ matrix.image_name }}-amd64
cache-to: type=gha,mode=max,scope=${{ matrix.image_name }}-amd64
- build-args: |
- ${{ (matrix.service == 'computing-unit-master' || matrix.service
== 'computing-unit-worker') && needs.set-parameters.outputs.with_r_support ==
'true' && 'WITH_R_SUPPORT=true' || '' }}
labels: |
org.opencontainers.image.title=${{ matrix.image_name }}
org.opencontainers.image.description=Apache Texera ${{
matrix.image_name }} (AMD64)
@@ -468,8 +458,6 @@ jobs:
tags: ${{ env.DOCKER_REGISTRY }}/${{ matrix.image_name }}:${{
needs.set-parameters.outputs.image_tag }}-arm64
cache-from: type=gha,scope=${{ matrix.image_name }}-arm64
cache-to: type=gha,mode=max,scope=${{ matrix.image_name }}-arm64
- build-args: |
- ${{ (matrix.service == 'computing-unit-master' || matrix.service
== 'computing-unit-worker') && needs.set-parameters.outputs.with_r_support ==
'true' && 'WITH_R_SUPPORT=true' || '' }}
labels: |
org.opencontainers.image.title=${{ matrix.image_name }}
org.opencontainers.image.description=Apache Texera ${{
matrix.image_name }} (ARM64)
@@ -531,7 +519,6 @@ jobs:
echo "- **Tag:** \`${{ needs.set-parameters.outputs.image_tag }}\`"
>> $GITHUB_STEP_SUMMARY
echo "- **Services:** ${{ needs.set-parameters.outputs.services }}"
>> $GITHUB_STEP_SUMMARY
echo "- **Platforms:** ${{ needs.set-parameters.outputs.platforms
}}" >> $GITHUB_STEP_SUMMARY
- echo "- **R Support:** ${{
needs.set-parameters.outputs.with_r_support }}" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "## Build Method" >> $GITHUB_STEP_SUMMARY
echo "**Parallel platform builds** (faster)" >> $GITHUB_STEP_SUMMARY
diff --git a/bin/build-images.sh b/bin/build-images.sh
index 8c55656db9..a762bea1db 100755
--- a/bin/build-images.sh
+++ b/bin/build-images.sh
@@ -20,7 +20,6 @@ set -e
# Default values
DEFAULT_TAG="latest"
DEFAULT_SERVICES="*"
-WITH_R_SUPPORT="false"
# Parse command-line arguments
while [[ $# -gt 0 ]]; do
@@ -33,21 +32,16 @@ while [[ $# -gt 0 ]]; do
SERVICES_INPUT="$2"
shift 2
;;
- --with-r-support)
- WITH_R_SUPPORT="true"
- shift
- ;;
--help|-h)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " -t, --tag TAG Base tag for the images (default:
latest)"
echo " -s, --services SERVICES Services to build, comma-separated or
'*' for all (default: *)"
- echo " --with-r-support Enable R support for
computing-unit-master (sets WITH_R_SUPPORT=true)"
echo " -h, --help Show this help message"
echo ""
echo "Examples:"
- echo " $0 --tag v1.0.0 --services '*' --with-r-support"
+ echo " $0 --tag v1.0.0 --services '*'"
echo " $0 -t latest -s 'gui,computing-unit-master'"
echo " $0 # Interactive mode"
exit 0
@@ -107,9 +101,6 @@ fi
FULL_TAG="${BASE_TAG}-${TAG_SUFFIX}"
echo "🔍 Detected architecture: $ARCH -> Building for $PLATFORM with tag
:$FULL_TAG"
-if [[ "$WITH_R_SUPPORT" == "true" ]]; then
- echo "🔍 R support enabled for computing-unit-master"
-fi
# Ensure Buildx is ready
docker buildx create --name texera-builder --use --bootstrap > /dev/null 2>&1
|| docker buildx use texera-builder
@@ -137,23 +128,12 @@ for dockerfile in "${dockerfiles[@]}"; do
image="texera/$service_name:$FULL_TAG"
echo "👉 Building $image from $dockerfile"
- # Add WITH_R_SUPPORT build arg for computing-unit-master
- if [[ "$service_name" == "computing-unit-master" && "$WITH_R_SUPPORT" ==
"true" ]]; then
- docker buildx build \
- --platform "$PLATFORM" \
- -f "$dockerfile" \
- -t "$image" \
- --build-arg WITH_R_SUPPORT=true \
- --push \
- ..
- else
- docker buildx build \
- --platform "$PLATFORM" \
- -f "$dockerfile" \
- -t "$image" \
- --push \
- ..
- fi
+ docker buildx build \
+ --platform "$PLATFORM" \
+ -f "$dockerfile" \
+ -t "$image" \
+ --push \
+ ..
done
# Build pylsp service (directory: pylsp)
diff --git a/bin/computing-unit-master.dockerfile
b/bin/computing-unit-master.dockerfile
index f9df6e4f9f..fa079558f4 100644
--- a/bin/computing-unit-master.dockerfile
+++ b/bin/computing-unit-master.dockerfile
@@ -44,83 +44,30 @@ RUN unzip amber/target/universal/amber-*.zip -d
amber/target/
FROM eclipse-temurin:11-jdk-jammy AS runtime
-# Build argument to enable/disable R support (default: false)
-ARG WITH_R_SUPPORT=false
-
WORKDIR /texera/amber
COPY --from=build /texera/amber/requirements.txt /tmp/requirements.txt
COPY --from=build /texera/amber/operator-requirements.txt
/tmp/operator-requirements.txt
-# Install Python runtime dependencies (always) and R runtime dependencies
(conditional)
+# Install Python runtime dependencies
RUN apt-get update && apt-get install -y \
python3-pip \
python3-dev \
libpq-dev \
- curl \
- unzip \
- gnupg \
- software-properties-common \
- dirmngr \
- git \
- $(if [ "$WITH_R_SUPPORT" = "true" ]; then echo "\
- gfortran \
- libxml2-dev \
- libssl-dev \
- libcurl4-openssl-dev"; fi) \
&& apt-get clean
-# Install R from CRAN repository (pre-built, much faster than source
compilation)
-RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
- # Add CRAN GPG key and repository
- curl -fsSL
https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \
- gpg --dearmor -o /usr/share/keyrings/cran-ubuntu-keyring.gpg && \
- echo "deb [signed-by=/usr/share/keyrings/cran-ubuntu-keyring.gpg]
https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" | \
- tee /etc/apt/sources.list.d/cran.list && \
- apt-get update && \
- apt-get install -y r-base r-base-dev && \
- R --version; \
- fi
-
# Install Python packages
RUN pip3 install --upgrade pip setuptools wheel && \
pip3 install -r /tmp/requirements.txt && \
pip3 install -r /tmp/operator-requirements.txt
-# Install texera-rudf and its dependencies (conditional)
-RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
- pip3 install git+https://github.com/Texera/texera-rudf.git; \
- fi
-
-# Install R packages with pinned versions for texera-rudf (conditional)
-RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
- Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); \
- if (!requireNamespace('remotes', quietly=TRUE)) \
- install.packages('remotes', Ncpus =
parallel::detectCores()); \
- remotes::install_version('arrow', version='14.0.2.1', \
- repos='https://cran.r-project.org', upgrade='never', \
- Ncpus = parallel::detectCores()); \
- remotes::install_version('coro', version='1.1.0', \
- repos='https://cran.r-project.org', upgrade='never', \
- Ncpus = parallel::detectCores()); \
- remotes::install_version('aws.s3', version='0.3.22', \
- repos='https://cran.r-project.org', upgrade='never', \
- Ncpus = parallel::detectCores()); \
- cat('R package versions:\n'); \
- cat(' arrow: ', as.character(packageVersion('arrow')),
'\n'); \
- cat(' coro: ', as.character(packageVersion('coro')),
'\n'); \
- cat(' aws.s3: ', as.character(packageVersion('aws.s3')),
'\n')"; \
- fi
-
-ENV LD_LIBRARY_PATH=/usr/lib/R/lib:$LD_LIBRARY_PATH
-
# Copy the built texera binary from the build phase
COPY --from=build /texera/.git /texera/amber/.git
COPY --from=build /texera/amber/target/amber-* /texera/amber/
# Copy resources directories from build phase
COPY --from=build /texera/common/config/src/main/resources
/texera/amber/common/config/src/main/resources
COPY --from=build /texera/amber/src/main/resources
/texera/amber/src/main/resources
-# Copy code for python & R UDF
+# Copy code for python UDF
COPY --from=build /texera/amber/src/main/python /texera/amber/src/main/python
# Copy ASF licensing files
COPY --from=build /texera/LICENSE /texera/NOTICE /texera/DISCLAIMER-WIP
/texera/
diff --git a/bin/computing-unit-worker.dockerfile
b/bin/computing-unit-worker.dockerfile
index c938da7835..9fd013f384 100644
--- a/bin/computing-unit-worker.dockerfile
+++ b/bin/computing-unit-worker.dockerfile
@@ -44,43 +44,18 @@ RUN unzip amber/target/universal/amber-*.zip -d
amber/target/
FROM eclipse-temurin:11-jre-jammy AS runtime
-# Build argument to enable/disable R support (default: false)
-ARG WITH_R_SUPPORT=false
-
WORKDIR /texera/amber
COPY --from=build /texera/amber/requirements.txt /tmp/requirements.txt
COPY --from=build /texera/amber/operator-requirements.txt
/tmp/operator-requirements.txt
-# Install Python runtime dependencies (always) and R runtime dependencies
(conditional)
+# Install Python runtime dependencies
RUN apt-get update && apt-get install -y \
python3-pip \
python3-dev \
libpq-dev \
- curl \
- gnupg \
- software-properties-common \
- dirmngr \
- git \
- $(if [ "$WITH_R_SUPPORT" = "true" ]; then echo "\
- gfortran \
- libxml2-dev \
- libssl-dev \
- libcurl4-openssl-dev"; fi) \
&& apt-get clean
-# Install R from CRAN repository (pre-built, much faster than source
compilation)
-RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
- # Add CRAN GPG key and repository
- curl -fsSL
https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \
- gpg --dearmor -o /usr/share/keyrings/cran-ubuntu-keyring.gpg && \
- echo "deb [signed-by=/usr/share/keyrings/cran-ubuntu-keyring.gpg]
https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/" | \
- tee /etc/apt/sources.list.d/cran.list && \
- apt-get update && \
- apt-get install -y r-base r-base-dev && \
- R --version; \
- fi
-
# Install Python packages
RUN pip3 install --upgrade pip setuptools wheel && \
pip3 install python-lsp-server python-lsp-server[websockets] && \
@@ -88,33 +63,6 @@ RUN pip3 install --upgrade pip setuptools wheel && \
(pip3 install --no-cache-dir --find-links https://pypi.org/simple/ -r
/tmp/operator-requirements.txt || \
pip3 install --no-cache-dir wordcloud==1.9.2)
-# Install texera-rudf and its dependencies (conditional)
-RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
- pip3 install git+https://github.com/Texera/texera-rudf.git; \
- fi
-
-# Install R packages with pinned versions for texera-rudf (conditional)
-RUN if [ "$WITH_R_SUPPORT" = "true" ]; then \
- Rscript -e "options(repos = c(CRAN = 'https://cran.r-project.org')); \
- if (!requireNamespace('remotes', quietly=TRUE)) \
- install.packages('remotes', Ncpus =
parallel::detectCores()); \
- remotes::install_version('arrow', version='22.0.0.1', \
- repos='https://cran.r-project.org', upgrade='never', \
- Ncpus = parallel::detectCores()); \
- remotes::install_version('coro', version='1.1.0', \
- repos='https://cran.r-project.org', upgrade='never', \
- Ncpus = parallel::detectCores()); \
- remotes::install_version('aws.s3', version='0.3.22', \
- repos='https://cran.r-project.org', upgrade='never', \
- Ncpus = parallel::detectCores()); \
- cat('R package versions:\n'); \
- cat(' arrow: ', as.character(packageVersion('arrow')),
'\n'); \
- cat(' coro: ', as.character(packageVersion('coro')),
'\n'); \
- cat(' aws.s3: ', as.character(packageVersion('aws.s3')),
'\n')"; \
- fi
-
-ENV LD_LIBRARY_PATH=/usr/lib/R/lib:$LD_LIBRARY_PATH
-
# Copy the built texera binary from the build phase
COPY --from=build /texera/amber/target/amber-* /texera/amber/
# Copy resources directories from build phase