From 11c6df590f5f6ba31415db23aa14c4e11dd9478f Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Sat, 7 Feb 2026 17:25:27 +0800 Subject: [PATCH 1/2] nit --- .github/workflows/build_and_test.yml | 4 +-- .../python-minimum/Dockerfile | 26 +++++++++++------- .../python-ps-minimum/Dockerfile | 27 ++++++++++++------- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 97cb5e922782a..9dcb2f2d31358 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -41,14 +41,14 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{"PYSPARK_IMAGE_TO_TEST": "python-312", "PYTHON_TO_TEST": "python3.12"}' + default: '{"PYSPARK_IMAGE_TO_TEST": "python-minimum", "PYTHON_TO_TEST": "python3.10"}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined in this file, e.g., build. See precondition job below. required: false type: string - default: '' + default: '{"pyspark": "true", "pyspark-pandas": "true"}' secrets: codecov_token: description: The upload token of codecov. diff --git a/dev/spark-test-image/python-minimum/Dockerfile b/dev/spark-test-image/python-minimum/Dockerfile index 4f671c2229d52..011618a36d3f9 100644 --- a/dev/spark-test-image/python-minimum/Dockerfile +++ b/dev/spark-test-image/python-minimum/Dockerfile @@ -15,16 +15,16 @@ # limitations under the License. # -# Image for building and testing Spark branches. Based on Ubuntu 22.04. +# Image for building and testing Spark branches. Based on Ubuntu 24.04. # See also in https://hub.docker.com/_/ubuntu -FROM ubuntu:jammy-20240911.1 +FROM ubuntu:noble LABEL org.opencontainers.image.authors="Apache Spark project " LABEL org.opencontainers.image.licenses="Apache-2.0" LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For PySpark with old dependencies" # Overwrite this label to avoid exposing the underlying Ubuntu OS version label LABEL org.opencontainers.image.version="" -ENV FULL_REFRESH_DATE=20260127 +ENV FULL_REFRESH_DATE=20260206 ENV DEBIAN_FRONTEND=noninteractive ENV DEBCONF_NONINTERACTIVE_SEEN=true @@ -43,20 +43,28 @@ RUN apt-get update && apt-get install -y \ libssl-dev \ openjdk-17-jdk-headless \ pkg-config \ - python3.10 \ - python3-psutil \ tzdata \ software-properties-common \ - zlib1g-dev \ + zlib1g-dev + +# Install Python 3.10 +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get update && apt-get install -y \ + python3.10 \ && apt-get autoremove --purge -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting" -# Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf==6.33.5" +# Setup virtual environment +ENV VIRTUAL_ENV=/opt/spark-venv +RUN python3.10 -m venv --without-pip $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 + +ARG BASIC_PIP_PKGS="numpy==1.22.4 pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 scipy scikit-learn coverage unittest-xml-reporting psutil" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf==6.33.5" + RUN python3.10 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \ python3.10 -m pip cache purge diff --git a/dev/spark-test-image/python-ps-minimum/Dockerfile b/dev/spark-test-image/python-ps-minimum/Dockerfile index ca1bd092a2fa8..5daecd379498e 100644 --- a/dev/spark-test-image/python-ps-minimum/Dockerfile +++ b/dev/spark-test-image/python-ps-minimum/Dockerfile @@ -15,16 +15,16 @@ # limitations under the License. # -# Image for building and testing Spark branches. Based on Ubuntu 22.04. +# Image for building and testing Spark branches. Based on Ubuntu 24.04. # See also in https://hub.docker.com/_/ubuntu -FROM ubuntu:jammy-20240911.1 +FROM ubuntu:noble LABEL org.opencontainers.image.authors="Apache Spark project " LABEL org.opencontainers.image.licenses="Apache-2.0" LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image For Pandas API on Spark with old dependencies" # Overwrite this label to avoid exposing the underlying Ubuntu OS version label LABEL org.opencontainers.image.version="" -ENV FULL_REFRESH_DATE=20260127 +ENV FULL_REFRESH_DATE=20260206 ENV DEBIAN_FRONTEND=noninteractive ENV DEBCONF_NONINTERACTIVE_SEEN=true @@ -43,21 +43,28 @@ RUN apt-get update && apt-get install -y \ libssl-dev \ openjdk-17-jdk-headless \ pkg-config \ - python3.10 \ - python3-psutil \ tzdata \ software-properties-common \ - zlib1g-dev \ + zlib1g-dev + +# Install Python 3.10 +RUN add-apt-repository ppa:deadsnakes/ppa +RUN apt-get update && apt-get install -y \ + python3.10 \ && apt-get autoremove --purge -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* - -ARG BASIC_PIP_PKGS="pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting" -# Python deps for Spark Connect -ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf==6.33.5" +# Setup virtual environment +ENV VIRTUAL_ENV=/opt/spark-venv +RUN python3.10 -m venv --without-pip $VIRTUAL_ENV +ENV PATH="$VIRTUAL_ENV/bin:$PATH" # Install Python 3.10 packages RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 + +ARG BASIC_PIP_PKGS="pyarrow==18.0.0 pandas==2.2.0 six==1.16.0 numpy scipy coverage unittest-xml-reporting psutil" +ARG CONNECT_PIP_PKGS="grpcio==1.76.0 grpcio-status==1.76.0 googleapis-common-protos==1.71.0 zstandard==0.25.0 graphviz==0.20 protobuf==6.33.5" + RUN python3.10 -m pip install --force $BASIC_PIP_PKGS $CONNECT_PIP_PKGS && \ python3.10 -m pip cache purge From 8c2605a1f1db7f1920daeea04f2e0747d542b4a5 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Sun, 8 Feb 2026 11:41:29 +0800 Subject: [PATCH 2/2] test ps mini --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9dcb2f2d31358..64b5a41e63b4f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -41,7 +41,7 @@ on: description: Additional environment variables to set when running the tests. Should be in JSON format. required: false type: string - default: '{"PYSPARK_IMAGE_TO_TEST": "python-minimum", "PYTHON_TO_TEST": "python3.10"}' + default: '{"PYSPARK_IMAGE_TO_TEST": "python-ps-minimum", "PYTHON_TO_TEST": "python3.10"}' jobs: description: >- Jobs to run, and should be in JSON format. The values should be matched with the job's key defined