diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index a934401a20..355b5cff4f 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -19,7 +19,7 @@ jobs: runs-on: nvidia # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 container: - image: nvidia/cuda:12.6.2-cudnn-devel-ubuntu22.04 + image: nvidia/cuda:12.9.1-cudnn-devel-ubuntu22.04 options: --gpus all if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group') steps: @@ -49,6 +49,8 @@ jobs: export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)') pip install --find-links "https://www.paddlepaddle.org.cn/packages/nightly/cu126/paddlepaddle-gpu/" --index-url https://pypi.org/simple "paddlepaddle-gpu==3.3.0.dev20251204" source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py --reinstall-package deepmd-kit + # See https://github.com/jax-ml/jax/issues/29042 + source/install/uv_with_retry.sh pip install --system -U 'nvidia-cublas-cu12>=12.9.0.13' env: DP_VARIANT: cuda DP_ENABLE_NATIVE_OPTIMIZATION: 1 @@ -62,6 +64,7 @@ jobs: XLA_PYTHON_CLIENT_PREALLOCATE: false XLA_PYTHON_CLIENT_ALLOCATOR: platform FLAGS_use_stride_compute_kernel: 0 + if: false - name: Convert models run: source/tests/infer/convert-models.sh - run: |