From 1fecb54ae045300681d85f623a2a429fab8d7087 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
Date: Thu, 18 Dec 2025 22:47:31 +0800
Subject: [PATCH 1/4] CI: bump CUDA image to 12.9.1

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
---
 .github/workflows/test_cuda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index a934401a20..71bdc6503f 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -19,7 +19,7 @@ jobs:
     runs-on: nvidia
     # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845
     container:
-      image: nvidia/cuda:12.6.2-cudnn-devel-ubuntu22.04
+      image: nvidia/cuda:12.9.1-cudnn-devel-ubuntu22.04
       options: --gpus all
     if: github.repository_owner == 'deepmodeling' && (github.event_name == 'pull_request' && github.event.label && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' || github.event_name == 'merge_group')
     steps:

From 8244877af0a7b4db7f4112178d37bf7d6a37e08c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
Date: Fri, 19 Dec 2025 15:49:46 +0800
Subject: [PATCH 2/4] debug GPU

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
---
 .github/workflows/test_cuda.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 71bdc6503f..db09662e87 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -62,6 +62,8 @@ jobs:
         XLA_PYTHON_CLIENT_PREALLOCATE: false
         XLA_PYTHON_CLIENT_ALLOCATOR: platform
         FLAGS_use_stride_compute_kernel: 0
+        TF_CPP_MAX_VLOG_LEVEL: 3
+      if: false
     - name: Convert models
       run: source/tests/infer/convert-models.sh
     - run: |
@@ -74,6 +76,7 @@ jobs:
         CMAKE_GENERATOR: Ninja
         DP_VARIANT: cuda
         DP_USE_MPICH2: 1
+        TF_CPP_MAX_VLOG_LEVEL: 3
     - run: |
         export LD_LIBRARY_PATH=$CUDA_PATH/lib64:/usr/lib/x86_64-linux-gnu/:$GITHUB_WORKSPACE/dp_test/lib:$LD_LIBRARY_PATH
         export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH
@@ -88,6 +91,7 @@ jobs:
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
         CUDA_VISIBLE_DEVICES: 0
+        TF_CPP_MAX_VLOG_LEVEL: 3
   pass:
     name: Pass testing on CUDA
     needs: [test_cuda]

From 55a1d37a2d22b64c5a58dab32217fb15ced4964c Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
Date: Fri, 19 Dec 2025 17:06:43 +0800
Subject: [PATCH 3/4] bump cublas version

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
---
 .github/workflows/test_cuda.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index db09662e87..cdc08c803a 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -49,6 +49,8 @@ jobs:
         export TENSORFLOW_ROOT=$(python -c 'import importlib.util,pathlib;print(pathlib.Path(importlib.util.find_spec("tensorflow").origin).parent)')
         pip install --find-links "https://www.paddlepaddle.org.cn/packages/nightly/cu126/paddlepaddle-gpu/" --index-url https://pypi.org/simple "paddlepaddle-gpu==3.3.0.dev20251204"
         source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py --reinstall-package deepmd-kit
+        # See https://github.com/jax-ml/jax/issues/29042
+        source/install/uv_with_retry.sh pip install -U nvidia-cublas-cu12>=12.9.0.13
       env:
         DP_VARIANT: cuda
         DP_ENABLE_NATIVE_OPTIMIZATION: 1
@@ -62,7 +64,6 @@ jobs:
         XLA_PYTHON_CLIENT_PREALLOCATE: false
         XLA_PYTHON_CLIENT_ALLOCATOR: platform
         FLAGS_use_stride_compute_kernel: 0
-        TF_CPP_MAX_VLOG_LEVEL: 3
       if: false
     - name: Convert models
       run: source/tests/infer/convert-models.sh
@@ -76,7 +77,6 @@ jobs:
         CMAKE_GENERATOR: Ninja
         DP_VARIANT: cuda
         DP_USE_MPICH2: 1
-        TF_CPP_MAX_VLOG_LEVEL: 3
     - run: |
         export LD_LIBRARY_PATH=$CUDA_PATH/lib64:/usr/lib/x86_64-linux-gnu/:$GITHUB_WORKSPACE/dp_test/lib:$LD_LIBRARY_PATH
         export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH
@@ -91,7 +91,6 @@ jobs:
         TF_INTER_OP_PARALLELISM_THREADS: 1
         LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
         CUDA_VISIBLE_DEVICES: 0
-        TF_CPP_MAX_VLOG_LEVEL: 3
   pass:
     name: Pass testing on CUDA
     needs: [test_cuda]

From 67f15f3795a4054aaa6bfc72a099f528a7ce5486 Mon Sep 17 00:00:00 2001
From: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
Date: Fri, 19 Dec 2025 17:46:59 +0800
Subject: [PATCH 4/4] system

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@ustc.edu.cn>
---
 .github/workflows/test_cuda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index cdc08c803a..355b5cff4f 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -50,7 +50,7 @@ jobs:
         pip install --find-links "https://www.paddlepaddle.org.cn/packages/nightly/cu126/paddlepaddle-gpu/" --index-url https://pypi.org/simple "paddlepaddle-gpu==3.3.0.dev20251204"
         source/install/uv_with_retry.sh pip install --system -v -e .[gpu,test,lmp,cu12,torch,jax] mpi4py --reinstall-package deepmd-kit
         # See https://github.com/jax-ml/jax/issues/29042
-        source/install/uv_with_retry.sh pip install -U nvidia-cublas-cu12>=12.9.0.13
+        source/install/uv_with_retry.sh pip install --system -U 'nvidia-cublas-cu12>=12.9.0.13'
       env:
         DP_VARIANT: cuda
         DP_ENABLE_NATIVE_OPTIMIZATION: 1