-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsetup_gpu.sh
More file actions
executable file
·33 lines (28 loc) · 1.48 KB
/
Copy pathsetup_gpu.sh
File metadata and controls
executable file
·33 lines (28 loc) · 1.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env bash
# One-time GPU env setup on the AutoDL box (sm_120 Blackwell, RTX PRO 6000). Phase -1.1.
# AutoDL notes: disable the academic proxy (it breaks domestic mirrors), install via a
# domestic pip mirror, sm_120 needs CUDA 12.9+/driver >= 575 for vLLM.
set -euo pipefail
cd "$(dirname "$0")/.."
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY 2>/dev/null || true
MIRROR="${PIP_MIRROR:-https://pypi.tuna.tsinghua.edu.cn/simple}"
PIP="pip install -i $MIRROR"
echo "== python ==" && python --version
python -m pip install -U -i "$MIRROR" pip
echo "== sqlrl (offline core, editable) ==" && $PIP -e .
echo "== TRL training stack ==" && $PIP transformers trl peft accelerate datasets openai
echo "== vLLM (pulls a matching torch; sm_120 needs a recent build) ==" && $PIP vllm
echo "== versions + cuda =="
python - <<'PY'
for m in ["torch","transformers","trl","peft","accelerate","datasets","vllm","openai"]:
try:
mod = __import__(m); print(f"{m:14s}", getattr(mod, "__version__", "?"))
except Exception as e:
print(f"{m:14s} MISSING: {e}")
import torch
print("cuda:", torch.cuda.is_available(),
"| cap:", torch.cuda.get_device_capability() if torch.cuda.is_available() else "-",
"| dev:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu")
PY
echo "setup done. Next: python scripts/dump_modules.py (Phase -1.2: confirm LoRA targets)"
echo "then: python scripts/train_grpo.py --config configs/smoke_train.yaml (Phase -1.4 green gate)"