Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/unittest/linux_libs/scripts_botorch/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
channels:
- pytorch
- defaults
dependencies:
- pip
- pip:
- hypothesis
- future
- cloudpickle
- pytest
- pytest-cov
- pytest-mock
- pytest-instafail
- pytest-rerunfailures
- pytest-json-report
- pytest-error-for-skips
- expecttest
- pybind11[global]
- pyyaml
- scipy
- botorch
- gpytorch
- psutil
53 changes: 53 additions & 0 deletions .github/unittest/linux_libs/scripts_botorch/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash

unset PYTORCH_VERSION

set -euxo pipefail

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env

if [ "${CU_VERSION:-}" == cpu ] ; then
version="cpu"
else
if [[ ${#CU_VERSION} -eq 4 ]]; then
CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
elif [[ ${#CU_VERSION} -eq 5 ]]; then
CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
fi
echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
fi

# submodules
git submodule sync && git submodule update --init --recursive

printf "Installing PyTorch with cu128"
if [[ "$TORCH_VERSION" == "nightly" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U
else
pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128 -U
fi
elif [[ "$TORCH_VERSION" == "stable" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install torch --index-url https://download.pytorch.org/whl/cpu -U
else
pip3 install torch --index-url https://download.pytorch.org/whl/cu128
fi
else
printf "Failed to install pytorch"
exit 1
fi

# install tensordict
pip install git+https://github.com/pytorch/tensordict.git --progress-bar off

# smoke test
python -c "import functorch;import tensordict"

printf "* Installing torchrl\n"
python -m pip install -e . --no-build-isolation

# smoke test
python -c "import torchrl"
6 changes: 6 additions & 0 deletions .github/unittest/linux_libs/scripts_botorch/post_process.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash

set -e

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env
33 changes: 33 additions & 0 deletions .github/unittest/linux_libs/scripts_botorch/run_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash

set -euxo pipefail

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env

export PYTORCH_TEST_WITH_SLOW='1'
export LAZY_LEGACY_OP=False

python -m torch.utils.collect_env
git config --global --add safe.directory '*'

root_dir="$(git rev-parse --show-toplevel)"
env_dir="${root_dir}/env"
lib_dir="${env_dir}/lib"

export MKL_THREADING_LAYER=GNU

# smoke test
python -c "import botorch; print('botorch', botorch.__version__)"
python -c "import gpytorch; print('gpytorch', gpytorch.__version__)"

# JSON report for flaky test tracking
json_report_dir="${RUNNER_ARTIFACT_DIR:-${root_dir}}"
json_report_args="--json-report --json-report-file=${json_report_dir}/test-results-botorch.json --json-report-indent=2"

python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_objectives.py ${json_report_args} --instafail -v --durations 200 --capture no -k TestGPWorldModel --error-for-skips
coverage combine -q
coverage xml -i

# Upload test results with metadata for flaky tracking
python .github/unittest/helpers/upload_test_results.py || echo "Warning: Failed to process test results for flaky tracking"
44 changes: 44 additions & 0 deletions .github/unittest/linux_libs/scripts_botorch/setup_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

set -euxo pipefail

apt-get update && apt-get upgrade -y && apt-get install -y git cmake
git config --global --add safe.directory '*'
apt-get install -y wget gcc g++

this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
root_dir="$(git rev-parse --show-toplevel)"
conda_dir="${root_dir}/conda"
env_dir="${root_dir}/env"

cd "${root_dir}"

case "$(uname -s)" in
Darwin*) os=MacOSX;;
*) os=Linux
esac

# 1. Install conda at ./conda
if [ ! -d "${conda_dir}" ]; then
printf "* Installing conda\n"
wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
bash ./miniconda.sh -b -f -p "${conda_dir}"
fi
eval "$(${conda_dir}/bin/conda shell.bash hook)"

# 2. Create test environment at ./env
printf "python: ${PYTHON_VERSION}\n"
if [ ! -d "${env_dir}" ]; then
printf "* Creating a test environment\n"
conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
fi
conda activate "${env_dir}"

# 3. Install Conda dependencies
printf "* Installing dependencies (except PyTorch)\n"
echo " - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml"
cat "${this_dir}/environment.yml"

pip install pip --upgrade

conda env update --file "${this_dir}/environment.yml" --prune
38 changes: 38 additions & 0 deletions .github/workflows/test-linux-libs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,44 @@ jobs:

bash .github/unittest/linux_libs/scripts_brax/run_all.sh

unittests-botorch:
strategy:
matrix:
python_version: ["3.10"]
cuda_arch_version: ["12.8"]
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_call' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'Modules') || contains(github.event.pull_request.labels.*.name, 'Objectives') }}
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/rl
runner: "linux.g5.4xlarge.nvidia.gpu"
gpu-arch-type: cuda
gpu-arch-version: "12.8"
docker-image: "nvidia/cuda:12.4.0-devel-ubuntu22.04"
timeout: 120
script: |
if [[ "${{ github.ref }}" =~ release/* ]]; then
export RELEASE=1
export TORCH_VERSION=stable
else
export RELEASE=0
export TORCH_VERSION=nightly
fi

set -euo pipefail
export PYTHON_VERSION="3.10"
export CU_VERSION="12.8"
export TAR_OPTIONS="--no-same-owner"
export UPLOAD_CHANNEL="nightly"
export TF_CPP_MIN_LOG_LEVEL=0
export TD_GET_DEFAULTS_TO_NONE=1

nvidia-smi

bash .github/unittest/linux_libs/scripts_botorch/setup_env.sh
bash .github/unittest/linux_libs/scripts_botorch/install.sh
bash .github/unittest/linux_libs/scripts_botorch/run_test.sh
bash .github/unittest/linux_libs/scripts_botorch/post_process.sh

# unittests-d4rl:
# strategy:
# matrix:
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/envs_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ Domain-specific
ModelBasedEnvBase
model_based.dreamer.DreamerEnv
model_based.dreamer.DreamerDecoder
model_based.imagined.ImaginedEnv

Helpers
-------
Expand Down
1 change: 1 addition & 0 deletions docs/source/reference/envs_transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ Available Transforms
Hash
InitTracker
LineariseRewards
MeanActionSelector
ModuleTransform
MultiAction
NoopResetEnv
Expand Down
12 changes: 12 additions & 0 deletions docs/source/reference/modules_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,15 @@ Modules for model-based reinforcement learning, including world models and dynam
RSSMPosterior
RSSMPrior
RSSMRollout

PILCO
-----

Components for moment-matching model-based policy search (PILCO).

.. autosummary::
:toctree: generated/
:template: rl_template_noinherit.rst

GPWorldModel
RBFController
1 change: 1 addition & 0 deletions docs/source/reference/objectives_other.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ Additional loss modules for specialized algorithms.
DreamerActorLoss
DreamerModelLoss
DreamerValueLoss
ExponentialQuadraticCost
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ marl = ["vmas>=1.2.10", "pettingzoo>=1.24.1", "dm-meltingpot; python_version>='3
open_spiel = ["open_spiel>=1.5"]
brax = ["jax>=0.7.0; python_version>='3.11'", "brax; python_version>='3.11'"]
procgen = ["procgen"]
pilco = [
"botorch",
"gpytorch",
]
# Base LLM dependencies (no inference backend - use llm-vllm or llm-sglang)
llm = [
"transformers",
Expand Down
35 changes: 30 additions & 5 deletions setup-and-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ REPO_DIR="/root/rl"
VENV_DIR="/root/torchrl_venv"
MODE="isaac" # "isaac" or "dmcontrol"
BUILD_ONLY=false
GPUS="" # explicit GPU set, e.g. "3,4,5"
EXTRA_ARGS=() # extra Hydra overrides forwarded to the training script

# ---- Parse arguments --------------------------------------------------------
Expand All @@ -38,22 +39,46 @@ for arg in "$@"; do
--build-only) BUILD_ONLY=true ;;
--dmcontrol) MODE="dmcontrol" ;;
--isaac) MODE="isaac" ;;
--gpus=*) GPUS="${arg#--gpus=}" ;;
*) EXTRA_ARGS+=("$arg") ;;
esac
done

# Avoid "'': unknown terminal type" in headless containers
export TERM="${TERM:-xterm}"

# Resolve GPU set early so we can use it for zombie cleanup
if [[ -n "$GPUS" ]]; then
export CUDA_VISIBLE_DEVICES="$GPUS"
elif [[ "$MODE" == "isaac" ]]; then
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2}"
fi

echo "============================================================"
echo " setup-and-run.sh"
echo " mode=$MODE build_only=$BUILD_ONLY"
echo " gpus=${CUDA_VISIBLE_DEVICES:-<all>}"
echo " extra_args=${EXTRA_ARGS[*]:-<none>}"
echo "============================================================"

# ---- 0) Kill zombie Python processes from previous runs ---------------------
echo "* Killing leftover Python processes..."
pkill -9 -f python || true
# ---- 0) Kill zombie Python processes on the SAME GPUs ----------------------
# Only kill dreamer processes whose CUDA_VISIBLE_DEVICES matches ours,
# so that a second experiment on different GPUs is left untouched.
echo "* Killing leftover dreamer processes on GPUs=${CUDA_VISIBLE_DEVICES:-<all>}..."
if [[ -n "${CUDA_VISIBLE_DEVICES:-}" ]]; then
# Find dreamer_isaac.py PIDs whose /proc/<pid>/environ contains our GPU set
for pid in $(pgrep -f "dreamer_isaac.py|dreamer.py" 2>/dev/null || true); do
proc_env=$(tr '\0' '\n' < /proc/$pid/environ 2>/dev/null || true)
proc_gpus=$(echo "$proc_env" | grep '^CUDA_VISIBLE_DEVICES=' | head -1 | cut -d= -f2)
if [[ "$proc_gpus" == "$CUDA_VISIBLE_DEVICES" ]] || [[ -z "$proc_gpus" ]]; then
echo " Killing PID $pid (CUDA_VISIBLE_DEVICES=$proc_gpus)"
kill -9 "$pid" 2>/dev/null || true
fi
done
else
# No GPU constraint — kill all dreamer processes
pkill -9 -f "dreamer_isaac.py|dreamer.py" || true
fi
sleep 1

# ---- 1) System dependencies ------------------------------------------------
Expand Down Expand Up @@ -203,8 +228,8 @@ echo "============================================================"
cd "$REPO_DIR"

if [[ "$MODE" == "isaac" ]]; then
# Expose 3 GPUs: GPU 0 = sim, GPU 1 = training, GPU 2 = eval (rendering)
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2}"
# GPUs already set above: GPU0 = sim, GPU1 = training, GPU2 = eval (rendering)
echo " CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
$PYTHON "sota-implementations/dreamer/dreamer_isaac.py" "${EXTRA_ARGS[@]}"
else
export MUJOCO_GL=egl
Expand Down
26 changes: 26 additions & 0 deletions sota-check/run_pilco.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

#SBATCH --job-name=pilco
#SBATCH --ntasks=32
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:1
#SBATCH --output=slurm_logs/pilco_%j.txt
#SBATCH --error=slurm_errors/pilco_%j.txt

current_commit=$(git rev-parse --short HEAD)
project_name="torchrl-example-check-$current_commit"
group_name="pilco"
export PYTHONPATH=$(dirname $(dirname $PWD))
python $PYTHONPATH/sota-implementations/pilco/pilco.py \
logger.backend=wandb \
logger.project_name="$project_name" \
logger.group_name="$group_name"

# Capture the exit status of the Python command
exit_status=$?
# Write the exit status to a file
if [ $exit_status -eq 0 ]; then
echo "${group_name}_${SLURM_JOB_ID}=success" >> report.log
else
echo "${group_name}_${SLURM_JOB_ID}=error" >> report.log
fi
18 changes: 18 additions & 0 deletions sota-implementations/pilco/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
env:
env_name: InvertedPendulum-v5
library: gym
device: null
logger:
backend: wandb
project_name: torchrl_pilco
group_name: null
video: True
optim:
policy_lr: 5e-3
pilco:
horizon: 40
initial_rollout_length: 200
max_rollout_length: 350
epochs: 3
policy_training_steps: 100
policy_n_basis: 10
Loading
Loading