pytorch · PSXBRosa · Feb 27, 2026 · Mar 3, 2026 · Mar 13, 2026 · Mar 14, 2026
diff --git a/.github/unittest/linux_libs/scripts_botorch/environment.yml b/.github/unittest/linux_libs/scripts_botorch/environment.yml
@@ -0,0 +1,23 @@
+channels:
+  - pytorch
+  - defaults
+dependencies:
+  - pip
+  - pip:
+    - hypothesis
+    - future
+    - cloudpickle
+    - pytest
+    - pytest-cov
+    - pytest-mock
+    - pytest-instafail
+    - pytest-rerunfailures
+    - pytest-json-report
+    - pytest-error-for-skips
+    - expecttest
+    - pybind11[global]
+    - pyyaml
+    - scipy
+    - botorch
+    - gpytorch
+    - psutil
diff --git a/.github/unittest/linux_libs/scripts_botorch/install.sh b/.github/unittest/linux_libs/scripts_botorch/install.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+
+unset PYTORCH_VERSION
+
+set -euxo pipefail
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+if [ "${CU_VERSION:-}" == cpu ] ; then
+    version="cpu"
+else
+    if [[ ${#CU_VERSION} -eq 4 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
+    elif [[ ${#CU_VERSION} -eq 5 ]]; then
+        CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
+    fi
+    echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
+    version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
+fi
+
+# submodules
+git submodule sync && git submodule update --init --recursive
+
+printf "Installing PyTorch with cu128"
+if [[ "$TORCH_VERSION" == "nightly" ]]; then
+  if [ "${CU_VERSION:-}" == cpu ] ; then
+      pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U
+  else
+      pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128 -U
+  fi
+elif [[ "$TORCH_VERSION" == "stable" ]]; then
+    if [ "${CU_VERSION:-}" == cpu ] ; then
+      pip3 install torch --index-url https://download.pytorch.org/whl/cpu -U
+  else
+      pip3 install torch --index-url https://download.pytorch.org/whl/cu128
+  fi
+else
+  printf "Failed to install pytorch"
+  exit 1
+fi
+
+# install tensordict
+pip install git+https://github.com/pytorch/tensordict.git --progress-bar off
+
+# smoke test
+python -c "import functorch;import tensordict"
+
+printf "* Installing torchrl\n"
+python -m pip install -e . --no-build-isolation
+
+# smoke test
+python -c "import torchrl"
diff --git a/.github/unittest/linux_libs/scripts_botorch/post_process.sh b/.github/unittest/linux_libs/scripts_botorch/post_process.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+set -e
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
diff --git a/.github/unittest/linux_libs/scripts_botorch/run_test.sh b/.github/unittest/linux_libs/scripts_botorch/run_test.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+set -euxo pipefail
+
+eval "$(./conda/bin/conda shell.bash hook)"
+conda activate ./env
+
+export PYTORCH_TEST_WITH_SLOW='1'
+export LAZY_LEGACY_OP=False
+
+python -m torch.utils.collect_env
+git config --global --add safe.directory '*'
+
+root_dir="$(git rev-parse --show-toplevel)"
+env_dir="${root_dir}/env"
+lib_dir="${env_dir}/lib"
+
+export MKL_THREADING_LAYER=GNU
+
+# smoke test
+python -c "import botorch; print('botorch', botorch.__version__)"
+python -c "import gpytorch; print('gpytorch', gpytorch.__version__)"
+
+# JSON report for flaky test tracking
+json_report_dir="${RUNNER_ARTIFACT_DIR:-${root_dir}}"
+json_report_args="--json-report --json-report-file=${json_report_dir}/test-results-botorch.json --json-report-indent=2"
+
+python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_objectives.py ${json_report_args} --instafail -v --durations 200 --capture no -k TestGPWorldModel --error-for-skips
+coverage combine -q
+coverage xml -i
+
+# Upload test results with metadata for flaky tracking
+python .github/unittest/helpers/upload_test_results.py || echo "Warning: Failed to process test results for flaky tracking"
diff --git a/.github/unittest/linux_libs/scripts_botorch/setup_env.sh b/.github/unittest/linux_libs/scripts_botorch/setup_env.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+
+set -euxo pipefail
+
+apt-get update && apt-get upgrade -y && apt-get install -y git cmake
+git config --global --add safe.directory '*'
+apt-get install -y wget gcc g++
+
+this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+root_dir="$(git rev-parse --show-toplevel)"
+conda_dir="${root_dir}/conda"
+env_dir="${root_dir}/env"
+
+cd "${root_dir}"
+
+case "$(uname -s)" in
+    Darwin*) os=MacOSX;;
+    *) os=Linux
+esac
+
+# 1. Install conda at ./conda
+if [ ! -d "${conda_dir}" ]; then
+    printf "* Installing conda\n"
+    wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
+    bash ./miniconda.sh -b -f -p "${conda_dir}"
+fi
+eval "$(${conda_dir}/bin/conda shell.bash hook)"
+
+# 2. Create test environment at ./env
+printf "python: ${PYTHON_VERSION}\n"
+if [ ! -d "${env_dir}" ]; then
+    printf "* Creating a test environment\n"
+    conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
+fi
+conda activate "${env_dir}"
+
+# 3. Install Conda dependencies
+printf "* Installing dependencies (except PyTorch)\n"
+echo "  - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml"
+cat "${this_dir}/environment.yml"
+
+pip install pip --upgrade
+
+conda env update --file "${this_dir}/environment.yml" --prune
diff --git a/.github/workflows/test-linux-libs.yml b/.github/workflows/test-linux-libs.yml
@@ -93,6 +93,44 @@ jobs:
 
         bash .github/unittest/linux_libs/scripts_brax/run_all.sh
 
+  unittests-botorch:
+    strategy:
+      matrix:
+        python_version: ["3.10"]
+        cuda_arch_version: ["12.8"]
+    if: ${{ github.event_name == 'push' || github.event_name == 'workflow_call' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'Modules') || contains(github.event.pull_request.labels.*.name, 'Objectives') }}
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    with:
+      repository: pytorch/rl
+      runner: "linux.g5.4xlarge.nvidia.gpu"
+      gpu-arch-type: cuda
+      gpu-arch-version: "12.8"
+      docker-image: "nvidia/cuda:12.4.0-devel-ubuntu22.04"
+      timeout: 120
+      script: |
+        if [[ "${{ github.ref }}" =~ release/* ]]; then
+          export RELEASE=1
+          export TORCH_VERSION=stable
+        else
+          export RELEASE=0
+          export TORCH_VERSION=nightly
+        fi
+
+        set -euo pipefail
+        export PYTHON_VERSION="3.10"
+        export CU_VERSION="12.8"
+        export TAR_OPTIONS="--no-same-owner"
+        export UPLOAD_CHANNEL="nightly"
+        export TF_CPP_MIN_LOG_LEVEL=0
+        export TD_GET_DEFAULTS_TO_NONE=1
+
+        nvidia-smi
+
+        bash .github/unittest/linux_libs/scripts_botorch/setup_env.sh
+        bash .github/unittest/linux_libs/scripts_botorch/install.sh
+        bash .github/unittest/linux_libs/scripts_botorch/run_test.sh
+        bash .github/unittest/linux_libs/scripts_botorch/post_process.sh
+
   # unittests-d4rl:
   #   strategy:
   #     matrix:

diff --git a/docs/source/reference/envs_api.rst b/docs/source/reference/envs_api.rst
@@ -191,6 +191,7 @@ Domain-specific
     ModelBasedEnvBase
     model_based.dreamer.DreamerEnv
     model_based.dreamer.DreamerDecoder
+    model_based.imagined.ImaginedEnv
 
 Helpers
 -------

diff --git a/docs/source/reference/envs_transforms.rst b/docs/source/reference/envs_transforms.rst
@@ -273,6 +273,7 @@ Available Transforms
     Hash
     InitTracker
     LineariseRewards
+    MeanActionSelector
     ModuleTransform
     MultiAction
     NoopResetEnv

diff --git a/docs/source/reference/modules_models.rst b/docs/source/reference/modules_models.rst
@@ -16,3 +16,15 @@ Modules for model-based reinforcement learning, including world models and dynam
     RSSMPosterior
     RSSMPrior
     RSSMRollout
+
+PILCO
+-----
+
+Components for moment-matching model-based policy search (PILCO).
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template_noinherit.rst
+
+    GPWorldModel
+    RBFController
diff --git a/docs/source/reference/objectives_other.rst b/docs/source/reference/objectives_other.rst
@@ -15,3 +15,4 @@ Additional loss modules for specialized algorithms.
     DreamerActorLoss
     DreamerModelLoss
     DreamerValueLoss
+    ExponentialQuadraticCost
diff --git a/pyproject.toml b/pyproject.toml
@@ -93,6 +93,10 @@ marl = ["vmas>=1.2.10", "pettingzoo>=1.24.1", "dm-meltingpot; python_version>='3
 open_spiel = ["open_spiel>=1.5"]
 brax = ["jax>=0.7.0; python_version>='3.11'", "brax; python_version>='3.11'"]
 procgen = ["procgen"]
+pilco = [
+    "botorch",
+    "gpytorch",
+]
 # Base LLM dependencies (no inference backend - use llm-vllm or llm-sglang)
 llm = [
     "transformers",

diff --git a/setup-and-run.sh b/setup-and-run.sh
@@ -30,6 +30,7 @@ REPO_DIR="/root/rl"
 VENV_DIR="/root/torchrl_venv"
 MODE="isaac"      # "isaac" or "dmcontrol"
 BUILD_ONLY=false
+GPUS=""            # explicit GPU set, e.g. "3,4,5"
 EXTRA_ARGS=()      # extra Hydra overrides forwarded to the training script
 
 # ---- Parse arguments --------------------------------------------------------
@@ -38,22 +39,46 @@ for arg in "$@"; do
         --build-only)   BUILD_ONLY=true ;;
         --dmcontrol)    MODE="dmcontrol" ;;
         --isaac)        MODE="isaac" ;;
+        --gpus=*)       GPUS="${arg#--gpus=}" ;;
         *)              EXTRA_ARGS+=("$arg") ;;
     esac
 done
 
 # Avoid "'': unknown terminal type" in headless containers
 export TERM="${TERM:-xterm}"
 
+# Resolve GPU set early so we can use it for zombie cleanup
+if [[ -n "$GPUS" ]]; then
+    export CUDA_VISIBLE_DEVICES="$GPUS"
+elif [[ "$MODE" == "isaac" ]]; then
+    export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2}"
+fi
+
 echo "============================================================"
 echo " setup-and-run.sh"
 echo "   mode=$MODE  build_only=$BUILD_ONLY"
+echo "   gpus=${CUDA_VISIBLE_DEVICES:-<all>}"
 echo "   extra_args=${EXTRA_ARGS[*]:-<none>}"
 echo "============================================================"
 
-# ---- 0) Kill zombie Python processes from previous runs ---------------------
-echo "* Killing leftover Python processes..."
-pkill -9 -f python || true
+# ---- 0) Kill zombie Python processes on the SAME GPUs ----------------------
+# Only kill dreamer processes whose CUDA_VISIBLE_DEVICES matches ours,
+# so that a second experiment on different GPUs is left untouched.
+echo "* Killing leftover dreamer processes on GPUs=${CUDA_VISIBLE_DEVICES:-<all>}..."
+if [[ -n "${CUDA_VISIBLE_DEVICES:-}" ]]; then
+    # Find dreamer_isaac.py PIDs whose /proc/<pid>/environ contains our GPU set
+    for pid in $(pgrep -f "dreamer_isaac.py|dreamer.py" 2>/dev/null || true); do
+        proc_env=$(tr '\0' '\n' < /proc/$pid/environ 2>/dev/null || true)
+        proc_gpus=$(echo "$proc_env" | grep '^CUDA_VISIBLE_DEVICES=' | head -1 | cut -d= -f2)
+        if [[ "$proc_gpus" == "$CUDA_VISIBLE_DEVICES" ]] || [[ -z "$proc_gpus" ]]; then
+            echo "  Killing PID $pid (CUDA_VISIBLE_DEVICES=$proc_gpus)"
+            kill -9 "$pid" 2>/dev/null || true
+        fi
+    done
+else
+    # No GPU constraint — kill all dreamer processes
+    pkill -9 -f "dreamer_isaac.py|dreamer.py" || true
+fi
 sleep 1
 
 # ---- 1) System dependencies ------------------------------------------------
@@ -203,8 +228,8 @@ echo "============================================================"
 cd "$REPO_DIR"
 
 if [[ "$MODE" == "isaac" ]]; then
-    # Expose 3 GPUs: GPU 0 = sim, GPU 1 = training, GPU 2 = eval (rendering)
-    export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2}"
+    # GPUs already set above: GPU0 = sim, GPU1 = training, GPU2 = eval (rendering)
+    echo "  CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES"
     $PYTHON "sota-implementations/dreamer/dreamer_isaac.py" "${EXTRA_ARGS[@]}"
 else
     export MUJOCO_GL=egl

diff --git a/sota-check/run_pilco.sh b/sota-check/run_pilco.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#SBATCH --job-name=pilco
+#SBATCH --ntasks=32
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --output=slurm_logs/pilco_%j.txt
+#SBATCH --error=slurm_errors/pilco_%j.txt
+
+current_commit=$(git rev-parse --short HEAD)
+project_name="torchrl-example-check-$current_commit"
+group_name="pilco"
+export PYTHONPATH=$(dirname $(dirname $PWD))
+python $PYTHONPATH/sota-implementations/pilco/pilco.py \
+  logger.backend=wandb \
+  logger.project_name="$project_name" \
+  logger.group_name="$group_name"
+
+# Capture the exit status of the Python command
+exit_status=$?
+# Write the exit status to a file
+if [ $exit_status -eq 0 ]; then
+  echo "${group_name}_${SLURM_JOB_ID}=success" >> report.log
+else
+  echo "${group_name}_${SLURM_JOB_ID}=error" >> report.log
+fi
diff --git a/sota-implementations/pilco/config.yaml b/sota-implementations/pilco/config.yaml
@@ -0,0 +1,18 @@
+env:
+  env_name: InvertedPendulum-v5
+  library: gym
+device: null
+logger:
+  backend: wandb
+  project_name: torchrl_pilco
+  group_name: null
+  video: True
+optim:
+  policy_lr: 5e-3
+pilco:
+  horizon: 40
+  initial_rollout_length: 200
+  max_rollout_length: 350
+  epochs: 3
+  policy_training_steps: 100
+  policy_n_basis: 10