Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,20 @@
- any-glob-to-any-file:
- 'torchrl/data/datasets/**'

# =============================================================================
# Integrations Sub-Labels (trigger specific tests in test-linux-libs.yml)
# =============================================================================
"Integrations/torch_geometric":
- changed-files:
- any-glob-to-any-file: ['torchrl/collectors/**', 'torchrl/modules/**']

# Parent Integrations label (any integration-related change)
"Integrations":
- changed-files:
- any-glob-to-any-file:
- 'torchrl/collectors/**'
- 'torchrl/modules/**'

# =============================================================================
# LLM (triggers test-linux-llm.yml)
# =============================================================================
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
channels:
- pytorch
- defaults
dependencies:
- pip
- pip:
- hypothesis
- future
- cloudpickle
- pytest
- pytest-cov
- pytest-mock
- pytest-instafail
- pytest-rerunfailures
- pytest-error-for-skips
- expecttest
- pybind11[global]
- pyyaml
- scipy
- torch_geometric
57 changes: 57 additions & 0 deletions .github/unittest/linux_libs/scripts_torch_geometric/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env bash

unset PYTORCH_VERSION

set -euxo pipefail

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env

if [ "${CU_VERSION:-}" == cpu ] ; then
version="cpu"
else
if [[ ${#CU_VERSION} -eq 4 ]]; then
CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}"
elif [[ ${#CU_VERSION} -eq 5 ]]; then
CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}"
fi
echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION ($CU_VERSION)"
version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
fi

# submodules
git submodule sync && git submodule update --init --recursive

printf "Installing PyTorch with cu128"
if [[ "$TORCH_VERSION" == "nightly" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu -U
else
pip3 install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu128 -U
fi
elif [[ "$TORCH_VERSION" == "stable" ]]; then
if [ "${CU_VERSION:-}" == cpu ] ; then
pip3 install torch --index-url https://download.pytorch.org/whl/cpu
else
pip3 install torch --index-url https://download.pytorch.org/whl/cu128
fi
else
printf "Failed to install pytorch"
exit 1
fi

# install tensordict
if [[ "$RELEASE" == 0 ]]; then
pip3 install git+https://github.com/pytorch/tensordict.git
else
pip3 install tensordict
fi

# smoke test
python -c "import functorch;import tensordict"

printf "* Installing torchrl\n"
python -m pip install -e . --no-build-isolation

# smoke test
python -c "import torchrl"
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env bash

set -e

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env bash

set -euxo pipefail

this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
bash ${this_dir}/setup_env.sh
bash ${this_dir}/install.sh
bash ${this_dir}/run_test.sh
bash ${this_dir}/post_process.sh
23 changes: 23 additions & 0 deletions .github/unittest/linux_libs/scripts_torch_geometric/run_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/usr/bin/env bash

set -euxo pipefail

eval "$(./conda/bin/conda shell.bash hook)"
conda activate ./env

export PYTORCH_TEST_WITH_SLOW='1'
export LAZY_LEGACY_OP=False
python -m torch.utils.collect_env
git config --global --add safe.directory '*'

root_dir="$(git rev-parse --show-toplevel)"
env_dir="${root_dir}/env"
lib_dir="${env_dir}/lib"

conda deactivate && conda activate ./env

python -c "import torch_geometric"

python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/test_libs.py --instafail -v --durations 200 --capture no -k TestTorchGeometric --error-for-skips
coverage combine -q
coverage xml -i
44 changes: 44 additions & 0 deletions .github/unittest/linux_libs/scripts_torch_geometric/setup_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env bash

set -euxo pipefail

apt-get update && apt-get upgrade -y && apt-get install -y git cmake
git config --global --add safe.directory '*'
apt-get install -y wget gcc g++

this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
root_dir="$(git rev-parse --show-toplevel)"
conda_dir="${root_dir}/conda"
env_dir="${root_dir}/env"

cd "${root_dir}"

case "$(uname -s)" in
Darwin*) os=MacOSX;;
*) os=Linux
esac

# 1. Install conda at ./conda
if [ ! -d "${conda_dir}" ]; then
printf "* Installing conda\n"
wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh"
bash ./miniconda.sh -b -f -p "${conda_dir}"
fi
eval "$(${conda_dir}/bin/conda shell.bash hook)"

# 2. Create test environment at ./env
printf "python: ${PYTHON_VERSION}\n"
if [ ! -d "${env_dir}" ]; then
printf "* Creating a test environment\n"
conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION"
fi
conda activate "${env_dir}"

# 3. Install Conda dependencies
printf "* Installing dependencies (except PyTorch)\n"
echo " - python=${PYTHON_VERSION}" >> "${this_dir}/environment.yml"
cat "${this_dir}/environment.yml"

pip install pip --upgrade

conda env update --file "${this_dir}/environment.yml" --prune
36 changes: 36 additions & 0 deletions .github/workflows/test-linux-libs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -794,3 +794,39 @@ jobs:
bash .github/unittest/linux_libs/scripts_vmas/install.sh
bash .github/unittest/linux_libs/scripts_vmas/run_test.sh
bash .github/unittest/linux_libs/scripts_vmas/post_process.sh

unittests-torch_geometric:
strategy:
matrix:
python_version: ["3.10"]
cuda_arch_version: ["12.8"]
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_call' || github.event_name == 'workflow_dispatch' || contains(github.event.pull_request.labels.*.name, 'Integrations') || contains(github.event.pull_request.labels.*.name, 'Integrations/torch_geometric') }}
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
with:
repository: pytorch/rl
runner: "linux.g5.4xlarge.nvidia.gpu"
gpu-arch-type: cuda
gpu-arch-version: "12.8"
docker-image: "nvidia/cuda:12.4.0-devel-ubuntu22.04"
timeout: 120
script: |
if [[ "${{ github.ref }}" =~ release/* ]]; then
export RELEASE=1
export TORCH_VERSION=stable
else
export RELEASE=0
export TORCH_VERSION=nightly
fi

set -euo pipefail
export PYTHON_VERSION="3.10"
export CU_VERSION="12.8"
export TAR_OPTIONS="--no-same-owner"
export UPLOAD_CHANNEL="nightly"
export TF_CPP_MIN_LOG_LEVEL=0
export BATCHED_PIPE_TIMEOUT=60
export TD_GET_DEFAULTS_TO_NONE=1

nvidia-smi

bash .github/unittest/linux_libs/scripts_torch_geometric/run_all.sh
105 changes: 105 additions & 0 deletions test/test_libs.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@
importlib.util.find_spec("mujoco") is not None
or importlib.util.find_spec("mujoco_py") is not None
)
_has_torch_geometric = importlib.util.find_spec("torch_geometric") is not None


def _has_atari_for_gym():
Expand Down Expand Up @@ -6038,6 +6039,110 @@ def test_procgen_start_level_num_levels(self):
env.close()


@pytest.mark.skipif(not _has_torch_geometric, reason="torch_geometric not installed")
class TestTorchGeometric:
"""Tests for torch_geometric compatibility with torchrl (issue #2679).

The primary concern is that torch_geometric modules override __deepcopy__
in a way that conflicts with torchrl's collector parameter-mapping logic.
"""

def _make_pyg_module(self, in_features=10, hidden=32, out_features=4):
from torch_geometric.nn import Linear as PyGLinear

class PyGModule(nn.Module):
def __init__(self):
super().__init__()
self.pyg_linear = PyGLinear(in_features, hidden)
self.head = nn.Linear(hidden, out_features)

def forward(self, x):
return self.head(torch.relu(self.pyg_linear(x)))

return PyGModule()

def test_deepcopy(self):
module = self._make_pyg_module()
module_copy = copy.deepcopy(module)
x = torch.randn(5, 10)
out_orig = module(x)
out_copy = module_copy(x)
assert out_orig.shape == out_copy.shape == (5, 4)

def test_deepcopy_meta_device(self):
"""Reproduce the collector's internal deepcopy pattern that triggers #2679."""
module = self._make_pyg_module()
param_and_buf = TensorDict.from_module(module, as_module=True)

with param_and_buf.data.to("meta").to_module(module):
module_copy = copy.deepcopy(module)

param_and_buf.to_module(module_copy)

x = torch.randn(5, 10)
out = module_copy(x)
assert out.shape == (5, 4)

@pytest.mark.skipif(
not (torch.cuda.is_available() and torch.cuda.device_count()),
reason="CUDA required for collector device-mapping test",
)
def test_collector_with_pyg_policy(self):
from torchrl.testing.mocking_classes import ContinuousActionVecMockEnv

in_features = 7
act_features = 7
module = self._make_pyg_module(
in_features=in_features, hidden=32, out_features=act_features
)
policy = TensorDictModule(module, in_keys=["observation"], out_keys=["action"])

collector = Collector(
create_env_fn=ContinuousActionVecMockEnv,
policy=policy,
total_frames=20,
frames_per_batch=10,
device="cpu",
policy_device="cuda:0",
)
for data in collector:
assert "action" in data
break
collector.shutdown()

def test_collector_with_pyg_policy_same_device(self):
from torchrl.testing.mocking_classes import ContinuousActionVecMockEnv

in_features = 7
act_features = 7
module = self._make_pyg_module(
in_features=in_features, hidden=32, out_features=act_features
)
policy = TensorDictModule(module, in_keys=["observation"], out_keys=["action"])

collector = Collector(
create_env_fn=ContinuousActionVecMockEnv,
policy=policy,
total_frames=20,
frames_per_batch=10,
device="cpu",
)
for data in collector:
assert "action" in data
break
collector.shutdown()

def test_tensordict_module_wrap(self):
module = self._make_pyg_module()
td_module = TensorDictModule(
module, in_keys=["observation"], out_keys=["action"]
)
td = TensorDict({"observation": torch.randn(3, 10)})
out = td_module(td)
assert "action" in out
assert out["action"].shape == (3, 4)


if __name__ == "__main__":
args, unknown = argparse.ArgumentParser().parse_known_args()
pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
Loading