diff --git a/.github/workflows/build-cpu.yml b/.github/workflows/build-cpu.yml
index 6ae2a3db1..98060175f 100644
--- a/.github/workflows/build-cpu.yml
+++ b/.github/workflows/build-cpu.yml
@@ -34,4 +34,4 @@ jobs:
         pip install -r build-requirements.txt
 
         # Build monarch (No tensor engine, CPU version)
-        USE_TENSOR_ENGINE=0 python setup.py bdist_wheel
+        MONARCH_FEATURES=core python setup.py bdist_wheel -v
diff --git a/.github/workflows/build-cuda.yml b/.github/workflows/build-cuda.yml
index 088599020..09e23c3dd 100644
--- a/.github/workflows/build-cuda.yml
+++ b/.github/workflows/build-cuda.yml
@@ -43,5 +43,5 @@ jobs:
 
         export CUDA_LIB_DIR=/usr/lib64
 
-        # Build monarch (CUDA version)
-        python setup.py bdist_wheel
+        # Build monarch (CUDA version with all features)
+        MONARCH_FEATURES=full python setup.py bdist_wheel -v
diff --git a/.github/workflows/doc_build.yml b/.github/workflows/doc_build.yml
index f0bb0ad8d..04649675e 100644
--- a/.github/workflows/doc_build.yml
+++ b/.github/workflows/doc_build.yml
@@ -43,7 +43,6 @@ jobs:
 
         # Set environment variables for CUDA build
         export USE_CUDA=1
-        export USE_TENSOR_ENGINE=1
         export RUSTFLAGS="-Zthreads=16 ${RUSTFLAGS:-}"
         export _GLIBCXX_USE_CXX11_ABI=1
         export CUDA_LIB_DIR=/usr/lib64
diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml
index 269ae1aee..6c8807233 100644
--- a/.github/workflows/publish_release.yml
+++ b/.github/workflows/publish_release.yml
@@ -50,7 +50,7 @@ jobs:
         export MONARCH_PACKAGE_NAME="torchmonarch"
         export CUDA_LIB_DIR=/usr/lib64
         export MONARCH_VERSION="${{ github.event.inputs.version }}"
-        python setup.py bdist_wheel
+        MONARCH_FEATURES=full python setup.py bdist_wheel -v
 
         # hacky until the right distribution wheel can be made...
         find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
diff --git a/.github/workflows/test-cpu-python.yml b/.github/workflows/test-cpu-python.yml
index d2f63ffc5..f8a6e3c01 100644
--- a/.github/workflows/test-cpu-python.yml
+++ b/.github/workflows/test-cpu-python.yml
@@ -28,9 +28,6 @@ jobs:
         # Setup test environment
         setup_conda_environment
 
-        # Disable tensor engine
-        export USE_TENSOR_ENGINE=0
-
         # Install PyTorch nightly
         pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
 
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index ebf1c89a5..27fb82d8e 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -51,7 +51,7 @@ jobs:
         export MONARCH_VERSION=$(date +'%Y.%m.%d')
         export CUDA_LIB_DIR=/usr/lib64
 
-        python setup.py bdist_wheel
+        MONARCH_FEATURES=full python setup.py bdist_wheel -v
 
         # hacky until the right distribution wheel can be made...
         find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
diff --git a/.gitignore b/.gitignore
index d87c0a063..42b420182 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,7 +9,7 @@ python/monarch.egg-info/*
 *.egg
 build/*
 dist/*
-monarch.egg-info/*
+torchmonarch.egg-info/*
 
 .ipynb_checkpoints
 .monarch
diff --git a/README.md b/README.md
index 772625fee..c08a6f396 100644
--- a/README.md
+++ b/README.md
@@ -98,7 +98,7 @@ sudo dnf install clang-devel libnccl-devel
 conda install -c conda-forge clangdev nccl
 conda update -n monarchenv --all -c conda-forge -y
 
-# If you are building with RDMA support, build monarch with `USE_TENSOR_ENGINE=1 pip install --no-build-isolation .` and dnf install the following packages
+# If you are building with RDMA support, build monarch with `MONARCH_FEATURES=rdma pip install --no-build-isolation .` and dnf install the following packages
 sudo dnf install -y libibverbs rdma-core libmlx5 libibverbs-devel rdma-core-devel
 
 # Install build dependencies
@@ -147,12 +147,12 @@ pip install -r build-requirements.txt
 # Install test dependencies
 pip install -r python/tests/requirements.txt
 
-# Build and install Monarch (with tensor engine support)
+# Build and install Monarch (with all features)
 pip install --no-build-isolation .
 
 # or
 # Build and install Monarch (without tensor engine support)
-USE_TENSOR_ENGINE=0 pip install --no-build-isolation .
+MONARCH_FEATURES=core pip install --no-build-isolation .
 
 # or setup for development
 pip install --no-build-isolation -e .
@@ -185,10 +185,10 @@ pip install -r build-requirements.txt
 # Install test dependencies
 pip install -r python/tests/requirements.txt
 
-# Build and install Monarch
-USE_TENSOR_ENGINE=0 pip install --no-build-isolation .
+# Build and install Monarch (core only, no RDMA or tensor engine)
+MONARCH_FEATURES=core pip install --no-build-isolation .
 # or setup for development
-USE_TENSOR_ENGINE=0 pip install --no-build-isolation -e .
+MONARCH_FEATURES=core pip install --no-build-isolation -e .
 
 # Verify installation
 pip list | grep monarch
diff --git a/monarch_extension/Cargo.toml b/monarch_extension/Cargo.toml
index a0390cfd0..580300220 100644
--- a/monarch_extension/Cargo.toml
+++ b/monarch_extension/Cargo.toml
@@ -40,5 +40,8 @@ torch-sys-cuda = { version = "0.0.0", path = "../torch-sys-cuda", optional = tru
 tracing = { version = "0.1.41", features = ["attributes", "valuable"] }
 
 [features]
-default = ["tensor_engine"]
-tensor_engine = ["dep:monarch_messages", "dep:monarch_rdma_extension", "dep:monarch_tensor_worker", "dep:nccl-sys", "dep:rdmaxcel-sys", "dep:torch-sys", "dep:torch-sys-cuda"]
+core = []
+default = ["full"]
+full = ["rdma", "tensor_engine"]
+rdma = ["dep:monarch_rdma_extension", "dep:rdmaxcel-sys"]
+tensor_engine = ["dep:monarch_messages", "dep:monarch_tensor_worker", "dep:nccl-sys", "dep:torch-sys", "dep:torch-sys-cuda"]
diff --git a/monarch_extension/src/lib.rs b/monarch_extension/src/lib.rs
index f98f2db31..e91d3271b 100644
--- a/monarch_extension/src/lib.rs
+++ b/monarch_extension/src/lib.rs
@@ -122,6 +122,10 @@ pub fn mod_init(module: &Bound<'_, PyModule>) -> PyResult<()> {
             module,
             "monarch_extension.mesh_controller",
         )?)?;
+    }
+
+    #[cfg(feature = "rdma")]
+    {
         monarch_rdma_extension::register_python_bindings(&get_or_add_new_module(module, "rdma")?)?;
     }
     simulation_tools::register_python_bindings(&get_or_add_new_module(
diff --git a/python/monarch/_src/actor/proc_mesh.py b/python/monarch/_src/actor/proc_mesh.py
index e3cc151ac..7c1bf69b2 100644
--- a/python/monarch/_src/actor/proc_mesh.py
+++ b/python/monarch/_src/actor/proc_mesh.py
@@ -478,7 +478,8 @@ def _spawn_nonblocking_on(
     def _device_mesh(self) -> "DeviceMesh":
         if not _has_tensor_engine():
             raise RuntimeError(
-                "DeviceMesh is not available because tensor_engine was not compiled (USE_TENSOR_ENGINE=0)"
+                "DeviceMesh is not available because tensor_engine was not compiled.\n"
+                "Build with: MONARCH_FEATURES=tensor_engine pip install ."
             )
 
         # type: ignore[21]
diff --git a/python/monarch/_src/actor/v1/proc_mesh.py b/python/monarch/_src/actor/v1/proc_mesh.py
index 9e3b65eb8..ddc256dd5 100644
--- a/python/monarch/_src/actor/v1/proc_mesh.py
+++ b/python/monarch/_src/actor/v1/proc_mesh.py
@@ -332,7 +332,8 @@ def _device_mesh(self) -> "DeviceMesh":
 
         if not _has_tensor_engine():
             raise RuntimeError(
-                "DeviceMesh is not available because tensor_engine was not compiled (USE_TENSOR_ENGINE=0)"
+                "DeviceMesh is not available because tensor_engine was not compiled.\n"
+                "Build with: MONARCH_FEATURES=tensor_engine pip install ."
             )
 
         # type: ignore[21]
diff --git a/scripts/build_monarch_for_docs.sh b/scripts/build_monarch_for_docs.sh
index 6e749cd83..769514564 100755
--- a/scripts/build_monarch_for_docs.sh
+++ b/scripts/build_monarch_for_docs.sh
@@ -15,7 +15,7 @@ echo "========================================="
 export CI=true
 # BUILD MONARCH COMPLETELY - This is critical for API documentation
 echo "Building Monarch with Rust bindings..."
-python -m pip install -e . --no-build-isolation
+python -m pip install -e . --no-build-isolation -v
 
 # Verify Monarch installation and imports
 echo "Verifying Monarch installation..."
diff --git a/setup.py b/setup.py
index 1f1ec13d3..2f6b6b9f2 100644
--- a/setup.py
+++ b/setup.py
@@ -25,61 +25,92 @@
 )
 
 USE_CUDA = CUDA_HOME is not None
-USE_TENSOR_ENGINE = os.environ.get("USE_TENSOR_ENGINE", "1") == "1"
 
-monarch_cpp_src = ["python/monarch/common/init.cpp"]
 
-if USE_CUDA:
-    monarch_cpp_src.append("python/monarch/common/mock_cuda.cpp")
+# Feature detection for building torchmonarch-* variants.
+def get_rust_features():
+    """
+    Determine which Rust features to build.
 
-common_C = CppExtension(
-    "monarch.common._C",
-    monarch_cpp_src,
-    extra_compile_args=["-g", "-O3"],
-    libraries=["dl"],
-    include_dirs=[
-        os.path.dirname(os.path.abspath(__file__)),
-        sysconfig.get_config_var("INCLUDEDIR"),
-    ],
-)
+    Environment variable:
+    - MONARCH_FEATURES: "core", "tensor_engine", "rdma", "full" (comma-separated)
+    - Default: "full" (all features)
 
+    Returns:
+        list: features to enable
+    """
+    features_str = os.environ.get("MONARCH_FEATURES", "").strip()
 
-controller_C = CppExtension(
-    "monarch.gradient._gradient_generator",
-    ["python/monarch/gradient/_gradient_generator.cpp"],
-    extra_compile_args=["-g", "-O3"],
-    include_dirs=[
-        os.path.dirname(os.path.abspath(__file__)),
-        sysconfig.get_config_var("INCLUDEDIR"),
-    ],
-)
+    if features_str:
+        return [f.strip() for f in features_str.split(",") if f.strip()]
+    else:
+        # Use the full build by default.
+        return ["full"]
 
-ENABLE_MSG_LOGGING = (
-    "--cfg=enable_hyperactor_message_logging"
-    if os.environ.get("ENABLE_MESSAGE_LOGGING")
-    else ""
-)
 
-ENABLE_TRACING_UNSTABLE = "--cfg=tracing_unstable"
+# Get features for this build
+RUST_FEATURES = get_rust_features()
+
+
+def has_feature(feature):
+    """
+    Check if a feature is enabled.
+
+    Args:
+        feature: Feature name to check (e.g., "rdma", "tensor_engine", "core")
+
+    Returns:
+        bool: True if the feature is explicitly listed or "full" is enabled
+    """
+    return feature in RUST_FEATURES or "full" in RUST_FEATURES
 
-os.environ.update(
-    {
+
+# Print build configuration
+package_version = os.environ.get("MONARCH_VERSION", "0.0.1")
+package_name = os.environ.get("MONARCH_PACKAGE_NAME", "torchmonarch")
+print(f"Building {package_name} v{package_version} with features: {RUST_FEATURES}")
+
+
+def setup_build_environment():
+    """
+    Configure environment variables for Rust and C++ builds.
+
+    Sets up compiler flags, PyTorch library paths, and feature-specific configuration.
+    """
+    enable_msg_logging = (
+        "--cfg=enable_hyperactor_message_logging"
+        if os.environ.get("ENABLE_MESSAGE_LOGGING")
+        else ""
+    )
+    enable_tracing_unstable = "--cfg=tracing_unstable"
+
+    # RDMA requires PyTorch CUDA libraries (torch_cuda, c10_cuda) for GPUDirect support
+    # So we only enable TORCH_SYS_USE_PYTORCH_APIS when building with RDMA
+    use_pytorch_apis = "1" if has_feature("rdma") else "0"
+
+    env_updates = {
         "CXXFLAGS": f"-D_GLIBCXX_USE_CXX11_ABI={int(torch._C._GLIBCXX_USE_CXX11_ABI)}",
         "RUSTFLAGS": " ".join(
-            ["-Zthreads=16", ENABLE_MSG_LOGGING, ENABLE_TRACING_UNSTABLE]
+            ["-Zthreads=16", enable_msg_logging, enable_tracing_unstable]
         ),
         "LIBTORCH_LIB": TORCH_LIB_PATH,
         "LIBTORCH_INCLUDE": ":".join(torch_include_paths()),
         "_GLIBCXX_USE_CXX11_ABI": str(int(torch._C._GLIBCXX_USE_CXX11_ABI)),
-        "TORCH_SYS_USE_PYTORCH_APIS": "0",
+        "TORCH_SYS_USE_PYTORCH_APIS": use_pytorch_apis,
     }
-)
-if USE_CUDA:
-    os.environ.update(
-        {
-            "CUDA_HOME": CUDA_HOME,
-        }
-    )
+
+    if USE_CUDA:
+        env_updates["CUDA_HOME"] = CUDA_HOME
+
+    print("Setting environment variables:")
+    for k, v in env_updates.items():
+        print(f"  {k}={v}")
+
+    os.environ.update(env_updates)
+
+
+# Setup build environment
+setup_build_environment()
 
 
 class Clean(Command):
@@ -181,19 +212,60 @@ def run(self):
     )
 
 # Main extension (always built)
-rust_extensions.append(
-    RustExtension(
-        "monarch._rust_bindings",
-        binding=Binding.PyO3,
-        path="monarch_extension/Cargo.toml",
-        debug=False,
-        features=["tensor_engine"] if USE_TENSOR_ENGINE else [],
-        args=[] if USE_TENSOR_ENGINE else ["--no-default-features"],
+rust_ext = RustExtension(
+    "monarch._rust_bindings",
+    binding=Binding.PyO3,
+    path="monarch_extension/Cargo.toml",
+    debug=False,
+    features=RUST_FEATURES,
+    args=["--no-default-features"],
+)
+
+print(f"   Rust extension features: {RUST_FEATURES}")
+print(f"   Rust extension args: {rust_ext.args}")
+
+rust_extensions.append(rust_ext)
+
+# Build C++ extensions conditionally based on features
+cpp_ext_modules = []
+
+# common_C is always needed
+monarch_cpp_src = ["python/monarch/common/init.cpp"]
+if USE_CUDA:
+    monarch_cpp_src.append("python/monarch/common/mock_cuda.cpp")
+
+cpp_ext_modules.append(
+    CppExtension(
+        "monarch.common._C",
+        monarch_cpp_src,
+        extra_compile_args=["-g", "-O3"],
+        libraries=["dl"],
+        include_dirs=[
+            os.path.dirname(os.path.abspath(__file__)),
+            sysconfig.get_config_var("INCLUDEDIR"),
+        ],
     )
 )
+print("   Building common._C C++ extension")
+
+# Only build gradient_generator if tensor_engine is enabled
+if has_feature("tensor_engine"):
+    cpp_ext_modules.append(
+        CppExtension(
+            "monarch.gradient._gradient_generator",
+            ["python/monarch/gradient/_gradient_generator.cpp"],
+            extra_compile_args=["-g", "-O3"],
+            include_dirs=[
+                os.path.dirname(os.path.abspath(__file__)),
+                sysconfig.get_config_var("INCLUDEDIR"),
+            ],
+        )
+    )
+    print("   Building gradient_generator C++ extension")
+else:
+    print("   Skipping gradient_generator C++ extension (tensor_engine not enabled)")
 
-package_name = os.environ.get("MONARCH_PACKAGE_NAME", "monarch")
-package_version = os.environ.get("MONARCH_VERSION", "0.0.1")
+print(f"   C++ extensions: {[ext.name for ext in cpp_ext_modules]}")
 
 setup(
     name=package_name,
@@ -217,10 +289,7 @@ def run(self):
     description="Monarch: Single controller library",
     long_description=readme,
     long_description_content_type="text/markdown",
-    ext_modules=[
-        controller_C,
-        common_C,
-    ],
+    ext_modules=cpp_ext_modules,
     entry_points={
         "console_scripts": [
             "monarch=monarch.tools.cli:main",