NVIDIA-NeMo · hemildesai · Jan 16, 2026 · Jan 16, 2026 · Jan 18, 2026
diff --git a/nemo_run/config.py b/nemo_run/config.py
@@ -468,13 +468,33 @@ def get_name(self):
             return os.path.basename(self.path)
 
     def to_command(
-        self, with_entrypoint: bool = False, filename: Optional[str] = None, is_local: bool = False
+        self,
+        with_entrypoint: bool = False,
+        filename: Optional[str] = None,
+        is_local: bool = False,
+        substitute_rundir_path: Optional[str] = None,
     ) -> list[str]:
+        """Convert the script to a command.
+
+        Args:
+            with_entrypoint: If True, prepend the entrypoint to the command.
+            filename: If provided, write the inline script to this file.
+            is_local: If True, use the local filename in the command.
+            substitute_rundir_path: If provided, substitute /{RUNDIR_NAME} paths
+                with this path in the inline script content. Used for non-container
+                mode where container paths need to be replaced with actual cluster paths.
+        """
         if self.inline:
             if filename:
                 os.makedirs(os.path.dirname(filename), exist_ok=True)
+                inline_content = self.inline
+                # Substitute /{RUNDIR_NAME} paths if specified (non-container mode)
+                if substitute_rundir_path is not None:
+                    inline_content = inline_content.replace(
+                        f"/{RUNDIR_NAME}", substitute_rundir_path
+                    )
                 with open(filename, "w") as f:
-                    f.write("#!/usr/bin/bash\n" + self.inline)
+                    f.write("#!/usr/bin/bash\n" + inline_content)
 
                 if is_local:
                     cmd = [filename]

diff --git a/nemo_run/core/execution/slurm.py b/nemo_run/core/execution/slurm.py
@@ -936,7 +936,18 @@ def get_container_flags(
             container_image: Optional[str],
             container_env: Optional[list[str]] = None,
         ) -> list[str]:
-            _container_flags = ["--container-image", container_image] if container_image else []
+            """Get srun flags for container or non-container mode.
+
+            For non-container mode, returns --chdir flag to set working directory.
+            For container mode, returns container-related flags (image, mounts, workdir, env).
+            """
+            if container_image is None:
+                # Non-container mode: use --chdir to set working directory
+                workdir = os.path.join(src_job_dir, "code")
+                return ["--chdir", workdir]
+
+            # Container mode: set up container mounts and workdir
+            _container_flags = ["--container-image", container_image]
 
             new_mounts = copy.deepcopy(base_mounts)
             for i, mount in enumerate(new_mounts):
@@ -1077,6 +1088,18 @@ def get_container_flags(
             vars_to_fill["fault_tol_job_results_file"] = self.launcher.job_results_file
 
         sbatch_script = fill_template("slurm.sh.j2", vars_to_fill)
+
+        # For non-container mode, substitute /{RUNDIR_NAME} paths with actual job directory
+        # Check both top-level container_image and resource_group container images
+        has_container = self.executor.container_image is not None
+        if self.executor.run_as_group and self.executor.resource_group:
+            has_container = has_container or any(
+                rg.container_image is not None for rg in self.executor.resource_group
+            )
+        if not has_container:
+            actual_job_dir = os.path.join(slurm_job_dir, job_directory_name)
+            sbatch_script = sbatch_script.replace(f"/{RUNDIR_NAME}", actual_job_dir)
+
         return sbatch_script
 
     def __repr__(self) -> str:

diff --git a/nemo_run/run/torchx_backend/packaging.py b/nemo_run/run/torchx_backend/packaging.py
@@ -15,6 +15,7 @@
 
 import logging
 import os
+from pathlib import Path
 from typing import Iterator, Optional, Type, Union
 
 import fiddle as fdl
@@ -120,9 +121,23 @@ def _get_details_from_script(fn_or_script: Script, serialize_configs: bool):
                 log.warning(f"Failed saving yaml configs due to: {e}")
 
         args = fn_or_script.args
+
+        # For SlurmExecutor without container, substitute /{RUNDIR_NAME} paths
+        # with actual cluster paths in inline scripts
+        substitute_rundir_path = None
+        if (
+            isinstance(executor, SlurmExecutor)
+            and executor.container_image is None
+            and executor.tunnel is not None
+        ):
+            substitute_rundir_path = os.path.join(
+                executor.tunnel.job_dir, Path(executor.job_dir).name
+            )
+
         role_args = fn_or_script.to_command(
             filename=os.path.join(executor.job_dir, SCRIPTS_DIR, f"{name}.sh"),
             is_local=True if isinstance(executor, LocalExecutor) else False,
+            substitute_rundir_path=substitute_rundir_path,
         )
         m = fn_or_script.path if fn_or_script.m else None
         no_python = fn_or_script.entrypoint != "python"

diff --git a/test/core/execution/artifacts/dummy_slurm.sh b/test/core/execution/artifacts/dummy_slurm.sh
@@ -33,7 +33,7 @@ export ENV_VAR=value
 
 # Command 1
 
-srun --output /root/sample_job/log-account-account.sample_job_%j_${SLURM_RESTART_COUNT:-0}.out --container-mounts /root/sample_job:/nemo_run --container-workdir /nemo_run/code --wait=60 --kill-on-bad-exit=1 cmd3 cmd4
+srun --output /root/sample_job/log-account-account.sample_job_%j_${SLURM_RESTART_COUNT:-0}.out --container-image test_image --container-mounts /root/sample_job:/nemo_run --container-workdir /nemo_run/code --wait=60 --kill-on-bad-exit=1 cmd3 cmd4
 
 exitcode=$?
 

diff --git a/test/core/execution/artifacts/ft_slurm.sh b/test/core/execution/artifacts/ft_slurm.sh
@@ -62,7 +62,7 @@ echo "$SLURM_JOB_ID ${SLURM_RESTART_COUNT:-0} X" >> "$JOB_RESULTS_FILE"
 
 # Command 1
 
-srun --output /root/sample_job/log-account-account.sample_job_%j_${SLURM_RESTART_COUNT:-0}.out --container-mounts /root/sample_job:/nemo_run --container-workdir /nemo_run/code --wait=60 --kill-on-bad-exit=1 ft_launcher --ft-param-workload_check_interval 10 --ft-param-rank_heartbeat_timeout 10 --rdzv-backend c10d --rdzv-endpoint localhost:0 --rdzv-id 7680 --nnodes 1 --nproc-per-node 1 --node-rank 0 --tee 3 --no-python test_ft.sh
+srun --output /root/sample_job/log-account-account.sample_job_%j_${SLURM_RESTART_COUNT:-0}.out --container-image test_image --container-mounts /root/sample_job:/nemo_run --container-workdir /nemo_run/code --wait=60 --kill-on-bad-exit=1 ft_launcher --ft-param-workload_check_interval 10 --ft-param-rank_heartbeat_timeout 10 --rdzv-backend c10d --rdzv-endpoint localhost:0 --rdzv-id 7680 --nnodes 1 --nproc-per-node 1 --node-rank 0 --tee 3 --no-python test_ft.sh
 
 exitcode=$?
 

diff --git a/test/core/execution/test_slurm.py b/test/core/execution/test_slurm.py
@@ -18,8 +18,10 @@
 
 import pytest
 
+from nemo_run.config import RUNDIR_NAME
 from nemo_run.core.execution.launcher import SlurmTemplate, Torchrun
 from nemo_run.core.execution.slurm import (
+    SlurmBatchRequest,
     SlurmExecutor,
     SlurmJobDetails,
     SlurmTunnelCallback,
@@ -403,3 +405,148 @@ def test_merge_mismatch(self):
                 [SlurmExecutor(account="account1"), SlurmExecutor(account="account2")],
                 num_tasks=3,
             )
+
+
+class TestSlurmBatchRequestNonContainerMode:
+    """Tests for non-container mode support (container_image=None)."""
+
+    @pytest.fixture
+    def executor_with_container(self):
+        """Create an executor with container image."""
+        executor = SlurmExecutor(
+            account="test_account",
+            partition="gpu",
+            nodes=2,
+            ntasks_per_node=8,
+            container_image="nvcr.io/nvidia/pytorch:24.01-py3",
+            container_mounts=["/data:/data"],
+        )
+        executor.job_name = "test-job"
+        executor.experiment_dir = "/local/experiments"
+        executor.job_dir = "/local/experiments/test-job"
+        executor.experiment_id = "exp-123"
+
+        # Mock tunnel
+        tunnel = MagicMock(spec=LocalTunnel)
+        tunnel.job_dir = "/remote/experiments/exp-123"
+        executor.tunnel = tunnel
+
+        return executor
+
+    @pytest.fixture
+    def executor_without_container(self):
+        """Create an executor without container image (non-container mode)."""
+        executor = SlurmExecutor(
+            account="test_account",
+            partition="gpu",
+            nodes=2,
+            ntasks_per_node=8,
+            container_image=None,  # Non-container mode
+        )
+        executor.job_name = "test-job"
+        executor.experiment_dir = "/local/experiments"
+        executor.job_dir = "/local/experiments/test-job"
+        executor.experiment_id = "exp-123"
+
+        # Mock tunnel
+        tunnel = MagicMock(spec=LocalTunnel)
+        tunnel.job_dir = "/remote/experiments/exp-123"
+        executor.tunnel = tunnel
+
+        return executor
+
+    def test_materialize_with_container_uses_container_flags(self, executor_with_container):
+        """Test that materialize uses container flags when container_image is set."""
+        request = SlurmBatchRequest(
+            launch_cmd=["sbatch", "--parsable"],
+            jobs=["test-job"],
+            command_groups=[["python train.py"]],
+            executor=executor_with_container,
+            max_retries=0,
+            extra_env={},
+        )
+
+        script = request.materialize()
+
+        # Should contain container flags
+        assert "--container-image" in script
+        assert "--container-mounts" in script
+        assert "--container-workdir" in script
+        # Should NOT contain --chdir (used for non-container mode)
+        assert "--chdir" not in script
+        # Should contain /nemo_run paths (not substituted)
+        assert f"/{RUNDIR_NAME}" in script
+
+    def test_materialize_without_container_uses_chdir(self, executor_without_container):
+        """Test that materialize uses --chdir when container_image is None."""
+        request = SlurmBatchRequest(
+            launch_cmd=["sbatch", "--parsable"],
+            jobs=["test-job"],
+            command_groups=[["python train.py"]],
+            executor=executor_without_container,
+            max_retries=0,
+            extra_env={},
+        )
+
+        script = request.materialize()
+
+        # Should contain --chdir flag for working directory
+        assert "--chdir" in script
+        # Should NOT contain container flags
+        assert "--container-image" not in script
+        assert "--container-mounts" not in script
+        assert "--container-workdir" not in script
+
+    def test_materialize_without_container_substitutes_rundir_paths(
+        self, executor_without_container
+    ):
+        """Test that /{RUNDIR_NAME} paths are substituted with actual paths in non-container mode."""
+        request = SlurmBatchRequest(
+            launch_cmd=["sbatch", "--parsable"],
+            jobs=["test-job"],
+            command_groups=[["python train.py"]],
+            executor=executor_without_container,
+            max_retries=0,
+            extra_env={},
+        )
+
+        script = request.materialize()
+
+        # Should NOT contain /nemo_run paths (should be substituted)
+        assert f"/{RUNDIR_NAME}/code" not in script
+        # Should contain the actual job directory path
+        actual_job_dir = "/remote/experiments/exp-123/test-job"
+        assert f"{actual_job_dir}/code" in script
+
+    def test_materialize_with_container_preserves_rundir_paths(self, executor_with_container):
+        """Test that /{RUNDIR_NAME} paths are NOT substituted when using container."""
+        request = SlurmBatchRequest(
+            launch_cmd=["sbatch", "--parsable"],
+            jobs=["test-job"],
+            command_groups=[["python train.py"]],
+            executor=executor_with_container,
+            max_retries=0,
+            extra_env={},
+        )
+
+        script = request.materialize()
+
+        # Should contain /nemo_run paths (not substituted for container mode)
+        assert f"/{RUNDIR_NAME}" in script
+
+    def test_non_container_mode_chdir_points_to_code_directory(self, executor_without_container):
+        """Test that --chdir in non-container mode points to the code directory."""
+        request = SlurmBatchRequest(
+            launch_cmd=["sbatch", "--parsable"],
+            jobs=["test-job"],
+            command_groups=[["python train.py"]],
+            executor=executor_without_container,
+            max_retries=0,
+            extra_env={},
+        )
+
+        script = request.materialize()
+
+        # The --chdir should point to {job_dir}/code
+        expected_chdir = "--chdir /remote/experiments/exp-123/test-job/code"
+        assert expected_chdir in script
diff --git a/test/core/execution/test_slurm_templates.py b/test/core/execution/test_slurm_templates.py
@@ -54,6 +54,7 @@ def dummy_slurm_request_with_artifact(
             account="account",
             job_dir="/root/sample_job",
             tunnel=LocalTunnel(job_dir="/root"),
+            container_image="test_image",
         )
         slurm_config.job_name = "sample_job"
         max_retries = 3
@@ -79,6 +80,7 @@ def ft_slurm_request_with_artifact(
             account="account",
             job_dir="/root/sample_job",
             tunnel=LocalTunnel(job_dir="/root/"),
+            container_image="test_image",
         )
         slurm_config.job_name = "sample_job"
         slurm_config.launcher = FaultTolerance(