Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions ddev/src/ddev/cli/env/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,15 @@ def agent(app: Application, *, intg_name: str, environment: str, args: tuple[str
finally:
env_data.config_file.unlink()
else:
temp_config_file = env_data.config_file.parent / f'{env_data.config_file.name}.bak.example'
env_data.config_file.replace(temp_config_file)
# Read-modify-restore in place. The previous implementation renamed
# the original config away before writing the override, which left a
# window where the conf.d directory contained no config for this
# integration; if Agent autodiscovery rescanned during that window it
# deregistered the check and the immediately-following `agent check`
# returned "no valid check found".
original_config = env_data.read_config()
try:
env_data.write_config(config)
agent.invoke(full_args)
finally:
temp_config_file.replace(env_data.config_file)
env_data.write_config(original_config)
17 changes: 17 additions & 0 deletions ddev/src/ddev/e2e/agent/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ def start(self, *, agent_build: str | None, local_packages: dict[Path, str], env
if local_packages or start_commands or post_install_commands:
self.restart()

self.wait_until_ready()

def _initialize(self, command, local_packages, start_commands, post_install_commands):
process = self._captured_process(command)
if process.returncode:
Expand Down Expand Up @@ -373,6 +375,21 @@ def run_command(self, args: list[str]) -> None:
def enter_shell(self) -> None:
self._run_command(self._format_command(['cmd' if self._is_windows_container else 'bash']), check=True)

def wait_until_ready(self, *, timeout: float = 60.0) -> None:
"""Poll ``agent status`` until the cmd-server is responding.

Without this gate, callers race the Agent's startup: ``docker run``
returns before the in-container Agent finishes initializing the
check loader, so an immediate ``agent check <name>`` can return
"no valid check found" with exit 255.
"""
cmd = self._format_command(['agent', 'status', '--json'])
for attempt in stamina.retry_context(on=RuntimeError, timeout=timeout, wait_initial=0.5, wait_max=2.0):
with attempt:
proc = self._captured_process(cmd)
if proc.returncode != 0:
raise RuntimeError(f'Agent not ready (rc={proc.returncode})')

@stamina.retry(on=RuntimeError, attempts=3)
def __pull_image(self, agent_build):
process = self._run_command(['docker', 'pull', agent_build])
Expand Down
8 changes: 8 additions & 0 deletions ddev/src/ddev/e2e/agent/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ def invoke(self, args: list[str]) -> None: ...

@abstractmethod
def enter_shell(self) -> None: ...

def wait_until_ready(self, *, timeout: float = 60.0) -> None:
"""Block until the Agent's command server is responding.

Concrete backends should override this with a meaningful readiness
check. The default is a no-op so backends can opt in incrementally.
"""
return
10 changes: 9 additions & 1 deletion ddev/src/ddev/e2e/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,21 @@ def write_config(self, config: dict[str, Any] | None) -> None:
if config is None:
return

import os

import yaml

if 'instances' not in config:
config = {'instances': [config]}

self.config_file.parent.ensure_dir_exists()
self.config_file.write_text(yaml.safe_dump(config, default_flow_style=False))
# Write via tmp + os.replace so the file is never transiently absent.
# Agent autodiscovery watches this directory; if it observes the path
# missing it deregisters the integration's check, causing later
# `agent check <name>` invocations to fail with "no valid check found".
tmp = self.config_file.parent / f'.{self.config_file.name}.swap'
tmp.write_text(yaml.safe_dump(config, default_flow_style=False))
os.replace(tmp, self.config_file)

def read_metadata(self) -> dict[str, Any]:
import json
Expand Down
34 changes: 34 additions & 0 deletions ddev/tests/e2e/agent/test_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ def free_port(mocker):


class TestStart:
@pytest.fixture(autouse=True)
def _skip_readiness(self, mocker):
# Existing TestStart cases assert exact subprocess.run call lists; the
# readiness probe is exercised in TestWaitUntilReady.
mocker.patch.object(DockerAgent, 'wait_until_ready', return_value=None)

@pytest.mark.parametrize(
'agent_build, agent_image, use_jmx',
[
Expand Down Expand Up @@ -1256,3 +1262,31 @@ def test_windows_container(self, app, get_integration, docker_path, mocker):
check=True,
),
]


class TestWaitUntilReady:
def test_ready_first_try(self, app, get_integration, docker_path, mocker):
run = mocker.patch('subprocess.run', return_value=mocker.MagicMock(returncode=0, stdout=b'{}'))

integration = 'postgres'
environment = 'py3.12'

agent = DockerAgent(app, get_integration(integration), environment, {}, Path('config.yaml'))
agent.wait_until_ready()

assert run.call_args_list == [
mocker.call(
[docker_path, 'exec', f'dd_{integration}_{environment}', 'agent', 'status', '--json'],
shell=False,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
),
]

def test_timeout_raises(self, app, get_integration, mocker):
mocker.patch('subprocess.run', return_value=mocker.MagicMock(returncode=1, stdout=b''))

agent = DockerAgent(app, get_integration('postgres'), 'py3.12', {}, Path('config.yaml'))

with pytest.raises(RuntimeError, match='Agent not ready'):
agent.wait_until_ready(timeout=0.05)
Loading