Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/68326.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed the `salt.state` orchestrate state silently reporting only `Run failed on minions: <minion>` when a targeted minion returned `False`, no return at all, or a list of error strings. The orchestrate comment now includes the per-minion failure detail (the minion's actual return value or "did not return a state result") so operators can diagnose `salt-run state.orchestrate` failures without re-running with extra logging.
76 changes: 76 additions & 0 deletions salt/states/saltmod.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,48 @@ def run_thread():
return outputs


def _format_failure_detail(minion, m_ret, m_no_return):
"""
Build a one-line human-readable detail string explaining a
minion-level failure that did *not* produce a state-result dict.

The orchestrate state historically reported these failures as a bare
``Run failed on minions: <minion>`` line with an opaque ``False`` (or
list of error strings) buried in ``changes``. That left operators
with no clue what went wrong (issue #68326). This helper produces
the per-minion line we append to that comment.

The contract:

* ``m_no_return=True`` means the minion's response had no ``ret``
key at all (the master flagged it ``failed: True``, or otherwise
lost the return). We say so explicitly.
* A list/tuple ``m_ret`` is treated as a sequence of error strings
(state.sls returns this shape for pillar errors, queue conflicts,
and similar pre-compilation failures) and joined.
* Any other non-dict ``m_ret`` (``False`` is the case that triggered
the report) is reproduced literally so the operator can see what
came back.
* A dict ``m_ret`` reaching this helper means
``state.check_result`` rejected the high-level shape; we omit a
detail line and let the existing ``changes`` payload speak for
itself rather than dumping the full dict into the comment.

Returns an empty string when there is no useful one-line summary to
add; callers should treat empty as "nothing to append".
"""
if m_no_return:
return f"Minion {minion} did not return a state result."
if isinstance(m_ret, (list, tuple)):
parts = [str(item) for item in m_ret if item]
if not parts:
return f"Minion {minion} returned an empty error list."
return "Minion {} returned errors: {}".format(minion, "; ".join(parts))
if isinstance(m_ret, dict):
return ""
return f"Minion {minion} returned {m_ret!r} instead of a state result."


def state(
name,
tgt,
Expand Down Expand Up @@ -364,6 +406,12 @@ def state(

changes = {}
fail = set()
# Per-minion human-readable detail collected for failures whose minion
# return value isn't a state-result dict (e.g. False, a list of error
# strings, or no return at all). Surfaced in the final comment so users
# see *why* their orchestrate run failed instead of only seeing the
# opaque minion-id list (issue #68326).
fail_detail = {}
no_change = set()

if fail_minions is None:
Expand All @@ -386,24 +434,41 @@ def state(
log.warning("Output from salt state not highstate")

m_ret = False
# Track whether the minion had no usable return at all, separate
# from "the return was present but indicated failure" so we can
# explain the difference in the comment.
m_no_return = False

if "return" in mdata and "ret" not in mdata:
mdata["ret"] = mdata.pop("return")

m_state = True
# Capture whatever the minion actually returned (if anything) for
# use in failure-detail formatting. The legacy code only consumes
# ``mdata["ret"]`` on the non-failed path; for `failed: True`
# responses with a useful ``ret`` payload we still want to show
# the user what came back rather than reporting a bare ``False``.
m_raw_ret = mdata.get("ret", mdata.get("return", None))
if mdata.get("failed", False):
m_state = False
m_no_return = "ret" not in mdata and "return" not in mdata
else:
try:
m_ret = mdata["ret"]
except KeyError:
m_state = False
m_no_return = True
if m_state:
m_state = __utils__["state.check_result"](m_ret, recurse=True)

if not m_state:
if minion not in fail_minions:
fail.add(minion)
fail_detail[minion] = _format_failure_detail(
minion,
m_raw_ret if m_raw_ret is not None else m_ret,
m_no_return,
)
changes[minion] = m_ret
continue
try:
Expand All @@ -423,6 +488,17 @@ def state(
if len(fail) > allow_fail:
state_ret["result"] = False
state_ret["comment"] = "Run failed on minions: {}".format(", ".join(fail))
# Append per-minion detail for any failures whose minion return
# was not a state-result dict — without this, the orchestrate
# output is just an opaque ``False`` in ``changes`` and gives the
# operator no clue what went wrong (issue #68326).
details = [
fail_detail[minion]
for minion in sorted(fail)
if minion in fail_detail and fail_detail[minion]
]
if details:
state_ret["comment"] += "\n" + "\n".join(details)
else:
state_ret["comment"] = "States ran successfully."
if changes:
Expand Down
112 changes: 112 additions & 0 deletions tests/pytests/unit/states/saltmod/test_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,118 @@ def test_state_failed_and_expected_minions():
assert ret == expected


def test_state_minion_ret_false_includes_detail_in_comment():
"""
When a targeted minion's state return value is the scalar ``False``
(for example because the minion timed out at the master level or
because state.sls on the minion failed before any state ran), the
``salt.state`` orchestrate state must surface that detail in its
comment instead of leaving the user with only ``Run failed on
minions: <minion>`` and an opaque ``False`` in ``changes``.

Regression test for #68326.
"""
name = "state"
tgt = "minion1"

cmd_ret = {
"10.0.0.1": {
"jid": "20170406104341210934",
"retcode": 1,
"ret": False,
"out": "highstate",
},
}

with patch.dict(saltmod.__opts__, {"test": False}):
mock = MagicMock(return_value=cmd_ret)
with patch.dict(saltmod.__salt__, {"saltutil.cmd": mock}):
ret = saltmod.state(name, tgt, highstate=True)

assert ret["result"] is False
assert ret["changes"] == {"out": "highstate", "ret": {"10.0.0.1": False}}
# The bare "Run failed on minions: ..." comment from before the fix
# is exactly what the issue complains about. The comment must now
# include the minion identifier *and* useful detail about what came
# back so that the user can begin to diagnose the failure.
assert "10.0.0.1" in ret["comment"]
assert ret["comment"] != "Run failed on minions: 10.0.0.1"
lowered = ret["comment"].lower()
assert "false" in lowered or "no return" in lowered or "did not return" in lowered


def test_state_minion_failed_flag_includes_detail_in_comment():
"""
When ``saltutil.cmd`` flags a minion's response with ``failed: True``
but no useful ``ret`` payload was attached, the orchestrate state
must say so in its comment instead of returning a bare
"Run failed on minions: <minion>" line.

Regression test for #68326.
"""
name = "state"
tgt = "minion1"

cmd_ret = {
"10.0.0.1": {
"jid": "20170406104341210934",
"retcode": 1,
"failed": True,
"out": "highstate",
},
}

with patch.dict(saltmod.__opts__, {"test": False}):
mock = MagicMock(return_value=cmd_ret)
with patch.dict(saltmod.__salt__, {"saltutil.cmd": mock}):
ret = saltmod.state(name, tgt, highstate=True)

assert ret["result"] is False
assert "10.0.0.1" in ret["comment"]
assert ret["comment"] != "Run failed on minions: 10.0.0.1"
assert (
"did not return" in ret["comment"].lower()
or "no return" in ret["comment"].lower()
)


def test_state_minion_ret_error_list_includes_detail_in_comment():
"""
When a minion's state return value is a list of error strings (for
example pillar render errors, or a state.sls queue conflict), the
orchestrate state must include those error messages in its comment
so the user can see why the run failed.

Regression test for #68326.
"""
name = "state"
tgt = "minion1"

error_msg = (
'The function "state.highstate" is running as PID 1234 and was '
"started at 2025, Sep 12 22:23:04.123456 with jid 20250912142302454180"
)
cmd_ret = {
"10.0.0.1": {
"jid": "20170406104341210934",
"retcode": 1,
"ret": [error_msg],
"out": "highstate",
},
}

with patch.dict(saltmod.__opts__, {"test": False}):
mock = MagicMock(return_value=cmd_ret)
with patch.dict(saltmod.__salt__, {"saltutil.cmd": mock}):
ret = saltmod.state(name, tgt, highstate=True)

assert ret["result"] is False
assert "10.0.0.1" in ret["comment"]
# The error message text must appear in the orchestrate comment,
# not just be hidden inside `changes`.
assert error_msg in ret["comment"]


def test_state_allow_fail():

name = "state"
Expand Down