Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/reference/workflows.md
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ Steps can reference inputs and previous step outputs using `{{ expression }}` sy
| `steps.specify.output.file` | Output from a previous step |
| `item` | Current item in a fan-out iteration |

Available filters: `default`, `join`, `contains`, `map`.
Available filters: `default`, `join`, `contains`, `map`, `from_json`.

Example:

Expand Down
39 changes: 37 additions & 2 deletions src/specify_cli/workflows/expressions.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""Sandboxed expression evaluator for workflow templates.

Provides a safe Jinja2 subset for evaluating expressions in workflow YAML.
No file I/O, no imports, no arbitrary code execution.
Templates cannot perform file I/O, import modules, or run arbitrary code —
the evaluator only walks the namespace and applies a fixed set of filters.
"""

from __future__ import annotations

import json
import re
from typing import Any

Expand Down Expand Up @@ -57,6 +59,23 @@ def _filter_contains(value: Any, substring: str) -> bool:
return False


def _filter_from_json(value: Any) -> Any:
"""Parse a JSON string into a typed value (list/dict/scalar).

Raises ``ValueError`` on non-string input or invalid JSON — a parse
failure here means the pipeline wiring is wrong, and silently
passing the unparsed value through would hide it.
"""
if not isinstance(value, str):
raise ValueError(
f"from_json: expected a JSON string, got {type(value).__name__}"
)
try:
return json.loads(value)
except json.JSONDecodeError as exc:
raise ValueError(f"from_json: invalid JSON: {exc}") from exc


# -- Expression resolution ------------------------------------------------

_EXPR_PATTERN = re.compile(r"\{\{(.+?)\}\}")
Expand Down Expand Up @@ -122,7 +141,7 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
- Comparisons: ``==``, ``!=``, ``>``, ``<``, ``>=``, ``<=``
- Boolean operators: ``and``, ``or``, ``not``
- ``in``, ``not in``
- Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| map('...')``
- Pipe filters: ``| default('...')``, ``| join(', ')``, ``| contains('...')``, ``| from_json``, ``| map('...')``
- String and numeric literals
"""
Comment thread
mnriem marked this conversation as resolved.
expr = expr.strip()
Expand All @@ -140,6 +159,22 @@ def _evaluate_simple_expression(expr: str, namespace: dict[str, Any]) -> Any:
value = _evaluate_simple_expression(parts[0].strip(), namespace)
filter_expr = parts[1].strip()

# `from_json` is strict: it takes no arguments and tolerates no
# trailing tokens. Match on the leading filter name and require the
# whole filter to be exactly `from_json`, so every mis-wired form
# (`from_json()`, `from_json('x')`, `from_json)`, `from_json extra`)
# fails loudly instead of silently falling through to the
# unknown-filter path and returning the unparsed value. (filter_expr
# is already stripped above.)
leading = re.match(r"\w+", filter_expr)
if leading and leading.group(0) == "from_json":
if filter_expr != "from_json":
raise ValueError(
"from_json: expected '| from_json' with no arguments or "
f"trailing tokens, got '| {filter_expr}'"
)
return _filter_from_json(value)

# Parse filter name and argument
filter_match = re.match(r"(\w+)\((.+)\)", filter_expr)
if filter_match:
Expand Down
53 changes: 53 additions & 0 deletions tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,59 @@ def test_filter_contains(self):
ctx = StepContext(inputs={"text": "hello world"})
assert evaluate_expression("{{ inputs.text | contains('world') }}", ctx) is True

def test_filter_from_json_parses_object(self):
from specify_cli.workflows.expressions import evaluate_expression
from specify_cli.workflows.base import StepContext

ctx = StepContext(
steps={"emit": {"output": {"stdout": '{"items": [1, 2, 3]}'}}}
)
result = evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx)
assert result == {"items": [1, 2, 3]}

def test_filter_from_json_invalid_json_raises(self):
import pytest
from specify_cli.workflows.expressions import evaluate_expression
from specify_cli.workflows.base import StepContext

ctx = StepContext(steps={"emit": {"output": {"stdout": "not json"}}})
with pytest.raises(ValueError, match="from_json: invalid JSON"):
evaluate_expression("{{ steps.emit.output.stdout | from_json }}", ctx)

def test_filter_from_json_non_string_raises(self):
import pytest
from specify_cli.workflows.expressions import evaluate_expression
from specify_cli.workflows.base import StepContext

ctx = StepContext(steps={"emit": {"output": {"exit_code": 0}}})
with pytest.raises(ValueError, match="expected a JSON string"):
evaluate_expression("{{ steps.emit.output.exit_code | from_json }}", ctx)

def test_filter_from_json_rejects_malformed_forms(self):
# `from_json` is strict: no arguments and no trailing tokens. Every
# mis-wired form — parenthesized, accidental arg, or trailing
# garbage — must raise rather than silently fall through to the
# unknown-filter path and return the unparsed value.
import pytest
from specify_cli.workflows.expressions import evaluate_expression
from specify_cli.workflows.base import StepContext

ctx = StepContext(steps={"emit": {"output": {"stdout": '{"a": 1}'}}})
bad_forms = (
"from_json()",
"from_json('x')",
"from_json ()",
"from_json ('x')",
"from_json)",
"from_json extra",
"from_json 'x'",
)
for bad in bad_forms:
with pytest.raises(ValueError, match="from_json: expected"):
evaluate_expression(
"{{ steps.emit.output.stdout | " + bad + " }}", ctx
)

def test_condition_evaluation(self):
from specify_cli.workflows.expressions import evaluate_condition
from specify_cli.workflows.base import StepContext
Expand Down
1 change: 1 addition & 0 deletions workflows/ARCHITECTURE.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ Workflow definitions use Jinja2-like `{{ expression }}` syntax for dynamic value
| Filter: `join` | `{{ list \| join(', ') }}` | Join list elements |
| Filter: `contains` | `{{ text \| contains('sub') }}` | Substring/membership check |
| Filter: `map` | `{{ list \| map('attr') }}` | Extract attribute from each item |
| Filter: `from_json` | `{{ steps.emit.output.stdout \| from_json }}` | Parse a JSON string into a typed value (raises on invalid JSON) |

**Single expressions** (`{{ expr }}` only) return typed values. **Mixed templates** (`"text {{ expr }} more"`) return interpolated strings.

Expand Down
2 changes: 1 addition & 1 deletion workflows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ condition: "{{ steps.run-tests.output.exit_code != 0 }}"
message: "{{ status | default('pending') }}"
```

Supported filters: `default`, `join`, `contains`, `map`.
Supported filters: `default`, `join`, `contains`, `map`, `from_json`.

### Runtime Context

Expand Down