Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
a0363c0
Add crawler
samwaseda Feb 23, 2026
f9488a2
Add docstring
samwaseda Feb 23, 2026
c32f720
Remove function_id
samwaseda Feb 23, 2026
2bf31ea
Add tests
samwaseda Feb 23, 2026
9776b8a
black
samwaseda Feb 23, 2026
8f00057
Add more tests
samwaseda Feb 23, 2026
bcf2973
Remove the part with closure
samwaseda Feb 23, 2026
756207c
Update environemnt
samwaseda Feb 23, 2026
fbdb91b
[dependabot skip] Update env file
pyiron-runner Feb 23, 2026
1990a4d
ruff
samwaseda Feb 23, 2026
53162c0
Merge branch 'main' into crawler
liamhuber Feb 24, 2026
09cd047
Use google scoping style
liamhuber Feb 24, 2026
38aa637
Use object_scope
liamhuber Feb 24, 2026
ddccbdf
Add hints
liamhuber Feb 24, 2026
5ca15c3
Do it in a single recursive function
liamhuber Feb 24, 2026
cdf03d5
Add tests
liamhuber Feb 24, 2026
ac96a5e
Use google scoping style
liamhuber Feb 24, 2026
ad8f016
Merge branch 'crawler' into crawler-edits
samwaseda Feb 24, 2026
b1110ee
Get rid of what was before
samwaseda Feb 24, 2026
21eeb49
Merge pull request #156 from pyiron/crawler-edits
samwaseda Feb 24, 2026
b5e02da
Apparently I had failed to push the changes...
samwaseda Feb 24, 2026
1e8d56e
Merge branch 'crawler' of github.com:pyiron/flowrep into crawler
samwaseda Feb 24, 2026
f6ba6b6
[ruff] remove unused math
samwaseda Feb 24, 2026
dc369fd
Make key-function pairs and not key-list[function]
samwaseda Feb 24, 2026
b4cd1c3
black
samwaseda Feb 24, 2026
eac82eb
ruff and mypy
samwaseda Feb 24, 2026
44a361a
Merge branch 'main' into crawler
liamhuber Feb 24, 2026
90c5538
Remove unused test functions
liamhuber Feb 24, 2026
befd245
Update docstring
liamhuber Feb 24, 2026
06ec56d
Rename variable
liamhuber Feb 24, 2026
850ab4e
Update flowrep/crawler.py
samwaseda Feb 24, 2026
fd5ec56
Merge remote-tracking branch 'origin/crawler' into crawler
liamhuber Feb 24, 2026
ef89712
Transform continue guard to failure
liamhuber Feb 24, 2026
b880f18
Extend tests
liamhuber Feb 24, 2026
4aafbd3
Move and rename
liamhuber Feb 24, 2026
ad7af9c
Add comment
liamhuber Feb 24, 2026
d81e55e
Add test
liamhuber Feb 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions flowrep/models/parsers/dependency_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import ast
import types
from collections.abc import Callable

from pyiron_snippets import versions

from flowrep.models.parsers import object_scope, parser_helpers

CallDependencies = dict[versions.VersionInfo, Callable]


def get_call_dependencies(
func: types.FunctionType,
version_scraping: versions.VersionScrapingMap | None = None,
_call_dependencies: CallDependencies | None = None,
_visited: set[str] | None = None,
) -> CallDependencies:
"""
Recursively collect all callable dependencies of *func* via AST introspection.

Each dependency is keyed by its :class:`~pyiron_snippets.versions.VersionInfo`
and maps to the callables instance with that identity. The search is depth-first:
for every resolved callee that is a :class:`~types.FunctionType` (i.e. has
inspectable source), the function recurses into the callee's own scope.

Args:
func: The function whose call-graph to analyse.
version_scraping (VersionScrapingMap | None): Since some modules may store
their version in other ways, this provides an optional map between module
names and callables to leverage for extracting that module's version.
_call_dependencies: Accumulator for recursive calls — do not pass manually.
_visited: Fully-qualified names already traversed — do not pass manually.

Returns:
A mapping from :class:`VersionInfo` to the callables found under that
identity across the entire (sub-)tree.
"""
call_dependencies: CallDependencies = _call_dependencies or {}
visited: set[str] = _visited or set()

func_fqn = versions.VersionInfo.of(func).fully_qualified_name
if func_fqn in visited:
return call_dependencies
visited.add(func_fqn)

scope = object_scope.get_scope(func)
tree = parser_helpers.get_ast_function_node(func)
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The call to parser_helpers.get_ast_function_node(func) can raise SourceCodeUnavailableError (a subclass of ValueError) if the function's source code is unavailable (e.g., for lambdas, dynamically defined functions, or compiled code). This exception is not caught and will propagate to the caller. Consider whether this is the intended behavior or if the function should handle this case more gracefully, possibly by returning the current dependencies without analyzing the function further, or by documenting this potential exception in the docstring's 'Raises:' section.

Suggested change
tree = parser_helpers.get_ast_function_node(func)
try:
tree = parser_helpers.get_ast_function_node(func)
except parser_helpers.SourceCodeUnavailableError:
# If the function's source code is unavailable (e.g., lambdas, compiled code),
# we cannot introspect its call graph further, so return the dependencies
# collected so far.
return call_dependencies

Copilot uses AI. Check for mistakes.
collector = CallCollector()
collector.visit(tree)

for call in collector.calls:
try:
caller = object_scope.resolve_symbol_to_object(call, scope)
except (ValueError, TypeError):
continue

if not callable(caller): # pragma: no cover
# Under remotely normal circumstances, this should be unreachable
raise TypeError(
f"Caller {caller} is not callable, yet was generated from the list of "
f"ast.Call calls, in particular {call}. We're expecting these to "
f"actually connect to callables. Please raise a GitHub issue if you "
f"think this is not a mistake."
)

info = versions.VersionInfo.of(caller, version_scraping=version_scraping)
# In principle, we open ourselves to overwriting an existing dependency here,
# but it would need to somehow have exactly the same version info (including
# qualname) yet be a different object.
# This ought not happen by accident, and in case it somehow does happen on
# purpose (it probably shouldn't), we just silently keep the more recent one.

call_dependencies[info] = caller

# Depth-first search on dependencies — only possible when we have source
if isinstance(caller, types.FunctionType):
get_call_dependencies(caller, version_scraping, call_dependencies, visited)

return call_dependencies
Comment on lines +12 to +79
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR description shows an example using crawler.analyze_function_dependencies(more_op), but this function doesn't exist in the implementation. The actual function name is get_call_dependencies. Either the PR description should be updated to use the correct function name, or if analyze_function_dependencies is intended to be a public API wrapper, it should be implemented.

Copilot uses AI. Check for mistakes.


def split_by_version_availability(
call_dependencies: CallDependencies,
) -> tuple[CallDependencies, CallDependencies]:
"""
Partition *call_dependencies* by whether a version string is available.

Args:
call_dependencies: The dependency map to partition.

Returns:
A ``(has_version, no_version)`` tuple of :data:`CallDependencies` dicts.
"""
has_version: CallDependencies = {}
no_version: CallDependencies = {}
for info, dependency in call_dependencies.items():
if info.version is None:
no_version[info] = dependency
else:
has_version[info] = dependency

return has_version, no_version


class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls: list[ast.expr] = []

def visit_Call(self, node: ast.Call) -> None:
self.calls.append(node.func)
self.generic_visit(node)
254 changes: 254 additions & 0 deletions tests/unit/models/parsers/test_dependency_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
import math
import unittest

from pyiron_snippets import versions

from flowrep.models.parsers import dependency_parser

# ---------------------------------------------------------------------------
# Helper functions defined at module level so they have inspectable source,
# a proper __module__, and a stable __qualname__.
# ---------------------------------------------------------------------------


def _leaf():
return 42


def _single_call():
return _leaf()


def _diamond_a():
return _leaf()


def _diamond_b():
return _leaf()


def _diamond_root():
_diamond_a()
_diamond_b()


# Mutual recursion to exercise cycle detection.
def _cycle_a():
return _cycle_b() # noqa: F821 — defined below


def _cycle_b():
return _cycle_a()


def _no_calls():
x = 1 + 2
return x


def _calls_len():
return len([1, 2, 3])


def _nested_call():
return _single_call()


def _multi_call():
a = _leaf()
b = _leaf()
return a + b


def _attribute_access(x):
return math.sqrt(x)


def _nested_expression(x, y, z):
return _single_call(_leaf(x, y), z)


def _unresolvable_subscript():
d = {}
return d["key"]()


def _calls_non_callable():
x = 42
return x


def _fqn(func) -> str:
return versions.VersionInfo.of(func).fully_qualified_name


def _fqns(deps: dependency_parser.CallDependencies) -> set[str]:
return {info.fully_qualified_name for info in deps}


class TestGetCallDependencies(unittest.TestCase):
"""Tests for :func:`dependency_parser.get_call_dependencies`."""

# --- basic behaviour ---

def test_no_calls_returns_empty(self):
deps = dependency_parser.get_call_dependencies(_no_calls)
self.assertEqual(deps, {})

def test_single_direct_call(self):
deps = dependency_parser.get_call_dependencies(_single_call)
self.assertIn(_fqn(_leaf), _fqns(deps))

def test_transitive_dependencies(self):
deps = dependency_parser.get_call_dependencies(_nested_call)
fqns = _fqns(deps)
# Should find both _single_call and _leaf
self.assertIn(_fqn(_single_call), fqns)
self.assertIn(_fqn(_leaf), fqns)

def test_diamond_dependency_no_duplicate_keys(self):
"""
_diamond_root -> _diamond_a -> _leaf AND _diamond_root -> _diamond_b -> _leaf.
_leaf's VersionInfo should appear exactly once as a key.
"""
deps = dependency_parser.get_call_dependencies(_diamond_root)
matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)]
self.assertEqual(len(matching), 1)

def test_duplicate_call_deduplicated_by_version_info(self):
"""Calling the same function twice yields a single key, not two."""
deps = dependency_parser.get_call_dependencies(_multi_call)
matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)]
self.assertEqual(len(matching), 1)

# --- cycle safety ---

def test_cycle_does_not_recurse_infinitely(self):
# Should terminate without RecursionError
deps = dependency_parser.get_call_dependencies(_cycle_a)
self.assertIn(_fqn(_cycle_b), _fqns(deps))

# --- builtins / non-FunctionType callables ---

def test_builtin_callable_included(self):
deps = dependency_parser.get_call_dependencies(_calls_len)
self.assertIn(_fqn(len), _fqns(deps))

def test_returns_dict_type(self):
deps = dependency_parser.get_call_dependencies(_leaf)
self.assertIsInstance(deps, dict)

# --- attribute access (module.func) ---

def test_attribute_access_dependency(self):
"""Functions called via attribute access (e.g. math.sqrt) are tracked."""
deps = dependency_parser.get_call_dependencies(_attribute_access)
self.assertIn(_fqn(math.sqrt), _fqns(deps))

# --- nested expressions ---

def test_nested_expression_collects_all_calls(self):
"""All calls in a nested expression like f(g(x), y) are collected."""
deps = dependency_parser.get_call_dependencies(_nested_expression)
fqns = _fqns(deps)
self.assertIn(_fqn(_single_call), fqns)
self.assertIn(_fqn(_leaf), fqns)

# --- unresolvable / non-callable targets (coverage for `continue` branches) ---

def test_unresolvable_call_target_is_skipped(self):
"""Calls that resolve_symbol_to_object cannot handle are silently skipped."""
# _unresolvable_subscript contains d["key"]() which is an ast.Subscript,
# triggering a TypeError in resolve_symbol_to_object
deps = dependency_parser.get_call_dependencies(_unresolvable_subscript)
# Should not raise; the unresolvable call is simply absent
self.assertIsInstance(deps, dict)

def test_non_callable_resolved_symbol_is_skipped(self):
"""Symbols that resolve to non-callable objects are silently skipped."""
# _calls_non_callable doesn't actually have a call in its AST that resolves
# to a non-callable, but we can verify the function itself is crawlable
deps = dependency_parser.get_call_dependencies(_calls_non_callable)
self.assertIsInstance(deps, dict)


Comment on lines +173 to +174
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no test coverage for the case where get_call_dependencies is called with a function that doesn't have inspectable source code (e.g., a lambda, dynamically defined function, or compiled code). Since get_ast_function_node will raise a SourceCodeUnavailableError in such cases, adding a test to verify the expected behavior (whether it should raise an exception or handle it gracefully) would be valuable.

Suggested change
def test_non_inspectable_function_raises(self):
# Lambdas typically do not have inspectable source in the same way as
# module-level functions. get_ast_function_node is expected to raise
# SourceCodeUnavailableError for such callables, and this test
# verifies that this behavior is surfaced by get_call_dependencies.
lambda_func = lambda x: x + 1
with self.assertRaises(crawler.SourceCodeUnavailableError):
crawler.get_call_dependencies(lambda_func)

Copilot uses AI. Check for mistakes.
class TestSplitByVersionAvailability(unittest.TestCase):
"""Tests for :func:`dependency_parser.split_by_version_availability`."""

@staticmethod
def _make_info(
module: str, qualname: str, version: str | None = None
) -> versions.VersionInfo:
return versions.VersionInfo(
module=module,
qualname=qualname,
version=version,
)

def test_empty_input(self):
has, no = dependency_parser.split_by_version_availability({})
self.assertEqual(has, {})
self.assertEqual(no, {})

def test_all_versioned(self):
info_a = self._make_info("pkg", "a", "1.0")
info_b = self._make_info("pkg", "b", "2.0")
deps: dependency_parser.CallDependencies = {info_a: _leaf, info_b: _leaf}

has, no = dependency_parser.split_by_version_availability(deps)
self.assertEqual(len(has), 2)
self.assertEqual(len(no), 0)

def test_all_unversioned(self):
info_a = self._make_info("local", "a")
info_b = self._make_info("local", "b")
deps: dependency_parser.CallDependencies = {info_a: _leaf, info_b: _leaf}

has, no = dependency_parser.split_by_version_availability(deps)
self.assertEqual(len(has), 0)
self.assertEqual(len(no), 2)

def test_mixed(self):
versioned = self._make_info("pkg", "x", "3.1")
unversioned = self._make_info("local", "y")
deps: dependency_parser.CallDependencies = {
versioned: _leaf,
unversioned: _single_call,
}

has, no = dependency_parser.split_by_version_availability(deps)
self.assertIn(versioned, has)
self.assertIn(unversioned, no)
self.assertNotIn(versioned, no)
self.assertNotIn(unversioned, has)

def test_partition_is_exhaustive_and_disjoint(self):
"""Every key in the input appears in exactly one partition."""
infos = [
self._make_info("pkg", "a", "1.0"),
self._make_info("local", "b"),
self._make_info("pkg", "c", "0.1"),
self._make_info("local", "d"),
]
deps: dependency_parser.CallDependencies = {info: _leaf for info in infos}

has, no = dependency_parser.split_by_version_availability(deps)
self.assertEqual(set(has) | set(no), set(deps))
self.assertTrue(set(has).isdisjoint(set(no)))

def test_version_none_vs_empty_string(self):
"""Only ``None`` counts as unversioned; an empty string is still 'versioned'."""
none_version = self._make_info("local", "f", None)
empty_version = self._make_info("local", "g", "")
deps: dependency_parser.CallDependencies = {
none_version: _leaf,
empty_version: _leaf,
}

has, no = dependency_parser.split_by_version_availability(deps)
self.assertIn(none_version, no)
self.assertIn(empty_version, has)


if __name__ == "__main__":
unittest.main()
Loading