From a0363c07f008fc2af47ebf76a66987e0d7be01b0 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 21:52:55 +0100 Subject: [PATCH 01/30] Add crawler --- flowrep/crawler.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 flowrep/crawler.py diff --git a/flowrep/crawler.py b/flowrep/crawler.py new file mode 100644 index 00000000..db41c350 --- /dev/null +++ b/flowrep/crawler.py @@ -0,0 +1,99 @@ +import ast +import inspect +import sys +import types +from typing import Any + +from pyiron_snippets import versions + + +def function_id(func) -> versions.VersionInfo: + return versions.VersionInfo.of(func) + + +class CallCollector(ast.NodeVisitor): + def __init__(self): + self.calls = [] + + def visit_Call(self, node): + self.calls.append(node.func) + self.generic_visit(node) + + +def build_global_namespace(func) -> dict[str, object]: + namespace = dict(func.__globals__) + + if func.__closure__: + freevars = func.__code__.co_freevars + for var, cell in zip(freevars, func.__closure__): + namespace[var] = cell.cell_contents + + return namespace + + +def resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: + if isinstance(node, ast.Name): + return namespace.get(node.id) + + if isinstance(node, ast.Attribute): + base = resolve_ast_node(node.value, namespace) + if base is None: + return None + return getattr(base, node.attr, None) + + return None + + +def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType]: + try: + source = inspect.getsource(func) + except (OSError, TypeError): + return set() + + tree = ast.parse(source) + collector = CallCollector() + collector.visit(tree) + + namespace = build_global_namespace(func) + resolved = set() + + for call_node in collector.calls: + obj = resolve_ast_node(call_node, namespace) + if callable(obj): + resolved.add(obj) + + return resolved + + +def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ + set[types.FunctionType], # local functions + set[FunctionID], # non-local functions +]: + """ + Recursively analyze function dependencies. + + Returns: + local_functions: set of locally-defined functions + external_functions: set of (module, qualname) + """ + visited: set[FunctionID] = set() + local_functions: set[types.FunctionType] = set() + external_functions: set[FunctionID] = set() + + def walk(func): + fid = function_id(func) + if fid.fully_qualified_name in visited: + return + visited.add(fid.fully_qualified_name) + + for called in extract_called_functions(func): + cid = function_id(called) + + if cid.version is None: + local_functions.add(called) + walk(called) + else: + external_functions.add(cid) + + walk(root_func) + return local_functions, external_functions From f9488a2f772241466b9f3676b37083b90d05d74a Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 21:56:00 +0100 Subject: [PATCH 02/30] Add docstring --- flowrep/crawler.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index db41c350..5617fb72 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -32,6 +32,16 @@ def build_global_namespace(func) -> dict[str, object]: def resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: + """ + Resolve an AST node to its corresponding object in the given namespace. + + Args: + node (ast.AST): The AST node to resolve. + namespace (dict[str, object]): The namespace to use for resolution. + + Returns: + Any: The resolved object, or None if it cannot be resolved. + """ if isinstance(node, ast.Name): return namespace.get(node.id) @@ -45,10 +55,16 @@ def resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType]: - try: - source = inspect.getsource(func) - except (OSError, TypeError): - return set() + """ + Extract all functions called by the given function. + + Args: + func (types.FunctionType): The function to analyze. + + Returns: + Set[types.FunctionType]: A set of functions that are called by the given function. + """ + source = inspect.getsource(func) tree = ast.parse(source) collector = CallCollector() @@ -72,9 +88,13 @@ def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ """ Recursively analyze function dependencies. + Args: + root_func (types.FunctionType): The root function to analyze. + Returns: - local_functions: set of locally-defined functions - external_functions: set of (module, qualname) + Tuple[Set[types.FunctionType], Set[FunctionID]]: A tuple containing: + - A set of local functions (defined in the same codebase). + - A set of external function IDs (from other modules or libraries). """ visited: set[FunctionID] = set() local_functions: set[types.FunctionType] = set() From c32f72082a8e9f0ad278c20c159a240490b88079 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 22:03:51 +0100 Subject: [PATCH 03/30] Remove function_id --- flowrep/crawler.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 5617fb72..e985409a 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -4,11 +4,7 @@ import types from typing import Any -from pyiron_snippets import versions - - -def function_id(func) -> versions.VersionInfo: - return versions.VersionInfo.of(func) +from pyiron_snippets.versions import VersionInfo class CallCollector(ast.NodeVisitor): @@ -101,13 +97,13 @@ def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ external_functions: set[FunctionID] = set() def walk(func): - fid = function_id(func) + fid = VersionInfo.of(func) if fid.fully_qualified_name in visited: return visited.add(fid.fully_qualified_name) for called in extract_called_functions(func): - cid = function_id(called) + cid = VersionInfo.of(called) if cid.version is None: local_functions.add(called) From 2bf31ead9892984b5130e66d7d6fb48f960e8acb Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 22:18:31 +0100 Subject: [PATCH 04/30] Add tests --- flowrep/crawler.py | 4 ++-- tests/unit/test_crawler.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 tests/unit/test_crawler.py diff --git a/flowrep/crawler.py b/flowrep/crawler.py index e985409a..8c16d9f0 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -16,7 +16,7 @@ def visit_Call(self, node): self.generic_visit(node) -def build_global_namespace(func) -> dict[str, object]: +def _build_global_namespace(func) -> dict[str, object]: namespace = dict(func.__globals__) if func.__closure__: @@ -66,7 +66,7 @@ def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType collector = CallCollector() collector.visit(tree) - namespace = build_global_namespace(func) + namespace = _build_global_namespace(func) resolved = set() for call_node in collector.calls: diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py new file mode 100644 index 00000000..968dc091 --- /dev/null +++ b/tests/unit/test_crawler.py @@ -0,0 +1,36 @@ +import math +import unittest + +from flowrep import crawler + + +def add(x, y): + return x + y + + +def op(a, b): + c = add(a, b) + d = math.sqrt(c) + return d + +def more_op(a, b): + c = op(a, b) + return c + + +class TestCrawler(unittest.TestCase): + def test_analyze_function_dependencies(self): + loc, ext = crawler.analyze_function_dependencies(op) + self.assertEqual(loc, {add}) + self.assertEqual(len(ext), 1) + f = ext.pop() + self.assertEqual(f.fully_qualified_name, "math.sqrt") + loc, ext = crawler.analyze_function_dependencies(more_op) + self.assertEqual(loc, {op, add}) + self.assertEqual(len(ext), 1) + g = ext.pop() + self.assertEqual(g.fully_qualified_name, "math.sqrt") + + +if __name__ == "__main__": + unittest.main() From 9776b8af1b6268fb2b74b61bf6ea7f68a9720a3c Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 22:18:57 +0100 Subject: [PATCH 05/30] black --- tests/unit/test_crawler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index 968dc091..c7d38676 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -13,6 +13,7 @@ def op(a, b): d = math.sqrt(c) return d + def more_op(a, b): c = op(a, b) return c From 8f000577eb4930656d06ae95d0fed6d4a50781f2 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 22:25:33 +0100 Subject: [PATCH 06/30] Add more tests --- flowrep/crawler.py | 6 +++--- tests/unit/test_crawler.py | 5 +++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 8c16d9f0..53024de6 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -27,7 +27,7 @@ def _build_global_namespace(func) -> dict[str, object]: return namespace -def resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: +def _resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: """ Resolve an AST node to its corresponding object in the given namespace. @@ -42,7 +42,7 @@ def resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: return namespace.get(node.id) if isinstance(node, ast.Attribute): - base = resolve_ast_node(node.value, namespace) + base = _resolve_ast_node(node.value, namespace) if base is None: return None return getattr(base, node.attr, None) @@ -70,7 +70,7 @@ def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType resolved = set() for call_node in collector.calls: - obj = resolve_ast_node(call_node, namespace) + obj = _resolve_ast_node(call_node, namespace) if callable(obj): resolved.add(obj) diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index c7d38676..4ce307fc 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -32,6 +32,11 @@ def test_analyze_function_dependencies(self): g = ext.pop() self.assertEqual(g.fully_qualified_name, "math.sqrt") + def test_extract_called_functions(self): + called = crawler.extract_called_functions(op) + self.assertEqual(called, {add, math.sqrt}) + called = crawler.extract_called_functions(more_op) + self.assertEqual(called, {op}) if __name__ == "__main__": unittest.main() From bcf297399be8c9cf9890b881d5ac07d74e229913 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 22:32:19 +0100 Subject: [PATCH 07/30] Remove the part with closure --- flowrep/crawler.py | 9 +-------- tests/unit/test_crawler.py | 1 + 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 53024de6..f9a542f2 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -17,14 +17,7 @@ def visit_Call(self, node): def _build_global_namespace(func) -> dict[str, object]: - namespace = dict(func.__globals__) - - if func.__closure__: - freevars = func.__code__.co_freevars - for var, cell in zip(freevars, func.__closure__): - namespace[var] = cell.cell_contents - - return namespace + return dict(func.__globals__) def _resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index 4ce307fc..909d280e 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -38,5 +38,6 @@ def test_extract_called_functions(self): called = crawler.extract_called_functions(more_op) self.assertEqual(called, {op}) + if __name__ == "__main__": unittest.main() From 756207cc3f48fe685021c5cdfd5696a889164a99 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 22:48:07 +0100 Subject: [PATCH 08/30] Update environemnt --- .ci_support/environment.yml | 1 + .ci_support/lower-bounds.yml | 1 + pyproject.toml | 1 + 3 files changed, 3 insertions(+) diff --git a/.ci_support/environment.yml b/.ci_support/environment.yml index 93fa383d..fb87806f 100644 --- a/.ci_support/environment.yml +++ b/.ci_support/environment.yml @@ -6,3 +6,4 @@ dependencies: - python >=3.11, <3.14 - networkx =3.6.1 - pydantic =2.12.5 +- pyiron_snippets =1.1.0 diff --git a/.ci_support/lower-bounds.yml b/.ci_support/lower-bounds.yml index 4e011b52..092d5001 100644 --- a/.ci_support/lower-bounds.yml +++ b/.ci_support/lower-bounds.yml @@ -6,3 +6,4 @@ dependencies: - python =3.11 - networkx =3.4.2 - pydantic =2.12.0 + - pyiron_snippets =1.1.0 diff --git a/pyproject.toml b/pyproject.toml index df326002..3a03e60a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ dependencies = [ "networkx==3.6.1", "pydantic==2.12.5", + "pyiron_snippets==1.1.0", ] dynamic = [ "version",] authors = [ From fbdb91b5183751fb710fb5e8ff103eedb340dcf1 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Mon, 23 Feb 2026 21:54:01 +0000 Subject: [PATCH 09/30] [dependabot skip] Update env file --- .binder/environment.yml | 1 + docs/environment.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.binder/environment.yml b/.binder/environment.yml index 93fa383d..fb87806f 100644 --- a/.binder/environment.yml +++ b/.binder/environment.yml @@ -6,3 +6,4 @@ dependencies: - python >=3.11, <3.14 - networkx =3.6.1 - pydantic =2.12.5 +- pyiron_snippets =1.1.0 diff --git a/docs/environment.yml b/docs/environment.yml index bb80e250..49a2a1a3 100644 --- a/docs/environment.yml +++ b/docs/environment.yml @@ -12,3 +12,4 @@ dependencies: - python >=3.11, <3.14 - networkx =3.6.1 - pydantic =2.12.5 +- pyiron_snippets =1.1.0 From 1990a4d565fd0bc38f9265e9a0cd5d69b93ff568 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Mon, 23 Feb 2026 22:56:25 +0100 Subject: [PATCH 10/30] ruff --- flowrep/crawler.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index f9a542f2..50ea2a21 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -1,6 +1,5 @@ import ast import inspect -import sys import types from typing import Any @@ -72,7 +71,7 @@ def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ set[types.FunctionType], # local functions - set[FunctionID], # non-local functions + set[VersionInfo], # non-local functions ]: """ Recursively analyze function dependencies. @@ -81,13 +80,13 @@ def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ root_func (types.FunctionType): The root function to analyze. Returns: - Tuple[Set[types.FunctionType], Set[FunctionID]]: A tuple containing: + Tuple[Set[types.FunctionType], Set[VersionInfo]]: A tuple containing: - A set of local functions (defined in the same codebase). - A set of external function IDs (from other modules or libraries). """ - visited: set[FunctionID] = set() + visited: set[VersionInfo] = set() local_functions: set[types.FunctionType] = set() - external_functions: set[FunctionID] = set() + external_functions: set[VersionInfo] = set() def walk(func): fid = VersionInfo.of(func) From 09cd047dbe021892936e9f1570e927bff7bc4e35 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Mon, 23 Feb 2026 17:25:57 -0800 Subject: [PATCH 11/30] Use google scoping style Signed-off-by: liamhuber --- flowrep/crawler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 50ea2a21..05fcc3a6 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -3,7 +3,7 @@ import types from typing import Any -from pyiron_snippets.versions import VersionInfo +from pyiron_snippets import versions class CallCollector(ast.NodeVisitor): @@ -71,7 +71,7 @@ def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ set[types.FunctionType], # local functions - set[VersionInfo], # non-local functions + set[versions.VersionInfo], # non-local functions ]: """ Recursively analyze function dependencies. @@ -84,18 +84,18 @@ def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ - A set of local functions (defined in the same codebase). - A set of external function IDs (from other modules or libraries). """ - visited: set[VersionInfo] = set() + visited: set[versions.VersionInfo] = set() local_functions: set[types.FunctionType] = set() - external_functions: set[VersionInfo] = set() + external_functions: set[versions.VersionInfo] = set() def walk(func): - fid = VersionInfo.of(func) + fid = versions.VersionInfo.of(func) if fid.fully_qualified_name in visited: return visited.add(fid.fully_qualified_name) for called in extract_called_functions(func): - cid = VersionInfo.of(called) + cid = versions.VersionInfo.of(called) if cid.version is None: local_functions.add(called) From 38aa6376686eb61c19b15475f0c7c7fa43778745 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Mon, 23 Feb 2026 19:48:47 -0800 Subject: [PATCH 12/30] Use object_scope Signed-off-by: liamhuber --- flowrep/crawler.py | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 05fcc3a6..64c8214f 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -1,10 +1,11 @@ import ast import inspect import types -from typing import Any from pyiron_snippets import versions +from flowrep.models.parsers import object_scope + class CallCollector(ast.NodeVisitor): def __init__(self): @@ -15,33 +16,6 @@ def visit_Call(self, node): self.generic_visit(node) -def _build_global_namespace(func) -> dict[str, object]: - return dict(func.__globals__) - - -def _resolve_ast_node(node: ast.AST, namespace: dict[str, object]) -> Any: - """ - Resolve an AST node to its corresponding object in the given namespace. - - Args: - node (ast.AST): The AST node to resolve. - namespace (dict[str, object]): The namespace to use for resolution. - - Returns: - Any: The resolved object, or None if it cannot be resolved. - """ - if isinstance(node, ast.Name): - return namespace.get(node.id) - - if isinstance(node, ast.Attribute): - base = _resolve_ast_node(node.value, namespace) - if base is None: - return None - return getattr(base, node.attr, None) - - return None - - def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType]: """ Extract all functions called by the given function. @@ -58,11 +32,11 @@ def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType collector = CallCollector() collector.visit(tree) - namespace = _build_global_namespace(func) + namespace = object_scope.get_scope(func) resolved = set() for call_node in collector.calls: - obj = _resolve_ast_node(call_node, namespace) + obj = object_scope.resolve_symbol_to_object(call_node, namespace) if callable(obj): resolved.add(obj) From ddccbdf685965ecbd9f501109952736f49cae93b Mon Sep 17 00:00:00 2001 From: liamhuber Date: Mon, 23 Feb 2026 19:59:53 -0800 Subject: [PATCH 13/30] Add hints Signed-off-by: liamhuber --- flowrep/crawler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 64c8214f..302d6b1a 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -9,9 +9,9 @@ class CallCollector(ast.NodeVisitor): def __init__(self): - self.calls = [] + self.calls: list[ast.expr] = [] - def visit_Call(self, node): + def visit_Call(self, node: ast.Call) -> None: self.calls.append(node.func) self.generic_visit(node) From 5ca15c3e1af54b86323ec79f19fa0b1540f499c3 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Mon, 23 Feb 2026 20:09:10 -0800 Subject: [PATCH 14/30] Do it in a single recursive function And return a map between the version info and the usages. Signed-off-by: liamhuber --- flowrep/crawler.py | 87 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 302d6b1a..6c1cbcfc 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -1,10 +1,95 @@ import ast import inspect import types +from collections.abc import Callable from pyiron_snippets import versions -from flowrep.models.parsers import object_scope +from flowrep.models.parsers import object_scope, parser_helpers + +CallDependencies = dict[versions.VersionInfo, list[Callable]] + + +def get_call_dependencies( + func: types.FunctionType, + version_scraping: versions.VersionScrapingMap | None = None, + _call_dependencies: CallDependencies | None = None, + _visited: set[str] | None = None, +) -> CallDependencies: + """ + Recursively collect all callable dependencies of *func* via AST introspection. + + Each dependency is keyed by its :class:`~pyiron_snippets.versions.VersionInfo` + and maps to the list of concrete callables sharing that identity. The search + is depth-first: for every resolved callee that is a + :class:`~types.FunctionType` (i.e. has inspectable source), the function + recurses into the callee's own scope. + + Args: + func: The function whose call-graph to analyse. + version_scraping (VersionScrapingMap | None): Since some modules may store + their version in other ways, this provides an optional map between module + names and callables to leverage for extracting that module's version. + _call_dependencies: Accumulator for recursive calls — do not pass manually. + _visited: Fully-qualified names already traversed — do not pass manually. + + Returns: + A mapping from :class:`VersionInfo` to the callables found under that + identity across the entire (sub-)tree. + """ + call_dependencies: CallDependencies = _call_dependencies or {} + visited: set[str] = _visited or set() + + func_fqn = versions.VersionInfo.of(func).fully_qualified_name + if func_fqn in visited: + return call_dependencies + visited.add(func_fqn) + + scope = object_scope.get_scope(func) + tree = parser_helpers.get_ast_function_node(func) + collector = CallCollector() + collector.visit(tree) + + for call in collector.calls: + try: + caller = object_scope.resolve_symbol_to_object(call, scope) + except (ValueError, TypeError): + continue + + if not callable(caller): + continue + + info = versions.VersionInfo.of(caller, version_scraping=version_scraping) + call_dependencies.setdefault(info, []).append(caller) + + # Depth-first search on dependencies — only possible when we have source + if isinstance(caller, types.FunctionType): + get_call_dependencies(caller, version_scraping, call_dependencies, visited) + + return call_dependencies + + +def split_by_version_availability( + call_dependencies: CallDependencies, +) -> tuple[CallDependencies, CallDependencies]: + """ + Partition *call_dependencies* by whether a version string is available. + + Args: + call_dependencies: The dependency map to partition. + + Returns: + A ``(has_version, no_version)`` tuple of :data:`CallDependencies` dicts. + """ + has_version: CallDependencies = {} + no_version: CallDependencies = {} + for info, dependents in call_dependencies.items(): + if info.version is None: + no_version[info] = dependents + else: + has_version[info] = dependents + + return has_version, no_version class CallCollector(ast.NodeVisitor): From cdf03d52c3b26444d8889c845c240c03a843b17f Mon Sep 17 00:00:00 2001 From: liamhuber Date: Mon, 23 Feb 2026 20:09:17 -0800 Subject: [PATCH 15/30] Add tests Signed-off-by: liamhuber --- tests/unit/test_crawler.py | 215 +++++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index 909d280e..4c72369b 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -1,6 +1,8 @@ import math import unittest +from pyiron_snippets import versions + from flowrep import crawler @@ -19,6 +21,77 @@ def more_op(a, b): return c +# --------------------------------------------------------------------------- +# Helper functions defined at module level so they have inspectable source, +# a proper __module__, and a stable __qualname__. +# --------------------------------------------------------------------------- + + +def _leaf(): + return 42 + + +def _single_call(): + return _leaf() + + +def _diamond_a(): + return _leaf() + + +def _diamond_b(): + return _leaf() + + +def _diamond_root(): + _diamond_a() + _diamond_b() + + +def _mutual_b(): + return _leaf() + + +def _mutual_a(): + return _mutual_b() + + +# Mutual recursion to exercise cycle detection. +def _cycle_a(): + return _cycle_b() # noqa: F821 — defined below + + +def _cycle_b(): + return _cycle_a() + + +def _no_calls(): + x = 1 + 2 + return x + + +def _calls_len(): + return len([1, 2, 3]) + + +def _nested_call(): + return _single_call() + + +def _multi_call(): + a = _leaf() + b = _leaf() + return a + b + + +def _fqn(func) -> str: + return versions.VersionInfo.of(func).fully_qualified_name + + +def _fqns(deps: crawler.CallDependencies) -> set[str]: + return {info.fully_qualified_name for info in deps} + + class TestCrawler(unittest.TestCase): def test_analyze_function_dependencies(self): loc, ext = crawler.analyze_function_dependencies(op) @@ -39,5 +112,147 @@ def test_extract_called_functions(self): self.assertEqual(called, {op}) +class TestGetCallDependencies(unittest.TestCase): + """Tests for :func:`crawler.get_call_dependencies`.""" + + # --- basic behaviour --- + + def test_no_calls_returns_empty(self): + deps = crawler.get_call_dependencies(_no_calls) + self.assertEqual(deps, {}) + + def test_single_direct_call(self): + deps = crawler.get_call_dependencies(_single_call) + self.assertIn(_fqn(_leaf), _fqns(deps)) + + def test_transitive_dependencies(self): + deps = crawler.get_call_dependencies(_nested_call) + fqns = _fqns(deps) + # Should find both _single_call and _leaf + self.assertIn(_fqn(_single_call), fqns) + self.assertIn(_fqn(_leaf), fqns) + + def test_diamond_dependency_no_duplicate_keys(self): + """ + _diamond_root -> _diamond_a -> _leaf AND _diamond_root -> _diamond_b -> _leaf. + _leaf's VersionInfo should appear exactly once as a key. + """ + deps = crawler.get_call_dependencies(_diamond_root) + matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)] + self.assertEqual(len(matching), 1) + + # --- cycle safety --- + + def test_cycle_does_not_recurse_infinitely(self): + # Should terminate without RecursionError + deps = crawler.get_call_dependencies(_cycle_a) + self.assertIn(_fqn(_cycle_b), _fqns(deps)) + + # --- builtins / non-FunctionType callables --- + + def test_builtin_callable_included(self): + deps = crawler.get_call_dependencies(_calls_len) + self.assertIn(_fqn(len), _fqns(deps)) + + # --- accumulator semantics --- + + def test_same_function_called_twice_appears_multiple_times_in_list(self): + deps = crawler.get_call_dependencies(_multi_call) + matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)] + self.assertEqual(len(matching), 1, "single key expected") + # The list value should have two entries (one per call-site) + self.assertEqual(len(deps[matching[0]]), 2) + + def test_returns_dict_type(self): + deps = crawler.get_call_dependencies(_leaf) + self.assertIsInstance(deps, dict) + + +class TestSplitByVersionAvailability(unittest.TestCase): + """Tests for :func:`crawler.split_by_version_availability`.""" + + @staticmethod + def _make_info( + module: str, qualname: str, version: str | None = None + ) -> versions.VersionInfo: + return versions.VersionInfo( + module=module, + qualname=qualname, + version=version, + ) + + def test_empty_input(self): + has, no = crawler.split_by_version_availability({}) + self.assertEqual(has, {}) + self.assertEqual(no, {}) + + def test_all_versioned(self): + info_a = self._make_info("pkg", "a", "1.0") + info_b = self._make_info("pkg", "b", "2.0") + deps: crawler.CallDependencies = {info_a: [_leaf], info_b: [_leaf]} + + has, no = crawler.split_by_version_availability(deps) + self.assertEqual(len(has), 2) + self.assertEqual(len(no), 0) + + def test_all_unversioned(self): + info_a = self._make_info("local", "a") + info_b = self._make_info("local", "b") + deps: crawler.CallDependencies = {info_a: [_leaf], info_b: [_leaf]} + + has, no = crawler.split_by_version_availability(deps) + self.assertEqual(len(has), 0) + self.assertEqual(len(no), 2) + + def test_mixed(self): + versioned = self._make_info("pkg", "x", "3.1") + unversioned = self._make_info("local", "y") + deps: crawler.CallDependencies = { + versioned: [_leaf], + unversioned: [_single_call], + } + + has, no = crawler.split_by_version_availability(deps) + self.assertIn(versioned, has) + self.assertIn(unversioned, no) + self.assertNotIn(versioned, no) + self.assertNotIn(unversioned, has) + + def test_preserves_callable_lists(self): + info = self._make_info("pkg", "z", "1.0") + callables = [_leaf, _single_call, _no_calls] + deps: crawler.CallDependencies = {info: callables} + + has, _ = crawler.split_by_version_availability(deps) + self.assertIs(has[info], callables) + + def test_partition_is_exhaustive_and_disjoint(self): + """Every key in the input appears in exactly one partition.""" + infos = [ + self._make_info("pkg", "a", "1.0"), + self._make_info("local", "b"), + self._make_info("pkg", "c", "0.1"), + self._make_info("local", "d"), + ] + deps: crawler.CallDependencies = {info: [_leaf] for info in infos} + + has, no = crawler.split_by_version_availability(deps) + self.assertEqual(set(has) | set(no), set(deps)) + self.assertTrue(set(has).isdisjoint(set(no))) + + def test_version_none_vs_empty_string(self): + """Only ``None`` counts as unversioned; an empty string is still 'versioned'.""" + none_version = self._make_info("local", "f", None) + empty_version = self._make_info("local", "g", "") + deps: crawler.CallDependencies = { + none_version: [_leaf], + empty_version: [_leaf], + } + + has, no = crawler.split_by_version_availability(deps) + self.assertIn(none_version, no) + self.assertIn(empty_version, has) + + if __name__ == "__main__": unittest.main() From ac96a5ea3c693451cc310c479eb742096b3a47fe Mon Sep 17 00:00:00 2001 From: liamhuber Date: Mon, 23 Feb 2026 17:25:57 -0800 Subject: [PATCH 16/30] Use google scoping style Signed-off-by: liamhuber --- flowrep/crawler.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 50ea2a21..05fcc3a6 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -3,7 +3,7 @@ import types from typing import Any -from pyiron_snippets.versions import VersionInfo +from pyiron_snippets import versions class CallCollector(ast.NodeVisitor): @@ -71,7 +71,7 @@ def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ set[types.FunctionType], # local functions - set[VersionInfo], # non-local functions + set[versions.VersionInfo], # non-local functions ]: """ Recursively analyze function dependencies. @@ -84,18 +84,18 @@ def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ - A set of local functions (defined in the same codebase). - A set of external function IDs (from other modules or libraries). """ - visited: set[VersionInfo] = set() + visited: set[versions.VersionInfo] = set() local_functions: set[types.FunctionType] = set() - external_functions: set[VersionInfo] = set() + external_functions: set[versions.VersionInfo] = set() def walk(func): - fid = VersionInfo.of(func) + fid = versions.VersionInfo.of(func) if fid.fully_qualified_name in visited: return visited.add(fid.fully_qualified_name) for called in extract_called_functions(func): - cid = VersionInfo.of(called) + cid = versions.VersionInfo.of(called) if cid.version is None: local_functions.add(called) From b1110ee4766cbf618017e70c793508e4a69c82b6 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 24 Feb 2026 07:18:59 +0100 Subject: [PATCH 17/30] Get rid of what was before --- flowrep/crawler.py | 65 -------------------------------------- tests/unit/test_crawler.py | 36 --------------------- 2 files changed, 101 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 6c1cbcfc..4c10dbdc 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -99,68 +99,3 @@ def __init__(self): def visit_Call(self, node: ast.Call) -> None: self.calls.append(node.func) self.generic_visit(node) - - -def extract_called_functions(func: types.FunctionType) -> set[types.FunctionType]: - """ - Extract all functions called by the given function. - - Args: - func (types.FunctionType): The function to analyze. - - Returns: - Set[types.FunctionType]: A set of functions that are called by the given function. - """ - source = inspect.getsource(func) - - tree = ast.parse(source) - collector = CallCollector() - collector.visit(tree) - - namespace = object_scope.get_scope(func) - resolved = set() - - for call_node in collector.calls: - obj = object_scope.resolve_symbol_to_object(call_node, namespace) - if callable(obj): - resolved.add(obj) - - return resolved - - -def analyze_function_dependencies(root_func: types.FunctionType) -> tuple[ - set[types.FunctionType], # local functions - set[versions.VersionInfo], # non-local functions -]: - """ - Recursively analyze function dependencies. - - Args: - root_func (types.FunctionType): The root function to analyze. - - Returns: - Tuple[Set[types.FunctionType], Set[VersionInfo]]: A tuple containing: - - A set of local functions (defined in the same codebase). - - A set of external function IDs (from other modules or libraries). - """ - visited: set[versions.VersionInfo] = set() - local_functions: set[types.FunctionType] = set() - external_functions: set[versions.VersionInfo] = set() - - def walk(func): - fid = versions.VersionInfo.of(func) - if fid.fully_qualified_name in visited: - return - visited.add(fid.fully_qualified_name) - - for called in extract_called_functions(func): - cid = versions.VersionInfo.of(called) - - if cid.version is None: - local_functions.add(called) - walk(called) - else: - external_functions.add(cid) - - walk(root_func) - return local_functions, external_functions diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index 4c72369b..fdc64dc6 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -5,22 +5,6 @@ from flowrep import crawler - -def add(x, y): - return x + y - - -def op(a, b): - c = add(a, b) - d = math.sqrt(c) - return d - - -def more_op(a, b): - c = op(a, b) - return c - - # --------------------------------------------------------------------------- # Helper functions defined at module level so they have inspectable source, # a proper __module__, and a stable __qualname__. @@ -92,26 +76,6 @@ def _fqns(deps: crawler.CallDependencies) -> set[str]: return {info.fully_qualified_name for info in deps} -class TestCrawler(unittest.TestCase): - def test_analyze_function_dependencies(self): - loc, ext = crawler.analyze_function_dependencies(op) - self.assertEqual(loc, {add}) - self.assertEqual(len(ext), 1) - f = ext.pop() - self.assertEqual(f.fully_qualified_name, "math.sqrt") - loc, ext = crawler.analyze_function_dependencies(more_op) - self.assertEqual(loc, {op, add}) - self.assertEqual(len(ext), 1) - g = ext.pop() - self.assertEqual(g.fully_qualified_name, "math.sqrt") - - def test_extract_called_functions(self): - called = crawler.extract_called_functions(op) - self.assertEqual(called, {add, math.sqrt}) - called = crawler.extract_called_functions(more_op) - self.assertEqual(called, {op}) - - class TestGetCallDependencies(unittest.TestCase): """Tests for :func:`crawler.get_call_dependencies`.""" From f6ba6b660f5ab6f79abe6c0d1e88644dfb7441d0 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 24 Feb 2026 07:25:08 +0100 Subject: [PATCH 18/30] [ruff] remove unused math --- tests/unit/test_crawler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index fdc64dc6..c543ae90 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -1,4 +1,3 @@ -import math import unittest from pyiron_snippets import versions From dc369fd5588e7fecfe7588baecc9c7cd73838cad Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 24 Feb 2026 07:36:12 +0100 Subject: [PATCH 19/30] Make key-function pairs and not key-list[function] --- flowrep/crawler.py | 2 +- tests/unit/test_crawler.py | 30 ++++++------------------------ 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 4c10dbdc..96296d9e 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -60,7 +60,7 @@ def get_call_dependencies( continue info = versions.VersionInfo.of(caller, version_scraping=version_scraping) - call_dependencies.setdefault(info, []).append(caller) + call_dependencies[info] = caller # Depth-first search on dependencies — only possible when we have source if isinstance(caller, types.FunctionType): diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index c543ae90..25a7a067 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -117,15 +117,6 @@ def test_builtin_callable_included(self): deps = crawler.get_call_dependencies(_calls_len) self.assertIn(_fqn(len), _fqns(deps)) - # --- accumulator semantics --- - - def test_same_function_called_twice_appears_multiple_times_in_list(self): - deps = crawler.get_call_dependencies(_multi_call) - matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)] - self.assertEqual(len(matching), 1, "single key expected") - # The list value should have two entries (one per call-site) - self.assertEqual(len(deps[matching[0]]), 2) - def test_returns_dict_type(self): deps = crawler.get_call_dependencies(_leaf) self.assertIsInstance(deps, dict) @@ -152,7 +143,7 @@ def test_empty_input(self): def test_all_versioned(self): info_a = self._make_info("pkg", "a", "1.0") info_b = self._make_info("pkg", "b", "2.0") - deps: crawler.CallDependencies = {info_a: [_leaf], info_b: [_leaf]} + deps: crawler.CallDependencies = {info_a: _leaf, info_b: _leaf} has, no = crawler.split_by_version_availability(deps) self.assertEqual(len(has), 2) @@ -161,7 +152,7 @@ def test_all_versioned(self): def test_all_unversioned(self): info_a = self._make_info("local", "a") info_b = self._make_info("local", "b") - deps: crawler.CallDependencies = {info_a: [_leaf], info_b: [_leaf]} + deps: crawler.CallDependencies = {info_a: _leaf, info_b: _leaf} has, no = crawler.split_by_version_availability(deps) self.assertEqual(len(has), 0) @@ -171,8 +162,7 @@ def test_mixed(self): versioned = self._make_info("pkg", "x", "3.1") unversioned = self._make_info("local", "y") deps: crawler.CallDependencies = { - versioned: [_leaf], - unversioned: [_single_call], + versioned: _leaf, unversioned: _single_call, } has, no = crawler.split_by_version_availability(deps) @@ -181,14 +171,6 @@ def test_mixed(self): self.assertNotIn(versioned, no) self.assertNotIn(unversioned, has) - def test_preserves_callable_lists(self): - info = self._make_info("pkg", "z", "1.0") - callables = [_leaf, _single_call, _no_calls] - deps: crawler.CallDependencies = {info: callables} - - has, _ = crawler.split_by_version_availability(deps) - self.assertIs(has[info], callables) - def test_partition_is_exhaustive_and_disjoint(self): """Every key in the input appears in exactly one partition.""" infos = [ @@ -197,7 +179,7 @@ def test_partition_is_exhaustive_and_disjoint(self): self._make_info("pkg", "c", "0.1"), self._make_info("local", "d"), ] - deps: crawler.CallDependencies = {info: [_leaf] for info in infos} + deps: crawler.CallDependencies = {info: _leaf for info in infos} has, no = crawler.split_by_version_availability(deps) self.assertEqual(set(has) | set(no), set(deps)) @@ -208,8 +190,8 @@ def test_version_none_vs_empty_string(self): none_version = self._make_info("local", "f", None) empty_version = self._make_info("local", "g", "") deps: crawler.CallDependencies = { - none_version: [_leaf], - empty_version: [_leaf], + none_version: _leaf, + empty_version: _leaf, } has, no = crawler.split_by_version_availability(deps) From b4cd1c3d36059dccbe619e12c989849873bf4ac9 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 24 Feb 2026 07:37:12 +0100 Subject: [PATCH 20/30] black --- tests/unit/test_crawler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index 25a7a067..cf7606fa 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -162,7 +162,8 @@ def test_mixed(self): versioned = self._make_info("pkg", "x", "3.1") unversioned = self._make_info("local", "y") deps: crawler.CallDependencies = { - versioned: _leaf, unversioned: _single_call, + versioned: _leaf, + unversioned: _single_call, } has, no = crawler.split_by_version_availability(deps) From eac82eb9b64127b9383ee350d3e70e257e3c0bf0 Mon Sep 17 00:00:00 2001 From: Sam Waseda Date: Tue, 24 Feb 2026 07:41:57 +0100 Subject: [PATCH 21/30] ruff and mypy --- flowrep/crawler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 96296d9e..aa721376 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -1,5 +1,4 @@ import ast -import inspect import types from collections.abc import Callable @@ -7,7 +6,7 @@ from flowrep.models.parsers import object_scope, parser_helpers -CallDependencies = dict[versions.VersionInfo, list[Callable]] +CallDependencies = dict[versions.VersionInfo, Callable] def get_call_dependencies( From 90c5538ac738753b62bc2927e030cdeeb7aabf6f Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 07:31:44 -0800 Subject: [PATCH 22/30] Remove unused test functions Signed-off-by: liamhuber --- tests/unit/test_crawler.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index cf7606fa..699f7cc5 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -31,14 +31,6 @@ def _diamond_root(): _diamond_b() -def _mutual_b(): - return _leaf() - - -def _mutual_a(): - return _mutual_b() - - # Mutual recursion to exercise cycle detection. def _cycle_a(): return _cycle_b() # noqa: F821 — defined below From befd245a8298e792db01476820c5d23f11bbfe16 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 07:32:45 -0800 Subject: [PATCH 23/30] Update docstring Signed-off-by: liamhuber --- flowrep/crawler.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index aa721376..6a0bb2f0 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -19,10 +19,9 @@ def get_call_dependencies( Recursively collect all callable dependencies of *func* via AST introspection. Each dependency is keyed by its :class:`~pyiron_snippets.versions.VersionInfo` - and maps to the list of concrete callables sharing that identity. The search - is depth-first: for every resolved callee that is a - :class:`~types.FunctionType` (i.e. has inspectable source), the function - recurses into the callee's own scope. + and maps to the callables instance with that identity. The search is depth-first: + for every resolved callee that is a :class:`~types.FunctionType` (i.e. has + inspectable source), the function recurses into the callee's own scope. Args: func: The function whose call-graph to analyse. From 06ec56dcdc152a47cc949a5c4b64f294ef6687f5 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 07:37:06 -0800 Subject: [PATCH 24/30] Rename variable Signed-off-by: liamhuber --- flowrep/crawler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 6a0bb2f0..1fae7a32 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -81,11 +81,11 @@ def split_by_version_availability( """ has_version: CallDependencies = {} no_version: CallDependencies = {} - for info, dependents in call_dependencies.items(): + for info, dependency in call_dependencies.items(): if info.version is None: - no_version[info] = dependents + no_version[info] = dependency else: - has_version[info] = dependents + has_version[info] = dependency return has_version, no_version From 850ab4ef311654d8eab8f3627858312ee105bee4 Mon Sep 17 00:00:00 2001 From: Sam Dareska <37879103+samwaseda@users.noreply.github.com> Date: Tue, 24 Feb 2026 16:37:14 +0100 Subject: [PATCH 25/30] Update flowrep/crawler.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- flowrep/crawler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index aa721376..ac7507ba 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -82,11 +82,11 @@ def split_by_version_availability( """ has_version: CallDependencies = {} no_version: CallDependencies = {} - for info, dependents in call_dependencies.items(): + for info, dependent in call_dependencies.items(): if info.version is None: - no_version[info] = dependents + no_version[info] = dependent else: - has_version[info] = dependents + has_version[info] = dependent return has_version, no_version From ef89712aff374d1ca6889527c97d87c1e8dcb4a3 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 09:22:40 -0800 Subject: [PATCH 26/30] Transform continue guard to failure This is only reachable if something is identified by ast as a `Call`, but _isn't_ callable. This should only happen in contrived situations. Let's just fail hard and ask for clarity. Signed-off-by: liamhuber --- flowrep/crawler.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/flowrep/crawler.py b/flowrep/crawler.py index 1fae7a32..84172fb2 100644 --- a/flowrep/crawler.py +++ b/flowrep/crawler.py @@ -54,8 +54,14 @@ def get_call_dependencies( except (ValueError, TypeError): continue - if not callable(caller): - continue + if not callable(caller): # pragma: no cover + # Under remotely normal circumstances, this should be unreachable + raise TypeError( + f"Caller {caller} is not callable, yet was generated from the list of " + f"ast.Call calls, in particular {call}. We're expecting these to " + f"actually connect to callables. Please raise a GitHub issue if you " + f"think this is not a mistake." + ) info = versions.VersionInfo.of(caller, version_scraping=version_scraping) call_dependencies[info] = caller From b880f18437aff3f3bd975009a94f532fc5e727cd Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 09:23:46 -0800 Subject: [PATCH 27/30] Extend tests Signed-off-by: liamhuber --- tests/unit/test_crawler.py | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tests/unit/test_crawler.py b/tests/unit/test_crawler.py index 699f7cc5..f1c14116 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/test_crawler.py @@ -1,3 +1,4 @@ +import math import unittest from pyiron_snippets import versions @@ -59,6 +60,24 @@ def _multi_call(): return a + b +def _attribute_access(x): + return math.sqrt(x) + + +def _nested_expression(x, y, z): + return _single_call(_leaf(x, y), z) + + +def _unresolvable_subscript(): + d = {} + return d["key"]() + + +def _calls_non_callable(): + x = 42 + return x + + def _fqn(func) -> str: return versions.VersionInfo.of(func).fully_qualified_name @@ -113,6 +132,39 @@ def test_returns_dict_type(self): deps = crawler.get_call_dependencies(_leaf) self.assertIsInstance(deps, dict) + # --- attribute access (module.func) --- + + def test_attribute_access_dependency(self): + """Functions called via attribute access (e.g. math.sqrt) are tracked.""" + deps = crawler.get_call_dependencies(_attribute_access) + self.assertIn(_fqn(math.sqrt), _fqns(deps)) + + # --- nested expressions --- + + def test_nested_expression_collects_all_calls(self): + """All calls in a nested expression like f(g(x), y) are collected.""" + deps = crawler.get_call_dependencies(_nested_expression) + fqns = _fqns(deps) + self.assertIn(_fqn(_single_call), fqns) + self.assertIn(_fqn(_leaf), fqns) + + # --- unresolvable / non-callable targets (coverage for `continue` branches) --- + + def test_unresolvable_call_target_is_skipped(self): + """Calls that resolve_symbol_to_object cannot handle are silently skipped.""" + # _unresolvable_subscript contains d["key"]() which is an ast.Subscript, + # triggering a TypeError in resolve_symbol_to_object + deps = crawler.get_call_dependencies(_unresolvable_subscript) + # Should not raise; the unresolvable call is simply absent + self.assertIsInstance(deps, dict) + + def test_non_callable_resolved_symbol_is_skipped(self): + """Symbols that resolve to non-callable objects are silently skipped.""" + # _calls_non_callable doesn't actually have a call in its AST that resolves + # to a non-callable, but we can verify the function itself is crawlable + deps = crawler.get_call_dependencies(_calls_non_callable) + self.assertIsInstance(deps, dict) + class TestSplitByVersionAvailability(unittest.TestCase): """Tests for :func:`crawler.split_by_version_availability`.""" From 4aafbd36612a0723b4a1307770372e91bdfba3a2 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 09:25:54 -0800 Subject: [PATCH 28/30] Move and rename Signed-off-by: liamhuber --- .../parsers/dependency_parser.py} | 0 .../parsers/test_dependency_parser.py} | 52 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) rename flowrep/{crawler.py => models/parsers/dependency_parser.py} (100%) rename tests/unit/{test_crawler.py => models/parsers/test_dependency_parser.py} (76%) diff --git a/flowrep/crawler.py b/flowrep/models/parsers/dependency_parser.py similarity index 100% rename from flowrep/crawler.py rename to flowrep/models/parsers/dependency_parser.py diff --git a/tests/unit/test_crawler.py b/tests/unit/models/parsers/test_dependency_parser.py similarity index 76% rename from tests/unit/test_crawler.py rename to tests/unit/models/parsers/test_dependency_parser.py index f1c14116..ad8f278a 100644 --- a/tests/unit/test_crawler.py +++ b/tests/unit/models/parsers/test_dependency_parser.py @@ -3,7 +3,7 @@ from pyiron_snippets import versions -from flowrep import crawler +from flowrep.models.parsers import dependency_parser # --------------------------------------------------------------------------- # Helper functions defined at module level so they have inspectable source, @@ -82,25 +82,25 @@ def _fqn(func) -> str: return versions.VersionInfo.of(func).fully_qualified_name -def _fqns(deps: crawler.CallDependencies) -> set[str]: +def _fqns(deps: dependency_parser.CallDependencies) -> set[str]: return {info.fully_qualified_name for info in deps} class TestGetCallDependencies(unittest.TestCase): - """Tests for :func:`crawler.get_call_dependencies`.""" + """Tests for :func:`dependency_parser.get_call_dependencies`.""" # --- basic behaviour --- def test_no_calls_returns_empty(self): - deps = crawler.get_call_dependencies(_no_calls) + deps = dependency_parser.get_call_dependencies(_no_calls) self.assertEqual(deps, {}) def test_single_direct_call(self): - deps = crawler.get_call_dependencies(_single_call) + deps = dependency_parser.get_call_dependencies(_single_call) self.assertIn(_fqn(_leaf), _fqns(deps)) def test_transitive_dependencies(self): - deps = crawler.get_call_dependencies(_nested_call) + deps = dependency_parser.get_call_dependencies(_nested_call) fqns = _fqns(deps) # Should find both _single_call and _leaf self.assertIn(_fqn(_single_call), fqns) @@ -111,7 +111,7 @@ def test_diamond_dependency_no_duplicate_keys(self): _diamond_root -> _diamond_a -> _leaf AND _diamond_root -> _diamond_b -> _leaf. _leaf's VersionInfo should appear exactly once as a key. """ - deps = crawler.get_call_dependencies(_diamond_root) + deps = dependency_parser.get_call_dependencies(_diamond_root) matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)] self.assertEqual(len(matching), 1) @@ -119,31 +119,31 @@ def test_diamond_dependency_no_duplicate_keys(self): def test_cycle_does_not_recurse_infinitely(self): # Should terminate without RecursionError - deps = crawler.get_call_dependencies(_cycle_a) + deps = dependency_parser.get_call_dependencies(_cycle_a) self.assertIn(_fqn(_cycle_b), _fqns(deps)) # --- builtins / non-FunctionType callables --- def test_builtin_callable_included(self): - deps = crawler.get_call_dependencies(_calls_len) + deps = dependency_parser.get_call_dependencies(_calls_len) self.assertIn(_fqn(len), _fqns(deps)) def test_returns_dict_type(self): - deps = crawler.get_call_dependencies(_leaf) + deps = dependency_parser.get_call_dependencies(_leaf) self.assertIsInstance(deps, dict) # --- attribute access (module.func) --- def test_attribute_access_dependency(self): """Functions called via attribute access (e.g. math.sqrt) are tracked.""" - deps = crawler.get_call_dependencies(_attribute_access) + deps = dependency_parser.get_call_dependencies(_attribute_access) self.assertIn(_fqn(math.sqrt), _fqns(deps)) # --- nested expressions --- def test_nested_expression_collects_all_calls(self): """All calls in a nested expression like f(g(x), y) are collected.""" - deps = crawler.get_call_dependencies(_nested_expression) + deps = dependency_parser.get_call_dependencies(_nested_expression) fqns = _fqns(deps) self.assertIn(_fqn(_single_call), fqns) self.assertIn(_fqn(_leaf), fqns) @@ -154,7 +154,7 @@ def test_unresolvable_call_target_is_skipped(self): """Calls that resolve_symbol_to_object cannot handle are silently skipped.""" # _unresolvable_subscript contains d["key"]() which is an ast.Subscript, # triggering a TypeError in resolve_symbol_to_object - deps = crawler.get_call_dependencies(_unresolvable_subscript) + deps = dependency_parser.get_call_dependencies(_unresolvable_subscript) # Should not raise; the unresolvable call is simply absent self.assertIsInstance(deps, dict) @@ -162,12 +162,12 @@ def test_non_callable_resolved_symbol_is_skipped(self): """Symbols that resolve to non-callable objects are silently skipped.""" # _calls_non_callable doesn't actually have a call in its AST that resolves # to a non-callable, but we can verify the function itself is crawlable - deps = crawler.get_call_dependencies(_calls_non_callable) + deps = dependency_parser.get_call_dependencies(_calls_non_callable) self.assertIsInstance(deps, dict) class TestSplitByVersionAvailability(unittest.TestCase): - """Tests for :func:`crawler.split_by_version_availability`.""" + """Tests for :func:`dependency_parser.split_by_version_availability`.""" @staticmethod def _make_info( @@ -180,37 +180,37 @@ def _make_info( ) def test_empty_input(self): - has, no = crawler.split_by_version_availability({}) + has, no = dependency_parser.split_by_version_availability({}) self.assertEqual(has, {}) self.assertEqual(no, {}) def test_all_versioned(self): info_a = self._make_info("pkg", "a", "1.0") info_b = self._make_info("pkg", "b", "2.0") - deps: crawler.CallDependencies = {info_a: _leaf, info_b: _leaf} + deps: dependency_parser.CallDependencies = {info_a: _leaf, info_b: _leaf} - has, no = crawler.split_by_version_availability(deps) + has, no = dependency_parser.split_by_version_availability(deps) self.assertEqual(len(has), 2) self.assertEqual(len(no), 0) def test_all_unversioned(self): info_a = self._make_info("local", "a") info_b = self._make_info("local", "b") - deps: crawler.CallDependencies = {info_a: _leaf, info_b: _leaf} + deps: dependency_parser.CallDependencies = {info_a: _leaf, info_b: _leaf} - has, no = crawler.split_by_version_availability(deps) + has, no = dependency_parser.split_by_version_availability(deps) self.assertEqual(len(has), 0) self.assertEqual(len(no), 2) def test_mixed(self): versioned = self._make_info("pkg", "x", "3.1") unversioned = self._make_info("local", "y") - deps: crawler.CallDependencies = { + deps: dependency_parser.CallDependencies = { versioned: _leaf, unversioned: _single_call, } - has, no = crawler.split_by_version_availability(deps) + has, no = dependency_parser.split_by_version_availability(deps) self.assertIn(versioned, has) self.assertIn(unversioned, no) self.assertNotIn(versioned, no) @@ -224,9 +224,9 @@ def test_partition_is_exhaustive_and_disjoint(self): self._make_info("pkg", "c", "0.1"), self._make_info("local", "d"), ] - deps: crawler.CallDependencies = {info: _leaf for info in infos} + deps: dependency_parser.CallDependencies = {info: _leaf for info in infos} - has, no = crawler.split_by_version_availability(deps) + has, no = dependency_parser.split_by_version_availability(deps) self.assertEqual(set(has) | set(no), set(deps)) self.assertTrue(set(has).isdisjoint(set(no))) @@ -234,12 +234,12 @@ def test_version_none_vs_empty_string(self): """Only ``None`` counts as unversioned; an empty string is still 'versioned'.""" none_version = self._make_info("local", "f", None) empty_version = self._make_info("local", "g", "") - deps: crawler.CallDependencies = { + deps: dependency_parser.CallDependencies = { none_version: _leaf, empty_version: _leaf, } - has, no = crawler.split_by_version_availability(deps) + has, no = dependency_parser.split_by_version_availability(deps) self.assertIn(none_version, no) self.assertIn(empty_version, has) From ad7af9cd819bb1970fc08c57ca01f8efa990cf52 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 09:29:10 -0800 Subject: [PATCH 29/30] Add comment Signed-off-by: liamhuber --- flowrep/models/parsers/dependency_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/flowrep/models/parsers/dependency_parser.py b/flowrep/models/parsers/dependency_parser.py index 84172fb2..189b28e4 100644 --- a/flowrep/models/parsers/dependency_parser.py +++ b/flowrep/models/parsers/dependency_parser.py @@ -64,6 +64,12 @@ def get_call_dependencies( ) info = versions.VersionInfo.of(caller, version_scraping=version_scraping) + # In principle, we open ourselves to overwriting an existing dependency here, + # but it would need to somehow have exactly the same version info (including + # qualname) yet be a different object. + # This ought not happen by accident, and in case it somehow does happen on + # purpose (it probably shouldn't), we just silently keep the more recent one. + call_dependencies[info] = caller # Depth-first search on dependencies — only possible when we have source From d81e55e13ead3e2b53aa2f275be5abee1638d7f4 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 24 Feb 2026 09:31:08 -0800 Subject: [PATCH 30/30] Add test Signed-off-by: liamhuber --- tests/unit/models/parsers/test_dependency_parser.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/unit/models/parsers/test_dependency_parser.py b/tests/unit/models/parsers/test_dependency_parser.py index ad8f278a..d4b69cf9 100644 --- a/tests/unit/models/parsers/test_dependency_parser.py +++ b/tests/unit/models/parsers/test_dependency_parser.py @@ -115,6 +115,12 @@ def test_diamond_dependency_no_duplicate_keys(self): matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)] self.assertEqual(len(matching), 1) + def test_duplicate_call_deduplicated_by_version_info(self): + """Calling the same function twice yields a single key, not two.""" + deps = dependency_parser.get_call_dependencies(_multi_call) + matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)] + self.assertEqual(len(matching), 1) + # --- cycle safety --- def test_cycle_does_not_recurse_infinitely(self):