Skip to content

Commit 57a57e1

Browse files
authored
Merge pull request #218 from GREENRAT-K405/feat/test-malformed-graph
add tests for gaphml with strict xml checks
2 parents 7ab05be + a42d3ff commit 57a57e1

2 files changed

Lines changed: 170 additions & 3 deletions

File tree

concore_cli/commands/validate.py

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from rich.panel import Panel
44
from rich.table import Table
55
import re
6+
import xml.etree.ElementTree as ET
67

78
def validate_workflow(workflow_file, console):
89
workflow_path = Path(workflow_file)
@@ -22,15 +23,34 @@ def validate_workflow(workflow_file, console):
2223
errors.append("File is empty")
2324
return show_results(console, errors, warnings, info)
2425

26+
# strict XML syntax check
27+
try:
28+
ET.fromstring(content)
29+
except ET.ParseError as e:
30+
errors.append(f"Invalid XML: {str(e)}")
31+
return show_results(console, errors, warnings, info)
32+
2533
try:
2634
soup = BeautifulSoup(content, 'xml')
2735
except Exception as e:
2836
errors.append(f"Invalid XML: {str(e)}")
2937
return show_results(console, errors, warnings, info)
3038

31-
if not soup.find('graphml'):
39+
root = soup.find('graphml')
40+
if not root:
3241
errors.append("Not a valid GraphML file - missing <graphml> root element")
3342
return show_results(console, errors, warnings, info)
43+
44+
# check the graph attributes
45+
graph = soup.find('graph')
46+
if not graph:
47+
errors.append("Missing <graph> element")
48+
else:
49+
edgedefault = graph.get('edgedefault')
50+
if not edgedefault:
51+
errors.append("Graph missing required 'edgedefault' attribute")
52+
elif edgedefault not in ['directed', 'undirected']:
53+
errors.append(f"Invalid edgedefault value '{edgedefault}' (must be 'directed' or 'undirected')")
3454

3555
nodes = soup.find_all('node')
3656
edges = soup.find_all('edge')
@@ -47,8 +67,19 @@ def validate_workflow(workflow_file, console):
4767

4868
node_labels = []
4969
for node in nodes:
70+
#check the node id
71+
node_id = node.get('id')
72+
if not node_id:
73+
errors.append("Node missing required 'id' attribute")
74+
#skip further checks for this node to avoid noise
75+
continue
76+
5077
try:
78+
#robust find: try with namespace prefix first, then without
5179
label_tag = node.find('y:NodeLabel')
80+
if not label_tag:
81+
label_tag = node.find('NodeLabel')
82+
5283
if label_tag and label_tag.text:
5384
label = label_tag.text.strip()
5485
node_labels.append(label)
@@ -60,13 +91,13 @@ def validate_workflow(workflow_file, console):
6091
if len(parts) != 2:
6192
warnings.append(f"Node '{label}' has invalid format")
6293
else:
63-
node_id, filename = parts
94+
nodeId_part, filename = parts
6495
if not filename:
6596
errors.append(f"Node '{label}' has no filename")
6697
elif not any(filename.endswith(ext) for ext in ['.py', '.cpp', '.m', '.v', '.java']):
6798
warnings.append(f"Node '{label}' has unusual file extension")
6899
else:
69-
warnings.append(f"Node {node.get('id', 'unknown')} has no label")
100+
warnings.append(f"Node {node_id} has no label")
70101
except Exception as e:
71102
warnings.append(f"Error parsing node: {str(e)}")
72103

@@ -91,6 +122,9 @@ def validate_workflow(workflow_file, console):
91122
for edge in edges:
92123
try:
93124
label_tag = edge.find('y:EdgeLabel')
125+
if not label_tag:
126+
label_tag = edge.find('EdgeLabel')
127+
94128
if label_tag and label_tag.text:
95129
if edge_label_regex.match(label_tag.text.strip()):
96130
zmq_edges += 1

tests/test_graph.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import unittest
2+
import tempfile
3+
import shutil
4+
from pathlib import Path
5+
from click.testing import CliRunner
6+
from concore_cli.cli import cli
7+
8+
class TestGraphValidation(unittest.TestCase):
9+
10+
def setUp(self):
11+
self.runner = CliRunner()
12+
self.temp_dir = tempfile.mkdtemp()
13+
14+
def tearDown(self):
15+
if Path(self.temp_dir).exists():
16+
shutil.rmtree(self.temp_dir)
17+
18+
def create_graph_file(self, filename, content):
19+
filepath = Path(self.temp_dir) / filename
20+
with open(filepath, 'w') as f:
21+
f.write(content)
22+
return str(filepath)
23+
24+
def test_validate_corrupted_xml(self):
25+
content = '<graphml><node id="n0">'
26+
filepath = self.create_graph_file('corrupted.graphml', content)
27+
28+
result = self.runner.invoke(cli, ['validate', filepath])
29+
30+
self.assertIn('Validation failed', result.output)
31+
self.assertIn('Invalid XML', result.output)
32+
33+
def test_validate_empty_file(self):
34+
filepath = self.create_graph_file('empty.graphml', '')
35+
36+
result = self.runner.invoke(cli, ['validate', filepath])
37+
38+
self.assertIn('Validation failed', result.output)
39+
self.assertIn('File is empty', result.output)
40+
41+
def test_validate_missing_node_id(self):
42+
content = '''
43+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
44+
<graph id="G" edgedefault="directed">
45+
<node>
46+
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
47+
</node>
48+
</graph>
49+
</graphml>
50+
'''
51+
filepath = self.create_graph_file('missing_id.graphml', content)
52+
result = self.runner.invoke(cli, ['validate', filepath])
53+
self.assertIn('Validation failed', result.output)
54+
self.assertIn("Node missing required 'id' attribute", result.output)
55+
56+
def test_validate_missing_edgedefault(self):
57+
content = '''
58+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
59+
<graph id="G">
60+
<node id="n0">
61+
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
62+
</node>
63+
</graph>
64+
</graphml>
65+
'''
66+
filepath = self.create_graph_file('missing_default.graphml', content)
67+
result = self.runner.invoke(cli, ['validate', filepath])
68+
self.assertIn('Validation failed', result.output)
69+
self.assertIn("Graph missing required 'edgedefault'", result.output)
70+
71+
def test_validate_missing_root_element(self):
72+
content = '<?xml version="1.0"?><other_root></other_root>'
73+
filepath = self.create_graph_file('not_graphml.xml', content)
74+
75+
result = self.runner.invoke(cli, ['validate', filepath])
76+
77+
self.assertIn('Validation failed', result.output)
78+
self.assertIn('missing <graphml> root element', result.output)
79+
80+
def test_validate_broken_edges(self):
81+
content = '''
82+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
83+
<graph id="G" edgedefault="directed">
84+
<node id="n0">
85+
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
86+
</node>
87+
<edge source="n0" target="n1"/>
88+
</graph>
89+
</graphml>
90+
'''
91+
filepath = self.create_graph_file('bad_edge.graphml', content)
92+
93+
result = self.runner.invoke(cli, ['validate', filepath])
94+
95+
self.assertIn('Validation failed', result.output)
96+
self.assertIn('Edge references non-existent target node', result.output)
97+
98+
def test_validate_node_missing_filename(self):
99+
content = '''
100+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
101+
<graph id="G" edgedefault="directed">
102+
<node id="n0">
103+
<data key="d0"><y:NodeLabel>n0:</y:NodeLabel></data>
104+
</node>
105+
</graph>
106+
</graphml>
107+
'''
108+
filepath = self.create_graph_file('bad_node.graphml', content)
109+
110+
result = self.runner.invoke(cli, ['validate', filepath])
111+
112+
self.assertIn('Validation failed', result.output)
113+
self.assertIn('has no filename', result.output)
114+
115+
def test_validate_valid_graph(self):
116+
content = '''
117+
<graphml xmlns:y="http://www.yworks.com/xml/graphml">
118+
<graph id="G" edgedefault="directed">
119+
<node id="n0">
120+
<data key="d0"><y:NodeLabel>n0:script.py</y:NodeLabel></data>
121+
</node>
122+
</graph>
123+
</graphml>
124+
'''
125+
filepath = self.create_graph_file('valid.graphml', content)
126+
127+
result = self.runner.invoke(cli, ['validate', filepath])
128+
129+
self.assertIn('Validation passed', result.output)
130+
self.assertIn('Workflow is valid', result.output)
131+
132+
if __name__ == '__main__':
133+
unittest.main()

0 commit comments

Comments
 (0)