-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze_depth.py
More file actions
executable file
·165 lines (133 loc) · 5.77 KB
/
analyze_depth.py
File metadata and controls
executable file
·165 lines (133 loc) · 5.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/env python3
"""
Analyze the depth of nodes in SearchExport.json
Find the longest path/deepest node
"""
import json
import sys
def analyze_item_depth(item):
"""Calculate the depth of a single item based on its structure"""
depth = 0
# Root level (the item itself)
depth += 1
# Steward organization level
if item.get('stewardOrg'):
depth += 1
# Classification levels
classification_depth = 0
if item.get('classification'):
for cls in item['classification']:
if cls.get('elements'):
# Each element in classification adds a level
for elem in cls['elements']:
if elem.get('name'):
classification_depth = max(classification_depth, len(cls['elements']))
if classification_depth > 0:
depth += classification_depth
# Data element concept levels
if item.get('dataElementConcept') and item['dataElementConcept'].get('concepts'):
depth += len(item['dataElementConcept']['concepts'])
# Property levels
if item.get('property') and item['property'].get('concepts'):
depth += len(item['property']['concepts'])
return depth
def find_deepest_path(item):
"""Find the deepest path through the item's structure"""
path = []
max_depth = 0
deepest_path = []
# Start with the item itself
designation = item.get('designations', [{}])[0].get('designation', 'Unnamed Element')
path.append(('Element', designation))
# Steward
steward = item.get('stewardOrg', {}).get('name', 'Unknown Steward')
path.append(('Steward', steward))
# Classification path
if item.get('classification'):
for cls in item['classification']:
if cls.get('elements'):
for elem in cls['elements']:
if elem.get('name'):
path.append(('Classification', elem['name']))
# Data element concepts
if item.get('dataElementConcept') and item['dataElementConcept'].get('concepts'):
for concept in item['dataElementConcept']['concepts']:
path.append(('Concept', concept.get('name', 'Unknown')))
# Properties
if item.get('property') and item['property'].get('concepts'):
for prop in item['property']['concepts']:
path.append(('Property', prop.get('name', 'Unknown')))
return len(path), path
def main():
print("Loading SearchExport.json...")
try:
with open('SearchExport.json', 'r', encoding='utf-8') as f:
data = json.load(f)
except Exception as e:
print(f"Error loading file: {e}")
sys.exit(1)
print(f"Loaded {len(data)} items\n")
max_depth = 0
deepest_items = []
depth_distribution = {}
print("Analyzing depths...")
for i, item in enumerate(data):
depth, path = find_deepest_path(item)
if depth > max_depth:
max_depth = depth
deepest_items = [(i, item, path)]
elif depth == max_depth:
deepest_items.append((i, item, path))
depth_distribution[depth] = depth_distribution.get(depth, 0) + 1
if (i + 1) % 1000 == 0:
print(f" Processed {i + 1}/{len(data)} items...", end='\r')
print(f"\n\n{'='*60}")
print("DEPTH ANALYSIS RESULTS")
print('='*60)
print(f"\nMaximum depth found: {max_depth} levels")
print(f"Number of items at maximum depth: {len(deepest_items)}")
print(f"\n{'='*60}")
print("DEPTH DISTRIBUTION")
print('='*60)
for depth in sorted(depth_distribution.keys()):
count = depth_distribution[depth]
percentage = (count / len(data)) * 100
bar = '█' * int(percentage / 2)
print(f"Depth {depth:2d}: {count:6d} items ({percentage:5.1f}%) {bar}")
print(f"\n{'='*60}")
print("DEEPEST PATH(S) - First Example")
print('='*60)
if deepest_items:
idx, item, path = deepest_items[0]
print(f"\nItem index: {idx}")
print(f"Designation: {item.get('designations', [{}])[0].get('designation', 'N/A')}")
print(f"\nPath ({len(path)} levels):")
for level, (path_type, path_name) in enumerate(path, 1):
indent = " " * (level - 1)
print(f"{indent}{level}. [{path_type}] {path_name}")
if len(deepest_items) > 1:
print(f"\n... and {len(deepest_items) - 1} more item(s) at this depth")
print(f"\n{'='*60}")
print("STRUCTURE BREAKDOWN")
print('='*60)
# Analyze structure components
has_classification = sum(1 for item in data if item.get('classification'))
has_concepts = sum(1 for item in data if item.get('dataElementConcept', {}).get('concepts'))
has_properties = sum(1 for item in data if item.get('property', {}).get('concepts'))
print(f"\nItems with classification: {has_classification} ({has_classification/len(data)*100:.1f}%)")
print(f"Items with data element concepts: {has_concepts} ({has_concepts/len(data)*100:.1f}%)")
print(f"Items with properties: {has_properties} ({has_properties/len(data)*100:.1f}%)")
# Count classification levels
classification_levels = {}
for item in data:
if item.get('classification'):
for cls in item['classification']:
if cls.get('elements'):
level_count = len([e for e in cls['elements'] if e.get('name')])
classification_levels[level_count] = classification_levels.get(level_count, 0) + 1
if classification_levels:
print(f"\nClassification element counts:")
for count in sorted(classification_levels.keys()):
print(f" {count} element(s): {classification_levels[count]} items")
if __name__ == '__main__':
main()