-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcomfyui_workflow_models.py
More file actions
401 lines (321 loc) · 15 KB
/
comfyui_workflow_models.py
File metadata and controls
401 lines (321 loc) · 15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
import json
import os
import csv
from pathlib import Path
from collections import defaultdict
try:
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl.utils import get_column_letter
EXCEL_AVAILABLE = True
except ImportError:
EXCEL_AVAILABLE = False
print("Warning: openpyxl not installed. Will create CSV files instead.")
print("To create Excel files, install openpyxl: apt install python3-openpyxl or pip install openpyxl")
def get_model_categories(models_dir):
"""Scan for all subfolders in the models directory to use as categories."""
models_dir = Path(models_dir)
return sorted([d.name for d in models_dir.iterdir() if d.is_dir()])
def scan_model_files(models_dir):
"""Scan the models directory and catalog all model files by category."""
models_dir = Path(models_dir)
models = defaultdict(list)
print(f"Scanning models directory: {models_dir}")
for category in MODEL_CATEGORIES:
category_path = models_dir / category
if category_path.exists() and category_path.is_dir():
# Recursively find all files in this category
for file_path in category_path.rglob('*'):
if file_path.is_file():
# Get file size in MB
try:
size_bytes = file_path.stat().st_size
size_mb = round(size_bytes / (1024 * 1024), 2)
except Exception:
size_mb = 0
# Store filename, relative path, and size
rel_path = file_path.relative_to(category_path)
models[category].append({
'filename': file_path.name,
'rel_path': str(rel_path).replace('\\', '/'),
'full_path': str(file_path),
'size_mb': size_mb
})
if models[category]:
print(f" {category}: {len(models[category])} file(s)")
return models
def find_model_references(workflow_data, models):
"""Recursively search for model file references in workflow JSON."""
matches = defaultdict(set)
def search_dict(obj):
if isinstance(obj, dict):
for key, value in obj.items():
# Common keys that contain model filenames
if key in ['ckpt_name', 'model_name', 'lora_name', 'vae_name',
'checkpoint', 'model', 'config_name', 'filename',
'unet_name', 'clip_name', 'control_net_name',
'upscale_model', 'sam_model_name', 'control_net',
'embedding_name', 'hypernetwork_name',
'diffusion_model', 'text_encoder', 'clip_vision',
'weight_dtype', 'mmproj']:
if isinstance(value, str):
check_model_match(value)
# Recursively search nested structures
search_dict(value)
elif isinstance(obj, list):
for item in obj:
search_dict(item)
elif isinstance(obj, str):
# Check any string value for model names
check_model_match(obj)
def check_model_match(value):
if not value or not isinstance(value, str):
return
# Normalize the value
value_normalized = value.replace('\\', '/')
# Check against all model categories
for category, model_list in models.items():
for model_info in model_list:
filename = model_info['filename']
rel_path = model_info['rel_path']
# Try multiple matching strategies:
# 1. Exact filename match
if value == filename or value.endswith('/' + filename) or value.endswith('\\' + filename):
matches[category].add(rel_path)
# 2. Relative path match
elif value_normalized == rel_path or value_normalized.endswith('/' + rel_path):
matches[category].add(rel_path)
# 3. Filename contained in value
elif filename in value:
matches[category].add(rel_path)
# 4. Check if value (without path) matches filename
elif os.path.basename(value_normalized) == filename:
matches[category].add(rel_path)
search_dict(workflow_data)
# Convert sets to sorted lists
return {k: sorted(list(v)) for k, v in matches.items()}
def scan_workflows(workflow_dir, models):
"""Scan all JSON workflows and match them to models."""
workflow_dir = Path(workflow_dir)
results = []
if not workflow_dir.exists():
print(f"Error: Workflow directory does not exist: {workflow_dir}")
return results
# Find all JSON files
json_files = list(workflow_dir.rglob('*.json'))
print(f"\nFound {len(json_files)} workflow file(s)")
for json_file in json_files:
try:
with open(json_file, 'r', encoding='utf-8') as f:
workflow_data = json.load(f)
# Get relative path from workflow_dir
relative_path = json_file.relative_to(workflow_dir)
# Find model matches
matches = find_model_references(workflow_data, models)
# Build result row
row = {'Workflow': str(relative_path).replace('\\', '/')}
# Add each category
for category in MODEL_CATEGORIES:
if category in matches and matches[category]:
row[category] = ','.join(matches[category])
else:
row[category] = ''
results.append(row)
print(f" Processed: {relative_path.name}")
except json.JSONDecodeError as e:
print(f" Error: Invalid JSON in {json_file}: {e}")
except Exception as e:
print(f" Error processing {json_file}: {e}")
return results
def write_excel(workflow_results, model_usage_data, output_file='workflow_models.xlsx'):
"""Write results to Excel file with two tabs."""
if not EXCEL_AVAILABLE:
print("\nFalling back to CSV files...")
write_csv_fallback(workflow_results, 'workflow_models.csv')
write_model_usage_csv(model_usage_data, 'model_usage.csv')
return
# Create a new workbook
wb = openpyxl.Workbook()
wb.remove(wb.active) # Remove the default sheet properly
# Create first sheet: Workflow Models
ws1 = wb.create_sheet('Workflow Models')
# Headers for first sheet
headers1 = ['Workflow'] + MODEL_CATEGORIES
ws1.append(headers1)
# Style the header row
header_fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid')
header_font = Font(bold=True, color='FFFFFF')
for col_num, _ in enumerate(headers1, 1):
cell = ws1.cell(row=1, column=col_num)
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal='center', vertical='center')
# Add workflow data
for result in workflow_results:
row_data = [result['Workflow']]
for category in MODEL_CATEGORIES:
# Ensure empty cells are truly empty strings, not None
value = result.get(category, '')
row_data.append(value if value else '')
ws1.append(row_data)
# Auto-adjust column widths for first sheet
for col_num, col_name in enumerate(headers1, 1):
column_letter = get_column_letter(col_num)
if col_num == 1: # Workflow column
ws1.column_dimensions[column_letter].width = 50
else:
ws1.column_dimensions[column_letter].width = 30
# Freeze the header row
ws1.freeze_panes = 'A2'
# Create second sheet: Model Usage
ws2 = wb.create_sheet('Model Usage')
# Headers for second sheet
headers2 = ['Model', 'Folder', 'Size_MB', 'Workflow_Count']
ws2.append(headers2)
# Style the header row
for col_num, _ in enumerate(headers2, 1):
cell = ws2.cell(row=1, column=col_num)
cell.fill = header_fill
cell.font = header_font
cell.alignment = Alignment(horizontal='center', vertical='center')
# Add model usage data
for row_data in model_usage_data:
ws2.append([row_data['Model'], row_data['Folder'], row_data['Size_MB'], row_data['Workflow_Count']])
# Auto-adjust column widths for second sheet
ws2.column_dimensions['A'].width = 50 # Model
ws2.column_dimensions['B'].width = 25 # Folder
ws2.column_dimensions['C'].width = 15 # Size_MB
ws2.column_dimensions['D'].width = 15 # Workflow_Count
# Freeze the header row
ws2.freeze_panes = 'A2'
# Highlight unused models (count = 0) in light red
unused_fill = PatternFill(start_color='FFE6E6', end_color='FFE6E6', fill_type='solid')
for row in range(2, ws2.max_row + 1):
if ws2.cell(row=row, column=4).value == 0:
for col in range(1, 5):
ws2.cell(row=row, column=col).fill = unused_fill
# Save the workbook
wb.save(output_file)
print(f"\n{'='*60}")
print(f"Excel file created: {output_file}")
print(f" - Tab 1: Workflow Models ({len(workflow_results)} workflows)")
print(f" - Tab 2: Model Usage ({len(model_usage_data)} models)")
def write_csv_fallback(results, output_file='workflow_models.csv'):
"""Fallback CSV writer for workflow models."""
if not results:
print("\nNo workflows found!")
return
fieldnames = ['Workflow'] + MODEL_CATEGORIES
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
writer.writeheader()
writer.writerows(results)
print(f"Workflow results written to: {output_file}")
def write_model_usage_csv(model_usage_data, output_file='model_usage.csv'):
"""Fallback CSV writer for model usage."""
if not model_usage_data:
print("\nNo model usage data to write!")
return
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=['Model', 'Folder', 'Size_MB', 'Workflow_Count'], extrasaction='ignore')
writer.writeheader()
writer.writerows(model_usage_data)
print(f"Model usage written to: {output_file}")
def create_model_usage_report(results, models):
"""Create model usage data, including unused models."""
if not results:
print("\nNo results to create model usage report!")
return []
# Dictionary to track usage: {(model_name, category): count}
model_counts = defaultdict(int)
# Dictionary to track model sizes: {(model_name, category): size_mb}
model_sizes = {}
# First, catalog all model sizes
for category, model_list in models.items():
for model_info in model_list:
model_path = model_info['rel_path']
model_sizes[(model_path, category)] = model_info['size_mb']
# Count how many workflows use each model
for result in results:
for category in MODEL_CATEGORIES:
models_in_category = result.get(category, '')
if models_in_category:
# Split by comma in case multiple models are listed
for model in models_in_category.split(','):
model = model.strip()
if model:
model_counts[(model, category)] += 1
# Create list of rows for used models
usage_rows = []
for (model_name, category), count in model_counts.items():
size_mb = model_sizes.get((model_name, category), 0)
usage_rows.append({
'Model': model_name,
'Folder': category,
'Size_MB': size_mb,
'Workflow_Count': count
})
# Sort used models by workflow count (descending), then by folder, then by model name
usage_rows.sort(key=lambda x: (-x['Workflow_Count'], x['Folder'], x['Model']))
# Find unused models
unused_rows = []
for category, model_list in models.items():
for model_info in model_list:
model_path = model_info['rel_path']
# Check if this model is in our used models
if (model_path, category) not in model_counts:
unused_rows.append({
'Model': model_path,
'Folder': category,
'Size_MB': model_info['size_mb'],
'Workflow_Count': 0
})
# Sort unused models by folder, then by model name
unused_rows.sort(key=lambda x: (x['Folder'], x['Model']))
# Combine: used models first, then unused models
all_rows = usage_rows + unused_rows
# Calculate total sizes
total_size_used = sum(row['Size_MB'] for row in usage_rows)
total_size_unused = sum(row['Size_MB'] for row in unused_rows)
print(f"\nModel usage statistics:")
print(f" Total models used: {len(usage_rows)} ({total_size_used:,.2f} MB)")
print(f" Total models unused: {len(unused_rows)} ({total_size_unused:,.2f} MB)")
print(f" Total models in library: {len(all_rows)} ({total_size_used + total_size_unused:,.2f} MB)")
# Show top 10 most used models
if usage_rows:
print(f"\nTop 10 most used models:")
for i, row in enumerate(usage_rows[:10], 1):
print(f" {i}. {row['Model']} ({row['Folder']}): {row['Workflow_Count']} workflow(s), {row['Size_MB']:,.2f} MB")
return all_rows
def main():
models_dir = '/home/jmarc/Docker/comfyui/storage-models/models/'
workflow_dir = '/home/jmarc/Docker/comfyui/storage-user/workflows/'
global MODEL_CATEGORIES
MODEL_CATEGORIES = get_model_categories(models_dir)
print("="*60)
print("ComfyUI Workflow Model Matcher")
print("="*60)
# Scan models directory
models = scan_model_files(models_dir)
if not models:
print("\nError: No models found in the models directory!")
return
print(f"\nTotal model categories found: {len(models)}")
# Scan workflows
print("\n" + "="*60)
print("Scanning workflows...")
print("="*60)
results = scan_workflows(workflow_dir, models)
# Sort results by workflow path
results.sort(key=lambda x: x['Workflow'])
print(f"\nTotal workflows processed: {len(results)}")
# Create model usage data
model_usage_data = create_model_usage_report(results, models)
# Write to Excel file
write_excel(results, model_usage_data)
# Also write CSV files
print("\nCreating CSV files...")
write_csv_fallback(results, 'workflow_models.csv')
write_model_usage_csv(model_usage_data, 'model_usage.csv')
if __name__ == '__main__':
main()