comfyui_workflow_models/comfyui_workflow_models.py at main · JMarcSyd/comfyui_workflow_models · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
import json
import os
import csv
from pathlib import Path
from collections import defaultdict
try:
    import openpyxl
    from openpyxl.styles import Font, PatternFill, Alignment
    from openpyxl.utils import get_column_letter
    EXCEL_AVAILABLE = True
except ImportError:
    EXCEL_AVAILABLE = False
    print("Warning: openpyxl not installed. Will create CSV files instead.")
    print("To create Excel files, install openpyxl: apt install python3-openpyxl or pip install openpyxl")

def get_model_categories(models_dir):
    """Scan for all subfolders in the models directory to use as categories."""
    models_dir = Path(models_dir)
    return sorted([d.name for d in models_dir.iterdir() if d.is_dir()])

def scan_model_files(models_dir):
    """Scan the models directory and catalog all model files by category."""
    models_dir = Path(models_dir)
    models = defaultdict(list)

    print(f"Scanning models directory: {models_dir}")

    for category in MODEL_CATEGORIES:
        category_path = models_dir / category
        if category_path.exists() and category_path.is_dir():
            # Recursively find all files in this category
            for file_path in category_path.rglob('*'):
                if file_path.is_file():
                    # Get file size in MB
                    try:
                        size_bytes = file_path.stat().st_size
                        size_mb = round(size_bytes / (1024 * 1024), 2)
                    except Exception:
                        size_mb = 0

                    # Store filename, relative path, and size
                    rel_path = file_path.relative_to(category_path)
                    models[category].append({
                        'filename': file_path.name,
                        'rel_path': str(rel_path).replace('\\', '/'),
                        'full_path': str(file_path),
                        'size_mb': size_mb
                    })

            if models[category]:
                print(f"  {category}: {len(models[category])} file(s)")

    return models

def find_model_references(workflow_data, models):
    """Recursively search for model file references in workflow JSON."""
    matches = defaultdict(set)

    def search_dict(obj):
        if isinstance(obj, dict):
            for key, value in obj.items():
                # Common keys that contain model filenames
                if key in ['ckpt_name', 'model_name', 'lora_name', 'vae_name',
                        'checkpoint', 'model', 'config_name', 'filename',
                        'unet_name', 'clip_name', 'control_net_name',
                        'upscale_model', 'sam_model_name', 'control_net',
                        'embedding_name', 'hypernetwork_name',
                        'diffusion_model', 'text_encoder', 'clip_vision',
                        'weight_dtype', 'mmproj']:

                    if isinstance(value, str):
                        check_model_match(value)

                # Recursively search nested structures
                search_dict(value)
        elif isinstance(obj, list):
            for item in obj:
                search_dict(item)
        elif isinstance(obj, str):
            # Check any string value for model names
            check_model_match(obj)

    def check_model_match(value):
        if not value or not isinstance(value, str):
            return

        # Normalize the value
        value_normalized = value.replace('\\', '/')

        # Check against all model categories
        for category, model_list in models.items():
            for model_info in model_list:
                filename = model_info['filename']
                rel_path = model_info['rel_path']

                # Try multiple matching strategies:
                # 1. Exact filename match
                if value == filename or value.endswith('/' + filename) or value.endswith('\\' + filename):
                    matches[category].add(rel_path)
                # 2. Relative path match
                elif value_normalized == rel_path or value_normalized.endswith('/' + rel_path):
                    matches[category].add(rel_path)
                # 3. Filename contained in value
                elif filename in value:
                    matches[category].add(rel_path)
                # 4. Check if value (without path) matches filename
                elif os.path.basename(value_normalized) == filename:
                    matches[category].add(rel_path)

    search_dict(workflow_data)

    # Convert sets to sorted lists
    return {k: sorted(list(v)) for k, v in matches.items()}

def scan_workflows(workflow_dir, models):
    """Scan all JSON workflows and match them to models."""
    workflow_dir = Path(workflow_dir)
    results = []

    if not workflow_dir.exists():
        print(f"Error: Workflow directory does not exist: {workflow_dir}")
        return results

    # Find all JSON files
    json_files = list(workflow_dir.rglob('*.json'))
    print(f"\nFound {len(json_files)} workflow file(s)")

    for json_file in json_files:
        try:
            with open(json_file, 'r', encoding='utf-8') as f:
                workflow_data = json.load(f)

            # Get relative path from workflow_dir
            relative_path = json_file.relative_to(workflow_dir)

            # Find model matches
            matches = find_model_references(workflow_data, models)

            # Build result row
            row = {'Workflow': str(relative_path).replace('\\', '/')}

            # Add each category
            for category in MODEL_CATEGORIES:
                if category in matches and matches[category]:
                    row[category] = ','.join(matches[category])
                else:
                    row[category] = ''

            results.append(row)
            print(f"  Processed: {relative_path.name}")

        except json.JSONDecodeError as e:
            print(f"  Error: Invalid JSON in {json_file}: {e}")
        except Exception as e:
            print(f"  Error processing {json_file}: {e}")

    return results

def write_excel(workflow_results, model_usage_data, output_file='workflow_models.xlsx'):
    """Write results to Excel file with two tabs."""
    if not EXCEL_AVAILABLE:
        print("\nFalling back to CSV files...")
        write_csv_fallback(workflow_results, 'workflow_models.csv')
        write_model_usage_csv(model_usage_data, 'model_usage.csv')
        return

    # Create a new workbook
    wb = openpyxl.Workbook()
    wb.remove(wb.active)  # Remove the default sheet properly

    # Create first sheet: Workflow Models
    ws1 = wb.create_sheet('Workflow Models')

    # Headers for first sheet
    headers1 = ['Workflow'] + MODEL_CATEGORIES
    ws1.append(headers1)

    # Style the header row
    header_fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid')
    header_font = Font(bold=True, color='FFFFFF')

    for col_num, _ in enumerate(headers1, 1):
        cell = ws1.cell(row=1, column=col_num)
        cell.fill = header_fill
        cell.font = header_font
        cell.alignment = Alignment(horizontal='center', vertical='center')

    # Add workflow data
    for result in workflow_results:
        row_data = [result['Workflow']]
        for category in MODEL_CATEGORIES:
            # Ensure empty cells are truly empty strings, not None
            value = result.get(category, '')
            row_data.append(value if value else '')
        ws1.append(row_data)

    # Auto-adjust column widths for first sheet
    for col_num, col_name in enumerate(headers1, 1):
        column_letter = get_column_letter(col_num)
        if col_num == 1:  # Workflow column
            ws1.column_dimensions[column_letter].width = 50
        else:
            ws1.column_dimensions[column_letter].width = 30

    # Freeze the header row
    ws1.freeze_panes = 'A2'

    # Create second sheet: Model Usage
    ws2 = wb.create_sheet('Model Usage')

    # Headers for second sheet
    headers2 = ['Model', 'Folder', 'Size_MB', 'Workflow_Count']
    ws2.append(headers2)

    # Style the header row
    for col_num, _ in enumerate(headers2, 1):
        cell = ws2.cell(row=1, column=col_num)
        cell.fill = header_fill
        cell.font = header_font
        cell.alignment = Alignment(horizontal='center', vertical='center')

    # Add model usage data
    for row_data in model_usage_data:
        ws2.append([row_data['Model'], row_data['Folder'], row_data['Size_MB'], row_data['Workflow_Count']])

    # Auto-adjust column widths for second sheet
    ws2.column_dimensions['A'].width = 50  # Model
    ws2.column_dimensions['B'].width = 25  # Folder
    ws2.column_dimensions['C'].width = 15  # Size_MB
    ws2.column_dimensions['D'].width = 15  # Workflow_Count

    # Freeze the header row
    ws2.freeze_panes = 'A2'

    # Highlight unused models (count = 0) in light red
    unused_fill = PatternFill(start_color='FFE6E6', end_color='FFE6E6', fill_type='solid')
    for row in range(2, ws2.max_row + 1):
        if ws2.cell(row=row, column=4).value == 0:
            for col in range(1, 5):
                ws2.cell(row=row, column=col).fill = unused_fill

    # Save the workbook
    wb.save(output_file)

    print(f"\n{'='*60}")
    print(f"Excel file created: {output_file}")
    print(f"  - Tab 1: Workflow Models ({len(workflow_results)} workflows)")
    print(f"  - Tab 2: Model Usage ({len(model_usage_data)} models)")

def write_csv_fallback(results, output_file='workflow_models.csv'):
    """Fallback CSV writer for workflow models."""
    if not results:
        print("\nNo workflows found!")
        return

    fieldnames = ['Workflow'] + MODEL_CATEGORIES

    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
        writer.writeheader()
        writer.writerows(results)

    print(f"Workflow results written to: {output_file}")

def write_model_usage_csv(model_usage_data, output_file='model_usage.csv'):
    """Fallback CSV writer for model usage."""
    if not model_usage_data:
        print("\nNo model usage data to write!")
        return

    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['Model', 'Folder', 'Size_MB', 'Workflow_Count'], extrasaction='ignore')
        writer.writeheader()
        writer.writerows(model_usage_data)

    print(f"Model usage written to: {output_file}")

def create_model_usage_report(results, models):
    """Create model usage data, including unused models."""
    if not results:
        print("\nNo results to create model usage report!")
        return []

    # Dictionary to track usage: {(model_name, category): count}
    model_counts = defaultdict(int)

    # Dictionary to track model sizes: {(model_name, category): size_mb}
    model_sizes = {}

    # First, catalog all model sizes
    for category, model_list in models.items():
        for model_info in model_list:
            model_path = model_info['rel_path']
            model_sizes[(model_path, category)] = model_info['size_mb']

    # Count how many workflows use each model
    for result in results:
        for category in MODEL_CATEGORIES:
            models_in_category = result.get(category, '')
            if models_in_category:
                # Split by comma in case multiple models are listed
                for model in models_in_category.split(','):
                    model = model.strip()
                    if model:
                        model_counts[(model, category)] += 1

    # Create list of rows for used models
    usage_rows = []
    for (model_name, category), count in model_counts.items():
        size_mb = model_sizes.get((model_name, category), 0)
        usage_rows.append({
            'Model': model_name,
            'Folder': category,
            'Size_MB': size_mb,
            'Workflow_Count': count
        })

    # Sort used models by workflow count (descending), then by folder, then by model name
    usage_rows.sort(key=lambda x: (-x['Workflow_Count'], x['Folder'], x['Model']))

    # Find unused models
    unused_rows = []
    for category, model_list in models.items():
        for model_info in model_list:
            model_path = model_info['rel_path']
            # Check if this model is in our used models
            if (model_path, category) not in model_counts:
                unused_rows.append({
                    'Model': model_path,
                    'Folder': category,
                    'Size_MB': model_info['size_mb'],
                    'Workflow_Count': 0
                })

    # Sort unused models by folder, then by model name
    unused_rows.sort(key=lambda x: (x['Folder'], x['Model']))

    # Combine: used models first, then unused models
    all_rows = usage_rows + unused_rows

    # Calculate total sizes
    total_size_used = sum(row['Size_MB'] for row in usage_rows)
    total_size_unused = sum(row['Size_MB'] for row in unused_rows)

    print(f"\nModel usage statistics:")
    print(f"  Total models used: {len(usage_rows)} ({total_size_used:,.2f} MB)")
    print(f"  Total models unused: {len(unused_rows)} ({total_size_unused:,.2f} MB)")
    print(f"  Total models in library: {len(all_rows)} ({total_size_used + total_size_unused:,.2f} MB)")

    # Show top 10 most used models
    if usage_rows:
        print(f"\nTop 10 most used models:")
        for i, row in enumerate(usage_rows[:10], 1):
            print(f"  {i}. {row['Model']} ({row['Folder']}): {row['Workflow_Count']} workflow(s), {row['Size_MB']:,.2f} MB")

    return all_rows

def main():
    models_dir = '/home/jmarc/Docker/comfyui/storage-models/models/'
    workflow_dir = '/home/jmarc/Docker/comfyui/storage-user/workflows/'

    global MODEL_CATEGORIES
    MODEL_CATEGORIES = get_model_categories(models_dir)

    print("="*60)
    print("ComfyUI Workflow Model Matcher")
    print("="*60)

    # Scan models directory
    models = scan_model_files(models_dir)

    if not models:
        print("\nError: No models found in the models directory!")
        return

    print(f"\nTotal model categories found: {len(models)}")

    # Scan workflows
    print("\n" + "="*60)
    print("Scanning workflows...")
    print("="*60)
    results = scan_workflows(workflow_dir, models)

    # Sort results by workflow path
    results.sort(key=lambda x: x['Workflow'])

    print(f"\nTotal workflows processed: {len(results)}")

    # Create model usage data
    model_usage_data = create_model_usage_report(results, models)

    # Write to Excel file
    write_excel(results, model_usage_data)

    # Also write CSV files
    print("\nCreating CSV files...")
    write_csv_fallback(results, 'workflow_models.csv')
    write_model_usage_csv(model_usage_data, 'model_usage.csv')

if __name__ == '__main__':
    main()