PyMontage/video_engine.py at main · Ariel-Gal/PyMontage · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Simple Video Slideshow Creator
Creates a basic video slideshow from images without effects.
Just loads images chronologically and displays them full (no cropping).
"""

import hashlib
import os
import subprocess
import shutil
from datetime import datetime
from pathlib import Path
import tempfile
import json
import random
import requests
import zipfile
import io

import cv2
from PIL import Image, ExifTags
from moviepy.editor import (
    AudioFileClip,
    ImageClip,
    TextClip,
    VideoFileClip,
    ColorClip,
    concatenate_videoclips,
    concatenate_audioclips,
    CompositeAudioClip,
)
from moviepy.audio.AudioClip import AudioClip
from tqdm import tqdm
import numpy as np
import librosa
from typing import Optional

# Register HEIF/HEIC support
try:
    from pillow_heif import register_heif_opener
    register_heif_opener()
    print("✅ HEIC/HEIF support enabled")
except ImportError:
    print("⚠️ pillow-heif not installed, HEIC/HEIF files won't be supported")
except Exception as e:
    print(f"⚠️ Could not enable HEIC/HEIF support: {e}")

def install_google_font(font_name):
    """
    Downloads a font from Google Fonts, extracts it, and saves to assets/fonts directory.
    Automatically detects the font variant (Regular, Bold, Italic, Light) from the URL
    to ensure the filename matches the actual font style.

    Args:
        font_name: Name of the font (e.g., "Heebo", "Roboto")

    Returns:
        Path to the installed font file, or None if failed

    Example:
        font_path = install_google_font("Heebo")
        if font_path:
            TITLE_FONT_PATH = font_path
    """
    # Save directory
    save_dir = os.path.join("assets", "fonts")
    os.makedirs(save_dir, exist_ok=True)

    # Check if font already exists (avoid unnecessary downloads)
    # Look for any variant of the font (Regular, Bold, Italic, etc.)
    existing_fonts = [f for f in os.listdir(save_dir) if f.startswith(f"{font_name}-") and f.endswith('.ttf')]
    if existing_fonts:
        font_path = os.path.join(save_dir, existing_fonts[0])
        print(f"✓ Font already installed: {font_path}")
        return font_path

    print(f"📥 Downloading font: {font_name} from Google Fonts...")

    # Method 1: Try Google Fonts API (requires parsing but works reliably)
    try:
        # Get font metadata from Google Fonts API
        api_url = f"https://fonts.googleapis.com/css?family={font_name.replace(' ', '+')}"
        response = requests.get(api_url, timeout=30, headers={'User-Agent': 'Mozilla/5.0'})

        if response.status_code == 200:
            # Parse CSS to find TTF URL
            import re
            # Look for url() in the CSS
            ttf_urls = re.findall(r'url\((https://[^)]+\.ttf)\)', response.text)

            if ttf_urls:
                # Download the first TTF file
                ttf_url = ttf_urls[0]
                ttf_response = requests.get(ttf_url, timeout=30)
                if ttf_response.status_code == 200:
                    # Extract font variant from URL to avoid mismatch
                    # Google Fonts URLs often contain the variant in the filename
                    # Example: https://fonts.gstatic.com/...FontName-Bold.ttf
                    font_filename = os.path.basename(ttf_url.split('?')[0])  # Remove query params

                    # If the filename doesn't contain the font name, use our naming convention
                    # Normalize both strings for comparison (remove spaces, lowercase)
                    normalized_font_name = font_name.replace(' ', '').lower()
                    normalized_filename = font_filename.replace(' ', '').lower()

                    if normalized_font_name not in normalized_filename:
                        # Try to detect variant from URL - check compound variants first
                        ttf_url_lower = ttf_url.lower()
                        if 'bolditalic' in ttf_url_lower or 'italicbold' in ttf_url_lower:
                            variant = 'BoldItalic'
                        elif 'bold' in ttf_url_lower:
                            variant = 'Bold'
                        elif 'italic' in ttf_url_lower:
                            variant = 'Italic'
                        elif 'light' in ttf_url_lower:
                            variant = 'Light'
                        else:
                            variant = 'Regular'
                        font_filename = f"{font_name}-{variant}.ttf"

                    font_path = os.path.join(save_dir, font_filename)
                    with open(font_path, 'wb') as f:
                        f.write(ttf_response.content)
                    print(f"✅ Font installed: {font_path}")
                    return font_path

        print(f"⚠️ Could not download {font_name} automatically")
        print(f"💡 You can manually download it from https://fonts.google.com/ and place it in {save_dir}")
        return None

    except Exception as e:
        print(f"❌ Failed to download font: {e}")
        print(f"💡 You can manually download {font_name} from https://fonts.google.com/ and place it in {save_dir}")
        return None


def resolve_font_path(font_name: str) -> Optional[str]:
    """Resolve a font name or path, downloading from Google Fonts if missing."""
    if not font_name:
        return None

    # Absolute path
    if os.path.isabs(font_name) and os.path.exists(font_name):
        return font_name

    # Relative assets/fonts path
    assets_path = os.path.join("assets", "fonts", font_name)
    if os.path.exists(assets_path):
        return assets_path

    # System fonts directory fallback (platform-specific)
    if os.name == "nt":
        # Windows system fonts
        windows_font = os.path.join(r"C:\Windows\Fonts", font_name)
        if os.path.exists(windows_font):
            return windows_font
    else:
        # Common font directories on Unix-like systems (Linux, macOS)
        home = Path.home()
        font_dirs = [
            "/usr/share/fonts",
            "/usr/local/share/fonts",
            str(home / ".local" / "share" / "fonts"),
            "/System/Library/Fonts",
            "/Library/Fonts",
            str(home / "Library" / "Fonts"),
        ]
        for font_dir in font_dirs:
            candidate = os.path.join(font_dir, font_name)
            if os.path.exists(candidate):
                return candidate

    # Try downloading from Google Fonts (strip extension)
    base_name = os.path.splitext(font_name)[0]
    downloaded = install_google_font(base_name)
    if downloaded and os.path.exists(downloaded):
        return downloaded

    return None

def read_image_safe(path, max_width=None):
    """
    Helper function to read images with non-ASCII (Hebrew) characters in path.
    Replaces cv2.imread which fails on Unicode paths in Windows.

    OPTIMIZATION: Downscales images immediately during read to reduce memory usage
    and speed up face detection and rendering.

    Args:
        path: Image file path
        max_width: Maximum width in pixels (uses MAX_IMAGE_WIDTH from config if None)
    """
    if max_width is None:
        max_width = MAX_IMAGE_WIDTH

    try:
        # Check if file is HEIC/HEIF (cv2 doesn't support them)
        path_lower = str(path).lower()
        if path_lower.endswith(('.heic', '.heif', '.heics', '.heifs')):
            # Use PIL for HEIC/HEIF files
            img_pil = Image.open(path)

            # Convert to RGB if needed
            if img_pil.mode != 'RGB':
                img_pil = img_pil.convert('RGB')

            # OPTIMIZATION: Downscale immediately if image is too large
            if img_pil.width > max_width:
                scale_factor = max_width / img_pil.width
                new_width = max_width
                new_height = int(img_pil.height * scale_factor)
                img_pil = img_pil.resize((new_width, new_height), Image.Resampling.LANCZOS)

            # Convert PIL to OpenCV format
            img = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
            return img

        # For other formats, use cv2 (faster)
        # Open file directly via Python (handles Hebrew correctly)
        with open(path, "rb") as f:
            file_bytes = bytearray(f.read())
            numpy_array = np.asarray(file_bytes, dtype=np.uint8)
            # Decode the bytes to OpenCV image
            img = cv2.imdecode(numpy_array, cv2.IMREAD_COLOR)

            # OPTIMIZATION: Downscale immediately if image is too large
            # This reduces memory usage and speeds up face detection
            if img is not None and img.shape[1] > max_width:
                scale_factor = max_width / img.shape[1]
                new_width = max_width
                new_height = int(img.shape[0] * scale_factor)
                img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)

            return img
    except Exception as e:
        print(f"Error reading file {path}: {e}")
        return None


# ==================== CONFIGURATION ====================

# ===== PATHS =====
# Use repo-relative defaults with optional environment overrides for portability (works on CI/GitHub too)
PROJECT_ROOT = Path(__file__).resolve().parent
IMAGE_FOLDER_PATH = os.getenv("PYMONTAGE_IMAGE_FOLDER", str(PROJECT_ROOT / "input" / "photos"))
AUDIO_FILE_PATH = os.getenv("PYMONTAGE_AUDIO_FILE", str(PROJECT_ROOT / "input" / "audio.mp3"))
OUTPUT_FILE_PATH = os.getenv("PYMONTAGE_OUTPUT_FILE", str(PROJECT_ROOT / "output" / "slideshow.mp4"))

# ===== TEXT CONTENT =====
INTRO_TEXT = "Our Family Memories"
OUTRO_TEXT = "See you in happy times!"

# ===== VIDEO RESOLUTION =====
TARGET_WIDTH = 1920
TARGET_HEIGHT = 1080

# ===== TIMING SETTINGS =====
TRANSITION_DURATION = 0.5      # Crossfade duration in seconds
FIXED_INTRO_TIME = 7.5         # Reserved time for intro+pause at start (seconds)
INTRO_CARD_DURATION = 3.0      # Duration of intro title card (seconds)
OUTRO_CARD_DURATION = 3.0      # Duration of outro title card (seconds)
OPENING_PAUSE_DURATION = 2.0   # Black pause before intro (seconds)
CLOSING_PAUSE_DURATION = 2.0   # Black pause after outro (seconds)

# ===== LAYOUT MODE =====
# USE_GRID_2X2: True = 2x2 grids (4 images per slide), False = single images
USE_GRID_2X2 = True

# ===== DISPLAY TYPE WEIGHTS =====
# Different screen layouts get different durations (relative weights)
GRID_WEIGHT = 2.0        # 2x2 Grid (4 horizontal images) - gets most screen time
TRIPLE_WEIGHT = 1.75     # 1x3 Grid (3 vertical images) - gets more than collage
COLLAGE_WEIGHT = 1.5     # 1x2 Collage (2 images) - gets medium screen time
SINGLE_WEIGHT = 1.0      # 1x1 Single (1 image) - gets least screen time

# ===== RENDERING SETTINGS =====
VIDEO_FPS = 24                 # Frames per second for output video
VIDEO_BITRATE = '4000k'        # Video bitrate (higher = better quality, larger file)
VIDEO_CODEC_PREFERENCE = 'auto'  # 'auto', 'h264_nvenc' (NVIDIA GPU), 'libx264' (CPU)
VIDEO_QUALITY = 32             # CRF/CQ value (lower = better quality, 18-32 recommended)

# ===== IMAGE PROCESSING =====
MAX_IMAGE_WIDTH = 2400         # Maximum width for loaded images (pixels, reduces memory)

# ===== TEXT STYLING =====
TITLE_FONT_PATH = r"C:\Windows\Fonts\trebucbd.ttf"  # Font for titles
TITLE_FONT_SIZE = 100          # Font size for intro/outro titles
DATE_FONT_PATH = r"C:\Windows\Fonts\trebucbd.ttf"   # Font for date overlays
DATE_FONT_SIZE = 70            # Font size for date overlays

# ===== CACHE SETTINGS =====
USE_CACHE = False               # Enable caching to skip re-rendering if nothing changed

# NOTE: Image duration is calculated automatically!
# The script divides (audio_length - FIXED_INTRO_TIME) by total weighted slides
# This ensures all images fit perfectly to the audio length.

# Image file extensions
SUPPORTED_EXTENSIONS = (
    '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tif', '.tiff', '.webp', '.heic', '.heif'
)

# Video file extensions
SUPPORTED_VIDEO_EXTENSIONS = (
    '.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv', '.webm', '.m4v'
)
# =======================================================


def get_best_video_codec():
    """Detect available GPU codecs - prefer h264_nvenc which uses less VRAM."""
    # Find ffmpeg first
    ffmpeg_path = shutil.which('ffmpeg')
    if not ffmpeg_path:
        # Try to find ffmpeg from imageio_ffmpeg
        try:
            import imageio_ffmpeg
            ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
        except:
            pass

    if not ffmpeg_path:
        print("⚠ FFmpeg not found in PATH, using CPU (libx264)")
        return 'libx264'

    try:
        result = subprocess.run([ffmpeg_path, '-codecs'], capture_output=True, text=True, timeout=5)
        output = result.stdout.lower()

        # Check for NVIDIA codecs - h264_nvenc uses less VRAM than hevc
        if 'h264_nvenc' in output:
            print("✓ Found NVIDIA H.264 GPU codec (h264_nvenc) - lower VRAM usage")
            return 'h264_nvenc'
        if 'hevc_nvenc' in output:
            print("✓ Found NVIDIA HEVC GPU codec (hevc_nvenc)")
            return 'hevc_nvenc'

        print("⚠ No NVIDIA GPU codecs found, using CPU (libx264)")
        return 'libx264'
    except Exception as e:
        print(f"⚠ Could not check GPU codecs: {e}, using CPU (libx264)")
        return 'libx264'


def load_and_sort_images(folder_path):
    """Load image and video files from folder, sorted chronologically by EXIF/file date and grouped by date."""

    def md5_for_file(path):
        """Compute MD5 hash for duplicate detection."""
        h = hashlib.md5()
        with open(path, 'rb') as f:
            for chunk in iter(lambda: f.read(1 << 20), b''):
                h.update(chunk)
        return h.hexdigest()

    def parse_exif_datetime(path):
        """Extract date from EXIF metadata if available."""
        try:
            img = Image.open(path)
            exif = img._getexif() or {}
            tag_map = {ExifTags.TAGS.get(k): v for k, v in exif.items() if k in ExifTags.TAGS}
            dt_str = tag_map.get('DateTimeOriginal') or tag_map.get('DateTime')
            if dt_str:
                return datetime.strptime(dt_str, '%Y:%m:%d %H:%M:%S')
        except Exception:
            return None
        return None

    media_files = []
    seen_hashes = set()

    if not os.path.exists(folder_path):
        raise FileNotFoundError(f"Image folder not found: {folder_path}")

    # Scan main folder + VIDEOS subfolder
    folders_to_scan = [folder_path]
    videos_subfolder = os.path.join(folder_path, 'VIDEOS')
    if os.path.exists(videos_subfolder):
        folders_to_scan.append(videos_subfolder)

    for scan_folder in folders_to_scan:
        for filename in sorted(os.listdir(scan_folder)):
            filepath = os.path.join(scan_folder, filename)
            file_ext = filename.lower()

            # Check if it's an image
            if file_ext.endswith(SUPPORTED_EXTENSIONS):
                try:
                    # Verify it's a valid image and check for duplicates
                    file_hash = md5_for_file(filepath)
                    if file_hash in seen_hashes:
                        print(f"  Skipping duplicate: {filename}")
                        continue
                    seen_hashes.add(file_hash)

                    # Get EXIF date or file modification date
                    dt = parse_exif_datetime(filepath)
                    if dt is None:
                        dt = datetime.fromtimestamp(os.path.getmtime(filepath))

                    # Verify it's a valid image with HEIC support
                    try:
                        # Suppress PIL warnings
                        import warnings
                        warnings.filterwarnings('ignore', category=UserWarning)

                        img = Image.open(filepath)
                        img.verify()
                        # Try to actually load it to catch corrupted files
                        img = Image.open(filepath)  # Need to reopen after verify
                        img.load()
                    except Exception as verify_error:
                        # If PIL verification fails, try HEIC conversion
                        if file_ext.endswith(('.heic', '.heif')):
                            if load_image_with_heic_support(filepath) is None:
                                raise ValueError("Could not convert HEIC")
                        else:
                            # For other formats, try one more time with OpenCV
                            test_img = cv2.imread(filepath)
                            if test_img is None:
                                raise ValueError(f"Corrupted or truncated image: {verify_error}")

                    media_files.append((filepath, dt, 'image'))
                except Exception as e:
                    print(f"  ⚠ Skipping corrupted/invalid image {filename}")

            # Check if it's a video
            elif file_ext.endswith(SUPPORTED_VIDEO_EXTENSIONS):
                try:
                    # Get file modification date
                    dt = datetime.fromtimestamp(os.path.getmtime(filepath))

                    # Try to verify it's a valid video
                    try:
                        clip = VideoFileClip(filepath)
                        # Check if it has video stream
                        if clip.w > 0 and clip.h > 0:
                            clip.close()
                            media_files.append((filepath, dt, 'video'))
                        else:
                            print(f"  ⚠ Skipping video with no valid stream: {filename}")
                    except Exception as e:
                        print(f"  ⚠ Skipping invalid video {filename}")
                except Exception as e:
                    print(f"  ⚠ Skipping video {filename}: {e}")

    if not media_files:
        raise ValueError(f"No valid images or videos found in {folder_path}")

    # Sort chronologically
    media_files.sort(key=lambda x: x[1])

    # Group media by date (YYYY-MM-DD)
    from collections import defaultdict
    grouped = defaultdict(list)
    for filepath, dt, media_type in media_files:
        date_key = dt.date()
        grouped[date_key].append((filepath, dt, media_type))

    # Sort each group chronologically by time (not just date)
    for date_key in grouped:
        grouped[date_key].sort(key=lambda x: x[1])

    # Convert to list of groups, sorted by date
    media_groups = [group for date_key in sorted(grouped.keys()) for group in [grouped[date_key]]]

    # Add date information to each group
    groups_with_dates = []
    for group in media_groups:
        date_key = group[0][1].date()  # Get date from first item
        groups_with_dates.append((group, date_key))

    total_media = len(media_files)
    total_groups = len(media_groups)
    image_count = sum(1 for _, _, mtype in media_files if mtype == 'image')
    video_count = sum(1 for _, _, mtype in media_files if mtype == 'video')
    collage_count = sum(1 for group in media_groups if len(group) > 1 and all(m[2] == 'image' for m in group))

    print(f"✓ Loaded {total_media} media files in {total_groups} date groups")
    print(f"  ({image_count} images, {video_count} videos, {collage_count} collage groups)")

    return groups_with_dates


def convert_heic_to_jpg(heic_path):
    """Convert HEIC image to JPG using ffmpeg, return numpy array."""
    try:
        # Use ffmpeg to convert HEIC to temporary JPG
        temp_jpg = tempfile.NamedTemporaryFile(suffix='.jpg', delete=False).name
        ffmpeg_path = shutil.which('ffmpeg')
        if not ffmpeg_path:
            try:
                import imageio_ffmpeg
                ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
            except:
                return None

        if not ffmpeg_path:
            return None

        # Convert HEIC to JPG
        # Note: ffmpeg might struggle with Hebrew paths too, passing as input usually works better if quoted,
        # but pure python approaches are safer. Assuming ffmpeg handles it or user renames input if this fails.
        result = subprocess.run(
            [ffmpeg_path, '-i', heic_path, '-y', temp_jpg],
            capture_output=True,
            timeout=10
        )

        if result.returncode == 0 and os.path.exists(temp_jpg):
            # Read the converted JPG using safe reader
            img = read_image_safe(temp_jpg) # CHANGED THIS LINE
            # Clean up temp file
            try:
                os.remove(temp_jpg)
            except:
                pass
            return img
        else:
            try:
                os.remove(temp_jpg)
            except:
                pass
            return None
    except Exception as e:
        return None


def load_image_with_heic_support(filepath):
    """Load image with HEIC support using pillow-heif or ffmpeg fallback."""
    try:
        # Suppress warnings about truncated images
        import warnings
        warnings.filterwarnings('ignore', category=UserWarning)

        # Try PIL first (works if pillow-heif is installed) - PIL handles Hebrew paths correctly
        img = Image.open(filepath)
        img.load()
        return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    except Exception:
        # Fallback to ffmpeg conversion for HEIC
        if filepath.lower().endswith(('.heic', '.heif')):
            return convert_heic_to_jpg(filepath)
        return None

def resize_video_to_fit(video_path, target_width, target_height):
    """
    Load video, resize to fit target resolution while preserving original speed.
    Keeps videos at their natural playback speed to avoid artifacts.
    Returns None if video is corrupted/unreadable.
    """
    try:
        video_clip = VideoFileClip(video_path)

        # Remove audio to avoid conflicts with background music
        if video_clip.audio is not None:
            video_clip = video_clip.without_audio()

        # Check if video has valid dimensions
        if video_clip.w is None or video_clip.h is None or video_clip.w <= 0 or video_clip.h <= 0:
            print(f"  ⚠ Video has invalid dimensions: {video_clip.w}x{video_clip.h}")
            video_clip.close()
            return None

        # Resize to fit target size - maintain aspect ratio
        if video_clip.w / video_clip.h >= target_width / target_height:
            # Width is limiting
            video_clip = video_clip.resize(width=target_width)
        else:
            # Height is limiting
            video_clip = video_clip.resize(height=target_height)

        # Add black padding if needed (letterbox)
        if video_clip.w < target_width or video_clip.h < target_height:
            try:
                from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
                pad_h = max(0, target_height - video_clip.h)
                pad_w = max(0, target_width - video_clip.w)
                y_pad = pad_h // 2
                x_pad = pad_w // 2
                canvas = ColorClip(size=(target_width, target_height), color=(0, 0, 0))
                canvas = canvas.set_duration(video_clip.duration)
                # Use proper list syntax for CompositeVideoClip
                video_clip = CompositeVideoClip([canvas, video_clip.set_position((x_pad, y_pad))])
            except Exception as composite_error:
                print(f"  ⚠ Could not apply letterbox to video: {composite_error}")
                # Return the video without letterbox if composite fails
                return video_clip

        return video_clip
    except Exception as e:
        print(f"  ⚠ Error processing video {video_path}: {e}")
        return None


def resize_image_to_fit(image_path, target_width, target_height):
    """
    Load image and resize to fit target resolution.
    Maintains aspect ratio with black letterbox/pillarbox if needed.
    Supports HEIC with fallback conversion.
    Returns None if image is corrupted/unreadable.
    """
    try:
        # Suppress PIL warnings about truncated/corrupted images
        import warnings
        warnings.filterwarnings('ignore', category=UserWarning)

        # CHANGED: Use read_image_safe instead of cv2.imread
        img_bgr = read_image_safe(str(image_path))

        # If cv2 failed (or file implies HEIC), try pillow/HEIC conversion
        if img_bgr is None:
            img_bgr = load_image_with_heic_support(str(image_path))

        if img_bgr is None:
            return None

        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
        h, w = img_rgb.shape[:2]

        # Calculate scaling to fit image in target size (always fit inside, never bigger)
        scale_w = target_width / w
        scale_h = target_height / h
        scale = min(scale_w, scale_h)  # Use smaller scale to fit inside

        new_w = int(w * scale)
        new_h = int(h * scale)

        # Resize image
        resized = cv2.resize(img_rgb, (new_w, new_h), interpolation=cv2.INTER_AREA)

        # Create black canvas and center the image (letterbox)
        frame = np.zeros((target_height, target_width, 3), dtype=np.uint8)
        y_offset = max(0, (target_height - new_h) // 2)
        x_offset = max(0, (target_width - new_w) // 2)

        # Safe slicing - ensure we don't go out of bounds
        y_end = min(target_height, y_offset + new_h)
        x_end = min(target_width, x_offset + new_w)
        frame[y_offset:y_end, x_offset:x_end] = resized[0:y_end-y_offset, 0:x_end-x_offset]

        return frame
    except Exception as e:
        # Silently return None for corrupted images
        return None

def create_month_year_overlay_frame(date_obj, target_size):
    """Create a numpy array with month/year overlay on transparent background."""
    from PIL import ImageDraw, ImageFont

    # Create transparent background
    overlay = Image.new('RGBA', (target_size[0], target_size[1]), color=(0, 0, 0, 0))
    draw = ImageDraw.Draw(overlay)

    # Try to load configured font for dates
    try:
        font = ImageFont.truetype(DATE_FONT_PATH, DATE_FONT_SIZE)
    except:
        try:
            font = ImageFont.truetype("C:\\Windows\\Fonts\\trebuc.ttf", DATE_FONT_SIZE)
        except:
            font = ImageFont.load_default()

    # Format date as "January 2025" (in English)
    months_en = {
        1: 'January', 2: 'February', 3: 'March', 4: 'April',
        5: 'May', 6: 'June', 7: 'July', 8: 'August',
        9: 'September', 10: 'October', 11: 'November', 12: 'December'
    }
    month_name = months_en.get(date_obj.month, f"Month {date_obj.month}")
    year = date_obj.year

    text = f"{month_name} {year}"

    # Get text size
    bbox = draw.textbbox((0, 0), text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]

    # Position at bottom center
    x = (target_size[0] - text_width) // 2
    y = target_size[1] - text_height - 40  # 40 pixels from bottom

    # Draw semi-transparent background for text
    bg_padding = 20
    draw.rectangle(
        [(x - bg_padding, y - bg_padding), (x + text_width + bg_padding, y + text_height + bg_padding)],
        fill=(0, 0, 0, 150)  # Semi-transparent black
    )

    # Draw white text
    draw.text((x, y), text, fill=(255, 255, 255, 255), font=font)

    # Convert to numpy array (RGBA)
    return np.array(overlay)


def apply_month_overlay(frame, overlay_rgba):
    """Apply month/year overlay to an image frame - optimized version."""
    # Use NumPy vectorized operations instead of loops
    h, w = frame.shape[:2]

    # Extract alpha channel and normalize to 0-1
    overlay_alpha = overlay_rgba[:, :, 3].astype(np.float32) / 255.0
    overlay_rgb = overlay_rgba[:, :, :3].astype(np.float32)
    frame_float = frame.astype(np.float32)

    # Vectorized alpha blending for all channels at once
    for c in range(3):
        frame_float[:, :, c] = (frame_float[:, :, c] * (1 - overlay_alpha) +
                                overlay_rgb[:, :, c] * overlay_alpha)

    return frame_float.astype(np.uint8)


def create_title_card(text, duration, target_size):
    """Create a simple title card with text."""
    from PIL import ImageDraw, ImageFont

    # Create black background
    card = Image.new('RGB', (target_size[0], target_size[1]), color=(0, 0, 0))
    draw = ImageDraw.Draw(card)

    # Try to load configured font
    try:
        font = ImageFont.truetype(TITLE_FONT_PATH, TITLE_FONT_SIZE)
    except:
        try:
            font = ImageFont.truetype("C:\\Windows\\Fonts\\trebuc.ttf", TITLE_FONT_SIZE)
        except:
            font = ImageFont.load_default()

    # Center text
    bbox = draw.textbbox((0, 0), text, font=font)
    text_width = bbox[2] - bbox[0]
    text_height = bbox[3] - bbox[1]

    x = (target_size[0] - text_width) // 2
    y = (target_size[1] - text_height) // 2

    # Draw white text
    draw.text((x, y), text, fill=(255, 255, 255), font=font)

    # Convert to numpy array
    card_array = np.array(card)

    # Create clip without heavy fade effects (memory efficient)
    clip = ImageClip(card_array).set_duration(duration)
    return clip


def create_pause_clip(duration, target_size, fade_in=True, fade_out=True):
    """Create a black pause clip with fade effects."""
    # Create black frame
    black_frame = np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8)

    # Create clip
    clip = ImageClip(black_frame).set_duration(duration)

    # Add fade effects
    if fade_in:
        clip = clip.crossfadein(1.0)  # 1 second fade in from black
    if fade_out:
        clip = clip.crossfadeout(1.0)  # 1 second fade out to black

    return clip


def calculate_input_hash(image_folder, audio_file, settings):
    """Calculate hash of all inputs to detect changes."""
    hasher = hashlib.md5()

    # Hash all image files (sorted by name)
    image_files = []
    for filename in sorted(os.listdir(image_folder)):
        if filename.lower().endswith(SUPPORTED_EXTENSIONS):
            filepath = os.path.join(image_folder, filename)
            image_files.append(filepath)

    for img_path in sorted(image_files):
        # Hash file path and file size
        hasher.update(img_path.encode())
        hasher.update(str(os.path.getsize(img_path)).encode())

        # Hash file content (full hash for consistent results)
        try:
            with open(img_path, 'rb') as f:
                file_hasher = hashlib.md5()
                while chunk := f.read(8192):
                    file_hasher.update(chunk)
                hasher.update(file_hasher.hexdigest().encode())
        except:
            pass

    # Hash audio file
    if os.path.exists(audio_file):
        hasher.update(audio_file.encode())
        hasher.update(str(os.path.getsize(audio_file)).encode())

        # Hash audio content
        try:
            with open(audio_file, 'rb') as f:
                file_hasher = hashlib.md5()
                while chunk := f.read(8192):
                    file_hasher.update(chunk)
                hasher.update(file_hasher.hexdigest().encode())
        except:
            pass

    # Hash settings
    hasher.update(json.dumps(settings, sort_keys=True).encode())

    return hasher.hexdigest()


def check_cache(output_file, current_hash):
    """Check if cached video exists and is up to date."""
    cache_file = output_file + '.cache'

    # Check if output video exists
    if not os.path.exists(output_file):
        return False

    # Check if cache metadata exists
    if not os.path.exists(cache_file):
        return False

    try:
        with open(cache_file, 'r') as f:
            cache_data = json.load(f)

        # Compare hashes
        if cache_data.get('hash') == current_hash:
            return True
    except:
        pass

    return False


def save_cache(output_file, input_hash):
    """Save cache metadata."""
    cache_file = output_file + '.cache'
    cache_data = {
        'hash': input_hash,
        'timestamp': datetime.now().isoformat(),
        'output_file': output_file
    }

    with open(cache_file, 'w') as f:
        json.dump(cache_data, f, indent=2)


def analyze_audio_tempo(audio_path):
    """Analyze audio file to detect tempo (BPM) and beat times."""
    try:
        print("  Analyzing audio tempo...")
        y, sr = librosa.load(audio_path)

        # Detect tempo and beat frames
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

        # Convert beat frames to time in seconds
        beat_times = librosa.frames_to_time(beat_frames, sr=sr)

        print(f"  ✓ Detected tempo: {tempo:.1f} BPM")
        print(f"  ✓ Found {len(beat_times)} beats")

        return tempo, beat_times
    except Exception as e:
        print(f"  ⚠ Could not analyze tempo: {e}")
        return None, None


def create_collage(media_group, target_size):
    """
    Create a collage from images.
    - 2 images: 1x2 grid (side by side, full height)
    - 3-4 images: 2x2 grid
    media_group is a list of (filepath, datetime, media_type) tuples.
    Only includes images, not videos.
    """
    # Get only images from the group
    images = [(path, dt) for path, dt, mtype in media_group if mtype == 'image']
    num_images = len(images)

    # Create canvas
    canvas = np.zeros((target_size[1], target_size[0], 3), dtype=np.uint8)

    if num_images == 2:
        # 1x2 grid - two images side by side (full height)
        cell_w = target_size[0] // 2
        cell_h = target_size[1]

        for idx, (image_path, _) in enumerate(images):
            col = idx

            # Load and resize image to fit cell
            frame = resize_image_to_fit(image_path, cell_w, cell_h)

            # Skip corrupted images
            if frame is None:
                continue

            # Crop to exact cell size
            frame_h, frame_w = frame.shape[:2]
            if frame_h >= cell_h and frame_w >= cell_w:
                frame_cell = frame[0:cell_h, 0:cell_w]
            else:
                frame_cell = np.zeros((cell_h, cell_w, 3), dtype=np.uint8)
                frame_cell[0:frame_h, 0:frame_w] = frame[0:frame_h, 0:frame_w]

            # Place in canvas
            x0 = col * cell_w
            canvas[0:cell_h, x0:x0+cell_w] = frame_cell

    return canvas


def is_image_vertical(image_path):
    """Check if an image is vertical/portrait orientation.

    Returns:
        True if image is vertical (height > width), False otherwise
    """
    try:
        from PIL import Image, ExifTags
        img = Image.open(image_path)

        # Get actual dimensions after EXIF orientation is applied
        # PIL automatically handles EXIF orientation when loading
        width = img.width
        height = img.height

        # Handle EXIF orientation tag to get true orientation
        try:
            exif = img._getexif()
            if exif:
                for tag, value in exif.items():
                    if tag in ExifTags.TAGS and ExifTags.TAGS[tag] == 'Orientation':
                        # Orientations 6 and 8 are rotated 90/270 degrees
                        # In these cases, width and height are swapped in the file
                        if value in [6, 8]:
                            # Swap dimensions to get true orientation
                            width, height = height, width
        except:
            pass

        # Vertical/portrait if height > width
        return height > width
    except Exception as e:
        print(f"  ⚠ Could not check orientation for {image_path}: {e}")
        return False


def apply_random_transition(clip, transition_duration, transition_type=None):
    """Apply lightweight crossfade transition to a clip.
    Uses only clip.crossfadein() for efficient memory usage and smooth rendering.
    """
    try:
        return clip.crossfadein(transition_duration)
    except Exception as e:
        # Last resort: just return the clip as-is if transition fails
        return clip


def detect_faces_smart(image_path):
    """
    Smart multi-pass face detection using OpenCV Haar Cascade.
    Avoids both false positives (100+ faces) and false negatives (missing real faces).

    Strategy:
    1. Try balanced parameters (most common case)
    2. If 0-2 faces found: try looser params (might be missing people)
    3. If 30+ faces found: too many false positives, use stricter
    4. Return the result that makes most sense

    Returns:
        Dictionary with 'center', 'bbox', 'face_count' if faces found, None otherwise
    """
    try:
        # Load image with OpenCV
        img = read_image_safe(image_path)
        if img is None:
            return None

        # Convert to grayscale for face detection
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Load Haar Cascade classifier (built into OpenCV)
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

        # Try THREE different parameter sets to find best balance
        # Balanced/Medium (most likely correct)
        faces_balanced = face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=4,        # Medium sensitivity
            minSize=(40, 40),      # Medium face size
            flags=cv2.CASCADE_SCALE_IMAGE