NetfLips/util.py at main · Prometheus-AI-3team/NetfLips · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import soundfile as sf
import cv2
import ffmpeg

def process_units(units, reduce=False):
    if not reduce:
        return units

    out = [u for i, u in enumerate(units) if i == 0 or u != units[i - 1]]
    return out

def save_unit(unit, unit_path):
    os.makedirs(os.path.dirname(unit_path), exist_ok=True)
    with open(unit_path, "w") as f:
        f.write(unit)

def save_audio(audio, audio_path, sampling_rate=16000):
    os.makedirs(os.path.dirname(audio_path), exist_ok=True)
    sf.write(
        audio_path,
        audio,
        sampling_rate,
    )

def extract_audio_from_video(video_path, save_audio_path, sampling_rate=16000):
    os.makedirs(os.path.dirname(save_audio_path), exist_ok=True)
    (
        ffmpeg.input(video_path)
        .output(
            save_audio_path,
            acodec="pcm_s16le",
            ac=1,
            ar=sampling_rate,
            loglevel="panic",
        )
        .run(overwrite_output=True)
    )

def save_video(audio, video, full_video, bbox, save_video_path, sampling_rate=16000, fps=25, vcodec="libx264"):
    os.makedirs(os.path.dirname(save_video_path), exist_ok=True)
    temp_audio_path = os.path.splitext(save_video_path)[0]+".temp.wav"
    temp_video_path = os.path.splitext(save_video_path)[0]+".temp.avi"

    save_audio(audio, temp_audio_path, sampling_rate)

    frame_h, frame_w = full_video.shape[1], full_video.shape[2]
    out = cv2.VideoWriter(temp_video_path, cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h))

    for p, f, c in zip(video, full_video, bbox):
        #modified : if bbox is None, write original frame
        if c is None:
            out.write(f)
            continue
        x1, y1, x2, y2 = [max(int(_), 0) for _ in c]
        if x2 - x1 > 0 and y2 - y1 > 0:
            p = cv2.resize(p, (x2 - x1, y2 - y1))
            try:
                f[y1:y2, x1:x2] = p
            except:
                height, width, c = f[y1:y2, x1:x2].shape
                p = cv2.resize(p, (width, height))
                f[y1:y2, x1:x2] = p
        out.write(f)

    out.release()

    ffmpeg.output(
        ffmpeg.input(temp_video_path),
        ffmpeg.input(temp_audio_path),
        save_video_path,
        vcodec="libx264",
        acodec="aac",
        loglevel="panic",
    ).run(overwrite_output=True)

    os.remove(temp_audio_path)
    os.remove(temp_video_path)