-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathutil.py
More file actions
79 lines (66 loc) · 2.33 KB
/
util.py
File metadata and controls
79 lines (66 loc) · 2.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import soundfile as sf
import cv2
import ffmpeg
def process_units(units, reduce=False):
if not reduce:
return units
out = [u for i, u in enumerate(units) if i == 0 or u != units[i - 1]]
return out
def save_unit(unit, unit_path):
os.makedirs(os.path.dirname(unit_path), exist_ok=True)
with open(unit_path, "w") as f:
f.write(unit)
def save_audio(audio, audio_path, sampling_rate=16000):
os.makedirs(os.path.dirname(audio_path), exist_ok=True)
sf.write(
audio_path,
audio,
sampling_rate,
)
def extract_audio_from_video(video_path, save_audio_path, sampling_rate=16000):
os.makedirs(os.path.dirname(save_audio_path), exist_ok=True)
(
ffmpeg.input(video_path)
.output(
save_audio_path,
acodec="pcm_s16le",
ac=1,
ar=sampling_rate,
loglevel="panic",
)
.run(overwrite_output=True)
)
def save_video(audio, video, full_video, bbox, save_video_path, sampling_rate=16000, fps=25, vcodec="libx264"):
os.makedirs(os.path.dirname(save_video_path), exist_ok=True)
temp_audio_path = os.path.splitext(save_video_path)[0]+".temp.wav"
temp_video_path = os.path.splitext(save_video_path)[0]+".temp.avi"
save_audio(audio, temp_audio_path, sampling_rate)
frame_h, frame_w = full_video.shape[1], full_video.shape[2]
out = cv2.VideoWriter(temp_video_path, cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h))
for p, f, c in zip(video, full_video, bbox):
#modified : if bbox is None, write original frame
if c is None:
out.write(f)
continue
x1, y1, x2, y2 = [max(int(_), 0) for _ in c]
if x2 - x1 > 0 and y2 - y1 > 0:
p = cv2.resize(p, (x2 - x1, y2 - y1))
try:
f[y1:y2, x1:x2] = p
except:
height, width, c = f[y1:y2, x1:x2].shape
p = cv2.resize(p, (width, height))
f[y1:y2, x1:x2] = p
out.write(f)
out.release()
ffmpeg.output(
ffmpeg.input(temp_video_path),
ffmpeg.input(temp_audio_path),
save_video_path,
vcodec="libx264",
acodec="aac",
loglevel="panic",
).run(overwrite_output=True)
os.remove(temp_audio_path)
os.remove(temp_video_path)