-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathutil.py
More file actions
117 lines (88 loc) · 4.04 KB
/
util.py
File metadata and controls
117 lines (88 loc) · 4.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from PIL import Image
from diffsynth.utils.data import save_video
from copy import deepcopy
import numpy as np
from PIL import Image, ImageDraw, ImageFont
def rgb_to_latent_shot_groups_list(shot_groups_list):
latent_shot_group_list = deepcopy(shot_groups_list)
for shot_index, shot_group in enumerate(shot_groups_list):
shot_start_frame, shot_end_frame = shot_group
now_shot_frame_num = shot_end_frame - shot_start_frame
if shot_start_frame == 0:
shot_latent_start_frame = 0
else:
shot_latent_start_frame = latent_shot_group_list[shot_index - 1][-1]
shot_latent_end_frame = 1 + (max(0, now_shot_frame_num - 1)) // 4 + shot_latent_start_frame
latent_shot_group_list[shot_index] = [shot_latent_start_frame, shot_latent_end_frame]
return latent_shot_group_list
def pad_shot_groups_to_4n_plus_1(shot_groups_list):
padded_shot_groups = []
save_shot_num_list = []
current_start = shot_groups_list[0][0]
for i, (start, end) in enumerate(shot_groups_list):
current_frame_num = end - start
save_shot_num_list.append(current_frame_num)
remainder = (current_frame_num - 1) % 4
if remainder == 0:
needed_frame_num = current_frame_num
else:
needed_frame_num = current_frame_num + (4 - remainder)
current_end = current_start + needed_frame_num
padded_shot_groups.append([current_start, current_end])
current_start = current_end
return padded_shot_groups, save_shot_num_list
def get_user_wanted_frames(video, padded_shot_groups, save_shot_num_list):
output_list = []
for i, (start, end) in enumerate(padded_shot_groups):
output_list.append(video[start: start + save_shot_num_list[i]])
output_video = np.concatenate(output_list, axis=0)
return output_video
class TextImageCreator:
def get_text_width(self, text, font):
try:
return font.getlength(text)
except AttributeError:
try:
return font.getsize(text)[0]
except:
return len(text) * font.size
def create_text_image(self, text, width, height):
image = Image.new('RGB', (width, height), color='black')
draw = ImageDraw.Draw(image)
try:
font = ImageFont.truetype("comic.ttf", 19)
except:
font = ImageFont.load_default()
lines = []
paragraphs = text.split('\n')
for paragraph in paragraphs:
if not paragraph.strip():
lines.append('')
continue
current_line = ''
for char in paragraph:
test_line = current_line + char
if self.get_text_width(test_line, font) > width - 20:
lines.append(current_line)
current_line = char
else:
current_line = test_line
if current_line:
lines.append(current_line)
y = 5
line_spacing = 19
for line in lines:
draw.text((10, y), line, font=font, fill='white')
y += line_spacing
return np.array(image)
def save_video_with_caption(num_shots, shot_groups, now_multishot_video_caption_list, user_wanted_frames, save_path, target_width):
creator = TextImageCreator()
caption_frame_list = []
for count in range(num_shots):
now_shot_caption = now_multishot_video_caption_list[count]
caption_frame = creator.create_text_image(now_shot_caption.replace("Now:", f"\n\n[Shot{num_shots}_{count+1}]-Now:"), target_width, 250)
start_frame, end_frame = shot_groups[count]
caption_frame_list = caption_frame_list + [caption_frame] * (end_frame - start_frame)
caption_video = np.stack(caption_frame_list)
video_with_caption = np.concatenate((user_wanted_frames, caption_video), axis=1)
save_video(video_with_caption, save_path, fps=15, quality=5)