-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathinference.py
More file actions
101 lines (91 loc) · 4.03 KB
/
inference.py
File metadata and controls
101 lines (91 loc) · 4.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import argparse
import os
import torch
import numpy as np
import imageio
import pickle
from core.inference.wan import generate_video
from core.dataclass import Pointmap
from core.tokenizer.wan import WanTokenizer
model_path = 'pretrained/Wan2.1-I2V-14B-480P-Diffusers/vae/'
tokenizer = WanTokenizer(model_path=model_path)
def save_pointmap(latents, save_path, image_path=None, mode='xyz'):
latents = latents[None]
# apply special denormalize to latent pointmap only
encoded_pm_mean = -0.13
encoded_pm_std = 1.70
latents[:, :, :, :, latents.shape[4]//2:] = latents[:, :, :, :, latents.shape[4]//2:] * encoded_pm_std + encoded_pm_mean
pointmap = tokenizer.decode(latents)
if '.mp4' in save_path:
mp4_save_path = save_path
else:
_, ext = os.path.splitext(save_path)
mp4_save_path = save_path.replace(ext, ".mp4")
imageio.mimwrite(mp4_save_path,(pointmap*255).clip(0, 255).astype(np.uint8),fps=24)
pm = Pointmap()
if mode == 'xyzrgb':
W = pointmap.shape[2] // 2
rgb = pointmap[..., :W, :]
pointmap = pointmap[..., W:, :]
pm.init_dummy(pointmap.shape[0], pointmap.shape[1], pointmap.shape[2])
pointmap = pointmap.reshape(*pm.pcd.shape)
pm.pcd = pointmap
if mode == 'xyzrgb':
pm.rgb = rgb.clip(min=0, max=1)
pm.colors = pm.rgb.reshape(*pm.colors.shape)
elif image_path is not None:
rgb = imageio.imread(image_path) / 255.
pm.rgb = np.stack([rgb for _ in range(pm.rgb.shape[0])], 0)
pm.colors = pm.rgb.reshape(*pm.colors.shape)
pickle.dump(pm, open(save_path, 'wb'))
def main(args):
prompt_list = []
with open(args.prompt, 'r') as f:
for line in f.readlines():
prompt_list.append(line.strip())
image_list = []
with open(args.image, 'r') as f:
for line in f.readlines():
image_list.append(line.strip())
assert len(prompt_list) == len(image_list)
os.makedirs(args.out, exist_ok=True)
for i in range(len(prompt_list)):
prompt, image_path = prompt_list[i], image_list[i]
suffix = 'POINTMAP_STYLE.'
prompt = prompt + ' ' + suffix
output_path = os.path.join(args.out, f'{i:05d}.mp4')
if args.idx==-1 or i==args.idx:
latent = generate_video(
prompt=prompt,
image_or_video_path=image_path,
model_path='pretrained/Wan2.1-I2V-14B-480P-Diffusers',
sft_path=args.sft_path,
lora_path=args.lora_path,
lora_rank=args.lora_rank,
output_path=output_path,
num_frames=49,
width=720 * 2,
height=480,
generate_type=args.type,
num_inference_steps=50,
guidance_scale=5.0,
fps=24,
num_videos_per_prompt=1,
dtype=torch.bfloat16,
seed=42,
mode=args.mode
)
save_pointmap(latent, output_path.replace('.mp4', '.pkl'), image_path, args.mode)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate a video from a text prompt using Wan")
parser.add_argument("--prompt", type=str, required=True, help="prompt list")
parser.add_argument("--image", type=str, required=True, help="image list")
parser.add_argument("--idx", type=int, default=-1)
parser.add_argument("--sft_path", type=str, default=None, help="The path of the SFT weights to be used")
parser.add_argument("--out", type=str, default="results/output", help="The path save generated video")
parser.add_argument("--mode", type=str, default="xyzrgb", help="xyz or xzyrgb")
parser.add_argument("--type", type=str, default="condpm-i2dpm", help="i2dpm or condpm-i2dpm")
parser.add_argument("--lora_path", type=str, default=None, help="The path of the LoRA weights to be used")
parser.add_argument("--lora_rank", type=int, default=64, help="The rank of the LoRA weights to be used")
args = parser.parse_args()
main(args)