-
Notifications
You must be signed in to change notification settings - Fork 108
Expand file tree
/
Copy pathinference.sh
More file actions
24 lines (23 loc) · 1.37 KB
/
inference.sh
File metadata and controls
24 lines (23 loc) · 1.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
export TOKENIZERS_PARALLELISM=false
export MODEL_NAME="path/StableAvatar/checkpoints/Wan2.1-Fun-V1.1-1.3B-InP"
CUDA_VISIBLE_DEVICES=0 python inference.py \
--config_path="deepspeed_config/wan2.1/wan_civitai.yaml" \
--pretrained_model_name_or_path=$MODEL_NAME \
--transformer_path="path/StableAvatar/checkpoints/StableAvatar-1.3B/transformer3d-square.pt" \
--pretrained_wav2vec_path="path/StableAvatar/checkpoints/wav2vec2-base-960h" \
--validation_reference_path="path/StableAvatar/examples/case-1/reference.png" \
--validation_driven_audio_path="path/StableAvatar/examples/case-1/audio.wav" \
--output_dir="path/StableAvatar/output_infer" \
--validation_prompts="A middle-aged woman with short light brown hair, wearing pearl earrings and a blue blazer, is speaking passionately in front of a blurred background resembling a government building. Her mouth is open mid-phrase, her expression is engaged and energetic, and the lighting is bright and even, suggesting a television interview or live broadcast. The scene gives the impression she is singing with conviction and purpose." \
--seed=42 \
--ulysses_degree=1 \
--ring_degree=1 \
--motion_frame=25 \
--sample_steps=50 \
--width=512 \
--height=512 \
--overlap_window_length=5 \
--clip_sample_n_frames=81 \
--GPU_memory_mode="model_full_load" \
--sample_text_guide_scale=3.0 \
--sample_audio_guide_scale=5.0