-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
155 lines (130 loc) Β· 5.91 KB
/
app.py
File metadata and controls
155 lines (130 loc) Β· 5.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import streamlit as st
import os
import subprocess
import uuid
import whisper
import google.generativeai as genai
# Configure Streamlit page
st.set_page_config(page_title="π¬ AptPath Reel Transcriber", layout="centered")
st.title("π¬ AptPath Reel Transcriber")
st.caption("Upload your video and get the transcript like a boss π")
# Gemini API key
genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
# File uploader
video_file = st.file_uploader("π€ Upload your video", type=["mp4", "mov", "avi", "mkv"])
# Gemini: Key moments extraction
def extract_key_moments(transcript_text):
prompt = f"""
You are analyzing a video transcript. Identify the top 3-5 most engaging or insightful moments with their timestamps.
Output format:
1. [start_time] - [end_time]: [summary of event]
2. ...
Transcript:
{transcript_text}
"""
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content(prompt)
return response.text
# Timestamp formatter for ASS format
def format_ass_timestamp(seconds):
hrs, rem = divmod(int(seconds), 3600)
mins, secs = divmod(rem, 60)
millis = int((seconds - int(seconds)) * 100)
return f"{hrs:01}:{mins:02}:{secs:02}.{millis:02}"
if video_file:
for folder in ["uploads", "audio", "transcripts", "reels"]:
os.makedirs(folder, exist_ok=True)
unique_id = str(uuid.uuid4())[:8]
video_path = os.path.join("uploads", f"{unique_id}_{video_file.name}")
with open(video_path, "wb") as f:
f.write(video_file.read())
if not any(video_path.lower().endswith(ext) for ext in [".mp4", ".mov", ".avi", ".mkv"]):
st.error("β Unsupported file format.")
st.stop()
with st.spinner("π Validating video file..."):
validation_cmd = ["ffmpeg", "-v", "error", "-i", video_path, "-f", "null", "-"]
result = subprocess.run(validation_cmd, stderr=subprocess.PIPE, text=True)
if result.stderr:
st.error("β Video appears to be corrupted or unreadable.")
st.text(result.stderr)
st.stop()
else:
st.success(f"β
Video uploaded and validated: {video_file.name}")
# Step 1: Extract audio
audio_path = os.path.join("audio", f"{unique_id}.wav")
ffmpeg_cmd_audio = ["ffmpeg", "-i", video_path, "-q:a", "0", "-map", "a", audio_path, "-y"]
with st.spinner("π§ Extracting audio from video..."):
try:
subprocess.run(ffmpeg_cmd_audio, check=True)
st.success("π΅ Audio extracted successfully!")
except subprocess.CalledProcessError:
st.error("β Audio extraction failed.")
st.stop()
# Step 2: Transcribe using Whisper + generate .ass subtitles
with st.spinner("π§ Transcribing audio using Whisper..."):
try:
model = whisper.load_model("base")
result = model.transcribe(audio_path)
segments = result["segments"]
# Generate timestamped transcript
timestamped_transcript = ""
for seg in segments:
start = round(seg["start"], 2)
end = round(seg["end"], 2)
text = seg["text"].strip()
timestamped_transcript += f"[{start:.2f} - {end:.2f}] {text}\n"
transcript_path = os.path.join("transcripts", f"{unique_id}.txt")
with open(transcript_path, "w", encoding="utf-8") as f:
f.write(timestamped_transcript)
# π₯ Generate .ass subtitle file
ass_path = os.path.join("transcripts", f"{unique_id}.ass")
with open(ass_path, "w", encoding="utf-8") as f:
f.write("""[Script Info]
Title: Whisper Subtitles
ScriptType: v4.00+
Collisions: Normal
PlayResX: 1920
PlayResY: 1080
Timer: 100.0000
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,Arial,48,&H00FFFFFF,&H00000000,&H00000000,-1,0,0,0,100,100,0,0,1,2,0,2,30,30,30,1
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
""")
for seg in segments:
start = format_ass_timestamp(seg["start"])
end = format_ass_timestamp(seg["end"])
text = seg["text"].replace("\n", " ").strip()
f.write(f"Dialogue: 0,{start},{end},Default,,0,0,0,,{text}\n")
st.success("π Transcription complete with subtitles!")
st.text_area("π Transcript with Timestamps", timestamped_transcript, height=400)
except Exception as e:
st.error(f"β Whisper transcription failed: {str(e)}")
st.stop()
# Step 3: Analyze key moments
with st.spinner("π Analyzing transcript for key reel moments..."):
try:
key_moments = extract_key_moments(timestamped_transcript)
st.subheader("π Top Reel-Worthy Moments")
st.text_area("π― Important Segments", key_moments, height=300)
except Exception as e:
st.error(f"β Gemini analysis failed: {str(e)}")
# Step 4: Convert to 1080x1920 and burn subtitles
reel_path = os.path.join("reels", f"reel_{unique_id}.mp4")
ass_path_clean = ass_path.replace("\\", "/") # π₯ Fix for Windows paths in FFmpeg
ffmpeg_cmd_reel = [
"ffmpeg",
"-i", video_path,
"-vf", f"scale=1080:-2,pad=1080:1920:(ow-iw)/2:(oh-ih)/2,ass='{ass_path_clean}'",
"-y",
reel_path
]
with st.spinner("π± Converting to Reel format with subtitles..."):
try:
subprocess.run(ffmpeg_cmd_reel, check=True)
st.success("π₯ Reel with subtitles ready to rock!")
st.video(reel_path)
except subprocess.CalledProcessError as e:
st.error("β Failed to convert video to reel format with subtitles.")
st.text(str(e))