Skip to content

Commit e9e4f7b

Browse files
Ankit/add videodb editor (#54)
* fix: Fit Enum * Add reframe, smart vertical reframe and download functionality for video objects * Add support for audio transcription and remove not required segmenter and force prop in video get_transcript_text * build: update version --------- Co-authored-by: ashish-spext <ashish@spext.co>
1 parent 7dcabe1 commit e9e4f7b

5 files changed

Lines changed: 182 additions & 8 deletions

File tree

videodb/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44

5-
__version__ = "0.2.17"
5+
__version__ = "0.3.0"
66
__title__ = "videodb"
77
__author__ = "videodb"
88
__email__ = "contact@videodb.io"

videodb/_constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ class Workflows:
3535
add_subtitles = "add_subtitles"
3636

3737

38+
class ReframeMode:
39+
simple = "simple"
40+
smart = "smart"
41+
42+
3843
class SemanticSearchDefaultValues:
3944
result_threshold = 5
4045
score_threshold = 0.2
@@ -91,6 +96,7 @@ class ApiPath:
9196
meeting = "meeting"
9297
record = "record"
9398
editor = "editor"
99+
reframe = "reframe"
94100

95101

96102
class Status:

videodb/audio.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
from typing import Dict, List, Union
12
from videodb._constants import (
23
ApiPath,
4+
Segmenter,
35
)
46

57

@@ -10,6 +12,8 @@ class Audio:
1012
:ivar str collection_id: ID of the collection this audio belongs to
1113
:ivar str name: Name of the audio file
1214
:ivar float length: Duration of the audio in seconds
15+
:ivar list transcript: Timestamped transcript segments
16+
:ivar str transcript_text: Full transcript text
1317
"""
1418

1519
def __init__(
@@ -20,6 +24,8 @@ def __init__(
2024
self.collection_id = collection_id
2125
self.name = kwargs.get("name", None)
2226
self.length = kwargs.get("length", None)
27+
self.transcript = kwargs.get("transcript", None)
28+
self.transcript_text = kwargs.get("transcript_text", None)
2329

2430
def __repr__(self) -> str:
2531
return (
@@ -43,6 +49,97 @@ def generate_url(self) -> str:
4349
)
4450
return url_data.get("signed_url", None)
4551

52+
def _fetch_transcript(
53+
self,
54+
start: int = None,
55+
end: int = None,
56+
segmenter: str = Segmenter.word,
57+
length: int = 1,
58+
force: bool = None,
59+
) -> None:
60+
if self.transcript and not force and not start and not end:
61+
return
62+
transcript_data = self._connection.get(
63+
path=f"{ApiPath.audio}/{self.id}/{ApiPath.transcription}",
64+
params={
65+
"start": start,
66+
"end": end,
67+
"segmenter": segmenter,
68+
"length": length,
69+
"force": "true" if force else "false",
70+
},
71+
show_progress=True,
72+
)
73+
self.transcript = transcript_data.get("word_timestamps", [])
74+
self.transcript_text = transcript_data.get("text", "")
75+
76+
def get_transcript(
77+
self,
78+
start: int = None,
79+
end: int = None,
80+
segmenter: Segmenter = Segmenter.word,
81+
length: int = 1,
82+
force: bool = None,
83+
) -> List[Dict[str, Union[float, str]]]:
84+
"""Get timestamped transcript segments for the audio.
85+
86+
:param int start: Start time in seconds
87+
:param int end: End time in seconds
88+
:param Segmenter segmenter: Segmentation type (:class:`Segmenter.word`,
89+
:class:`Segmenter.sentence`, :class:`Segmenter.time`)
90+
:param int length: Length of segments when using time segmenter
91+
:param bool force: Force fetch new transcript
92+
:return: List of dicts with keys: start (float), end (float), text (str)
93+
:rtype: List[Dict[str, Union[float, str]]]
94+
"""
95+
self._fetch_transcript(
96+
start=start, end=end, segmenter=segmenter, length=length, force=force
97+
)
98+
return self.transcript
99+
100+
def get_transcript_text(
101+
self,
102+
start: int = None,
103+
end: int = None,
104+
) -> str:
105+
"""Get plain text transcript for the audio.
106+
107+
:param int start: Start time in seconds to get transcript from
108+
:param int end: End time in seconds to get transcript until
109+
:param bool force: Force fetch new transcript
110+
:return: Full transcript text as string
111+
:rtype: str
112+
"""
113+
self._fetch_transcript(start=start, end=end)
114+
return self.transcript_text
115+
116+
def generate_transcript(
117+
self,
118+
force: bool = None,
119+
language_code: str = None,
120+
) -> dict:
121+
"""Generate transcript for the audio.
122+
123+
:param bool force: Force generate new transcript
124+
:param str language_code: Language code of the spoken audio. If not provided, language is automatically detected.
125+
:return: Success dict if transcript generated or already exists
126+
:rtype: dict
127+
"""
128+
transcript_data = self._connection.post(
129+
path=f"{ApiPath.audio}/{self.id}/{ApiPath.transcription}",
130+
data={
131+
"force": True if force else False,
132+
"language_code": language_code,
133+
},
134+
)
135+
transcript = transcript_data.get("word_timestamps", [])
136+
if transcript:
137+
return {
138+
"success": True,
139+
"message": "Transcript generated successfully",
140+
}
141+
return transcript_data
142+
46143
def delete(self) -> None:
47144
"""Delete the audio.
48145

videodb/editor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,16 @@ class AssetType(str, Enum):
1616

1717
class Fit(str, Enum):
1818
"""Set how the asset should be scaled to fit the viewport using one of the following options:
19-
crop (default) - scale the asset to fill the viewport while maintaining the aspect ratio. The asset will be cropped if it exceeds the bounds of the viewport.
2019
20+
crop (default) - scale the asset to fill the viewport while maintaining the aspect ratio. The asset will be cropped if it exceeds the bounds of the viewport.
2121
cover - stretch the asset to fill the viewport without maintaining the aspect ratio.
2222
contain - fit the entire asset within the viewport while maintaining the original aspect ratio.
2323
none - preserves the original asset dimensions and does not apply any scaling."""
2424

2525
crop = "crop"
2626
cover = "cover"
2727
contain = "contain"
28+
none = None
2829

2930

3031
class Position(str, Enum):

videodb/video.py

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from videodb._constants import (
44
ApiPath,
55
IndexType,
6+
ReframeMode,
67
SceneExtractionType,
78
SearchType,
89
SemanticSearchDefaultValues,
@@ -239,9 +240,6 @@ def get_transcript_text(
239240
self,
240241
start: int = None,
241242
end: int = None,
242-
segmenter: str = Segmenter.word,
243-
length: int = 1,
244-
force: bool = None,
245243
) -> str:
246244
"""Get plain text transcript for the video.
247245
@@ -251,9 +249,7 @@ def get_transcript_text(
251249
:return: Full transcript text as string
252250
:rtype: str
253251
"""
254-
self._fetch_transcript(
255-
start=start, end=end, segmenter=segmenter, length=length, force=force
256-
)
252+
self._fetch_transcript(start=start, end=end)
257253
return self.transcript_text
258254

259255
def generate_transcript(
@@ -689,3 +685,77 @@ def get_meeting(self):
689685
**meeting_data,
690686
)
691687
return None
688+
689+
def reframe(
690+
self,
691+
start: Optional[float] = None,
692+
end: Optional[float] = None,
693+
target: Union[str, Dict[str, int]] = "vertical",
694+
mode: str = ReframeMode.smart,
695+
callback_url: Optional[str] = None,
696+
) -> Optional["Video"]:
697+
"""Reframe video to a new aspect ratio with optional object tracking.
698+
699+
:param float start: Start time in seconds (optional)
700+
:param float end: End time in seconds (optional)
701+
:param Union[str, dict] target: Target format - preset string (e.g., "vertical", "square", "landscape") or {"width": int, "height": int}
702+
:param str mode: Reframing mode - "simple" or "smart" (default: "smart")
703+
:param str callback_url: URL to receive callback when processing completes (optional)
704+
:raises InvalidRequestError: If the reframe request fails
705+
:return: :class:`Video <Video>` object if no callback_url, None otherwise
706+
:rtype: Optional[:class:`videodb.video.Video`]
707+
"""
708+
reframe_data = self._connection.post(
709+
path=f"{ApiPath.video}/{self.id}/{ApiPath.reframe}",
710+
data={
711+
"start": start,
712+
"end": end,
713+
"target": target,
714+
"mode": mode,
715+
"callback_url": callback_url,
716+
},
717+
)
718+
719+
if callback_url:
720+
return None
721+
722+
if reframe_data:
723+
return Video(self._connection, **reframe_data)
724+
725+
def smart_vertical_reframe(
726+
self,
727+
start: Optional[float] = None,
728+
end: Optional[float] = None,
729+
callback_url: Optional[str] = None,
730+
) -> Optional["Video"]:
731+
"""Convenience method for object-aware vertical reframing.
732+
733+
Equivalent to calling reframe(target="vertical", mode="smart").
734+
735+
:param float start: Start time in seconds (optional)
736+
:param float end: End time in seconds (optional)
737+
:param str callback_url: URL to receive callback when processing completes (optional)
738+
:return: :class:`Video <Video>` object if no callback_url, None otherwise
739+
:rtype: Optional[:class:`videodb.video.Video`]
740+
"""
741+
return self.reframe(
742+
start=start,
743+
end=end,
744+
target="vertical",
745+
mode=ReframeMode.smart,
746+
callback_url=callback_url,
747+
)
748+
749+
def download(self, name: Optional[str] = None) -> dict:
750+
"""Download the video from its stream URL.
751+
752+
:param str name: Name for the downloaded file (optional, defaults to video name)
753+
:raises InvalidRequestError: If the download request fails
754+
:return: Download response data
755+
:rtype: dict
756+
"""
757+
if not self.stream_url:
758+
raise ValueError("Video does not have a stream_url")
759+
760+
download_name = name or self.name or f"video_{self.id}"
761+
return self._connection.download(self.stream_url, download_name)

0 commit comments

Comments
 (0)