From b28928e6f51886a58ddc2ae99acea9b8cd0ac615 Mon Sep 17 00:00:00 2001 From: ashish-spext Date: Thu, 17 Jul 2025 18:14:40 +0530 Subject: [PATCH 1/4] Add audio param support in connect rtstream --- videodb/collection.py | 6 +- videodb/meeting.py | 4 +- videodb/timeline_v2.py | 267 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 275 insertions(+), 2 deletions(-) create mode 100644 videodb/timeline_v2.py diff --git a/videodb/collection.py b/videodb/collection.py index 994cda6..a947266 100644 --- a/videodb/collection.py +++ b/videodb/collection.py @@ -167,7 +167,7 @@ def delete_image(self, image_id: str) -> None: ) def connect_rtstream( - self, url: str, name: str, sample_rate: int = None + self, url: str, name: str, sample_rate: int = None, audio: bool = False ) -> RTStream: """Connect to an rtstream. @@ -182,6 +182,7 @@ def connect_rtstream( "collection_id": self.id, "url": url, "name": name, + "audio": audio, "sample_rate": sample_rate, }, ) @@ -519,6 +520,7 @@ def record_meeting( bot_name: str = None, bot_image_url: str = None, meeting_title: str = None, + realtime_stream: bool = False, callback_url: str = None, callback_data: Optional[dict] = None, time_zone: str = "UTC", @@ -529,6 +531,7 @@ def record_meeting( :param str bot_name: Name of the recorder bot :param str bot_image_url: URL of the recorder bot image :param str meeting_title: Name of the meeting + :param bool realtime_stream: Whether to stream the meeting in realtime :param str callback_url: URL to receive callback once recording is done :param dict callback_data: Data to be sent in the callback (optional) :param str time_zone: Time zone for the meeting (default ``UTC``) @@ -545,6 +548,7 @@ def record_meeting( "bot_name": bot_name, "bot_image_url": bot_image_url, "meeting_title": meeting_title, + "realtime_stream": realtime_stream, "callback_url": callback_url, "callback_data": callback_data, "time_zone": time_zone, diff --git a/videodb/meeting.py b/videodb/meeting.py index 827f5c6..f7da814 100644 --- a/videodb/meeting.py +++ b/videodb/meeting.py @@ -45,6 +45,8 @@ def _update_attributes(self, data: dict) -> None: self.time_zone = data.get("time_zone") self.video_id = data.get("video_id") self.speaker_timeline = data.get("speaker_timeline") + self.realtime_stream = data.get("realtime_stream") + self.realtime_stream_url = data.get("realtime_stream_url") def refresh(self) -> "Meeting": """Refresh meeting data from the server. @@ -106,4 +108,4 @@ def wait_for_status( return True time.sleep(interval) - return False + return False \ No newline at end of file diff --git a/videodb/timeline_v2.py b/videodb/timeline_v2.py new file mode 100644 index 0000000..8658cc6 --- /dev/null +++ b/videodb/timeline_v2.py @@ -0,0 +1,267 @@ +from typing import List, Optional, Union +from enum import Enum + + +class AssetType(str, Enum): + video = "video" + image = "image" + audio = "audio" + + +class Fit(str, Enum): + crop = "crop" + cover = "cover" + contain = "contain" + none = "none" + + +class Position(str, Enum): + top = "top" + bottom = "bottom" + left = "left" + right = "right" + center = "center" + top_left = "top-left" + top_right = "top-right" + bottom_left = "bottom-left" + bottom_right = "bottom-right" + + +class Filter(str, Enum): + """A filter effect to apply to the Clip.""" + + blur = "blur" + boost = "boost" + contrast = "contrast" + darken = "darken" + greyscale = "greyscale" + lighten = "lighten" + muted = "muted" + negative = "negative" + + +class Offset: + def __init__(self, x: float = 0, y: float = 0): + self.x = x + self.y = y + + def to_json(self): + return { + "x": self.x, + "y": self.y, + } + + +class Crop: + def __init__(self, top: int = 0, right: int = 0, bottom: int = 0, left: int = 0): + self.top = top + self.right = right + self.bottom = bottom + self.left = left + + def to_json(self): + return { + "top": self.top, + "right": self.right, + "bottom": self.bottom, + "left": self.left, + } + + +class Transition: + def __init__(self, in_: str = None, out: str = None): + self.in_ = in_ + self.out = out + + def to_json(self): + return { + "in": self.in_, + "out": self.out, + } + + +class BaseAsset: + """The type of asset to display for the duration of the Clip.""" + + type: AssetType + + +class VideoAsset(BaseAsset): + """The VideoAsset is used to create video sequences from video files. The src must be a publicly accessible URL to a video resource""" + + type = AssetType.video + + def __init__( + self, + id: str, + trim: int = 0, + volume: float = 1, + crop: Optional[Crop] = None, + ): + if trim < 0: + raise ValueError("trim must be non-negative") + if not (0 <= volume <= 2): + raise ValueError("volume must be between 0 and 2") + + self.id = id + self.trim = trim + self.volume = volume + self.crop = crop if crop is not None else Crop() + + def to_json(self): + return { + "type": self.type, + "id": self.id, + "trim": self.trim, + "volume": self.volume, + "crop": self.crop.to_json(), + } + + +class ImageAsset(BaseAsset): + """The ImageAsset is used to create video from images to compose an image. The src must be a publicly accessible URL to an image resource such as a jpg or png file.""" + + type = AssetType.image + + def __init__(self, id: str, trim: int = 0, crop: Optional[Crop] = None): + if trim < 0: + raise ValueError("trim must be non-negative") + + self.id = id + self.trim = trim + self.crop = crop if crop is not None else Crop() + + def to_json(self): + return { + "type": self.type, + "id": self.id, + "crop": self.crop.to_json(), + } + + +class AudioAsset(BaseAsset): + """The AudioAsset is used to create audio sequences from audio files. The src must be a publicly accessible URL to an audio resource""" + + type = AssetType.audio + + def __init__(self, id: str, trim: int = 0, volume: float = 1): + self.id = id + self.trim = trim + self.volume = volume + + def to_json(self): + return { + "type": self.type, + "id": self.id, + "trim": self.trim, + "volume": self.volume, + } + + +AnyAsset = Union[VideoAsset, ImageAsset, AudioAsset] + + +class Clip: + """A clip is a container for a specific type of asset, i.e. a title, image, video, audio or html. You use a Clip to define when an asset will display on the timeline, how long it will play for and transitions, filters and effects to apply to it.""" + + def __init__( + self, + asset: AnyAsset, + start: Union[float, int], + length: Union[float, int], + transition: Optional[Transition] = None, + effect: Optional[str] = None, + filter: Optional[Filter] = None, + scale: float = 1, + opacity: float = 1, + fit: Optional[Fit] = Fit.crop, + position: Position = Position.center, + offset: Optional[Offset] = None, + ): + if start < 0: + raise ValueError("start must be non-negative") + if length <= 0: + raise ValueError("length must be positive") + if not (0 <= scale <= 10): + raise ValueError("scale must be between 0 and 10") + if not (0 <= opacity <= 1): + raise ValueError("opacity must be between 0 and 1") + + self.asset = asset + self.start = start + self.length = length + self.transition = transition + self.effect = effect + self.filter = filter + self.scale = scale + self.opacity = opacity + self.fit = fit + self.position = position + self.offset = offset if offset is not None else Offset() + + def to_json(self): + json = { + "asset": self.asset.to_json(), + "start": self.start, + "length": self.length, + "effect": self.effect, + "scale": self.scale, + "opacity": self.opacity, + "fit": self.fit, + "position": self.position, + "offset": self.offset.to_json(), + } + + if self.transition: + json["transition"] = self.transition.to_json() + if self.filter: + json["filter"] = self.filter.value + + return json + + +class Track: + def __init__(self, clips: List[Clip] = []): + self.clips = clips + + def add_clip(self, clip: Clip): + self.clips.append(clip) + + def to_json(self): + return { + "clips": [clip.to_json() for clip in self.clips], + } + + +class TimelineV2: + def __init__(self, connection): + self.connection = connection + self.background: str = "#000000" + self.resolution: str = "1280x720" + self.tracks: List[Track] = [] + self.stream_url = None + self.player_url = None + + def add_track(self, track: Track): + self.tracks.append(track) + + def add_clip(self, track_index: int, clip: Clip): + self.tracks[track_index].clips.append(clip) + + def to_json(self): + return { + "timeline": { + "background": self.background, + "resolution": self.resolution, + "tracks": [track.to_json() for track in self.tracks], + } + } + + def generate_stream(self): + stream_data = self.connection.post( + path="timeline_v2", + data=self.to_json(), + ) + self.stream_url = stream_data.get("stream_url") + self.player_url = stream_data.get("player_url") + return stream_data.get("stream_url", None) From ac1fa600be4cf6ab94e1d80300e2638bdca689c4 Mon Sep 17 00:00:00 2001 From: ashish-spext Date: Thu, 17 Jul 2025 18:15:21 +0530 Subject: [PATCH 2/4] Remove timeline v2 to avoid conflict --- videodb/timeline_v2.py | 267 ----------------------------------------- 1 file changed, 267 deletions(-) delete mode 100644 videodb/timeline_v2.py diff --git a/videodb/timeline_v2.py b/videodb/timeline_v2.py deleted file mode 100644 index 8658cc6..0000000 --- a/videodb/timeline_v2.py +++ /dev/null @@ -1,267 +0,0 @@ -from typing import List, Optional, Union -from enum import Enum - - -class AssetType(str, Enum): - video = "video" - image = "image" - audio = "audio" - - -class Fit(str, Enum): - crop = "crop" - cover = "cover" - contain = "contain" - none = "none" - - -class Position(str, Enum): - top = "top" - bottom = "bottom" - left = "left" - right = "right" - center = "center" - top_left = "top-left" - top_right = "top-right" - bottom_left = "bottom-left" - bottom_right = "bottom-right" - - -class Filter(str, Enum): - """A filter effect to apply to the Clip.""" - - blur = "blur" - boost = "boost" - contrast = "contrast" - darken = "darken" - greyscale = "greyscale" - lighten = "lighten" - muted = "muted" - negative = "negative" - - -class Offset: - def __init__(self, x: float = 0, y: float = 0): - self.x = x - self.y = y - - def to_json(self): - return { - "x": self.x, - "y": self.y, - } - - -class Crop: - def __init__(self, top: int = 0, right: int = 0, bottom: int = 0, left: int = 0): - self.top = top - self.right = right - self.bottom = bottom - self.left = left - - def to_json(self): - return { - "top": self.top, - "right": self.right, - "bottom": self.bottom, - "left": self.left, - } - - -class Transition: - def __init__(self, in_: str = None, out: str = None): - self.in_ = in_ - self.out = out - - def to_json(self): - return { - "in": self.in_, - "out": self.out, - } - - -class BaseAsset: - """The type of asset to display for the duration of the Clip.""" - - type: AssetType - - -class VideoAsset(BaseAsset): - """The VideoAsset is used to create video sequences from video files. The src must be a publicly accessible URL to a video resource""" - - type = AssetType.video - - def __init__( - self, - id: str, - trim: int = 0, - volume: float = 1, - crop: Optional[Crop] = None, - ): - if trim < 0: - raise ValueError("trim must be non-negative") - if not (0 <= volume <= 2): - raise ValueError("volume must be between 0 and 2") - - self.id = id - self.trim = trim - self.volume = volume - self.crop = crop if crop is not None else Crop() - - def to_json(self): - return { - "type": self.type, - "id": self.id, - "trim": self.trim, - "volume": self.volume, - "crop": self.crop.to_json(), - } - - -class ImageAsset(BaseAsset): - """The ImageAsset is used to create video from images to compose an image. The src must be a publicly accessible URL to an image resource such as a jpg or png file.""" - - type = AssetType.image - - def __init__(self, id: str, trim: int = 0, crop: Optional[Crop] = None): - if trim < 0: - raise ValueError("trim must be non-negative") - - self.id = id - self.trim = trim - self.crop = crop if crop is not None else Crop() - - def to_json(self): - return { - "type": self.type, - "id": self.id, - "crop": self.crop.to_json(), - } - - -class AudioAsset(BaseAsset): - """The AudioAsset is used to create audio sequences from audio files. The src must be a publicly accessible URL to an audio resource""" - - type = AssetType.audio - - def __init__(self, id: str, trim: int = 0, volume: float = 1): - self.id = id - self.trim = trim - self.volume = volume - - def to_json(self): - return { - "type": self.type, - "id": self.id, - "trim": self.trim, - "volume": self.volume, - } - - -AnyAsset = Union[VideoAsset, ImageAsset, AudioAsset] - - -class Clip: - """A clip is a container for a specific type of asset, i.e. a title, image, video, audio or html. You use a Clip to define when an asset will display on the timeline, how long it will play for and transitions, filters and effects to apply to it.""" - - def __init__( - self, - asset: AnyAsset, - start: Union[float, int], - length: Union[float, int], - transition: Optional[Transition] = None, - effect: Optional[str] = None, - filter: Optional[Filter] = None, - scale: float = 1, - opacity: float = 1, - fit: Optional[Fit] = Fit.crop, - position: Position = Position.center, - offset: Optional[Offset] = None, - ): - if start < 0: - raise ValueError("start must be non-negative") - if length <= 0: - raise ValueError("length must be positive") - if not (0 <= scale <= 10): - raise ValueError("scale must be between 0 and 10") - if not (0 <= opacity <= 1): - raise ValueError("opacity must be between 0 and 1") - - self.asset = asset - self.start = start - self.length = length - self.transition = transition - self.effect = effect - self.filter = filter - self.scale = scale - self.opacity = opacity - self.fit = fit - self.position = position - self.offset = offset if offset is not None else Offset() - - def to_json(self): - json = { - "asset": self.asset.to_json(), - "start": self.start, - "length": self.length, - "effect": self.effect, - "scale": self.scale, - "opacity": self.opacity, - "fit": self.fit, - "position": self.position, - "offset": self.offset.to_json(), - } - - if self.transition: - json["transition"] = self.transition.to_json() - if self.filter: - json["filter"] = self.filter.value - - return json - - -class Track: - def __init__(self, clips: List[Clip] = []): - self.clips = clips - - def add_clip(self, clip: Clip): - self.clips.append(clip) - - def to_json(self): - return { - "clips": [clip.to_json() for clip in self.clips], - } - - -class TimelineV2: - def __init__(self, connection): - self.connection = connection - self.background: str = "#000000" - self.resolution: str = "1280x720" - self.tracks: List[Track] = [] - self.stream_url = None - self.player_url = None - - def add_track(self, track: Track): - self.tracks.append(track) - - def add_clip(self, track_index: int, clip: Clip): - self.tracks[track_index].clips.append(clip) - - def to_json(self): - return { - "timeline": { - "background": self.background, - "resolution": self.resolution, - "tracks": [track.to_json() for track in self.tracks], - } - } - - def generate_stream(self): - stream_data = self.connection.post( - path="timeline_v2", - data=self.to_json(), - ) - self.stream_url = stream_data.get("stream_url") - self.player_url = stream_data.get("player_url") - return stream_data.get("stream_url", None) From 35a02405066452a107f7f351d39ca2b82fe389ef Mon Sep 17 00:00:00 2001 From: ashish-spext Date: Thu, 17 Jul 2025 20:16:24 +0530 Subject: [PATCH 3/4] Add get trasscript for rtstream --- videodb/rtstream.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/videodb/rtstream.py b/videodb/rtstream.py index 4be4a8c..58178e6 100644 --- a/videodb/rtstream.py +++ b/videodb/rtstream.py @@ -299,3 +299,41 @@ def get_scene_index(self, index_id: str) -> RTStreamSceneIndex: name=index_data.get("name"), status=index_data.get("status"), ) + + def get_transcript( + self, + page=1, + page_size=100, + start=None, + end=None, + since=None, + engine=None, + ): + """Get transcription data from the rtstream. + + :param int page: Page number (default: 1) + :param int page_size: Items per page (default: 100, max: 1000) + :param float start: Start timestamp filter (optional) + :param float end: End timestamp filter (optional) + :param float since: For polling - only get transcriptions after this timestamp (optional) + :param str engine: Transcription engine (default: "AAIS") + :return: Transcription data with segments and metadata + :rtype: dict + """ + params = { + "engine": engine, + "page": page, + "page_size": page_size, + } + if start is not None: + params["start"] = start + if end is not None: + params["end"] = end + if since is not None: + params["since"] = since + + transcription_data = self._connection.get( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.transcription}", + params=params, + ) + return transcription_data From 2e7a564c705330b935929303ae4974176150840f Mon Sep 17 00:00:00 2001 From: ashish-spext Date: Mon, 21 Jul 2025 17:54:39 +0530 Subject: [PATCH 4/4] Add joined status for meeting bot --- videodb/_constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/videodb/_constants.py b/videodb/_constants.py index 4591892..7c5c1e7 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -94,6 +94,7 @@ class Status: class MeetingStatus: initializing = "initializing" processing = "processing" + joined = "joined" done = "done"