Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Look at the `examples/` directory to get a sense of all the different features a

## Typechecking, linting and formatting

The CI validates this but to do checks locally see the following example commmands:
The CI validates this but to do checks locally see the following example commands:

### Typechecking

Expand All @@ -75,6 +75,7 @@ uv pip install pip && uv run mypy --install-types --non-interactive \
-p livekit.plugins.gladia \
-p livekit.plugins.google \
-p livekit.plugins.groq \
-p livekit.plugins.hathora \
-p livekit.plugins.hume \
-p livekit.plugins.minimal \
-p livekit.plugins.neuphonic \
Expand All @@ -101,7 +102,7 @@ uv pip install pip && uv run mypy --install-types --non-interactive \
uv run ruff check --output-format=github .
```

### Formating
### Formatting

```bash
uv run ruff format .
Expand Down
15 changes: 15 additions & 0 deletions livekit-plugins/livekit-plugins-hathora/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Hathora plugin for LiveKit Agents

Support for [Hathora](https://models.hathora.dev/)'s voice AI services in LiveKit Agents.

More information is available in the docs for the [STT](https://docs.livekit.io/agents/integrations/stt/hathora/) and [TTS](https://docs.livekit.io/agents/integrations/tts/hathora/) integrations.

## Installation

```bash
pip install livekit-plugins-hathora
```

## Pre-requisites

You'll need an API key from Hathora, which you can retrieve at https://models.hathora.dev/tokens. It can be set as an environment variable: `HATHORA_API_KEY`
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2023 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Hathora plugin for LiveKit Agents

See https://docs.livekit.io/agents/integrations/tts/hathora/ and https://docs.livekit.io/agents/integrations/stt/hathora/ for more information.
"""

from .stt import STT
from .tts import TTS, ChunkedStream
from .utils import ConfigOption
from .version import __version__

__all__ = ["STT", "TTS", "ConfigOption", "ChunkedStream", "__version__"]

from livekit.agents import Plugin

from .log import logger


class HathoraPlugin(Plugin):
def __init__(self) -> None:
super().__init__(__name__, __version__, __package__, logger)


Plugin.register_plugin(HathoraPlugin())

# Cleanup docs of unexported modules
_module = dir()
NOT_IN_ALL = [m for m in _module if m not in __all__]

__pdoc__ = {}

for n in NOT_IN_ALL:
__pdoc__[n] = False
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .version import __version__

API_AUTH_HEADER = "Authorization"
USER_AGENT = f"LiveKit Agents Hathora Plugin/{__version__}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import logging

logger = logging.getLogger("livekit.plugins.hathora")
Empty file.
150 changes: 150 additions & 0 deletions livekit-plugins/livekit-plugins-hathora/livekit/plugins/hathora/stt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Copyright 2023 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import base64
import os
from typing import Any

import aiohttp

from livekit import rtc
from livekit.agents import (
APIConnectOptions,
APIStatusError,
stt,
utils,
)
from livekit.agents.types import NOT_GIVEN, NotGivenOr

from .constants import (
API_AUTH_HEADER,
USER_AGENT,
)
from .utils import ConfigOption


class STT(stt.STT):
"""This service supports several different speech-to-text models hosted by Hathora.

[Documentation](https://models.hathora.dev)
"""

def __init__(
self,
*,
model: str,
language: str | None = None,
model_config: list[ConfigOption] | None = None,
api_key: str | None = None,
base_url: str = "https://api.models.hathora.dev/inference/v1/stt",
):
"""Initialize the Hathora STT service.

Args:
model: Model to use; find available models
[here](https://models.hathora.dev).
language: Language code (if supported by model).
model_config: Some models support additional config, refer to
[docs](https://models.hathora.dev) for each model to see
what is supported.
api_key: API key for authentication with the Hathora service;
provision one [here](https://models.hathora.dev/tokens).
base_url: Base API URL for the Hathora STT service.
"""
super().__init__(
capabilities=stt.STTCapabilities(
streaming=False,
interim_results=False,
)
)

self._model = model
self._language = language
self._model_config = model_config
self._api_key = api_key or os.environ.get("HATHORA_API_KEY")
self._base_url = base_url

@property
def model(self) -> str:
"""Get the model name/identifier for this TTS instance.

Returns:
The model name.
"""
return self._model

@property
def provider(self) -> str:
"""Get the provider name/identifier for this TTS instance.

Returns:
"Hathora"
"""
return "Hathora"

async def _recognize_impl(
self,
buffer: utils.AudioBuffer,
*,
language: NotGivenOr[str] = NOT_GIVEN,
conn_options: APIConnectOptions,
) -> stt.SpeechEvent:
url = f"{self._base_url}"

payload: dict[str, Any] = {
"model": self._model,
}

if self._language is not None:
payload["language"] = self._language
elif language is not NOT_GIVEN:
payload["language"] = language

if self._model_config is not None:
payload["model_config"] = [
{"name": option.name, "value": option.value} for option in self._model_config
]

bytes = rtc.combine_audio_frames(buffer).to_wav_bytes()
base64_audio = base64.b64encode(bytes).decode("utf-8")
payload["audio"] = base64_audio

async with aiohttp.ClientSession() as session:
async with session.post(
url,
headers={
API_AUTH_HEADER: f"Bearer {self._api_key}",
"User-Agent": USER_AGENT,
},
json=payload,
) as resp:
response = await resp.json()

if response and "text" in response:
text = response["text"].strip()
returned_language = response.get("language", None)
if text:
return stt.SpeechEvent(
type=stt.SpeechEventType.FINAL_TRANSCRIPT,
alternatives=[
stt.SpeechData(
language=returned_language or language or "en",
text=text,
)
],
)

raise APIStatusError("No text found in the response", status_code=400)
Loading