Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,20 @@ CREATE TABLE IF NOT EXISTS users (

-- History of transcription requests made by users
CREATE TABLE IF NOT EXISTS transcription_history (
id INTEGER PRIMARY KEY AUTO_INCREMENT,
telegram_id BIGINT NOT NULL REFERENCES users(telegram_id),
status VARCHAR(32) NOT NULL,
audio_s3_path TEXT NOT NULL,
duration_seconds INTEGER,
price_rub DECIMAL(10,2),
result_s3_path TEXT,
result_json TEXT,
operation_id VARCHAR(128),
message_id INTEGER,
chat_id BIGINT,
started_at TIMESTAMP,
finished_at TIMESTAMP
id INTEGER PRIMARY KEY AUTO_INCREMENT,
telegram_id BIGINT NOT NULL REFERENCES users(telegram_id),
status VARCHAR(32) NOT NULL,
audio_s3_path TEXT NOT NULL,
duration_seconds INTEGER,
price_rub DECIMAL(10,2),
result_s3_path TEXT,
result_json TEXT,
llm_tokens_by_model JSON,
operation_id VARCHAR(128),
message_id INTEGER,
chat_id BIGINT,
started_at TIMESTAMP,
finished_at TIMESTAMP
);

-- Index to speed up lookups by user
Expand Down Expand Up @@ -263,9 +264,9 @@ In that case install libmysqlclient-dev: `sudo apt install libmysqlclient-dev` o

## References

- [Yandex Cloud SpeechKit docs][2]
- [Telegram Bot API][3]
- [Yandex Cloud SpeechKit docs][2]
- [Telegram Bot API][3]

[1]: https://t.me/ClearTranscriptBot
[2]: https://cloud.yandex.com/docs/speechkit/
[3]: https://core.telegram.org/bots/api
[3]: https://core.telegram.org/bots/api
4 changes: 4 additions & 0 deletions database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
DateTime,
ForeignKey,
Integer,
JSON,
Numeric,
String,
Text,
Expand Down Expand Up @@ -59,6 +60,9 @@ class TranscriptionHistory(Base):
# Raw recognition result returned by SpeechKit
result_json = Column(Text, nullable=True)

# Token counts for transcribed text by model
llm_tokens_by_model = Column(JSON, nullable=True)

# Duration of the audio in seconds
duration_seconds = Column(Integer, nullable=True)

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ boto3
pytz
requests
httpx
tiktoken

# Monitoring / error reporting
sentry_sdk
21 changes: 17 additions & 4 deletions schedulers/transcription.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from utils.speechkit import fetch_transcription_result, parse_text, format_duration
from utils.tg import safe_edit_message_text
from utils.s3 import upload_file
from utils.tokens import tokens_by_model


EDIT_INTERVAL_SEC = 5 # не редактировать чаще, чем раз в 5 сек
Expand Down Expand Up @@ -85,8 +86,10 @@ async def check_running_tasks(context: ContextTypes.DEFAULT_TYPE) -> None:
)
continue

text = parse_text(result)
if not text.strip():
text = parse_text(result).strip()
token_counts = tokens_by_model(text)

if not text:
text = "(речь в записи отсутствует или слишком неразборчива для распознавания)"

source_stem = Path(task.audio_s3_path).stem
Expand Down Expand Up @@ -117,13 +120,23 @@ async def check_running_tasks(context: ContextTypes.DEFAULT_TYPE) -> None:

try:
await context.bot.send_document(chat_id=task.telegram_id, document=path.open("rb"))
update_transcription(task.id, status="completed", result_s3_path=s3_uri)
update_transcription(
task.id,
status="completed",
result_s3_path=s3_uri,
llm_tokens_by_model=token_counts,
)
except Exception as e:
logging.error(f"Failed to send result for task {task.id}: {e}")
if os.getenv("ENABLE_SENTRY") == "1":
sentry_sdk.capture_exception(e)

update_transcription(task.id, status="failed", result_s3_path=s3_uri)
update_transcription(
task.id,
status="failed",
result_s3_path=s3_uri,
llm_tokens_by_model=token_counts,
)

await safe_edit_message_text(
context.bot,
Expand Down
33 changes: 33 additions & 0 deletions utils/tokens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Utilities for counting LLM tokens."""
import tiktoken

from typing import Optional


LLM_TOKEN_MODELS = [
"gpt-5.2",
"gpt-5.1",
"gpt-5-mini",
"gpt-5-nano",
]

DEFAULT_ENCODING = "o200k_base"


def count_tokens(text: str, model: str = LLM_TOKEN_MODELS[0]) -> Optional[int]:
"""Count tokens in *text* using tiktoken encoding for *model*."""
try:
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
except KeyError:
return None


def tokens_by_model(text: str) -> dict[str, Optional[int]]:
"""Return token counts for *text* across supported models."""
if not text.strip():
return {model: 0 for model in LLM_TOKEN_MODELS}
return {
model: count_tokens(text, model=model)
for model in LLM_TOKEN_MODELS
}