Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/node-server-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:

- name: Run tests
working-directory: "node-server"
run: npm run ci-test
run: npm run test:ci

- name: Run linter
working-directory: "node-server"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/whisper-service-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ jobs:
working-directory: "whisper-service"
run: pytest --cov=.

- name: Run tests
- name: Run linter
working-directory: "whisper-service"
run: pylint --disable=import-error $(git ls-files '*.py')
run: pylint $(git ls-files '*.py')

build-cpu-container-whisper-service:
needs: test-lint-whisper-service
Expand Down
8 changes: 0 additions & 8 deletions node-server/.editorconfig

This file was deleted.

7 changes: 3 additions & 4 deletions node-server/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 7 additions & 9 deletions node-server/package.json
Original file line number Diff line number Diff line change
@@ -1,25 +1,23 @@
{
"name": "scribear-backend",
"name": "scribear-node-server",
"version": "0.0.0",
"main": "build/index.js",
"main": "build/src/index.js",
"type": "module",
"scripts": {
"lint": "eslint",
"lint:fix": "eslint --fix",
"clean": "gts clean",
"fix": "eslint --fix",
"dev": "tsc-watch --compiler ts-patch/compiler/tsc.js --onSuccess \"node build/src/index.js\" | pino-pretty",
"pretest": "npm run lint",
"test": "vitest --ui --coverage",
"ci-test": "vitest run",
"test:dev": "vitest --ui --coverage",
"test:ci": "vitest run",
"build": "tspc",
"prestart": "npm run build",
"start": "node ./build/src/index.js"
},
"author": "bwu1324",
"license": "MIT",
"author": "scribear",
"description": "",
"engines": {
"node": "^20.0.0"
"node": ">=20.0.0"
},
"devDependencies": {
"@eslint/compat": "1.2.6",
Expand Down
2 changes: 1 addition & 1 deletion node-server/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import loadConfig from './shared/config/load_config.js';
import createServer from './server/start_server.js';
import createServer from './server/create_server.js';
import createLogger from './shared/logger/logger.js';

async function init() {
Expand Down
2 changes: 2 additions & 0 deletions whisper-service/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
device_config.json

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
2 changes: 2 additions & 0 deletions whisper-service/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
device_config.json

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
26 changes: 14 additions & 12 deletions whisper-service/create_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,24 @@
from typing import Annotated, Callable
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Query
from model_bases.transcription_model_base import TranscriptionModelBase
from model_factory import model_factory
from load_config import AppConfig, load_config
from load_config import AppConfig
from init_device_config import DeviceConfig


def create_server(
config: AppConfig,
model_factory_func: Callable[[str, WebSocket], TranscriptionModelBase]
device_config: DeviceConfig,
model_factory_func: Callable[[DeviceConfig,
str, WebSocket], TranscriptionModelBase]
) -> FastAPI:
'''
Instanciates FastAPI webserver.

Parameters:
config (Config) : Application configuration object
model_factory_func (function): Function that takes in a modelKey and a WebSocket and
returns the corresponding model implementation
config (AppConfig) : Application configuration object
device_config (DeviceConfig): Application device configuration object
model_factory_func (function) : Function that takes in a modelKey and a WebSocket and
returns the corresponding model implementation

Returns:
FastAPI webserver
Expand Down Expand Up @@ -56,7 +59,11 @@ async def whisper(
return

# Intanciate and setup requested model
transcription_model = model_factory_func(model_key, websocket)
transcription_model = model_factory_func(
device_config,
model_key,
websocket
)
transcription_model.load_model()

# Send any audio chunks to transcription model
Expand All @@ -69,8 +76,3 @@ async def whisper(
return

return fastapi_app


if __name__ == 'create_server':
app_config = load_config()
app = create_server(app_config, model_factory)
21 changes: 16 additions & 5 deletions whisper-service/create_server_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from fastapi.testclient import TestClient
from load_config import AppConfig
from model_bases.transcription_model_base import TranscriptionModelBase

from create_server import create_server


Expand Down Expand Up @@ -38,7 +37,11 @@ class Fake(TranscriptionModelBase):
'''

def __init__(self):
super().__init__(None)
super().__init__(None, {})

@staticmethod
def validate_config(config):
return config

def load_model(self):
return None
Expand All @@ -51,19 +54,27 @@ async def queue_audio_chunk(self, audio_chunk):

return mock.Mock(wraps=Fake())


@pytest.fixture(scope='function')
def test_client(fake_config, fake_transcription_model):
'''
Create a FastAPI test client for each test
'''
def fake_factory(model_key: str, ws: WebSocket):
if isinstance(ws, WebSocket) and model_key == 'test-model':
fake_device_config = {
'test-model': {}
}

def fake_factory(device_config, model_key: str, ws: WebSocket):
if (isinstance(ws, WebSocket) and
model_key == 'test-model' and
fake_device_config != device_config
):
return fake_transcription_model

raise NotImplementedError(
'Invalid model key or invalid websocket argument.'
)
app = create_server(fake_config, fake_factory)
app = create_server(fake_config, {}, fake_factory)
return TestClient(app)


Expand Down
22 changes: 22 additions & 0 deletions whisper-service/device_config.template.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"mock_transcription_duration": {
"display_name": "Sanity Test",
"description": "Returns how many seconds of audio was received by whisper service.",
"implementation_id": "mock_transcription_duration",
"implementation_configuration": {},
"available_features": {}
},
"faster-whisper:cpu-tiny-en": {
"display_name": "Tiny Faster Whisper",
"description": "Faster Whisper implementation of Open AI Whisper tiny.en model.",
"implementation_id": "faster_whisper",
"implementation_configuration": {
"model": "tiny.en",
"device": "cpu",
"local_agree_dim": 2,
"min_new_samples": 48000,
"max_segment_samples": 480000
},
"available_features": {}
}
}
18 changes: 14 additions & 4 deletions whisper-service/index.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,28 @@
'''
Entry point for whisper-service application.
'''
import sys
import uvicorn
from load_config import load_config
from create_server import create_server
from model_factory import model_factory
from init_device_config import init_device_config

config = load_config()
device_config = init_device_config('device_config.json')
APP = create_server(config, device_config, model_factory)

if __name__ == '__main__':
config = load_config()
app = create_server(config, model_factory)
dev_mode = len(sys.argv) > 1 and sys.argv[1] == '--dev'

if dev_mode:
APP = 'index:app'

uvicorn.run(
app,
APP,
log_level=config.LOG_LEVEL,
port=config.PORT,
host=config.HOST
host=config.HOST,
use_colors=dev_mode,
reload=dev_mode
)
117 changes: 117 additions & 0 deletions whisper-service/init_device_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
'''
Function to load then initialize whisper service according to device config

Functions:
init_device_config

Types:
AvailableFeaturesConfig
ModelConfig
DeviceConfig
'''
import json
import logging
from typing import Any, TypedDict
from model_implementations.import_model_implementation import \
ModelImplementationId, import_model_implementation
from utils.config_dict_contains import \
config_dict_contains_dict, config_dict_contains_one_of, config_dict_contains_str


class AvailableFeaturesConfig(TypedDict):
'''
Type hint for available features configuration dict
'''


class ModelConfig(TypedDict):
'''
Type hint for model configuration dict
'''
display_name: str
description: str
implementation_id: ModelImplementationId
implementation_configuration: dict
available_features: AvailableFeaturesConfig


# Type hint for loaded device configuration dict
type DeviceConfig = dict[str, ModelConfig]


def init_model(device_config: dict[str, Any], key: str) -> ModelConfig:
'''
Validates and initalizes given model_key in device_config.
Checks if all required property for ModelConfig are present. Throws error if not.
Implementation configuration is checked automatically when implementation is initialized.
Models are initialized by calling load_model() then unload_mode().

Parameters:
device_config (dict): Loaded device_config dict
key (str) : model_key to initialize

Return:
Validated ModelConfig dict
'''
logger = logging.getLogger('uvicorn.error')

# Grab config specific to model
config_dict_contains_dict(device_config, key)
model_config = device_config[key]

# Check required properties
config_dict_contains_str(model_config, 'display_name', min_length=1)
config_dict_contains_str(model_config, 'description', min_length=1)
config_dict_contains_one_of(
model_config, 'implementation_id', list(ModelImplementationId))
config_dict_contains_dict(model_config, 'implementation_configuration')
config_dict_contains_dict(model_config, 'available_features')

# Initialize the configured model
implementation_id: ModelImplementationId = model_config['implementation_id']
implementation_config = model_config['implementation_configuration']
logger.info(
'Initializing implementation: %s for model_key: %s', implementation_id, key
)

implementation = import_model_implementation(implementation_id)
model = implementation({}, implementation_config)

model.load_model()
model.unload_model()
logger.info(
'Successfully initialized implementation: %s for model_key: %s', implementation_id, key
)

return {
'display_name': model_config['display_name'],
'description': model_config['description'],
'implementation_id': implementation_id,
'implementation_configuration': implementation_config,
'available_features': model_config['available_features']
}


def init_device_config(device_config_path: str) -> DeviceConfig:
'''
Loads device config file from provided path then initializes configured models.


Parameters:
device_config_path (str): Path to device config file
'''
logger = logging.getLogger('uvicorn.error')

logger.info('Loading device config from: %s', device_config_path)
with open(device_config_path, 'r', encoding='utf-8') as file:
loaded_config = json.load(file)

if not isinstance(loaded_config, dict):
raise ValueError('Device config must an object')

device_config: DeviceConfig = {}
for key in loaded_config.keys():
model_config = init_model(loaded_config, key)
device_config[key] = model_config

return device_config
Loading
Loading