api2app-workers/whisper-cpp.py at main · andchir/api2app-workers · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import json
import os
import sys
import subprocess
import ffmpeg

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.queue_manager import polling_queue, upload_queue_files, send_queue_error, send_queue_result_dict
from utils.upload_file import delete_old_files

ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
WHISPER_CLI_PATH = '/home/andrew/PycharmProjects/whisper.cpp/build/bin/whisper-cli'
WHISPER_MODEL_PATH = '/home/andrew/PycharmProjects/whisper.cpp/models/ggml-medium.bin'
MAX_DURATION = 40 * 60 # 40 minutes maximum


def processing(queue_item):
    print()
    print('New task:', queue_item['uuid'])
    upload_dir_path = os.path.join(ROOT_DIR, 'uploads', 'whisper')

    # Download all files to a local folder
    image_file_path, audio_file_path, video_file_path, image_file_path2 = upload_queue_files(queue_item, upload_dir_path)

    if not audio_file_path:
        audio_file_path = video_file_path

    if not audio_file_path or not os.path.isfile(audio_file_path):
        print('Send error message - File not found.')
        send_queue_error(queue_item['uuid'], 'File not found.')
        return None

    # Options
    options = queue_item['data'] if 'data' in queue_item else dict()
    language = options['language'] if 'language' in options and type(options['language']) is str else 'en'
    output_type = options['output_type'] \
        if 'output_type' in options \
        and options['output_type'] in ['txt', 'csv', 'srt', 'json', 'json-full'] else 'txt'

    # Converting audio file to WAV format
    audio_wav_path = os.path.join(upload_dir_path, queue_item['uuid'] + '.wav')
    (
        ffmpeg
        .input(audio_file_path)
        .output(audio_wav_path, ar=16000, ac=1, acodec='pcm_s16le', t=MAX_DURATION)
        .run(capture_stdout=True, capture_stderr=True)
    )

    output_file_path = os.path.join(upload_dir_path, queue_item['uuid'])
    print()
    print('Processing...')
    # Creating a transcription using whisper-cli
    command = [
        WHISPER_CLI_PATH,
        '--model', WHISPER_MODEL_PATH,
        '--threads', '4',
        '--processors', '1',
        f'--output-{output_type}',
        '--language', language,
        '--duration', '0',
        '--file', audio_wav_path,
        '--output-file', output_file_path
    ]
    result = subprocess.run(command, capture_output=True, text=True)

    if not 'saving output to' in str(result):
        print(f'Processing error.')
        print()
        send_queue_error(queue_item['uuid'], 'Processing error. Please try again later.')
        return

    if output_type in ['json', 'json-full']:
        output_file_path += '.json'
    else:
        output_file_path += '.' + output_type
    if not os.path.isfile(output_file_path):
        print(f'Output file not found. Send error message - Processing error.')
        print()
        send_queue_error(queue_item['uuid'], 'Processing error. Please try again later.')
        return

    with open(output_file_path, 'r') as file:
        result_str = str(file.read()).strip()

    print('Sending the result...')
    if output_type in ['json', 'json-full']:
        result_obj = json.loads(result_str)
        result_obj.pop('systeminfo', None)
        result_obj.pop('model', None)
        result_obj.pop('params', None)
        result_str = json.dumps(result_obj)

    res = send_queue_result_dict(queue_item['uuid'], {'result': result_str})
    print()
    print('Completed.')

    # Delete old files
    deleted_input = delete_old_files(upload_dir_path, max_hours=2)
    print('Deleted old files: ', deleted_input)
    print()


if __name__ == '__main__':
    # Waiting for new tasks (polling)
    polling_queue('3ab88d39-e0ad-45f3-8c71-7558ded5c101', processing)