2023-03-25 12:25:47 +01:00
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
|
|
|
|
import requests
|
|
|
|
from pywhispercpp.model import Model
|
|
|
|
|
|
|
|
from jarvis.utils import languages_utils
|
|
|
|
|
|
|
|
|
|
|
|
def load_model():
|
|
|
|
log_level = logging.getLogger().level
|
|
|
|
global model
|
|
|
|
model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR)
|
|
|
|
logging.getLogger().setLevel(log_level)
|
|
|
|
|
|
|
|
|
|
|
|
def get_model():
|
|
|
|
return model
|
|
|
|
|
|
|
|
|
|
|
|
def whisper_cpp_stt(audio_file):
|
|
|
|
"""
|
|
|
|
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
|
|
|
|
|
|
|
|
:param audio_file:
|
|
|
|
:param model:
|
|
|
|
:return: text
|
|
|
|
"""
|
|
|
|
if model is None:
|
|
|
|
logging.error("Model is not loaded")
|
|
|
|
load_model()
|
|
|
|
|
|
|
|
segments = model.transcribe(audio_file, speed_up=False, translate=False)
|
|
|
|
|
|
|
|
# combines all segments in one string
|
|
|
|
text = ''
|
|
|
|
for segment in segments:
|
|
|
|
text += segment.text + ' '
|
|
|
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
def whisper_asr_stt(audio_file):
|
|
|
|
"""
|
|
|
|
Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU.
|
|
|
|
See : https://github.com/ahmetoner/whisper-asr-webservice
|
|
|
|
|
|
|
|
:param audio_file:
|
|
|
|
:return: text
|
|
|
|
"""
|
|
|
|
headers = {
|
|
|
|
'accept': 'application/json',
|
|
|
|
# 'Content-Type': 'multipart/form-data',
|
|
|
|
}
|
|
|
|
|
|
|
|
params = {
|
|
|
|
'task': 'transcribe',
|
|
|
|
# TODO: add to config
|
|
|
|
'language': languages_utils.get_language(),
|
|
|
|
'output': 'json',
|
|
|
|
}
|
|
|
|
|
|
|
|
files = {
|
|
|
|
'audio_file': open(audio_file, 'rb'),
|
|
|
|
}
|
|
|
|
|
|
|
|
# TODO: add to config
|
2023-05-31 18:51:55 +02:00
|
|
|
response = requests.post('https://whisper.yourdomain.xyz/asr', params=params, headers=headers, files=files)
|
2023-03-25 12:25:47 +01:00
|
|
|
return json.loads(response.text)['text']
|