import json import logging import requests from pywhispercpp.model import Model from jarvis.utils import languages_utils def load_model(): log_level = logging.getLogger().level global model model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR) logging.getLogger().setLevel(log_level) def get_model(): return model def whisper_cpp_stt(audio_file): """ Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU. :param audio_file: :param model: :return: text """ if model is None: logging.error("Model is not loaded") load_model() segments = model.transcribe(audio_file, speed_up=False, translate=False) # combines all segments in one string text = '' for segment in segments: text += segment.text + ' ' return text def whisper_asr_stt(audio_file): """ Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU. See : https://github.com/ahmetoner/whisper-asr-webservice :param audio_file: :return: text """ headers = { 'accept': 'application/json', # 'Content-Type': 'multipart/form-data', } params = { 'task': 'transcribe', # TODO: add to config 'language': languages_utils.get_language(), 'output': 'json', } files = { 'audio_file': open(audio_file, 'rb'), } # TODO: add to config response = requests.post('https://whisper.yourdomain.xyz/asr', params=params, headers=headers, files=files) return json.loads(response.text)['text']