jarvis-server-v2/jarvis/utils/faster_whisper_utils.py

38 lines
899 B
Python

import logging
from faster_whisper import WhisperModel
def load_model():
log_level = logging.getLogger().level
global model
model = WhisperModel('small', device="cpu", cpu_threads=8, compute_type="int8")
logging.getLogger().setLevel(log_level)
def get_model():
return model
def faster_whisper_stt(audio_file):
"""
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
:param audio_file:
:param model:
:return: text
"""
if model is None:
logging.error("Model is not loaded")
load_model()
segments, info = model.transcribe(audio_file, beam_size=5)
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
# combines all segments in one string
text = ''
for segment in segments:
text += segment.text + ' '
return text