38 lines
902 B
Python
38 lines
902 B
Python
import logging
|
|
|
|
from faster_whisper import WhisperModel
|
|
|
|
|
|
def load_model():
|
|
log_level = logging.getLogger().level
|
|
global model
|
|
model = WhisperModel('small', device="cpu", cpu_threads=8, compute_type="int8")
|
|
logging.getLogger().setLevel(log_level)
|
|
|
|
|
|
def get_model():
|
|
return model
|
|
|
|
|
|
def faster_whisper_stt(audio_file):
|
|
"""
|
|
Transcribe audio file using faster_whisper, no additional server/service needed, runs on CPU.
|
|
|
|
:param audio_file:
|
|
:param model:
|
|
:return: text
|
|
"""
|
|
if model is None:
|
|
logging.error("Model is not loaded")
|
|
load_model()
|
|
|
|
segments, info = model.transcribe(audio_file, beam_size=5)
|
|
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
|
|
|
# combines all segments in one string
|
|
text = ''
|
|
for segment in segments:
|
|
text += segment.text + ' '
|
|
|
|
return text
|