replaced whispercpp by faster-whisper
This commit is contained in:
parent
78d706cfcc
commit
74afc2f9f0
@ -9,7 +9,7 @@ from flask import Flask, request
|
||||
from flask_socketio import SocketIO, emit, join_room, leave_room, \
|
||||
rooms
|
||||
|
||||
from jarvis.utils import chat_utils, whisper_utils, chatgpt_utils
|
||||
from jarvis.utils import chat_utils, whisper_utils, chatgpt_utils, faster_whisper_utils
|
||||
|
||||
# Set this variable to "threading", "eventlet" or "gevent" to test the
|
||||
# different async modes, or leave it set to None for the application to choose
|
||||
@ -86,7 +86,9 @@ def get_text_from_audio():
|
||||
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
|
||||
audio_temp_file.write(request.data)
|
||||
|
||||
text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
|
||||
# text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
|
||||
text = faster_whisper_utils.faster_whisper_stt(audio_temp_file.name)
|
||||
|
||||
logging.info("STT result for " + request.remote_addr + " : " + text)
|
||||
|
||||
return {"data": text}
|
||||
|
37
jarvis/utils/faster_whisper_utils.py
Normal file
37
jarvis/utils/faster_whisper_utils.py
Normal file
@ -0,0 +1,37 @@
|
||||
import logging
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
|
||||
def load_model():
|
||||
log_level = logging.getLogger().level
|
||||
global model
|
||||
model = WhisperModel('small', device="cpu", cpu_threads=8, compute_type="int8")
|
||||
logging.getLogger().setLevel(log_level)
|
||||
|
||||
|
||||
def get_model():
|
||||
return model
|
||||
|
||||
|
||||
def faster_whisper_stt(audio_file):
|
||||
"""
|
||||
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
|
||||
|
||||
:param audio_file:
|
||||
:param model:
|
||||
:return: text
|
||||
"""
|
||||
if model is None:
|
||||
logging.error("Model is not loaded")
|
||||
load_model()
|
||||
|
||||
segments, info = model.transcribe(audio_file, beam_size=5)
|
||||
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
||||
|
||||
# combines all segments in one string
|
||||
text = ''
|
||||
for segment in segments:
|
||||
text += segment.text + ' '
|
||||
|
||||
return text
|
11
start.py
11
start.py
@ -3,9 +3,7 @@ import logging
|
||||
import lingua_franca
|
||||
|
||||
import jarvis.api
|
||||
from jarvis.skills.cocktails import CocktailSkill
|
||||
from jarvis.skills.intent_services import intent_manager
|
||||
from jarvis.utils import whisper_utils
|
||||
from jarvis.utils import faster_whisper_utils
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
@ -14,13 +12,14 @@ if __name__ == '__main__':
|
||||
lingua_franca.load_language(lang="fr")
|
||||
|
||||
# Register each skills
|
||||
CocktailSkill().register()
|
||||
# CocktailSkill().register()
|
||||
|
||||
# Load the skills
|
||||
intent_manager.load_all_skills()
|
||||
# intent_manager.load_all_skills()
|
||||
|
||||
# Load the STT (whisper) model
|
||||
whisper_utils.load_model()
|
||||
# whisper_utils.load_model()
|
||||
faster_whisper_utils.load_model()
|
||||
|
||||
# Start the api endpoint
|
||||
jarvis.api.start_api()
|
||||
|
Loading…
Reference in New Issue
Block a user