diff --git a/Dockerfile b/Dockerfile index 72d2996..5c83acd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,8 @@ FROM python:3.9-alpine ENV ENV_STATUS=${NODE_ENV:-production} -# needed in older versions of jarvis-server -# RUN apk update && apk upgrade -# RUN apk add --no-cache --upgrade grep +RUN apk update && apk upgrade +RUN apk add --no-cache --upgrade ffmpeg WORKDIR /jarvis diff --git a/jarvis/api.py b/jarvis/api.py index b683e72..5336f92 100644 --- a/jarvis/api.py +++ b/jarvis/api.py @@ -3,9 +3,10 @@ import tempfile from threading import Lock import requests -from flask import Flask, request +from flask import Flask, request, jsonify from flask_socketio import SocketIO, emit, join_room, leave_room, \ rooms +from pywhispercpp.model import Model from jarvis.skills.intent_services import intent_manager @@ -19,6 +20,7 @@ app.config['SECRET_KEY'] = 'secret!' socketio = SocketIO(app, async_mode=async_mode) thread = None thread_lock = Lock() +model = Model('small') @app.route('/') @@ -74,7 +76,8 @@ def get_text_from_audio(): audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client') audio_temp_file.write(request.data) - text = whisper_stt(audio_temp_file.name) + # text = whisper_stt(audio_temp_file.name) + text = whisper_cpp_stt(audio_temp_file.name) print(text) return {"data": text, "uuid": "null"} @@ -115,5 +118,16 @@ def whisper_stt(audio_file): return json.loads(response.text)['text'] +def whisper_cpp_stt(audio_file): + segments = model.transcribe(audio_file, speed_up=False, translate=False) + + # combines all segments in one string + text = '' + for segment in segments: + text += segment.text + ' ' + + return text + + def start_server(): socketio.run(app, host='0.0.0.0', port=6000, allow_unsafe_werkzeug=True) diff --git a/requirements.txt b/requirements.txt index 32efb67..d9bf864 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ requests~=2.28.1 Flask~=2.2.2 adapt-parser==1.0.0 lingua-franca~=0.4.3 -Flask-SocketIO==5.3.2 \ No newline at end of file +Flask-SocketIO==5.3.2 +pywhispercpp \ No newline at end of file