added whisper cpp

2023-03-14 21:42:14 +01:00 · 2023-03-14 21:42:14 +01:00 · 83e2a3cdf2
commit 83e2a3cdf2
parent 20219c8fad
3 changed files with 20 additions and 6 deletions
--- a/5
+++ b/5
@ -2,9 +2,8 @@ FROM python:3.9-alpine

 ENV ENV_STATUS=${NODE_ENV:-production}

-# needed in older versions of jarvis-server
-# RUN apk update && apk upgrade
-# RUN apk add --no-cache --upgrade grep
+RUN apk update && apk upgrade
+RUN apk add --no-cache --upgrade ffmpeg

 WORKDIR /jarvis

--- a/jarvis/api.py
+++ b/jarvis/api.py
@ -3,9 +3,10 @@ import tempfile
 from threading import Lock

 import requests
-from flask import Flask, request
+from flask import Flask, request, jsonify
 from flask_socketio import SocketIO, emit, join_room, leave_room, \
    rooms
+from pywhispercpp.model import Model

 from jarvis.skills.intent_services import intent_manager

@ -19,6 +20,7 @@ app.config['SECRET_KEY'] = 'secret!'
 socketio = SocketIO(app, async_mode=async_mode)
 thread = None
 thread_lock = Lock()
+model = Model('small')


@app.route('/')
@ -74,7 +76,8 @@ def get_text_from_audio():
    audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
    audio_temp_file.write(request.data)

-    text = whisper_stt(audio_temp_file.name)
+    # text = whisper_stt(audio_temp_file.name)
+    text = whisper_cpp_stt(audio_temp_file.name)
    print(text)

    return {"data": text, "uuid": "null"}
@ -115,5 +118,16 @@ def whisper_stt(audio_file):
    return json.loads(response.text)['text']


+def whisper_cpp_stt(audio_file):
+    segments = model.transcribe(audio_file, speed_up=False, translate=False)
+
+    # combines all segments in one string
+    text = ''
+    for segment in segments:
+        text += segment.text + ' '
+
+    return text
+
+
 def start_server():
    socketio.run(app, host='0.0.0.0', port=6000, allow_unsafe_werkzeug=True)
--- a/requirements.txt
+++ b/requirements.txt
@ -3,3 +3,4 @@ Flask~=2.2.2
 adapt-parser==1.0.0
 lingua-franca~=0.4.3
 Flask-SocketIO==5.3.2
+pywhispercpp