added gevent for websockets server and reworked main structure

2023-03-25 12:25:47 +01:00 · 2023-03-25 12:25:47 +01:00 · ee2eca484f
commit ee2eca484f
parent 2b356c5dea
5 changed files with 123 additions and 66 deletions
--- a/jarvis/api.py
+++ b/jarvis/api.py
@ -1,16 +1,15 @@
 import json
+import logging
 import sys
 import tempfile
 from threading import Lock

 import openai
-import requests
 from flask import Flask, request
 from flask_socketio import SocketIO, emit, join_room, leave_room, \
    rooms
-from pywhispercpp.model import Model

-from jarvis.utils.chatgpt_utils import chatgpt_recognise
+from jarvis.utils import chat_utils, whisper_utils

 # Set this variable to "threading", "eventlet" or "gevent" to test the
 # different async modes, or leave it set to None for the application to choose
@ -22,7 +21,7 @@ app.config['SECRET_KEY'] = 'secret!'
 socketio = SocketIO(app, async_mode=async_mode)
 thread = None
 thread_lock = Lock()
-model = Model('base', n_threads=16, suppress_non_speech_tokens=True)
+
 openai.api_key = sys.argv[1]


@ -34,25 +33,34 @@ def index():
@socketio.event
 def process_message(message):
    message = json.loads(message)
-    print("New PROCESS request from room " + message['uuid'])
+    logging.info("New PROCESS request from room " + message['uuid'])
+    logging.info("Message : " + message['data'])

-    print("Message : " + message['data'])
-    # TODO: maybe implement grammar check ?
+    # TODO: maybe implement grammar check and correction ?

    # intent_manager.recognise(message['data'], message['uuid'])
-    send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])

-    response = chatgpt_recognise(message['data'])
-    if 'comment' in response:
-        send_user_message_to_room(response['comment'], message['uuid'])
-    else:
-        send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
+    if message['data'] != "":
+        # response = chatgpt_recognise(message['data'])
+        response = {'action': 'answer',
+                    'answer': "Hello! As an AI, I don't have emotions, but I'm always here to help you with your smart home needs. How can I assist you today?"}
+
+        if response['action'] == 'clarify':
+            chat_utils.send_jarvis_message_to_room(response['question'], message['uuid'])
+        elif response['action'] == 'command':
+            chat_utils.send_jarvis_message_to_room(response['comment'], message['uuid'])
+        elif response['action'] == 'query':
+            chat_utils.send_jarvis_message_to_room(response['device_description'], message['uuid'])
+        elif response['action'] == 'answer':
+            chat_utils.send_jarvis_message_to_room(response['answer'], message['uuid'])
+        else:
+            chat_utils.send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])


@socketio.event
 def join(message):
    message = json.loads(message)
-    print("New client joined room " + message['uuid'])
+    logging.info("New client joined room " + message['uuid'])
    join_room(message['uuid'])


@ -68,27 +76,18 @@ def connect():
    emit('my_response', {'data': 'Connected', 'count': 0})


-def send_user_message_to_room(text, room_id):
-    socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
-
-
-def send_jarvis_message_to_room(text, room_id):
-    socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
-
-
 # .WAV (i.e.) FILE REQUEST
@app.route("/get_text_from_audio", methods=['POST'])
 def get_text_from_audio():
-    print("[" + request.remote_addr + "] - New STT request")
+    logging.info("New STT request from " + request.remote_addr)

    audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
    audio_temp_file.write(request.data)

-    # text = whisper_stt(audio_temp_file.name)
-    text = whisper_cpp_stt(audio_temp_file.name)
-    print(text)
+    text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
+    logging.info("STT result for " + request.remote_addr + " : " + text)

-    return {"data": text, "uuid": "null"}
+    return {"data": text}


 """
@ -103,39 +102,6 @@ def process_text():
    return {"transcription": text, "answer": answer}"""


-# send request to whisper-asr server (docker)
-def whisper_stt(audio_file):
-    headers = {
-        'accept': 'application/json',
-        # 'Content-Type': 'multipart/form-data',
-    }
-
-    params = {
-        'task': 'transcribe',
-        # TODO: add to config
-        'language': 'fr',
-        'output': 'json',
-    }
-
-    files = {
-        'audio_file': open(audio_file, 'rb'),
-    }
-
-    # TODO: add to config
-    response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
-    return json.loads(response.text)['text']
-
-
-def whisper_cpp_stt(audio_file):
-    segments = model.transcribe(audio_file, speed_up=False, translate=False)
-
-    # combines all segments in one string
-    text = ''
-    for segment in segments:
-        text += segment.text + ' '
-
-    return text
-
-
-def start_server():
-    socketio.run(app, host='0.0.0.0', port=6000, allow_unsafe_werkzeug=True)
+def start_api():
+    logging.info("Starting Jarvis Server API...")
+    socketio.run(app, host='0.0.0.0', port=6000)
--- a/jarvis/start.py
+++ b/jarvis/start.py
@ -1,10 +1,14 @@
-import api
+import logging
+
 import lingua_franca

+import api
 from jarvis.skills.cocktails import CocktailSkill
 from jarvis.skills.intent_services import intent_manager
+from jarvis.utils import whisper_utils

 if __name__ == '__main__':
+    logging.getLogger().setLevel(logging.DEBUG)

    # Load lingua franca in the memory
    lingua_franca.load_language(lang="fr")
@ -15,5 +19,8 @@ if __name__ == '__main__':
    # Load the skills
    intent_manager.load_all_skills()

+    # Load the STT (whisper) model
+    whisper_utils.load_model()
+
    # Start the api endpoint
-    api.start_server()
+    api.start_api()
--- a/jarvis/utils/chat_utils.py
+++ b/jarvis/utils/chat_utils.py
@ -0,0 +1,13 @@
+import logging
+
+from jarvis.api import socketio
+
+
+def send_user_message_to_room(text, room_id):
+    logging.debug("Sending message from user to room " + room_id + " : " + text)
+    socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
+
+
+def send_jarvis_message_to_room(text, room_id):
+    logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
+    socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
--- a/jarvis/utils/whisper_utils.py
+++ b/jarvis/utils/whisper_utils.py
@ -0,0 +1,69 @@
+import json
+import logging
+
+import requests
+from pywhispercpp.model import Model
+
+from jarvis.utils import languages_utils
+
+
+def load_model():
+    log_level = logging.getLogger().level
+    global model
+    model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR)
+    logging.getLogger().setLevel(log_level)
+
+
+def get_model():
+    return model
+
+
+def whisper_cpp_stt(audio_file):
+    """
+    Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
+
+    :param audio_file:
+    :param model:
+    :return: text
+    """
+    if model is None:
+        logging.error("Model is not loaded")
+        load_model()
+
+    segments = model.transcribe(audio_file, speed_up=False, translate=False)
+
+    # combines all segments in one string
+    text = ''
+    for segment in segments:
+        text += segment.text + ' '
+
+    return text
+
+
+def whisper_asr_stt(audio_file):
+    """
+    Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU.
+    See : https://github.com/ahmetoner/whisper-asr-webservice
+
+    :param audio_file:
+    :return: text
+    """
+    headers = {
+        'accept': 'application/json',
+        # 'Content-Type': 'multipart/form-data',
+    }
+
+    params = {
+        'task': 'transcribe',
+        # TODO: add to config
+        'language': languages_utils.get_language(),
+        'output': 'json',
+    }
+
+    files = {
+        'audio_file': open(audio_file, 'rb'),
+    }
+
+    # TODO: add to config
+    response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
+    return json.loads(response.text)['text']
--- a/requirements.txt
+++ b/requirements.txt
@ -5,4 +5,6 @@ lingua-franca
 Flask-SocketIO
 pywhispercpp
 padatious
-openai
+openai
+gevent
+gevent-websocket