massive rework of the whole api and sockets handling, will need rework and android app but more dynamic now, more usable for others clients and external apps now

2023-11-06 20:38:25 +01:00 · 2023-11-06 20:38:25 +01:00 · 7ce3df75f7
commit 7ce3df75f7
parent cb24495c95
18 changed files with 445 additions and 148 deletions
--- a/basic_test_client.py
+++ b/basic_test_client.py
@ -0,0 +1,44 @@
+# BASIC CLIENT FOR INTERACTING WITH THE SERVER
+# This client is used to test the server and to interact with it
+import json
+
+import socketio
+
+HOST = "localhost"
+PORT = 6000
+waiting = False
+
+if __name__ == '__main__':
+
+    # Create a TCP/IP socket
+    sock = socketio.Client()
+    sock.connect(f"http://{HOST}:{PORT}")
+
+    # Join the room
+    sock.emit('join', json.dumps({'uuid': 'clientpc'}))
+
+    # Listen for messages from the server
+    @sock.on('message_from_assistant')
+    def on_message_from_jarvis(data):
+        print("Assistant says: " + data['data'])
+        global waiting
+        waiting = False
+
+
+    # Chat with the server
+    while True:
+
+        while not waiting:
+            message = input("Enter a message to send to the server: ")
+
+            # Exit when CTRL+C is pressed
+            if message == "exit":
+                print("Exiting")
+
+                # Leave the room
+                sock.emit('leave', json.dumps({'uuid': 'clientpc'}))
+                exit(0)
+
+            waiting = True
+
+            sock.emit('process_message', json.dumps({'data': message, 'uuid': 'clientpc'}))
--- a/run.py
+++ b/run.py
@ -1,8 +1,9 @@
 import logging

-from src import api
+from src.audio import audio_utils
 from src.database import db_utils
-from src.utils import faster_whisper_utils
+from src.declarations import TTSEngine, STTEngine
+from src.network import api

 # import lingua_franca

@ -22,9 +23,8 @@ if __name__ == '__main__':
    # Load the skills
    # intent_manager.load_all_skills()

-    # Load the STT (whisper) model
-    # whisper_utils.load_model()
-    faster_whisper_utils.load_model()
+    # Load the audio model(s)
+    audio_utils.load_models(stt_engine=STTEngine.FASTER_WHISPER, tts_engine=TTSEngine.PIPER)

-    # Start the api endpoint
+    # Start the api server
    api.start_api(6000)
--- a/src/api.py
+++ b/src/api.py
@ -1,121 +0,0 @@
-import json
-import logging
-
-import openai
-import sys
-import tempfile
-from threading import Lock
-from flask import Flask, request
-from flask_socketio import SocketIO, emit, join_room, leave_room, \
-    rooms
-
-from src.utils import chat_utils, chatgpt_utils, faster_whisper_utils
-
-# Set this variable to "threading", "eventlet" or "gevent" to test the
-# different async modes, or leave it set to None for the application to choose
-# the best option based on installed packages.
-async_mode = None
-
-app = Flask(__name__)
-app.config['SECRET_KEY'] = 'secret!'
-socketio = SocketIO(app, async_mode=async_mode)
-thread = None
-thread_lock = Lock()
-
-openai.api_key = sys.argv[1]
-
-
-@app.route('/')
-def index():
-    return "Welcome to Jarvis Server API !"
-
-
-@socketio.event
-def process_message(message):
-    message = json.loads(message)
-    logging.info("New PROCESS request from room " + message['uuid'])
-    logging.info("Message : " + message['data'])
-
-    if message['uuid'] not in rooms():
-        logging.warning("Room not found, creating it")
-        join_room(message['uuid'])
-
-    # TODO: maybe implement grammar check and correction ?
-
-    # intent_manager.recognise(message['data'], message['uuid'])
-    if message['data'] != "":
-        response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
-        # text_response = "Tokens are expensive ya know?"
-
-        chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
-
-
-@socketio.event
-def join(message):
-    message = json.loads(message)
-
-    logging.info("New client joined room " + message['uuid'])
-    join_room(message['uuid'])
-
-
-@socketio.event
-def leave(message):
-    leave_room(message['uuid'])
-
-
-@socketio.event
-def connect():
-    global thread
-    emit('my_response', {'data': 'Connected', 'count': 0})
-
-
-@socketio.event
-def clear_chat(uuid):
-    """
-    Clear chat history for a specific room.
-    :param uuid: uuid
-    :return:
-    """
-    # uuid = json.loads(uuid)
-
-    emit('clear_chat', {}, to=uuid)
-    chatgpt_utils.clear_chat(uuid)
-
-
-# .WAV (i.e.) FILE REQUEST
-@app.route("/get_text_from_audio", methods=['POST'])
-def get_text_from_audio():
-    """
-    Transcribe audio file using whisper.
-
-    :return: transcription text
-    """
-
-    logging.info("New STT request from " + request.remote_addr)
-
-    audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
-    audio_temp_file.write(request.data)
-
-    # text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
-    text = faster_whisper_utils.faster_whisper_stt(audio_temp_file.name)
-
-    logging.info("STT result for " + request.remote_addr + " : " + text)
-
-    return {"data": text}
-
-
-"""
-@src.route("/process_text", methods=['POST'])
-def process_text():
-    print("[" + request.remote_addr + "] - New TXT request")
-
-    text = request.values['text']
-
-    answer = intent_manager.recognise(text, request.headers.get('Client-Ip'), request.headers.get('Client-Port'))
-
-    return {"transcription": text, "answer": answer}"""
-
-
-def start_api(port=6000):
-    logging.info("Starting Jarvis Server API on port " + str(port) + "...")
-    socketio.run(app, host='0.0.0.0', port=port)
--- a/src/audio/init.py
+++ b/src/audio/init.py
--- a/src/audio/audio_utils.py
+++ b/src/audio/audio_utils.py
@ -0,0 +1,58 @@
+import tempfile
+
+from src.audio.stt import faster_whisper
+from src.declarations import STTEngine, TTSEngine
+
+
+def load_models(stt_engine: STTEngine, tts_engine: TTSEngine):
+    """
+    Load the STT and TTS models in the memory.
+    :return:
+    """
+
+    if stt_engine is STTEngine.FASTER_WHISPER:
+        faster_whisper.load_model()
+    elif stt_engine is STTEngine.WHISPER:
+        pass
+    else:
+        raise Exception("Unknown STT engine: " + stt_engine.name)
+
+    if tts_engine is TTSEngine.PIPER:
+        pass
+    else:
+        raise Exception("Unknown TTS engine: " + tts_engine.name)
+
+
+def get_text_from_audio(audio_bytes, stt_engine):
+    """
+    Transcribe audio file.
+
+    :param audio_bytes:
+    :param stt_engine:
+    :return:
+    """
+
+    audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
+    audio_temp_file.write(audio_bytes)
+
+    if stt_engine is STTEngine.FASTER_WHISPER:
+        return faster_whisper.speech_to_text(audio_temp_file.name)
+        # text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
+    elif stt_engine is STTEngine.WHISPER:
+        # TODO: implement whisper
+        pass
+    else:
+        raise Exception("Unknown STT engine: " + stt_engine.name)
+
+
+def get_speech_from_text(text, tts_engine):
+    """
+    Speak text using Piper.
+    :return: audio file
+    """
+
+    # TODO: implement TTS
+    if tts_engine is TTSEngine.PIPER:
+        pass
+    else:
+        raise Exception("Unknown TTS engine: " + tts_engine.name)
--- a/src/audio/stt/init.py
+++ b/src/audio/stt/init.py
--- a/src/utils/faster_whisper_utils.py
+++ b/src/utils/faster_whisper_utils.py
@ -2,8 +2,19 @@ import logging

 from faster_whisper import WhisperModel

+model = None
+

 def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="int8"):
+    """
+    Load the whisper model in the memory.
+
+    :param model_size: small, medium or large
+    :param device: cpu or cuda
+    :param cpu_threads: number of cpu threads
+    :param compute_type: use int8 (haven't tested others)
+    :return: None
+    """
    log_level = logging.getLogger().level
    global model
    model = WhisperModel(model_size_or_path=model_size, device=device, cpu_threads=cpu_threads,
@ -12,22 +23,26 @@ def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="in


 def get_model():
+    """
+    Get the whisper model.
+    :return: the whisper model
+    :rtype: WhisperModel
+    """
    return model


-def faster_whisper_stt(audio_file):
+def speech_to_text(audio_file):
    """
    Transcribe audio file using faster_whisper, no additional server/service needed, runs on CPU.

-    :param audio_file:
-    :param model:
-    :return: text
+    :param audio_file: path to audio file
+    :return: transcription text
    """
-    if model is None:
+    if get_model() is None:
        logging.error("Model is not loaded")
        load_model()

-    segments, info = model.transcribe(audio_file, beam_size=5, language='fr')
+    segments, info = get_model().transcribe(audio_file, beam_size=5, language='fr')
    print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

    # combines all segments in one string
--- a/src/audio/tts/init.py
+++ b/src/audio/tts/init.py
--- a/src/declarations.py
+++ b/src/declarations.py
@ -0,0 +1,50 @@
+from enum import Enum
+
+
+class Sockets(Enum):
+    """
+    List of sockets
+    """
+    PROCESS_MESSAGE = "process_message"
+    JOIN = "join"
+    LEAVE = "leave"
+    CONNECT = "connect"
+    CLEAR_CHAT = "clear_chat"
+
+
+class Endpoints(Enum):
+    """
+    List of endpoints
+    """
+    DEFAULT = "/"
+    STATUS = "/status"
+    STT = "/stt"
+    TTS = "/tts"
+
+
+class TTSEngine(Enum):
+    """
+    List of TTS engines
+    """
+    PIPER = "piper"
+
+
+class STTEngine(Enum):
+    """
+    List of STT engines
+    """
+    WHISPER = "whisper"
+    FASTER_WHISPER = "faster_whisper"
+
+
+def get_enum_from_str(enumclass, name):
+    """
+    Get enum from string
+    :param enumclass:
+    :param name:
+    :return:
+    """
+    for enum in enumclass:
+        if enum.name == name:
+            return enum
+    raise Exception("Unknown enum " + name)
--- a/src/network/init.py
+++ b/src/network/init.py
--- a/src/network/api.py
+++ b/src/network/api.py
@ -0,0 +1,83 @@
+import logging
+import os
+
+import openai
+from flask import Flask, request
+from flask_socketio import SocketIO
+
+from src.network import endpoint_handler, socket_handler
+from src.declarations import Endpoints, Sockets
+
+# Set this variable to "threading", "eventlet" or "gevent" to test the
+# different async modes, or leave it set to None for the application to choose
+# the best option based on installed packages.
+async_mode = None
+
+app = Flask(__name__)
+app.config['SECRET_KEY'] = 'secret!'
+
+socketio = SocketIO(app, async_mode=async_mode)
+# thread = None
+# thread_lock = Lock()
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+
+#
+# ENDPOINTS
+#
+
+
+@app.route('/')
+def index():
+    return endpoint_handler.handle_endpoint(Endpoints.DEFAULT, request)
+
+
+@app.route('/status')
+def status():
+    return endpoint_handler.handle_endpoint(Endpoints.STATUS, request)
+
+
+@app.route("/stt", methods=['POST'])
+def speech_to_text():
+    return endpoint_handler.handle_endpoint(Endpoints.STT, request)
+
+
+@app.route("/tts", methods=['POST'])
+def text_to_speech():
+    return endpoint_handler.handle_endpoint(Endpoints.TTS, request)
+
+
+#
+# SOCKETS
+#
+
+
+@socketio.event
+def process_message(message):
+    return socket_handler.handle_socket(Sockets.PROCESS_MESSAGE, message)
+
+
+@socketio.event
+def join(message):
+    return socket_handler.handle_socket(Sockets.JOIN, message)
+
+
+@socketio.event
+def leave(message):
+    return socket_handler.handle_socket(Sockets.LEAVE, message)
+
+
+@socketio.event
+def connect():
+    return socket_handler.handle_socket(Sockets.CONNECT, None)
+
+
+@socketio.event
+def clear_chat(message):
+    return socket_handler.handle_socket(Sockets.CLEAR_CHAT, message)
+
+
+def start_api(port=6000):
+    logging.info("Starting Jarvis Server API on port " + str(port) + "...")
+    socketio.run(app, host='0.0.0.0', port=port)
--- a/src/network/endpoint_handler.py
+++ b/src/network/endpoint_handler.py
@ -0,0 +1,77 @@
+import logging
+
+from src.audio import audio_utils
+from src.declarations import Endpoints, Sockets
+
+
+def handle_endpoint(endpoint, request):
+    if endpoint is Endpoints.DEFAULT:
+        return default_endpoint(request)
+    elif endpoint is Endpoints.STATUS:
+        return status_endpoint(request)
+    elif endpoint is Endpoints.STT:
+        return speech_to_text(request)
+    elif endpoint is Endpoints.TTS:
+        return text_to_speech(request)
+    else:
+        return default_endpoint(request)
+
+
+def default_endpoint(request):
+    list_endpoints = []
+    for endpoint in Endpoints:
+        list_endpoints.append(endpoint.value)
+
+    list_sockets = []
+    for socket in Sockets:
+        list_sockets.append(socket.value)
+
+    return {"message": "Welcome to Jarvis Server API !",
+            "endpoints": list_endpoints,
+            "sockets": list_sockets,
+            "version": "1.0.0"  # TODO: get version from somewhere
+            }
+
+
+def status_endpoint(request):
+    return {"status": "ok"}
+
+
+def speech_to_text(request):
+    """
+    Transcribe audio file using whisper.
+
+    Exemple of request:
+    {
+        "data": "base64 encoded audio file",
+        "engine": "faster-whisper"
+    }
+
+    :return: transcription text
+    """
+
+    logging.info("New STT request from " + request.remote_addr)
+    text = audio_utils.get_text_from_audio(request.data, request.engine)
+    logging.info("STT result for " + request.remote_addr + " : " + text)
+
+    return {"text": text}
+
+
+def text_to_speech(request):
+    """
+    Speak text using Piper.
+
+    Exemple of request:
+    {
+        "data": "Hello World !",
+        "engine": "piper"
+    }
+
+    :return: audio data
+    """
+
+    logging.info("New TTS request from " + request.remote_addr)
+
+    # TODO: implement TTS
+
+    return {"audio": ""}
--- a/src/network/interactions.py
+++ b/src/network/interactions.py
@ -0,0 +1,13 @@
+import logging
+
+from flask_socketio import emit
+
+
+def add_message_from_user(text, room_id):
+    logging.debug("Sending message from user to room " + room_id + " : " + text)
+    emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
+
+
+def add_message_from_assistant(text, room_id):
+    logging.debug("Sending message from assistant to room " + room_id + " : " + text)
+    emit('message_from_assistant', {'data': text, "uuid": room_id}, to=room_id)
--- a/src/network/socket_handler.py
+++ b/src/network/socket_handler.py
@ -0,0 +1,74 @@
+import json
+import logging
+
+from flask_socketio import rooms, join_room, leave_room, emit
+
+from src.declarations import Sockets
+from src.network import interactions
+
+
+def handle_socket(socket, message):
+    if socket is Sockets.CONNECT:
+        connect()
+    elif socket is Sockets.JOIN:
+        join(message)
+    elif socket is Sockets.LEAVE:
+        leave(message)
+    elif socket is Sockets.PROCESS_MESSAGE:
+        process_message(message)
+    elif socket is Sockets.CLEAR_CHAT:
+        clear_chat(message)
+    else:
+        logging.warning("Unknown socket " + socket)
+
+
+def connect():
+    emit('connection', {'data': 'Connected', 'count': 0})
+
+
+def join(message):
+    message = json.loads(message)
+
+    logging.info("New client joined room " + message['uuid'])
+    join_room(message['uuid'])
+
+
+def leave(message):
+    message = json.loads(message)
+
+    logging.info("Client left room " + message['uuid'])
+    leave_room(message['uuid'])
+
+
+def process_message(message):
+    message = json.loads(message)
+
+    logging.info("New process request from room " + message['uuid'])
+    logging.info("Message : " + message['data'])
+
+    if message['uuid'] not in rooms():
+        logging.warning("Room not found, creating it")
+        join_room(message['uuid'])
+
+    # TODO: maybe implement grammar check and correction ?
+
+    # intent_manager.recognise(message['data'], message['uuid'])
+    if message['data'] != "":
+        # response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
+        text_response = "Tokens are expensive ya know?"
+
+        print(text_response)
+        interactions.add_message_from_assistant(text_response, message['uuid'])
+        # chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
+
+
+def clear_chat(message):
+    """
+    Clear chat history for a specific room.
+    :param uuid: uuid
+    :return:
+    """
+    message = json.loads(message)
+
+    emit('clear_chat', {}, to=message['uuid'])
+    # chatgpt_utils.clear_chat(uuid)
--- a/src/resources/db-jarvis-commands-memory.sqlite
+++ b/src/resources/db-jarvis-commands-memory.sqlite
--- a/src/skills/intent_services/intent_manager.py
+++ b/src/skills/intent_services/intent_manager.py
@ -5,7 +5,7 @@ import types
 from adapt.engine import DomainIntentDeterminationEngine
 from padatious import IntentContainer

-from src import api
+from src import network

 adapt_engine = DomainIntentDeterminationEngine()
 padatious_intents_container = IntentContainer('intent_cache')
@ -160,7 +160,7 @@ def recognise(sentence, uuid=None):
    launch_intent(look_for_matching_intent(sentence))

    # TODO: find why not working
-    api.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)
+    network.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)


 class SkillRegistering(type):
--- a/src/utils/chat_utils.py
+++ b/src/utils/chat_utils.py
@ -1,13 +0,0 @@
-import logging
-
-from flask_socketio import emit
-
-
-def send_user_message_to_room(text, room_id):
-    logging.debug("Sending message from user to room " + room_id + " : " + text)
-    emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
-
-
-def send_jarvis_message_to_room(text, room_id):
-    logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
-    emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
--- a/src/utils/homeassistant_utils.py
+++ b/src/utils/homeassistant_utils.py
@ -0,0 +1,17 @@
+import os
+
+import requests
+
+api_url = os.getenv("HOMEASSISTANT_URL")  # TODO: get URL from mobile app
+token = os.getenv("HOMEASSISTANT_TOKEN")  # TODO: get token from mobile app
+
+
+# client = Client(api_url, token)
+
+
+def send_message_to_homeassistant(message, language="en"):
+    # Make a POST request to the API
+    requests.post(api_url + "/api/conversation/process", json={
+        "text": message,
+        "language": language
+    }, headers={"Authorization": "Bearer " + token, "content-type": "application/json"})