massive rework of the whole api and sockets handling, will need rework and android app but more dynamic now, more usable for others clients and external apps now

This commit is contained in:
Mathieu Broillet 2023-11-06 20:38:25 +01:00
parent cb24495c95
commit 7ce3df75f7
Signed by: mathieu
GPG Key ID: C0E9E0E95AF03319
18 changed files with 445 additions and 148 deletions

44
basic_test_client.py Normal file
View File

@ -0,0 +1,44 @@
# BASIC CLIENT FOR INTERACTING WITH THE SERVER
# This client is used to test the server and to interact with it
import json
import socketio
HOST = "localhost"
PORT = 6000
waiting = False
if __name__ == '__main__':
# Create a TCP/IP socket
sock = socketio.Client()
sock.connect(f"http://{HOST}:{PORT}")
# Join the room
sock.emit('join', json.dumps({'uuid': 'clientpc'}))
# Listen for messages from the server
@sock.on('message_from_assistant')
def on_message_from_jarvis(data):
print("Assistant says: " + data['data'])
global waiting
waiting = False
# Chat with the server
while True:
while not waiting:
message = input("Enter a message to send to the server: ")
# Exit when CTRL+C is pressed
if message == "exit":
print("Exiting")
# Leave the room
sock.emit('leave', json.dumps({'uuid': 'clientpc'}))
exit(0)
waiting = True
sock.emit('process_message', json.dumps({'data': message, 'uuid': 'clientpc'}))

12
run.py
View File

@ -1,8 +1,9 @@
import logging
from src import api
from src.audio import audio_utils
from src.database import db_utils
from src.utils import faster_whisper_utils
from src.declarations import TTSEngine, STTEngine
from src.network import api
# import lingua_franca
@ -22,9 +23,8 @@ if __name__ == '__main__':
# Load the skills
# intent_manager.load_all_skills()
# Load the STT (whisper) model
# whisper_utils.load_model()
faster_whisper_utils.load_model()
# Load the audio model(s)
audio_utils.load_models(stt_engine=STTEngine.FASTER_WHISPER, tts_engine=TTSEngine.PIPER)
# Start the api endpoint
# Start the api server
api.start_api(6000)

View File

@ -1,121 +0,0 @@
import json
import logging
import openai
import sys
import tempfile
from threading import Lock
from flask import Flask, request
from flask_socketio import SocketIO, emit, join_room, leave_room, \
rooms
from src.utils import chat_utils, chatgpt_utils, faster_whisper_utils
# Set this variable to "threading", "eventlet" or "gevent" to test the
# different async modes, or leave it set to None for the application to choose
# the best option based on installed packages.
async_mode = None
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, async_mode=async_mode)
thread = None
thread_lock = Lock()
openai.api_key = sys.argv[1]
@app.route('/')
def index():
return "Welcome to Jarvis Server API !"
@socketio.event
def process_message(message):
message = json.loads(message)
logging.info("New PROCESS request from room " + message['uuid'])
logging.info("Message : " + message['data'])
if message['uuid'] not in rooms():
logging.warning("Room not found, creating it")
join_room(message['uuid'])
# TODO: maybe implement grammar check and correction ?
# intent_manager.recognise(message['data'], message['uuid'])
if message['data'] != "":
response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
# text_response = "Tokens are expensive ya know?"
chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
@socketio.event
def join(message):
message = json.loads(message)
logging.info("New client joined room " + message['uuid'])
join_room(message['uuid'])
@socketio.event
def leave(message):
leave_room(message['uuid'])
@socketio.event
def connect():
global thread
emit('my_response', {'data': 'Connected', 'count': 0})
@socketio.event
def clear_chat(uuid):
"""
Clear chat history for a specific room.
:param uuid: uuid
:return:
"""
# uuid = json.loads(uuid)
emit('clear_chat', {}, to=uuid)
chatgpt_utils.clear_chat(uuid)
# .WAV (i.e.) FILE REQUEST
@app.route("/get_text_from_audio", methods=['POST'])
def get_text_from_audio():
"""
Transcribe audio file using whisper.
:return: transcription text
"""
logging.info("New STT request from " + request.remote_addr)
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
audio_temp_file.write(request.data)
# text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
text = faster_whisper_utils.faster_whisper_stt(audio_temp_file.name)
logging.info("STT result for " + request.remote_addr + " : " + text)
return {"data": text}
"""
@src.route("/process_text", methods=['POST'])
def process_text():
print("[" + request.remote_addr + "] - New TXT request")
text = request.values['text']
answer = intent_manager.recognise(text, request.headers.get('Client-Ip'), request.headers.get('Client-Port'))
return {"transcription": text, "answer": answer}"""
def start_api(port=6000):
logging.info("Starting Jarvis Server API on port " + str(port) + "...")
socketio.run(app, host='0.0.0.0', port=port)

0
src/audio/__init__.py Normal file
View File

58
src/audio/audio_utils.py Normal file
View File

@ -0,0 +1,58 @@
import tempfile
from src.audio.stt import faster_whisper
from src.declarations import STTEngine, TTSEngine
def load_models(stt_engine: STTEngine, tts_engine: TTSEngine):
"""
Load the STT and TTS models in the memory.
:return:
"""
if stt_engine is STTEngine.FASTER_WHISPER:
faster_whisper.load_model()
elif stt_engine is STTEngine.WHISPER:
pass
else:
raise Exception("Unknown STT engine: " + stt_engine.name)
if tts_engine is TTSEngine.PIPER:
pass
else:
raise Exception("Unknown TTS engine: " + tts_engine.name)
def get_text_from_audio(audio_bytes, stt_engine):
"""
Transcribe audio file.
:param audio_bytes:
:param stt_engine:
:return:
"""
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
audio_temp_file.write(audio_bytes)
if stt_engine is STTEngine.FASTER_WHISPER:
return faster_whisper.speech_to_text(audio_temp_file.name)
# text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
elif stt_engine is STTEngine.WHISPER:
# TODO: implement whisper
pass
else:
raise Exception("Unknown STT engine: " + stt_engine.name)
def get_speech_from_text(text, tts_engine):
"""
Speak text using Piper.
:return: audio file
"""
# TODO: implement TTS
if tts_engine is TTSEngine.PIPER:
pass
else:
raise Exception("Unknown TTS engine: " + tts_engine.name)

View File

View File

@ -2,8 +2,19 @@ import logging
from faster_whisper import WhisperModel
model = None
def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="int8"):
"""
Load the whisper model in the memory.
:param model_size: small, medium or large
:param device: cpu or cuda
:param cpu_threads: number of cpu threads
:param compute_type: use int8 (haven't tested others)
:return: None
"""
log_level = logging.getLogger().level
global model
model = WhisperModel(model_size_or_path=model_size, device=device, cpu_threads=cpu_threads,
@ -12,22 +23,26 @@ def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="in
def get_model():
"""
Get the whisper model.
:return: the whisper model
:rtype: WhisperModel
"""
return model
def faster_whisper_stt(audio_file):
def speech_to_text(audio_file):
"""
Transcribe audio file using faster_whisper, no additional server/service needed, runs on CPU.
:param audio_file:
:param model:
:return: text
:param audio_file: path to audio file
:return: transcription text
"""
if model is None:
if get_model() is None:
logging.error("Model is not loaded")
load_model()
segments, info = model.transcribe(audio_file, beam_size=5, language='fr')
segments, info = get_model().transcribe(audio_file, beam_size=5, language='fr')
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
# combines all segments in one string

View File

50
src/declarations.py Normal file
View File

@ -0,0 +1,50 @@
from enum import Enum
class Sockets(Enum):
"""
List of sockets
"""
PROCESS_MESSAGE = "process_message"
JOIN = "join"
LEAVE = "leave"
CONNECT = "connect"
CLEAR_CHAT = "clear_chat"
class Endpoints(Enum):
"""
List of endpoints
"""
DEFAULT = "/"
STATUS = "/status"
STT = "/stt"
TTS = "/tts"
class TTSEngine(Enum):
"""
List of TTS engines
"""
PIPER = "piper"
class STTEngine(Enum):
"""
List of STT engines
"""
WHISPER = "whisper"
FASTER_WHISPER = "faster_whisper"
def get_enum_from_str(enumclass, name):
"""
Get enum from string
:param enumclass:
:param name:
:return:
"""
for enum in enumclass:
if enum.name == name:
return enum
raise Exception("Unknown enum " + name)

0
src/network/__init__.py Normal file
View File

83
src/network/api.py Normal file
View File

@ -0,0 +1,83 @@
import logging
import os
import openai
from flask import Flask, request
from flask_socketio import SocketIO
from src.network import endpoint_handler, socket_handler
from src.declarations import Endpoints, Sockets
# Set this variable to "threading", "eventlet" or "gevent" to test the
# different async modes, or leave it set to None for the application to choose
# the best option based on installed packages.
async_mode = None
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, async_mode=async_mode)
# thread = None
# thread_lock = Lock()
openai.api_key = os.getenv("OPENAI_API_KEY")
#
# ENDPOINTS
#
@app.route('/')
def index():
return endpoint_handler.handle_endpoint(Endpoints.DEFAULT, request)
@app.route('/status')
def status():
return endpoint_handler.handle_endpoint(Endpoints.STATUS, request)
@app.route("/stt", methods=['POST'])
def speech_to_text():
return endpoint_handler.handle_endpoint(Endpoints.STT, request)
@app.route("/tts", methods=['POST'])
def text_to_speech():
return endpoint_handler.handle_endpoint(Endpoints.TTS, request)
#
# SOCKETS
#
@socketio.event
def process_message(message):
return socket_handler.handle_socket(Sockets.PROCESS_MESSAGE, message)
@socketio.event
def join(message):
return socket_handler.handle_socket(Sockets.JOIN, message)
@socketio.event
def leave(message):
return socket_handler.handle_socket(Sockets.LEAVE, message)
@socketio.event
def connect():
return socket_handler.handle_socket(Sockets.CONNECT, None)
@socketio.event
def clear_chat(message):
return socket_handler.handle_socket(Sockets.CLEAR_CHAT, message)
def start_api(port=6000):
logging.info("Starting Jarvis Server API on port " + str(port) + "...")
socketio.run(app, host='0.0.0.0', port=port)

View File

@ -0,0 +1,77 @@
import logging
from src.audio import audio_utils
from src.declarations import Endpoints, Sockets
def handle_endpoint(endpoint, request):
if endpoint is Endpoints.DEFAULT:
return default_endpoint(request)
elif endpoint is Endpoints.STATUS:
return status_endpoint(request)
elif endpoint is Endpoints.STT:
return speech_to_text(request)
elif endpoint is Endpoints.TTS:
return text_to_speech(request)
else:
return default_endpoint(request)
def default_endpoint(request):
list_endpoints = []
for endpoint in Endpoints:
list_endpoints.append(endpoint.value)
list_sockets = []
for socket in Sockets:
list_sockets.append(socket.value)
return {"message": "Welcome to Jarvis Server API !",
"endpoints": list_endpoints,
"sockets": list_sockets,
"version": "1.0.0" # TODO: get version from somewhere
}
def status_endpoint(request):
return {"status": "ok"}
def speech_to_text(request):
"""
Transcribe audio file using whisper.
Exemple of request:
{
"data": "base64 encoded audio file",
"engine": "faster-whisper"
}
:return: transcription text
"""
logging.info("New STT request from " + request.remote_addr)
text = audio_utils.get_text_from_audio(request.data, request.engine)
logging.info("STT result for " + request.remote_addr + " : " + text)
return {"text": text}
def text_to_speech(request):
"""
Speak text using Piper.
Exemple of request:
{
"data": "Hello World !",
"engine": "piper"
}
:return: audio data
"""
logging.info("New TTS request from " + request.remote_addr)
# TODO: implement TTS
return {"audio": ""}

View File

@ -0,0 +1,13 @@
import logging
from flask_socketio import emit
def add_message_from_user(text, room_id):
logging.debug("Sending message from user to room " + room_id + " : " + text)
emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def add_message_from_assistant(text, room_id):
logging.debug("Sending message from assistant to room " + room_id + " : " + text)
emit('message_from_assistant', {'data': text, "uuid": room_id}, to=room_id)

View File

@ -0,0 +1,74 @@
import json
import logging
from flask_socketio import rooms, join_room, leave_room, emit
from src.declarations import Sockets
from src.network import interactions
def handle_socket(socket, message):
if socket is Sockets.CONNECT:
connect()
elif socket is Sockets.JOIN:
join(message)
elif socket is Sockets.LEAVE:
leave(message)
elif socket is Sockets.PROCESS_MESSAGE:
process_message(message)
elif socket is Sockets.CLEAR_CHAT:
clear_chat(message)
else:
logging.warning("Unknown socket " + socket)
def connect():
emit('connection', {'data': 'Connected', 'count': 0})
def join(message):
message = json.loads(message)
logging.info("New client joined room " + message['uuid'])
join_room(message['uuid'])
def leave(message):
message = json.loads(message)
logging.info("Client left room " + message['uuid'])
leave_room(message['uuid'])
def process_message(message):
message = json.loads(message)
logging.info("New process request from room " + message['uuid'])
logging.info("Message : " + message['data'])
if message['uuid'] not in rooms():
logging.warning("Room not found, creating it")
join_room(message['uuid'])
# TODO: maybe implement grammar check and correction ?
# intent_manager.recognise(message['data'], message['uuid'])
if message['data'] != "":
# response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
text_response = "Tokens are expensive ya know?"
print(text_response)
interactions.add_message_from_assistant(text_response, message['uuid'])
# chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
def clear_chat(message):
"""
Clear chat history for a specific room.
:param uuid: uuid
:return:
"""
message = json.loads(message)
emit('clear_chat', {}, to=message['uuid'])
# chatgpt_utils.clear_chat(uuid)

View File

@ -5,7 +5,7 @@ import types
from adapt.engine import DomainIntentDeterminationEngine
from padatious import IntentContainer
from src import api
from src import network
adapt_engine = DomainIntentDeterminationEngine()
padatious_intents_container = IntentContainer('intent_cache')
@ -160,7 +160,7 @@ def recognise(sentence, uuid=None):
launch_intent(look_for_matching_intent(sentence))
# TODO: find why not working
api.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)
network.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)
class SkillRegistering(type):

View File

@ -1,13 +0,0 @@
import logging
from flask_socketio import emit
def send_user_message_to_room(text, room_id):
logging.debug("Sending message from user to room " + room_id + " : " + text)
emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def send_jarvis_message_to_room(text, room_id):
logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)

View File

@ -0,0 +1,17 @@
import os
import requests
api_url = os.getenv("HOMEASSISTANT_URL") # TODO: get URL from mobile app
token = os.getenv("HOMEASSISTANT_TOKEN") # TODO: get token from mobile app
# client = Client(api_url, token)
def send_message_to_homeassistant(message, language="en"):
# Make a POST request to the API
requests.post(api_url + "/api/conversation/process", json={
"text": message,
"language": language
}, headers={"Authorization": "Bearer " + token, "content-type": "application/json"})