Compare commits

..

2 Commits

19 changed files with 446 additions and 149 deletions

44
basic_test_client.py Normal file
View File

@ -0,0 +1,44 @@
# BASIC CLIENT FOR INTERACTING WITH THE SERVER
# This client is used to test the server and to interact with it
import json
import socketio
HOST = "localhost"
PORT = 6000
waiting = False
if __name__ == '__main__':
# Create a TCP/IP socket
sock = socketio.Client()
sock.connect(f"http://{HOST}:{PORT}")
# Join the room
sock.emit('join', json.dumps({'uuid': 'clientpc'}))
# Listen for messages from the server
@sock.on('message_from_assistant')
def on_message_from_jarvis(data):
print("Assistant says: " + data['data'])
global waiting
waiting = False
# Chat with the server
while True:
while not waiting:
message = input("Enter a message to send to the server: ")
# Exit when CTRL+C is pressed
if message == "exit":
print("Exiting")
# Leave the room
sock.emit('leave', json.dumps({'uuid': 'clientpc'}))
exit(0)
waiting = True
sock.emit('process_message', json.dumps({'data': message, 'uuid': 'clientpc'}))

12
run.py
View File

@ -1,8 +1,9 @@
import logging
from src import api
from src.audio import audio_utils
from src.database import db_utils
from src.utils import faster_whisper_utils
from src.declarations import TTSEngine, STTEngine
from src.network import api
# import lingua_franca
@ -22,9 +23,8 @@ if __name__ == '__main__':
# Load the skills
# intent_manager.load_all_skills()
# Load the STT (whisper) model
# whisper_utils.load_model()
faster_whisper_utils.load_model()
# Load the audio model(s)
audio_utils.load_models(stt_engine=STTEngine.FASTER_WHISPER, tts_engine=TTSEngine.PIPER)
# Start the api endpoint
# Start the api server
api.start_api(6000)

View File

@ -1,121 +0,0 @@
import json
import logging
import openai
import sys
import tempfile
from threading import Lock
from flask import Flask, request
from flask_socketio import SocketIO, emit, join_room, leave_room, \
rooms
from src.utils import chat_utils, chatgpt_utils, faster_whisper_utils
# Set this variable to "threading", "eventlet" or "gevent" to test the
# different async modes, or leave it set to None for the application to choose
# the best option based on installed packages.
async_mode = None
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, async_mode=async_mode)
thread = None
thread_lock = Lock()
openai.api_key = sys.argv[1]
@app.route('/')
def index():
return "Welcome to Jarvis Server API !"
@socketio.event
def process_message(message):
message = json.loads(message)
logging.info("New PROCESS request from room " + message['uuid'])
logging.info("Message : " + message['data'])
if message['uuid'] not in rooms():
logging.warning("Room not found, creating it")
join_room(message['uuid'])
# TODO: maybe implement grammar check and correction ?
# intent_manager.recognise(message['data'], message['uuid'])
if message['data'] != "":
response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
# text_response = "Tokens are expensive ya know?"
chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
@socketio.event
def join(message):
message = json.loads(message)
logging.info("New client joined room " + message['uuid'])
join_room(message['uuid'])
@socketio.event
def leave(message):
leave_room(message['uuid'])
@socketio.event
def connect():
global thread
emit('my_response', {'data': 'Connected', 'count': 0})
@socketio.event
def clear_chat(uuid):
"""
Clear chat history for a specific room.
:param uuid: uuid
:return:
"""
# uuid = json.loads(uuid)
emit('clear_chat', {}, to=uuid)
chatgpt_utils.clear_chat(uuid)
# .WAV (i.e.) FILE REQUEST
@app.route("/get_text_from_audio", methods=['POST'])
def get_text_from_audio():
"""
Transcribe audio file using whisper.
:return: transcription text
"""
logging.info("New STT request from " + request.remote_addr)
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
audio_temp_file.write(request.data)
# text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
text = faster_whisper_utils.faster_whisper_stt(audio_temp_file.name)
logging.info("STT result for " + request.remote_addr + " : " + text)
return {"data": text}
"""
@src.route("/process_text", methods=['POST'])
def process_text():
print("[" + request.remote_addr + "] - New TXT request")
text = request.values['text']
answer = intent_manager.recognise(text, request.headers.get('Client-Ip'), request.headers.get('Client-Port'))
return {"transcription": text, "answer": answer}"""
def start_api(port=6000):
logging.info("Starting Jarvis Server API on port " + str(port) + "...")
socketio.run(app, host='0.0.0.0', port=port)

0
src/audio/__init__.py Normal file
View File

58
src/audio/audio_utils.py Normal file
View File

@ -0,0 +1,58 @@
import tempfile
from src.audio.stt import faster_whisper
from src.declarations import STTEngine, TTSEngine
def load_models(stt_engine: STTEngine, tts_engine: TTSEngine):
"""
Load the STT and TTS models in the memory.
:return:
"""
if stt_engine is STTEngine.FASTER_WHISPER:
faster_whisper.load_model()
elif stt_engine is STTEngine.WHISPER:
pass
else:
raise Exception("Unknown STT engine: " + stt_engine.name)
if tts_engine is TTSEngine.PIPER:
pass
else:
raise Exception("Unknown TTS engine: " + tts_engine.name)
def get_text_from_audio(audio_bytes, stt_engine):
"""
Transcribe audio file.
:param audio_bytes:
:param stt_engine:
:return:
"""
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
audio_temp_file.write(audio_bytes)
if stt_engine is STTEngine.FASTER_WHISPER:
return faster_whisper.speech_to_text(audio_temp_file.name)
# text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
elif stt_engine is STTEngine.WHISPER:
# TODO: implement whisper
pass
else:
raise Exception("Unknown STT engine: " + stt_engine.name)
def get_speech_from_text(text, tts_engine):
"""
Speak text using Piper.
:return: audio file
"""
# TODO: implement TTS
if tts_engine is TTSEngine.PIPER:
pass
else:
raise Exception("Unknown TTS engine: " + tts_engine.name)

View File

View File

@ -2,8 +2,19 @@ import logging
from faster_whisper import WhisperModel
model = None
def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="int8"):
"""
Load the whisper model in the memory.
:param model_size: small, medium or large
:param device: cpu or cuda
:param cpu_threads: number of cpu threads
:param compute_type: use int8 (haven't tested others)
:return: None
"""
log_level = logging.getLogger().level
global model
model = WhisperModel(model_size_or_path=model_size, device=device, cpu_threads=cpu_threads,
@ -12,22 +23,26 @@ def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="in
def get_model():
"""
Get the whisper model.
:return: the whisper model
:rtype: WhisperModel
"""
return model
def faster_whisper_stt(audio_file):
def speech_to_text(audio_file):
"""
Transcribe audio file using faster_whisper, no additional server/service needed, runs on CPU.
:param audio_file:
:param model:
:return: text
:param audio_file: path to audio file
:return: transcription text
"""
if model is None:
if get_model() is None:
logging.error("Model is not loaded")
load_model()
segments, info = model.transcribe(audio_file, beam_size=5, language='fr')
segments, info = get_model().transcribe(audio_file, beam_size=5, language='fr')
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
# combines all segments in one string

View File

View File

@ -10,7 +10,7 @@ def create_database():
"""Creates the database."""
# Check if the database already exists
if os.path.exists(project_path / "resources" / "jarvis-commands-memory.sqlite"):
if os.path.exists(project_path / "resources" / "db-jarvis-commands-memory.sqlite"):
logging.debug("Database already exists, skipping creation")
return

50
src/declarations.py Normal file
View File

@ -0,0 +1,50 @@
from enum import Enum
class Sockets(Enum):
"""
List of sockets
"""
PROCESS_MESSAGE = "process_message"
JOIN = "join"
LEAVE = "leave"
CONNECT = "connect"
CLEAR_CHAT = "clear_chat"
class Endpoints(Enum):
"""
List of endpoints
"""
DEFAULT = "/"
STATUS = "/status"
STT = "/stt"
TTS = "/tts"
class TTSEngine(Enum):
"""
List of TTS engines
"""
PIPER = "piper"
class STTEngine(Enum):
"""
List of STT engines
"""
WHISPER = "whisper"
FASTER_WHISPER = "faster_whisper"
def get_enum_from_str(enumclass, name):
"""
Get enum from string
:param enumclass:
:param name:
:return:
"""
for enum in enumclass:
if enum.name == name:
return enum
raise Exception("Unknown enum " + name)

0
src/network/__init__.py Normal file
View File

83
src/network/api.py Normal file
View File

@ -0,0 +1,83 @@
import logging
import os
import openai
from flask import Flask, request
from flask_socketio import SocketIO
from src.network import endpoint_handler, socket_handler
from src.declarations import Endpoints, Sockets
# Set this variable to "threading", "eventlet" or "gevent" to test the
# different async modes, or leave it set to None for the application to choose
# the best option based on installed packages.
async_mode = None
app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, async_mode=async_mode)
# thread = None
# thread_lock = Lock()
openai.api_key = os.getenv("OPENAI_API_KEY")
#
# ENDPOINTS
#
@app.route('/')
def index():
return endpoint_handler.handle_endpoint(Endpoints.DEFAULT, request)
@app.route('/status')
def status():
return endpoint_handler.handle_endpoint(Endpoints.STATUS, request)
@app.route("/stt", methods=['POST'])
def speech_to_text():
return endpoint_handler.handle_endpoint(Endpoints.STT, request)
@app.route("/tts", methods=['POST'])
def text_to_speech():
return endpoint_handler.handle_endpoint(Endpoints.TTS, request)
#
# SOCKETS
#
@socketio.event
def process_message(message):
return socket_handler.handle_socket(Sockets.PROCESS_MESSAGE, message)
@socketio.event
def join(message):
return socket_handler.handle_socket(Sockets.JOIN, message)
@socketio.event
def leave(message):
return socket_handler.handle_socket(Sockets.LEAVE, message)
@socketio.event
def connect():
return socket_handler.handle_socket(Sockets.CONNECT, None)
@socketio.event
def clear_chat(message):
return socket_handler.handle_socket(Sockets.CLEAR_CHAT, message)
def start_api(port=6000):
logging.info("Starting Jarvis Server API on port " + str(port) + "...")
socketio.run(app, host='0.0.0.0', port=port)

View File

@ -0,0 +1,77 @@
import logging
from src.audio import audio_utils
from src.declarations import Endpoints, Sockets
def handle_endpoint(endpoint, request):
if endpoint is Endpoints.DEFAULT:
return default_endpoint(request)
elif endpoint is Endpoints.STATUS:
return status_endpoint(request)
elif endpoint is Endpoints.STT:
return speech_to_text(request)
elif endpoint is Endpoints.TTS:
return text_to_speech(request)
else:
return default_endpoint(request)
def default_endpoint(request):
list_endpoints = []
for endpoint in Endpoints:
list_endpoints.append(endpoint.value)
list_sockets = []
for socket in Sockets:
list_sockets.append(socket.value)
return {"message": "Welcome to Jarvis Server API !",
"endpoints": list_endpoints,
"sockets": list_sockets,
"version": "1.0.0" # TODO: get version from somewhere
}
def status_endpoint(request):
return {"status": "ok"}
def speech_to_text(request):
"""
Transcribe audio file using whisper.
Exemple of request:
{
"data": "base64 encoded audio file",
"engine": "faster-whisper"
}
:return: transcription text
"""
logging.info("New STT request from " + request.remote_addr)
text = audio_utils.get_text_from_audio(request.data, request.engine)
logging.info("STT result for " + request.remote_addr + " : " + text)
return {"text": text}
def text_to_speech(request):
"""
Speak text using Piper.
Exemple of request:
{
"data": "Hello World !",
"engine": "piper"
}
:return: audio data
"""
logging.info("New TTS request from " + request.remote_addr)
# TODO: implement TTS
return {"audio": ""}

View File

@ -0,0 +1,13 @@
import logging
from flask_socketio import emit
def add_message_from_user(text, room_id):
logging.debug("Sending message from user to room " + room_id + " : " + text)
emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def add_message_from_assistant(text, room_id):
logging.debug("Sending message from assistant to room " + room_id + " : " + text)
emit('message_from_assistant', {'data': text, "uuid": room_id}, to=room_id)

View File

@ -0,0 +1,74 @@
import json
import logging
from flask_socketio import rooms, join_room, leave_room, emit
from src.declarations import Sockets
from src.network import interactions
def handle_socket(socket, message):
if socket is Sockets.CONNECT:
connect()
elif socket is Sockets.JOIN:
join(message)
elif socket is Sockets.LEAVE:
leave(message)
elif socket is Sockets.PROCESS_MESSAGE:
process_message(message)
elif socket is Sockets.CLEAR_CHAT:
clear_chat(message)
else:
logging.warning("Unknown socket " + socket)
def connect():
emit('connection', {'data': 'Connected', 'count': 0})
def join(message):
message = json.loads(message)
logging.info("New client joined room " + message['uuid'])
join_room(message['uuid'])
def leave(message):
message = json.loads(message)
logging.info("Client left room " + message['uuid'])
leave_room(message['uuid'])
def process_message(message):
message = json.loads(message)
logging.info("New process request from room " + message['uuid'])
logging.info("Message : " + message['data'])
if message['uuid'] not in rooms():
logging.warning("Room not found, creating it")
join_room(message['uuid'])
# TODO: maybe implement grammar check and correction ?
# intent_manager.recognise(message['data'], message['uuid'])
if message['data'] != "":
# response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
text_response = "Tokens are expensive ya know?"
print(text_response)
interactions.add_message_from_assistant(text_response, message['uuid'])
# chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
def clear_chat(message):
"""
Clear chat history for a specific room.
:param uuid: uuid
:return:
"""
message = json.loads(message)
emit('clear_chat', {}, to=message['uuid'])
# chatgpt_utils.clear_chat(uuid)

View File

@ -5,7 +5,7 @@ import types
from adapt.engine import DomainIntentDeterminationEngine
from padatious import IntentContainer
from src import api
from src import network
adapt_engine = DomainIntentDeterminationEngine()
padatious_intents_container = IntentContainer('intent_cache')
@ -160,7 +160,7 @@ def recognise(sentence, uuid=None):
launch_intent(look_for_matching_intent(sentence))
# TODO: find why not working
api.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)
network.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)
class SkillRegistering(type):

View File

@ -1,13 +0,0 @@
import logging
from flask_socketio import emit
def send_user_message_to_room(text, room_id):
logging.debug("Sending message from user to room " + room_id + " : " + text)
emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def send_jarvis_message_to_room(text, room_id):
logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)

View File

@ -0,0 +1,17 @@
import os
import requests
api_url = os.getenv("HOMEASSISTANT_URL") # TODO: get URL from mobile app
token = os.getenv("HOMEASSISTANT_TOKEN") # TODO: get token from mobile app
# client = Client(api_url, token)
def send_message_to_homeassistant(message, language="en"):
# Make a POST request to the API
requests.post(api_url + "/api/conversation/process", json={
"text": message,
"language": language
}, headers={"Authorization": "Bearer " + token, "content-type": "application/json"})