Compare commits
No commits in common. "7ce3df75f7188f1da6d762f042d17cb1deeaab12" and "d580673ea37207ef118ad107ad7ab8f691c5bcaf" have entirely different histories.
7ce3df75f7
...
d580673ea3
@ -1,44 +0,0 @@
|
||||
# BASIC CLIENT FOR INTERACTING WITH THE SERVER
|
||||
# This client is used to test the server and to interact with it
|
||||
import json
|
||||
|
||||
import socketio
|
||||
|
||||
HOST = "localhost"
|
||||
PORT = 6000
|
||||
waiting = False
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Create a TCP/IP socket
|
||||
sock = socketio.Client()
|
||||
sock.connect(f"http://{HOST}:{PORT}")
|
||||
|
||||
# Join the room
|
||||
sock.emit('join', json.dumps({'uuid': 'clientpc'}))
|
||||
|
||||
# Listen for messages from the server
|
||||
@sock.on('message_from_assistant')
|
||||
def on_message_from_jarvis(data):
|
||||
print("Assistant says: " + data['data'])
|
||||
global waiting
|
||||
waiting = False
|
||||
|
||||
|
||||
# Chat with the server
|
||||
while True:
|
||||
|
||||
while not waiting:
|
||||
message = input("Enter a message to send to the server: ")
|
||||
|
||||
# Exit when CTRL+C is pressed
|
||||
if message == "exit":
|
||||
print("Exiting")
|
||||
|
||||
# Leave the room
|
||||
sock.emit('leave', json.dumps({'uuid': 'clientpc'}))
|
||||
exit(0)
|
||||
|
||||
waiting = True
|
||||
|
||||
sock.emit('process_message', json.dumps({'data': message, 'uuid': 'clientpc'}))
|
12
run.py
12
run.py
@ -1,9 +1,8 @@
|
||||
import logging
|
||||
|
||||
from src.audio import audio_utils
|
||||
from src import api
|
||||
from src.database import db_utils
|
||||
from src.declarations import TTSEngine, STTEngine
|
||||
from src.network import api
|
||||
from src.utils import faster_whisper_utils
|
||||
|
||||
# import lingua_franca
|
||||
|
||||
@ -23,8 +22,9 @@ if __name__ == '__main__':
|
||||
# Load the skills
|
||||
# intent_manager.load_all_skills()
|
||||
|
||||
# Load the audio model(s)
|
||||
audio_utils.load_models(stt_engine=STTEngine.FASTER_WHISPER, tts_engine=TTSEngine.PIPER)
|
||||
# Load the STT (whisper) model
|
||||
# whisper_utils.load_model()
|
||||
faster_whisper_utils.load_model()
|
||||
|
||||
# Start the api server
|
||||
# Start the api endpoint
|
||||
api.start_api(6000)
|
||||
|
121
src/api.py
Normal file
121
src/api.py
Normal file
@ -0,0 +1,121 @@
|
||||
import json
|
||||
import logging
|
||||
|
||||
import openai
|
||||
import sys
|
||||
import tempfile
|
||||
from threading import Lock
|
||||
from flask import Flask, request
|
||||
from flask_socketio import SocketIO, emit, join_room, leave_room, \
|
||||
rooms
|
||||
|
||||
from src.utils import chat_utils, chatgpt_utils, faster_whisper_utils
|
||||
|
||||
# Set this variable to "threading", "eventlet" or "gevent" to test the
|
||||
# different async modes, or leave it set to None for the application to choose
|
||||
# the best option based on installed packages.
|
||||
async_mode = None
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['SECRET_KEY'] = 'secret!'
|
||||
socketio = SocketIO(app, async_mode=async_mode)
|
||||
thread = None
|
||||
thread_lock = Lock()
|
||||
|
||||
openai.api_key = sys.argv[1]
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return "Welcome to Jarvis Server API !"
|
||||
|
||||
|
||||
@socketio.event
|
||||
def process_message(message):
|
||||
message = json.loads(message)
|
||||
logging.info("New PROCESS request from room " + message['uuid'])
|
||||
logging.info("Message : " + message['data'])
|
||||
|
||||
if message['uuid'] not in rooms():
|
||||
logging.warning("Room not found, creating it")
|
||||
join_room(message['uuid'])
|
||||
|
||||
# TODO: maybe implement grammar check and correction ?
|
||||
|
||||
# intent_manager.recognise(message['data'], message['uuid'])
|
||||
if message['data'] != "":
|
||||
response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
|
||||
# text_response = "Tokens are expensive ya know?"
|
||||
|
||||
chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
|
||||
|
||||
|
||||
@socketio.event
|
||||
def join(message):
|
||||
message = json.loads(message)
|
||||
|
||||
logging.info("New client joined room " + message['uuid'])
|
||||
join_room(message['uuid'])
|
||||
|
||||
|
||||
@socketio.event
|
||||
def leave(message):
|
||||
leave_room(message['uuid'])
|
||||
|
||||
|
||||
@socketio.event
|
||||
def connect():
|
||||
global thread
|
||||
emit('my_response', {'data': 'Connected', 'count': 0})
|
||||
|
||||
|
||||
@socketio.event
|
||||
def clear_chat(uuid):
|
||||
"""
|
||||
Clear chat history for a specific room.
|
||||
:param uuid: uuid
|
||||
:return:
|
||||
"""
|
||||
# uuid = json.loads(uuid)
|
||||
|
||||
emit('clear_chat', {}, to=uuid)
|
||||
chatgpt_utils.clear_chat(uuid)
|
||||
|
||||
|
||||
# .WAV (i.e.) FILE REQUEST
|
||||
@app.route("/get_text_from_audio", methods=['POST'])
|
||||
def get_text_from_audio():
|
||||
"""
|
||||
Transcribe audio file using whisper.
|
||||
|
||||
:return: transcription text
|
||||
"""
|
||||
|
||||
logging.info("New STT request from " + request.remote_addr)
|
||||
|
||||
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
|
||||
audio_temp_file.write(request.data)
|
||||
|
||||
# text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
|
||||
text = faster_whisper_utils.faster_whisper_stt(audio_temp_file.name)
|
||||
|
||||
logging.info("STT result for " + request.remote_addr + " : " + text)
|
||||
|
||||
return {"data": text}
|
||||
|
||||
|
||||
"""
|
||||
@src.route("/process_text", methods=['POST'])
|
||||
def process_text():
|
||||
print("[" + request.remote_addr + "] - New TXT request")
|
||||
|
||||
text = request.values['text']
|
||||
|
||||
answer = intent_manager.recognise(text, request.headers.get('Client-Ip'), request.headers.get('Client-Port'))
|
||||
|
||||
return {"transcription": text, "answer": answer}"""
|
||||
|
||||
|
||||
def start_api(port=6000):
|
||||
logging.info("Starting Jarvis Server API on port " + str(port) + "...")
|
||||
socketio.run(app, host='0.0.0.0', port=port)
|
@ -1,58 +0,0 @@
|
||||
import tempfile
|
||||
|
||||
from src.audio.stt import faster_whisper
|
||||
from src.declarations import STTEngine, TTSEngine
|
||||
|
||||
|
||||
def load_models(stt_engine: STTEngine, tts_engine: TTSEngine):
|
||||
"""
|
||||
Load the STT and TTS models in the memory.
|
||||
:return:
|
||||
"""
|
||||
|
||||
if stt_engine is STTEngine.FASTER_WHISPER:
|
||||
faster_whisper.load_model()
|
||||
elif stt_engine is STTEngine.WHISPER:
|
||||
pass
|
||||
else:
|
||||
raise Exception("Unknown STT engine: " + stt_engine.name)
|
||||
|
||||
if tts_engine is TTSEngine.PIPER:
|
||||
pass
|
||||
else:
|
||||
raise Exception("Unknown TTS engine: " + tts_engine.name)
|
||||
|
||||
|
||||
def get_text_from_audio(audio_bytes, stt_engine):
|
||||
"""
|
||||
Transcribe audio file.
|
||||
|
||||
:param audio_bytes:
|
||||
:param stt_engine:
|
||||
:return:
|
||||
"""
|
||||
|
||||
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
|
||||
audio_temp_file.write(audio_bytes)
|
||||
|
||||
if stt_engine is STTEngine.FASTER_WHISPER:
|
||||
return faster_whisper.speech_to_text(audio_temp_file.name)
|
||||
# text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
|
||||
elif stt_engine is STTEngine.WHISPER:
|
||||
# TODO: implement whisper
|
||||
pass
|
||||
else:
|
||||
raise Exception("Unknown STT engine: " + stt_engine.name)
|
||||
|
||||
|
||||
def get_speech_from_text(text, tts_engine):
|
||||
"""
|
||||
Speak text using Piper.
|
||||
:return: audio file
|
||||
"""
|
||||
|
||||
# TODO: implement TTS
|
||||
if tts_engine is TTSEngine.PIPER:
|
||||
pass
|
||||
else:
|
||||
raise Exception("Unknown TTS engine: " + tts_engine.name)
|
@ -10,7 +10,7 @@ def create_database():
|
||||
"""Creates the database."""
|
||||
|
||||
# Check if the database already exists
|
||||
if os.path.exists(project_path / "resources" / "db-jarvis-commands-memory.sqlite"):
|
||||
if os.path.exists(project_path / "resources" / "jarvis-commands-memory.sqlite"):
|
||||
logging.debug("Database already exists, skipping creation")
|
||||
return
|
||||
|
||||
|
@ -1,50 +0,0 @@
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Sockets(Enum):
|
||||
"""
|
||||
List of sockets
|
||||
"""
|
||||
PROCESS_MESSAGE = "process_message"
|
||||
JOIN = "join"
|
||||
LEAVE = "leave"
|
||||
CONNECT = "connect"
|
||||
CLEAR_CHAT = "clear_chat"
|
||||
|
||||
|
||||
class Endpoints(Enum):
|
||||
"""
|
||||
List of endpoints
|
||||
"""
|
||||
DEFAULT = "/"
|
||||
STATUS = "/status"
|
||||
STT = "/stt"
|
||||
TTS = "/tts"
|
||||
|
||||
|
||||
class TTSEngine(Enum):
|
||||
"""
|
||||
List of TTS engines
|
||||
"""
|
||||
PIPER = "piper"
|
||||
|
||||
|
||||
class STTEngine(Enum):
|
||||
"""
|
||||
List of STT engines
|
||||
"""
|
||||
WHISPER = "whisper"
|
||||
FASTER_WHISPER = "faster_whisper"
|
||||
|
||||
|
||||
def get_enum_from_str(enumclass, name):
|
||||
"""
|
||||
Get enum from string
|
||||
:param enumclass:
|
||||
:param name:
|
||||
:return:
|
||||
"""
|
||||
for enum in enumclass:
|
||||
if enum.name == name:
|
||||
return enum
|
||||
raise Exception("Unknown enum " + name)
|
@ -1,83 +0,0 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import openai
|
||||
from flask import Flask, request
|
||||
from flask_socketio import SocketIO
|
||||
|
||||
from src.network import endpoint_handler, socket_handler
|
||||
from src.declarations import Endpoints, Sockets
|
||||
|
||||
# Set this variable to "threading", "eventlet" or "gevent" to test the
|
||||
# different async modes, or leave it set to None for the application to choose
|
||||
# the best option based on installed packages.
|
||||
async_mode = None
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['SECRET_KEY'] = 'secret!'
|
||||
|
||||
socketio = SocketIO(app, async_mode=async_mode)
|
||||
# thread = None
|
||||
# thread_lock = Lock()
|
||||
|
||||
openai.api_key = os.getenv("OPENAI_API_KEY")
|
||||
|
||||
|
||||
#
|
||||
# ENDPOINTS
|
||||
#
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return endpoint_handler.handle_endpoint(Endpoints.DEFAULT, request)
|
||||
|
||||
|
||||
@app.route('/status')
|
||||
def status():
|
||||
return endpoint_handler.handle_endpoint(Endpoints.STATUS, request)
|
||||
|
||||
|
||||
@app.route("/stt", methods=['POST'])
|
||||
def speech_to_text():
|
||||
return endpoint_handler.handle_endpoint(Endpoints.STT, request)
|
||||
|
||||
|
||||
@app.route("/tts", methods=['POST'])
|
||||
def text_to_speech():
|
||||
return endpoint_handler.handle_endpoint(Endpoints.TTS, request)
|
||||
|
||||
|
||||
#
|
||||
# SOCKETS
|
||||
#
|
||||
|
||||
|
||||
@socketio.event
|
||||
def process_message(message):
|
||||
return socket_handler.handle_socket(Sockets.PROCESS_MESSAGE, message)
|
||||
|
||||
|
||||
@socketio.event
|
||||
def join(message):
|
||||
return socket_handler.handle_socket(Sockets.JOIN, message)
|
||||
|
||||
|
||||
@socketio.event
|
||||
def leave(message):
|
||||
return socket_handler.handle_socket(Sockets.LEAVE, message)
|
||||
|
||||
|
||||
@socketio.event
|
||||
def connect():
|
||||
return socket_handler.handle_socket(Sockets.CONNECT, None)
|
||||
|
||||
|
||||
@socketio.event
|
||||
def clear_chat(message):
|
||||
return socket_handler.handle_socket(Sockets.CLEAR_CHAT, message)
|
||||
|
||||
|
||||
def start_api(port=6000):
|
||||
logging.info("Starting Jarvis Server API on port " + str(port) + "...")
|
||||
socketio.run(app, host='0.0.0.0', port=port)
|
@ -1,77 +0,0 @@
|
||||
import logging
|
||||
|
||||
from src.audio import audio_utils
|
||||
from src.declarations import Endpoints, Sockets
|
||||
|
||||
|
||||
def handle_endpoint(endpoint, request):
|
||||
if endpoint is Endpoints.DEFAULT:
|
||||
return default_endpoint(request)
|
||||
elif endpoint is Endpoints.STATUS:
|
||||
return status_endpoint(request)
|
||||
elif endpoint is Endpoints.STT:
|
||||
return speech_to_text(request)
|
||||
elif endpoint is Endpoints.TTS:
|
||||
return text_to_speech(request)
|
||||
else:
|
||||
return default_endpoint(request)
|
||||
|
||||
|
||||
def default_endpoint(request):
|
||||
list_endpoints = []
|
||||
for endpoint in Endpoints:
|
||||
list_endpoints.append(endpoint.value)
|
||||
|
||||
list_sockets = []
|
||||
for socket in Sockets:
|
||||
list_sockets.append(socket.value)
|
||||
|
||||
return {"message": "Welcome to Jarvis Server API !",
|
||||
"endpoints": list_endpoints,
|
||||
"sockets": list_sockets,
|
||||
"version": "1.0.0" # TODO: get version from somewhere
|
||||
}
|
||||
|
||||
|
||||
def status_endpoint(request):
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
def speech_to_text(request):
|
||||
"""
|
||||
Transcribe audio file using whisper.
|
||||
|
||||
Exemple of request:
|
||||
{
|
||||
"data": "base64 encoded audio file",
|
||||
"engine": "faster-whisper"
|
||||
}
|
||||
|
||||
:return: transcription text
|
||||
"""
|
||||
|
||||
logging.info("New STT request from " + request.remote_addr)
|
||||
text = audio_utils.get_text_from_audio(request.data, request.engine)
|
||||
logging.info("STT result for " + request.remote_addr + " : " + text)
|
||||
|
||||
return {"text": text}
|
||||
|
||||
|
||||
def text_to_speech(request):
|
||||
"""
|
||||
Speak text using Piper.
|
||||
|
||||
Exemple of request:
|
||||
{
|
||||
"data": "Hello World !",
|
||||
"engine": "piper"
|
||||
}
|
||||
|
||||
:return: audio data
|
||||
"""
|
||||
|
||||
logging.info("New TTS request from " + request.remote_addr)
|
||||
|
||||
# TODO: implement TTS
|
||||
|
||||
return {"audio": ""}
|
@ -1,13 +0,0 @@
|
||||
import logging
|
||||
|
||||
from flask_socketio import emit
|
||||
|
||||
|
||||
def add_message_from_user(text, room_id):
|
||||
logging.debug("Sending message from user to room " + room_id + " : " + text)
|
||||
emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
|
||||
|
||||
|
||||
def add_message_from_assistant(text, room_id):
|
||||
logging.debug("Sending message from assistant to room " + room_id + " : " + text)
|
||||
emit('message_from_assistant', {'data': text, "uuid": room_id}, to=room_id)
|
@ -1,74 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
|
||||
from flask_socketio import rooms, join_room, leave_room, emit
|
||||
|
||||
from src.declarations import Sockets
|
||||
from src.network import interactions
|
||||
|
||||
|
||||
def handle_socket(socket, message):
|
||||
if socket is Sockets.CONNECT:
|
||||
connect()
|
||||
elif socket is Sockets.JOIN:
|
||||
join(message)
|
||||
elif socket is Sockets.LEAVE:
|
||||
leave(message)
|
||||
elif socket is Sockets.PROCESS_MESSAGE:
|
||||
process_message(message)
|
||||
elif socket is Sockets.CLEAR_CHAT:
|
||||
clear_chat(message)
|
||||
else:
|
||||
logging.warning("Unknown socket " + socket)
|
||||
|
||||
|
||||
def connect():
|
||||
emit('connection', {'data': 'Connected', 'count': 0})
|
||||
|
||||
|
||||
def join(message):
|
||||
message = json.loads(message)
|
||||
|
||||
logging.info("New client joined room " + message['uuid'])
|
||||
join_room(message['uuid'])
|
||||
|
||||
|
||||
def leave(message):
|
||||
message = json.loads(message)
|
||||
|
||||
logging.info("Client left room " + message['uuid'])
|
||||
leave_room(message['uuid'])
|
||||
|
||||
|
||||
def process_message(message):
|
||||
message = json.loads(message)
|
||||
|
||||
logging.info("New process request from room " + message['uuid'])
|
||||
logging.info("Message : " + message['data'])
|
||||
|
||||
if message['uuid'] not in rooms():
|
||||
logging.warning("Room not found, creating it")
|
||||
join_room(message['uuid'])
|
||||
|
||||
# TODO: maybe implement grammar check and correction ?
|
||||
|
||||
# intent_manager.recognise(message['data'], message['uuid'])
|
||||
if message['data'] != "":
|
||||
# response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
|
||||
text_response = "Tokens are expensive ya know?"
|
||||
|
||||
print(text_response)
|
||||
interactions.add_message_from_assistant(text_response, message['uuid'])
|
||||
# chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
|
||||
|
||||
|
||||
def clear_chat(message):
|
||||
"""
|
||||
Clear chat history for a specific room.
|
||||
:param uuid: uuid
|
||||
:return:
|
||||
"""
|
||||
message = json.loads(message)
|
||||
|
||||
emit('clear_chat', {}, to=message['uuid'])
|
||||
# chatgpt_utils.clear_chat(uuid)
|
Binary file not shown.
@ -5,7 +5,7 @@ import types
|
||||
from adapt.engine import DomainIntentDeterminationEngine
|
||||
from padatious import IntentContainer
|
||||
|
||||
from src import network
|
||||
from src import api
|
||||
|
||||
adapt_engine = DomainIntentDeterminationEngine()
|
||||
padatious_intents_container = IntentContainer('intent_cache')
|
||||
@ -160,7 +160,7 @@ def recognise(sentence, uuid=None):
|
||||
launch_intent(look_for_matching_intent(sentence))
|
||||
|
||||
# TODO: find why not working
|
||||
network.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)
|
||||
api.send_jarvis_message_to_room("Not implemented that yet, please wait.", uuid)
|
||||
|
||||
|
||||
class SkillRegistering(type):
|
||||
|
13
src/utils/chat_utils.py
Normal file
13
src/utils/chat_utils.py
Normal file
@ -0,0 +1,13 @@
|
||||
import logging
|
||||
|
||||
from flask_socketio import emit
|
||||
|
||||
|
||||
def send_user_message_to_room(text, room_id):
|
||||
logging.debug("Sending message from user to room " + room_id + " : " + text)
|
||||
emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
|
||||
|
||||
|
||||
def send_jarvis_message_to_room(text, room_id):
|
||||
logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
|
||||
emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
|
@ -2,19 +2,8 @@ import logging
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
model = None
|
||||
|
||||
|
||||
def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="int8"):
|
||||
"""
|
||||
Load the whisper model in the memory.
|
||||
|
||||
:param model_size: small, medium or large
|
||||
:param device: cpu or cuda
|
||||
:param cpu_threads: number of cpu threads
|
||||
:param compute_type: use int8 (haven't tested others)
|
||||
:return: None
|
||||
"""
|
||||
log_level = logging.getLogger().level
|
||||
global model
|
||||
model = WhisperModel(model_size_or_path=model_size, device=device, cpu_threads=cpu_threads,
|
||||
@ -23,26 +12,22 @@ def load_model(model_size='small', device="cpu", cpu_threads=8, compute_type="in
|
||||
|
||||
|
||||
def get_model():
|
||||
"""
|
||||
Get the whisper model.
|
||||
:return: the whisper model
|
||||
:rtype: WhisperModel
|
||||
"""
|
||||
return model
|
||||
|
||||
|
||||
def speech_to_text(audio_file):
|
||||
def faster_whisper_stt(audio_file):
|
||||
"""
|
||||
Transcribe audio file using faster_whisper, no additional server/service needed, runs on CPU.
|
||||
|
||||
:param audio_file: path to audio file
|
||||
:return: transcription text
|
||||
:param audio_file:
|
||||
:param model:
|
||||
:return: text
|
||||
"""
|
||||
if get_model() is None:
|
||||
if model is None:
|
||||
logging.error("Model is not loaded")
|
||||
load_model()
|
||||
|
||||
segments, info = get_model().transcribe(audio_file, beam_size=5, language='fr')
|
||||
segments, info = model.transcribe(audio_file, beam_size=5, language='fr')
|
||||
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
||||
|
||||
# combines all segments in one string
|
@ -1,17 +0,0 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
|
||||
api_url = os.getenv("HOMEASSISTANT_URL") # TODO: get URL from mobile app
|
||||
token = os.getenv("HOMEASSISTANT_TOKEN") # TODO: get token from mobile app
|
||||
|
||||
|
||||
# client = Client(api_url, token)
|
||||
|
||||
|
||||
def send_message_to_homeassistant(message, language="en"):
|
||||
# Make a POST request to the API
|
||||
requests.post(api_url + "/api/conversation/process", json={
|
||||
"text": message,
|
||||
"language": language
|
||||
}, headers={"Authorization": "Bearer " + token, "content-type": "application/json"})
|
Loading…
Reference in New Issue
Block a user