added gevent for websockets server and reworked main structure

This commit is contained in:
Mathieu B 2023-03-25 12:25:47 +01:00
parent 2b356c5dea
commit ee2eca484f
5 changed files with 123 additions and 66 deletions

View File

@ -1,16 +1,15 @@
import json import json
import logging
import sys import sys
import tempfile import tempfile
from threading import Lock from threading import Lock
import openai import openai
import requests
from flask import Flask, request from flask import Flask, request
from flask_socketio import SocketIO, emit, join_room, leave_room, \ from flask_socketio import SocketIO, emit, join_room, leave_room, \
rooms rooms
from pywhispercpp.model import Model
from jarvis.utils.chatgpt_utils import chatgpt_recognise from jarvis.utils import chat_utils, whisper_utils
# Set this variable to "threading", "eventlet" or "gevent" to test the # Set this variable to "threading", "eventlet" or "gevent" to test the
# different async modes, or leave it set to None for the application to choose # different async modes, or leave it set to None for the application to choose
@ -22,7 +21,7 @@ app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, async_mode=async_mode) socketio = SocketIO(app, async_mode=async_mode)
thread = None thread = None
thread_lock = Lock() thread_lock = Lock()
model = Model('base', n_threads=16, suppress_non_speech_tokens=True)
openai.api_key = sys.argv[1] openai.api_key = sys.argv[1]
@ -34,25 +33,34 @@ def index():
@socketio.event @socketio.event
def process_message(message): def process_message(message):
message = json.loads(message) message = json.loads(message)
print("New PROCESS request from room " + message['uuid']) logging.info("New PROCESS request from room " + message['uuid'])
logging.info("Message : " + message['data'])
print("Message : " + message['data']) # TODO: maybe implement grammar check and correction ?
# TODO: maybe implement grammar check ?
# intent_manager.recognise(message['data'], message['uuid']) # intent_manager.recognise(message['data'], message['uuid'])
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
response = chatgpt_recognise(message['data']) if message['data'] != "":
if 'comment' in response: # response = chatgpt_recognise(message['data'])
send_user_message_to_room(response['comment'], message['uuid']) response = {'action': 'answer',
else: 'answer': "Hello! As an AI, I don't have emotions, but I'm always here to help you with your smart home needs. How can I assist you today?"}
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
if response['action'] == 'clarify':
chat_utils.send_jarvis_message_to_room(response['question'], message['uuid'])
elif response['action'] == 'command':
chat_utils.send_jarvis_message_to_room(response['comment'], message['uuid'])
elif response['action'] == 'query':
chat_utils.send_jarvis_message_to_room(response['device_description'], message['uuid'])
elif response['action'] == 'answer':
chat_utils.send_jarvis_message_to_room(response['answer'], message['uuid'])
else:
chat_utils.send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
@socketio.event @socketio.event
def join(message): def join(message):
message = json.loads(message) message = json.loads(message)
print("New client joined room " + message['uuid']) logging.info("New client joined room " + message['uuid'])
join_room(message['uuid']) join_room(message['uuid'])
@ -68,27 +76,18 @@ def connect():
emit('my_response', {'data': 'Connected', 'count': 0}) emit('my_response', {'data': 'Connected', 'count': 0})
def send_user_message_to_room(text, room_id):
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def send_jarvis_message_to_room(text, room_id):
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
# .WAV (i.e.) FILE REQUEST # .WAV (i.e.) FILE REQUEST
@app.route("/get_text_from_audio", methods=['POST']) @app.route("/get_text_from_audio", methods=['POST'])
def get_text_from_audio(): def get_text_from_audio():
print("[" + request.remote_addr + "] - New STT request") logging.info("New STT request from " + request.remote_addr)
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client') audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
audio_temp_file.write(request.data) audio_temp_file.write(request.data)
# text = whisper_stt(audio_temp_file.name) text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
text = whisper_cpp_stt(audio_temp_file.name) logging.info("STT result for " + request.remote_addr + " : " + text)
print(text)
return {"data": text, "uuid": "null"} return {"data": text}
""" """
@ -103,39 +102,6 @@ def process_text():
return {"transcription": text, "answer": answer}""" return {"transcription": text, "answer": answer}"""
# send request to whisper-asr server (docker) def start_api():
def whisper_stt(audio_file): logging.info("Starting Jarvis Server API...")
headers = { socketio.run(app, host='0.0.0.0', port=6000)
'accept': 'application/json',
# 'Content-Type': 'multipart/form-data',
}
params = {
'task': 'transcribe',
# TODO: add to config
'language': 'fr',
'output': 'json',
}
files = {
'audio_file': open(audio_file, 'rb'),
}
# TODO: add to config
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
return json.loads(response.text)['text']
def whisper_cpp_stt(audio_file):
segments = model.transcribe(audio_file, speed_up=False, translate=False)
# combines all segments in one string
text = ''
for segment in segments:
text += segment.text + ' '
return text
def start_server():
socketio.run(app, host='0.0.0.0', port=6000, allow_unsafe_werkzeug=True)

View File

@ -1,10 +1,14 @@
import api import logging
import lingua_franca import lingua_franca
import api
from jarvis.skills.cocktails import CocktailSkill from jarvis.skills.cocktails import CocktailSkill
from jarvis.skills.intent_services import intent_manager from jarvis.skills.intent_services import intent_manager
from jarvis.utils import whisper_utils
if __name__ == '__main__': if __name__ == '__main__':
logging.getLogger().setLevel(logging.DEBUG)
# Load lingua franca in the memory # Load lingua franca in the memory
lingua_franca.load_language(lang="fr") lingua_franca.load_language(lang="fr")
@ -15,5 +19,8 @@ if __name__ == '__main__':
# Load the skills # Load the skills
intent_manager.load_all_skills() intent_manager.load_all_skills()
# Load the STT (whisper) model
whisper_utils.load_model()
# Start the api endpoint # Start the api endpoint
api.start_server() api.start_api()

View File

@ -0,0 +1,13 @@
import logging
from jarvis.api import socketio
def send_user_message_to_room(text, room_id):
logging.debug("Sending message from user to room " + room_id + " : " + text)
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def send_jarvis_message_to_room(text, room_id):
logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)

View File

@ -0,0 +1,69 @@
import json
import logging
import requests
from pywhispercpp.model import Model
from jarvis.utils import languages_utils
def load_model():
log_level = logging.getLogger().level
global model
model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR)
logging.getLogger().setLevel(log_level)
def get_model():
return model
def whisper_cpp_stt(audio_file):
"""
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
:param audio_file:
:param model:
:return: text
"""
if model is None:
logging.error("Model is not loaded")
load_model()
segments = model.transcribe(audio_file, speed_up=False, translate=False)
# combines all segments in one string
text = ''
for segment in segments:
text += segment.text + ' '
return text
def whisper_asr_stt(audio_file):
"""
Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU.
See : https://github.com/ahmetoner/whisper-asr-webservice
:param audio_file:
:return: text
"""
headers = {
'accept': 'application/json',
# 'Content-Type': 'multipart/form-data',
}
params = {
'task': 'transcribe',
# TODO: add to config
'language': languages_utils.get_language(),
'output': 'json',
}
files = {
'audio_file': open(audio_file, 'rb'),
}
# TODO: add to config
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
return json.loads(response.text)['text']

View File

@ -6,3 +6,5 @@ Flask-SocketIO
pywhispercpp pywhispercpp
padatious padatious
openai openai
gevent
gevent-websocket