added gevent for websockets server and reworked main structure

This commit is contained in:
Mathieu B 2023-03-25 12:25:47 +01:00
parent 2b356c5dea
commit ee2eca484f
5 changed files with 123 additions and 66 deletions

View File

@ -1,16 +1,15 @@
import json
import logging
import sys
import tempfile
from threading import Lock
import openai
import requests
from flask import Flask, request
from flask_socketio import SocketIO, emit, join_room, leave_room, \
rooms
from pywhispercpp.model import Model
from jarvis.utils.chatgpt_utils import chatgpt_recognise
from jarvis.utils import chat_utils, whisper_utils
# Set this variable to "threading", "eventlet" or "gevent" to test the
# different async modes, or leave it set to None for the application to choose
@ -22,7 +21,7 @@ app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, async_mode=async_mode)
thread = None
thread_lock = Lock()
model = Model('base', n_threads=16, suppress_non_speech_tokens=True)
openai.api_key = sys.argv[1]
@ -34,25 +33,34 @@ def index():
@socketio.event
def process_message(message):
message = json.loads(message)
print("New PROCESS request from room " + message['uuid'])
logging.info("New PROCESS request from room " + message['uuid'])
logging.info("Message : " + message['data'])
print("Message : " + message['data'])
# TODO: maybe implement grammar check ?
# TODO: maybe implement grammar check and correction ?
# intent_manager.recognise(message['data'], message['uuid'])
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
response = chatgpt_recognise(message['data'])
if 'comment' in response:
send_user_message_to_room(response['comment'], message['uuid'])
if message['data'] != "":
# response = chatgpt_recognise(message['data'])
response = {'action': 'answer',
'answer': "Hello! As an AI, I don't have emotions, but I'm always here to help you with your smart home needs. How can I assist you today?"}
if response['action'] == 'clarify':
chat_utils.send_jarvis_message_to_room(response['question'], message['uuid'])
elif response['action'] == 'command':
chat_utils.send_jarvis_message_to_room(response['comment'], message['uuid'])
elif response['action'] == 'query':
chat_utils.send_jarvis_message_to_room(response['device_description'], message['uuid'])
elif response['action'] == 'answer':
chat_utils.send_jarvis_message_to_room(response['answer'], message['uuid'])
else:
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
chat_utils.send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
@socketio.event
def join(message):
message = json.loads(message)
print("New client joined room " + message['uuid'])
logging.info("New client joined room " + message['uuid'])
join_room(message['uuid'])
@ -68,27 +76,18 @@ def connect():
emit('my_response', {'data': 'Connected', 'count': 0})
def send_user_message_to_room(text, room_id):
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def send_jarvis_message_to_room(text, room_id):
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
# .WAV (i.e.) FILE REQUEST
@app.route("/get_text_from_audio", methods=['POST'])
def get_text_from_audio():
print("[" + request.remote_addr + "] - New STT request")
logging.info("New STT request from " + request.remote_addr)
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
audio_temp_file.write(request.data)
# text = whisper_stt(audio_temp_file.name)
text = whisper_cpp_stt(audio_temp_file.name)
print(text)
text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
logging.info("STT result for " + request.remote_addr + " : " + text)
return {"data": text, "uuid": "null"}
return {"data": text}
"""
@ -103,39 +102,6 @@ def process_text():
return {"transcription": text, "answer": answer}"""
# send request to whisper-asr server (docker)
def whisper_stt(audio_file):
headers = {
'accept': 'application/json',
# 'Content-Type': 'multipart/form-data',
}
params = {
'task': 'transcribe',
# TODO: add to config
'language': 'fr',
'output': 'json',
}
files = {
'audio_file': open(audio_file, 'rb'),
}
# TODO: add to config
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
return json.loads(response.text)['text']
def whisper_cpp_stt(audio_file):
segments = model.transcribe(audio_file, speed_up=False, translate=False)
# combines all segments in one string
text = ''
for segment in segments:
text += segment.text + ' '
return text
def start_server():
socketio.run(app, host='0.0.0.0', port=6000, allow_unsafe_werkzeug=True)
def start_api():
logging.info("Starting Jarvis Server API...")
socketio.run(app, host='0.0.0.0', port=6000)

View File

@ -1,10 +1,14 @@
import api
import logging
import lingua_franca
import api
from jarvis.skills.cocktails import CocktailSkill
from jarvis.skills.intent_services import intent_manager
from jarvis.utils import whisper_utils
if __name__ == '__main__':
logging.getLogger().setLevel(logging.DEBUG)
# Load lingua franca in the memory
lingua_franca.load_language(lang="fr")
@ -15,5 +19,8 @@ if __name__ == '__main__':
# Load the skills
intent_manager.load_all_skills()
# Load the STT (whisper) model
whisper_utils.load_model()
# Start the api endpoint
api.start_server()
api.start_api()

View File

@ -0,0 +1,13 @@
import logging
from jarvis.api import socketio
def send_user_message_to_room(text, room_id):
logging.debug("Sending message from user to room " + room_id + " : " + text)
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
def send_jarvis_message_to_room(text, room_id):
logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)

View File

@ -0,0 +1,69 @@
import json
import logging
import requests
from pywhispercpp.model import Model
from jarvis.utils import languages_utils
def load_model():
log_level = logging.getLogger().level
global model
model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR)
logging.getLogger().setLevel(log_level)
def get_model():
return model
def whisper_cpp_stt(audio_file):
"""
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
:param audio_file:
:param model:
:return: text
"""
if model is None:
logging.error("Model is not loaded")
load_model()
segments = model.transcribe(audio_file, speed_up=False, translate=False)
# combines all segments in one string
text = ''
for segment in segments:
text += segment.text + ' '
return text
def whisper_asr_stt(audio_file):
"""
Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU.
See : https://github.com/ahmetoner/whisper-asr-webservice
:param audio_file:
:return: text
"""
headers = {
'accept': 'application/json',
# 'Content-Type': 'multipart/form-data',
}
params = {
'task': 'transcribe',
# TODO: add to config
'language': languages_utils.get_language(),
'output': 'json',
}
files = {
'audio_file': open(audio_file, 'rb'),
}
# TODO: add to config
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
return json.loads(response.text)['text']

View File

@ -6,3 +6,5 @@ Flask-SocketIO
pywhispercpp
padatious
openai
gevent
gevent-websocket