added gevent for websockets server and reworked main structure
This commit is contained in:
parent
2b356c5dea
commit
ee2eca484f
@ -1,16 +1,15 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
|
||||||
import openai
|
import openai
|
||||||
import requests
|
|
||||||
from flask import Flask, request
|
from flask import Flask, request
|
||||||
from flask_socketio import SocketIO, emit, join_room, leave_room, \
|
from flask_socketio import SocketIO, emit, join_room, leave_room, \
|
||||||
rooms
|
rooms
|
||||||
from pywhispercpp.model import Model
|
|
||||||
|
|
||||||
from jarvis.utils.chatgpt_utils import chatgpt_recognise
|
from jarvis.utils import chat_utils, whisper_utils
|
||||||
|
|
||||||
# Set this variable to "threading", "eventlet" or "gevent" to test the
|
# Set this variable to "threading", "eventlet" or "gevent" to test the
|
||||||
# different async modes, or leave it set to None for the application to choose
|
# different async modes, or leave it set to None for the application to choose
|
||||||
@ -22,7 +21,7 @@ app.config['SECRET_KEY'] = 'secret!'
|
|||||||
socketio = SocketIO(app, async_mode=async_mode)
|
socketio = SocketIO(app, async_mode=async_mode)
|
||||||
thread = None
|
thread = None
|
||||||
thread_lock = Lock()
|
thread_lock = Lock()
|
||||||
model = Model('base', n_threads=16, suppress_non_speech_tokens=True)
|
|
||||||
openai.api_key = sys.argv[1]
|
openai.api_key = sys.argv[1]
|
||||||
|
|
||||||
|
|
||||||
@ -34,25 +33,34 @@ def index():
|
|||||||
@socketio.event
|
@socketio.event
|
||||||
def process_message(message):
|
def process_message(message):
|
||||||
message = json.loads(message)
|
message = json.loads(message)
|
||||||
print("New PROCESS request from room " + message['uuid'])
|
logging.info("New PROCESS request from room " + message['uuid'])
|
||||||
|
logging.info("Message : " + message['data'])
|
||||||
|
|
||||||
print("Message : " + message['data'])
|
# TODO: maybe implement grammar check and correction ?
|
||||||
# TODO: maybe implement grammar check ?
|
|
||||||
|
|
||||||
# intent_manager.recognise(message['data'], message['uuid'])
|
# intent_manager.recognise(message['data'], message['uuid'])
|
||||||
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
|
|
||||||
|
|
||||||
response = chatgpt_recognise(message['data'])
|
if message['data'] != "":
|
||||||
if 'comment' in response:
|
# response = chatgpt_recognise(message['data'])
|
||||||
send_user_message_to_room(response['comment'], message['uuid'])
|
response = {'action': 'answer',
|
||||||
else:
|
'answer': "Hello! As an AI, I don't have emotions, but I'm always here to help you with your smart home needs. How can I assist you today?"}
|
||||||
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
|
|
||||||
|
if response['action'] == 'clarify':
|
||||||
|
chat_utils.send_jarvis_message_to_room(response['question'], message['uuid'])
|
||||||
|
elif response['action'] == 'command':
|
||||||
|
chat_utils.send_jarvis_message_to_room(response['comment'], message['uuid'])
|
||||||
|
elif response['action'] == 'query':
|
||||||
|
chat_utils.send_jarvis_message_to_room(response['device_description'], message['uuid'])
|
||||||
|
elif response['action'] == 'answer':
|
||||||
|
chat_utils.send_jarvis_message_to_room(response['answer'], message['uuid'])
|
||||||
|
else:
|
||||||
|
chat_utils.send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
|
||||||
|
|
||||||
|
|
||||||
@socketio.event
|
@socketio.event
|
||||||
def join(message):
|
def join(message):
|
||||||
message = json.loads(message)
|
message = json.loads(message)
|
||||||
print("New client joined room " + message['uuid'])
|
logging.info("New client joined room " + message['uuid'])
|
||||||
join_room(message['uuid'])
|
join_room(message['uuid'])
|
||||||
|
|
||||||
|
|
||||||
@ -68,27 +76,18 @@ def connect():
|
|||||||
emit('my_response', {'data': 'Connected', 'count': 0})
|
emit('my_response', {'data': 'Connected', 'count': 0})
|
||||||
|
|
||||||
|
|
||||||
def send_user_message_to_room(text, room_id):
|
|
||||||
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
|
|
||||||
|
|
||||||
|
|
||||||
def send_jarvis_message_to_room(text, room_id):
|
|
||||||
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
|
|
||||||
|
|
||||||
|
|
||||||
# .WAV (i.e.) FILE REQUEST
|
# .WAV (i.e.) FILE REQUEST
|
||||||
@app.route("/get_text_from_audio", methods=['POST'])
|
@app.route("/get_text_from_audio", methods=['POST'])
|
||||||
def get_text_from_audio():
|
def get_text_from_audio():
|
||||||
print("[" + request.remote_addr + "] - New STT request")
|
logging.info("New STT request from " + request.remote_addr)
|
||||||
|
|
||||||
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
|
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
|
||||||
audio_temp_file.write(request.data)
|
audio_temp_file.write(request.data)
|
||||||
|
|
||||||
# text = whisper_stt(audio_temp_file.name)
|
text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
|
||||||
text = whisper_cpp_stt(audio_temp_file.name)
|
logging.info("STT result for " + request.remote_addr + " : " + text)
|
||||||
print(text)
|
|
||||||
|
|
||||||
return {"data": text, "uuid": "null"}
|
return {"data": text}
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -103,39 +102,6 @@ def process_text():
|
|||||||
return {"transcription": text, "answer": answer}"""
|
return {"transcription": text, "answer": answer}"""
|
||||||
|
|
||||||
|
|
||||||
# send request to whisper-asr server (docker)
|
def start_api():
|
||||||
def whisper_stt(audio_file):
|
logging.info("Starting Jarvis Server API...")
|
||||||
headers = {
|
socketio.run(app, host='0.0.0.0', port=6000)
|
||||||
'accept': 'application/json',
|
|
||||||
# 'Content-Type': 'multipart/form-data',
|
|
||||||
}
|
|
||||||
|
|
||||||
params = {
|
|
||||||
'task': 'transcribe',
|
|
||||||
# TODO: add to config
|
|
||||||
'language': 'fr',
|
|
||||||
'output': 'json',
|
|
||||||
}
|
|
||||||
|
|
||||||
files = {
|
|
||||||
'audio_file': open(audio_file, 'rb'),
|
|
||||||
}
|
|
||||||
|
|
||||||
# TODO: add to config
|
|
||||||
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
|
|
||||||
return json.loads(response.text)['text']
|
|
||||||
|
|
||||||
|
|
||||||
def whisper_cpp_stt(audio_file):
|
|
||||||
segments = model.transcribe(audio_file, speed_up=False, translate=False)
|
|
||||||
|
|
||||||
# combines all segments in one string
|
|
||||||
text = ''
|
|
||||||
for segment in segments:
|
|
||||||
text += segment.text + ' '
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def start_server():
|
|
||||||
socketio.run(app, host='0.0.0.0', port=6000, allow_unsafe_werkzeug=True)
|
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
import api
|
import logging
|
||||||
|
|
||||||
import lingua_franca
|
import lingua_franca
|
||||||
|
|
||||||
|
import api
|
||||||
from jarvis.skills.cocktails import CocktailSkill
|
from jarvis.skills.cocktails import CocktailSkill
|
||||||
from jarvis.skills.intent_services import intent_manager
|
from jarvis.skills.intent_services import intent_manager
|
||||||
|
from jarvis.utils import whisper_utils
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
# Load lingua franca in the memory
|
# Load lingua franca in the memory
|
||||||
lingua_franca.load_language(lang="fr")
|
lingua_franca.load_language(lang="fr")
|
||||||
@ -15,5 +19,8 @@ if __name__ == '__main__':
|
|||||||
# Load the skills
|
# Load the skills
|
||||||
intent_manager.load_all_skills()
|
intent_manager.load_all_skills()
|
||||||
|
|
||||||
|
# Load the STT (whisper) model
|
||||||
|
whisper_utils.load_model()
|
||||||
|
|
||||||
# Start the api endpoint
|
# Start the api endpoint
|
||||||
api.start_server()
|
api.start_api()
|
||||||
|
13
jarvis/utils/chat_utils.py
Normal file
13
jarvis/utils/chat_utils.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
from jarvis.api import socketio
|
||||||
|
|
||||||
|
|
||||||
|
def send_user_message_to_room(text, room_id):
|
||||||
|
logging.debug("Sending message from user to room " + room_id + " : " + text)
|
||||||
|
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
|
||||||
|
|
||||||
|
|
||||||
|
def send_jarvis_message_to_room(text, room_id):
|
||||||
|
logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
|
||||||
|
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
|
69
jarvis/utils/whisper_utils.py
Normal file
69
jarvis/utils/whisper_utils.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from pywhispercpp.model import Model
|
||||||
|
|
||||||
|
from jarvis.utils import languages_utils
|
||||||
|
|
||||||
|
|
||||||
|
def load_model():
|
||||||
|
log_level = logging.getLogger().level
|
||||||
|
global model
|
||||||
|
model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR)
|
||||||
|
logging.getLogger().setLevel(log_level)
|
||||||
|
|
||||||
|
|
||||||
|
def get_model():
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
def whisper_cpp_stt(audio_file):
|
||||||
|
"""
|
||||||
|
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
|
||||||
|
|
||||||
|
:param audio_file:
|
||||||
|
:param model:
|
||||||
|
:return: text
|
||||||
|
"""
|
||||||
|
if model is None:
|
||||||
|
logging.error("Model is not loaded")
|
||||||
|
load_model()
|
||||||
|
|
||||||
|
segments = model.transcribe(audio_file, speed_up=False, translate=False)
|
||||||
|
|
||||||
|
# combines all segments in one string
|
||||||
|
text = ''
|
||||||
|
for segment in segments:
|
||||||
|
text += segment.text + ' '
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def whisper_asr_stt(audio_file):
|
||||||
|
"""
|
||||||
|
Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU.
|
||||||
|
See : https://github.com/ahmetoner/whisper-asr-webservice
|
||||||
|
|
||||||
|
:param audio_file:
|
||||||
|
:return: text
|
||||||
|
"""
|
||||||
|
headers = {
|
||||||
|
'accept': 'application/json',
|
||||||
|
# 'Content-Type': 'multipart/form-data',
|
||||||
|
}
|
||||||
|
|
||||||
|
params = {
|
||||||
|
'task': 'transcribe',
|
||||||
|
# TODO: add to config
|
||||||
|
'language': languages_utils.get_language(),
|
||||||
|
'output': 'json',
|
||||||
|
}
|
||||||
|
|
||||||
|
files = {
|
||||||
|
'audio_file': open(audio_file, 'rb'),
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: add to config
|
||||||
|
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
|
||||||
|
return json.loads(response.text)['text']
|
@ -5,4 +5,6 @@ lingua-franca
|
|||||||
Flask-SocketIO
|
Flask-SocketIO
|
||||||
pywhispercpp
|
pywhispercpp
|
||||||
padatious
|
padatious
|
||||||
openai
|
openai
|
||||||
|
gevent
|
||||||
|
gevent-websocket
|
Loading…
Reference in New Issue
Block a user