added gevent for websockets server and reworked main structure
This commit is contained in:
parent
2b356c5dea
commit
ee2eca484f
@ -1,16 +1,15 @@
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import tempfile
|
||||
from threading import Lock
|
||||
|
||||
import openai
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
from flask_socketio import SocketIO, emit, join_room, leave_room, \
|
||||
rooms
|
||||
from pywhispercpp.model import Model
|
||||
|
||||
from jarvis.utils.chatgpt_utils import chatgpt_recognise
|
||||
from jarvis.utils import chat_utils, whisper_utils
|
||||
|
||||
# Set this variable to "threading", "eventlet" or "gevent" to test the
|
||||
# different async modes, or leave it set to None for the application to choose
|
||||
@ -22,7 +21,7 @@ app.config['SECRET_KEY'] = 'secret!'
|
||||
socketio = SocketIO(app, async_mode=async_mode)
|
||||
thread = None
|
||||
thread_lock = Lock()
|
||||
model = Model('base', n_threads=16, suppress_non_speech_tokens=True)
|
||||
|
||||
openai.api_key = sys.argv[1]
|
||||
|
||||
|
||||
@ -34,25 +33,34 @@ def index():
|
||||
@socketio.event
|
||||
def process_message(message):
|
||||
message = json.loads(message)
|
||||
print("New PROCESS request from room " + message['uuid'])
|
||||
logging.info("New PROCESS request from room " + message['uuid'])
|
||||
logging.info("Message : " + message['data'])
|
||||
|
||||
print("Message : " + message['data'])
|
||||
# TODO: maybe implement grammar check ?
|
||||
# TODO: maybe implement grammar check and correction ?
|
||||
|
||||
# intent_manager.recognise(message['data'], message['uuid'])
|
||||
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
|
||||
|
||||
response = chatgpt_recognise(message['data'])
|
||||
if 'comment' in response:
|
||||
send_user_message_to_room(response['comment'], message['uuid'])
|
||||
else:
|
||||
send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
|
||||
if message['data'] != "":
|
||||
# response = chatgpt_recognise(message['data'])
|
||||
response = {'action': 'answer',
|
||||
'answer': "Hello! As an AI, I don't have emotions, but I'm always here to help you with your smart home needs. How can I assist you today?"}
|
||||
|
||||
if response['action'] == 'clarify':
|
||||
chat_utils.send_jarvis_message_to_room(response['question'], message['uuid'])
|
||||
elif response['action'] == 'command':
|
||||
chat_utils.send_jarvis_message_to_room(response['comment'], message['uuid'])
|
||||
elif response['action'] == 'query':
|
||||
chat_utils.send_jarvis_message_to_room(response['device_description'], message['uuid'])
|
||||
elif response['action'] == 'answer':
|
||||
chat_utils.send_jarvis_message_to_room(response['answer'], message['uuid'])
|
||||
else:
|
||||
chat_utils.send_jarvis_message_to_room("I don't know how to respond to that...", message['uuid'])
|
||||
|
||||
|
||||
@socketio.event
|
||||
def join(message):
|
||||
message = json.loads(message)
|
||||
print("New client joined room " + message['uuid'])
|
||||
logging.info("New client joined room " + message['uuid'])
|
||||
join_room(message['uuid'])
|
||||
|
||||
|
||||
@ -68,27 +76,18 @@ def connect():
|
||||
emit('my_response', {'data': 'Connected', 'count': 0})
|
||||
|
||||
|
||||
def send_user_message_to_room(text, room_id):
|
||||
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
|
||||
|
||||
|
||||
def send_jarvis_message_to_room(text, room_id):
|
||||
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
|
||||
|
||||
|
||||
# .WAV (i.e.) FILE REQUEST
|
||||
@app.route("/get_text_from_audio", methods=['POST'])
|
||||
def get_text_from_audio():
|
||||
print("[" + request.remote_addr + "] - New STT request")
|
||||
logging.info("New STT request from " + request.remote_addr)
|
||||
|
||||
audio_temp_file = tempfile.NamedTemporaryFile(prefix='jarvis-audio_', suffix='_client')
|
||||
audio_temp_file.write(request.data)
|
||||
|
||||
# text = whisper_stt(audio_temp_file.name)
|
||||
text = whisper_cpp_stt(audio_temp_file.name)
|
||||
print(text)
|
||||
text = whisper_utils.whisper_cpp_stt(audio_temp_file.name)
|
||||
logging.info("STT result for " + request.remote_addr + " : " + text)
|
||||
|
||||
return {"data": text, "uuid": "null"}
|
||||
return {"data": text}
|
||||
|
||||
|
||||
"""
|
||||
@ -103,39 +102,6 @@ def process_text():
|
||||
return {"transcription": text, "answer": answer}"""
|
||||
|
||||
|
||||
# send request to whisper-asr server (docker)
|
||||
def whisper_stt(audio_file):
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
# 'Content-Type': 'multipart/form-data',
|
||||
}
|
||||
|
||||
params = {
|
||||
'task': 'transcribe',
|
||||
# TODO: add to config
|
||||
'language': 'fr',
|
||||
'output': 'json',
|
||||
}
|
||||
|
||||
files = {
|
||||
'audio_file': open(audio_file, 'rb'),
|
||||
}
|
||||
|
||||
# TODO: add to config
|
||||
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
|
||||
return json.loads(response.text)['text']
|
||||
|
||||
|
||||
def whisper_cpp_stt(audio_file):
|
||||
segments = model.transcribe(audio_file, speed_up=False, translate=False)
|
||||
|
||||
# combines all segments in one string
|
||||
text = ''
|
||||
for segment in segments:
|
||||
text += segment.text + ' '
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def start_server():
|
||||
socketio.run(app, host='0.0.0.0', port=6000, allow_unsafe_werkzeug=True)
|
||||
def start_api():
|
||||
logging.info("Starting Jarvis Server API...")
|
||||
socketio.run(app, host='0.0.0.0', port=6000)
|
||||
|
@ -1,10 +1,14 @@
|
||||
import api
|
||||
import logging
|
||||
|
||||
import lingua_franca
|
||||
|
||||
import api
|
||||
from jarvis.skills.cocktails import CocktailSkill
|
||||
from jarvis.skills.intent_services import intent_manager
|
||||
from jarvis.utils import whisper_utils
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
|
||||
# Load lingua franca in the memory
|
||||
lingua_franca.load_language(lang="fr")
|
||||
@ -15,5 +19,8 @@ if __name__ == '__main__':
|
||||
# Load the skills
|
||||
intent_manager.load_all_skills()
|
||||
|
||||
# Load the STT (whisper) model
|
||||
whisper_utils.load_model()
|
||||
|
||||
# Start the api endpoint
|
||||
api.start_server()
|
||||
api.start_api()
|
||||
|
13
jarvis/utils/chat_utils.py
Normal file
13
jarvis/utils/chat_utils.py
Normal file
@ -0,0 +1,13 @@
|
||||
import logging
|
||||
|
||||
from jarvis.api import socketio
|
||||
|
||||
|
||||
def send_user_message_to_room(text, room_id):
|
||||
logging.debug("Sending message from user to room " + room_id + " : " + text)
|
||||
socketio.emit('message_from_user', {'data': text, "uuid": room_id}, to=room_id)
|
||||
|
||||
|
||||
def send_jarvis_message_to_room(text, room_id):
|
||||
logging.debug("Sending message from jarvis to room " + room_id + " : " + text)
|
||||
socketio.emit('message_from_jarvis', {'data': text, "uuid": room_id}, to=room_id)
|
69
jarvis/utils/whisper_utils.py
Normal file
69
jarvis/utils/whisper_utils.py
Normal file
@ -0,0 +1,69 @@
|
||||
import json
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from pywhispercpp.model import Model
|
||||
|
||||
from jarvis.utils import languages_utils
|
||||
|
||||
|
||||
def load_model():
|
||||
log_level = logging.getLogger().level
|
||||
global model
|
||||
model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR)
|
||||
logging.getLogger().setLevel(log_level)
|
||||
|
||||
|
||||
def get_model():
|
||||
return model
|
||||
|
||||
|
||||
def whisper_cpp_stt(audio_file):
|
||||
"""
|
||||
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
|
||||
|
||||
:param audio_file:
|
||||
:param model:
|
||||
:return: text
|
||||
"""
|
||||
if model is None:
|
||||
logging.error("Model is not loaded")
|
||||
load_model()
|
||||
|
||||
segments = model.transcribe(audio_file, speed_up=False, translate=False)
|
||||
|
||||
# combines all segments in one string
|
||||
text = ''
|
||||
for segment in segments:
|
||||
text += segment.text + ' '
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def whisper_asr_stt(audio_file):
|
||||
"""
|
||||
Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU.
|
||||
See : https://github.com/ahmetoner/whisper-asr-webservice
|
||||
|
||||
:param audio_file:
|
||||
:return: text
|
||||
"""
|
||||
headers = {
|
||||
'accept': 'application/json',
|
||||
# 'Content-Type': 'multipart/form-data',
|
||||
}
|
||||
|
||||
params = {
|
||||
'task': 'transcribe',
|
||||
# TODO: add to config
|
||||
'language': languages_utils.get_language(),
|
||||
'output': 'json',
|
||||
}
|
||||
|
||||
files = {
|
||||
'audio_file': open(audio_file, 'rb'),
|
||||
}
|
||||
|
||||
# TODO: add to config
|
||||
response = requests.post('https://whisper.broillet.ch/asr', params=params, headers=headers, files=files)
|
||||
return json.loads(response.text)['text']
|
@ -6,3 +6,5 @@ Flask-SocketIO
|
||||
pywhispercpp
|
||||
padatious
|
||||
openai
|
||||
gevent
|
||||
gevent-websocket
|
Loading…
Reference in New Issue
Block a user