rework of the conversation engines and text processing

This commit is contained in:
Mathieu Broillet 2023-11-06 21:38:13 +01:00
parent 7ce3df75f7
commit 23c9f95fce
Signed by: mathieu
GPG Key ID: C0E9E0E95AF03319
13 changed files with 173 additions and 65 deletions

View File

@ -41,4 +41,4 @@ if __name__ == '__main__':
waiting = True waiting = True
sock.emit('process_message', json.dumps({'data': message, 'uuid': 'clientpc'})) sock.emit('process_message', json.dumps({'text': message, 'uuid': 'clientpc', 'engine': 'chatgpt3'}))

2
run.py
View File

@ -2,7 +2,7 @@ import logging
from src.audio import audio_utils from src.audio import audio_utils
from src.database import db_utils from src.database import db_utils
from src.declarations import TTSEngine, STTEngine from src.constants import TTSEngine, STTEngine
from src.network import api from src.network import api
# import lingua_franca # import lingua_franca

View File

@ -1,7 +1,7 @@
import tempfile import tempfile
from src.audio.stt import faster_whisper from src.audio.stt import faster_whisper
from src.declarations import STTEngine, TTSEngine from src.constants import STTEngine, TTSEngine
def load_models(stt_engine: STTEngine, tts_engine: TTSEngine): def load_models(stt_engine: STTEngine, tts_engine: TTSEngine):

View File

@ -37,14 +37,22 @@ class STTEngine(Enum):
FASTER_WHISPER = "faster_whisper" FASTER_WHISPER = "faster_whisper"
class ChatEngine(Enum):
"""
List of Chat engines
"""
CHATGPT3 = "chatgpt3"
def get_enum_from_str(enumclass, name): def get_enum_from_str(enumclass, name):
""" """
Get enum from string Get enum from string
:param enumclass: :param enumclass: Class of enum
:param name: :param name: Name of enum as string
:return: :return:
""" """
for enum in enumclass: for enum in enumclass:
if enum.name == name: if enum.value.lower() == name.lower() or enum.name.lower() == name.lower():
return enum return enum
raise Exception("Unknown enum " + name) raise Exception("Unknown enum " + name)

View File

@ -6,7 +6,7 @@ from flask import Flask, request
from flask_socketio import SocketIO from flask_socketio import SocketIO
from src.network import endpoint_handler, socket_handler from src.network import endpoint_handler, socket_handler
from src.declarations import Endpoints, Sockets from src.constants import Endpoints, Sockets
# Set this variable to "threading", "eventlet" or "gevent" to test the # Set this variable to "threading", "eventlet" or "gevent" to test the
# different async modes, or leave it set to None for the application to choose # different async modes, or leave it set to None for the application to choose

View File

@ -1,7 +1,7 @@
import logging import logging
from src.audio import audio_utils from src.audio import audio_utils
from src.declarations import Endpoints, Sockets from src.constants import Endpoints, Sockets
def handle_endpoint(endpoint, request): def handle_endpoint(endpoint, request):

View File

@ -3,8 +3,8 @@ import logging
from flask_socketio import rooms, join_room, leave_room, emit from flask_socketio import rooms, join_room, leave_room, emit
from src.declarations import Sockets from src.constants import Sockets
from src.network import interactions from src.text.engine import conversation
def handle_socket(socket, message): def handle_socket(socket, message):
@ -44,7 +44,7 @@ def process_message(message):
message = json.loads(message) message = json.loads(message)
logging.info("New process request from room " + message['uuid']) logging.info("New process request from room " + message['uuid'])
logging.info("Message : " + message['data']) logging.info("Message : " + message['text'])
if message['uuid'] not in rooms(): if message['uuid'] not in rooms():
logging.warning("Room not found, creating it") logging.warning("Room not found, creating it")
@ -53,13 +53,14 @@ def process_message(message):
# TODO: maybe implement grammar check and correction ? # TODO: maybe implement grammar check and correction ?
# intent_manager.recognise(message['data'], message['uuid']) # intent_manager.recognise(message['data'], message['uuid'])
if message['data'] != "": if message['text'] != "":
# response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid']) # response = chatgpt_utils.chatgpt_recognise(message['data'], message['uuid'])
text_response = "Tokens are expensive ya know?"
print(text_response)
interactions.add_message_from_assistant(text_response, message['uuid'])
# chat_utils.send_jarvis_message_to_room(response['response'], message['uuid']) # chat_utils.send_jarvis_message_to_room(response['response'], message['uuid'])
response = conversation.process_text(message['engine'], message['text'], message['uuid'])
# text_response = "Tokens are expensive ya know?"
# print(text_response)
# interactions.add_message_from_assistant(text_response, message['uuid'])
def clear_chat(message): def clear_chat(message):

0
src/text/__init__.py Normal file
View File

View File

View File

View File

@ -5,79 +5,93 @@ import time
import openai import openai
from src.database import db_utils
from src.get_path_file import project_path from src.get_path_file import project_path
chat_messages = {} chat_messages = {}
def setup_messages(uuid): def get_prompt():
prompt = open(project_path / "resources" / "gpt_prompt_v3.txt", "r").read() return open(project_path / "text" / "engine" / "chatgpt3" / "prompt.txt", "r").read()
chat_messages[uuid] = [{"role": "system", "content": prompt}]
def chatgpt_recognise(text, uuid): def start_new_conversation(uuid: str):
# If the chat history is empty, create it chat_messages[uuid] = [{"role": "system", "content": get_prompt()}]
if uuid not in chat_messages:
setup_messages(uuid)
def add_message_to_history(text: str, uuid: str, role: str):
"""
Add a message to the history to a specific room.
:param text: message to add
:param uuid: room id
:param role: user, assistant or system
:return: None
"""
# Add the user message to the chat history # Add the user message to the chat history
chat_messages[uuid].append({"role": "user", "content": text}) chat_messages[uuid].append({"role": role, "content": text})
history = chat_messages.get(uuid)
def get_last_x_messages(uuid: str, x: int):
"""
Get the last x messages from a specific room.
:param uuid: room id
:param x: number of messages to get
:return: None
"""
return chat_messages[uuid][-x:]
def get_last_x_messages_as_text(uuid: str, x: int):
"""
Get the last x messages from a specific room.
:param uuid: room id
:param x: number of messages to get
:return: None
"""
messages = get_last_x_messages(uuid, x)
text = "\nFor context, last questions were: \n"
for message in messages:
text += message["content"] + "\n"
return text
def process_text(text: str, uuid: str):
add_message_to_history(text, uuid, "user")
# Create a copy of the chat history to avoid modifying the original when appending the last messages
# (history, see below)
conversation = chat_messages[uuid]
# If history is > 2 messages, add the previous messages to the prompt for keeping context
# This is a "hack" to cost less tokens as it avoids resending the whole conversation history,
# and instead resends the prompt with the last messages appended to it
if len(chat_messages[uuid]) > 2: if len(chat_messages[uuid]) > 2:
history = [] prompt = conversation[0]
prompt = chat_messages.get(uuid)[0] last_messages = get_last_x_messages_as_text(uuid, 6)
last_messages = "\nFor context, last questions were: \n" prompt['content'] = prompt['content'] + last_messages
conversation[0] = prompt
for message in chat_messages.get(uuid)[-6:]:
if message["role"] == "user":
last_messages += message["content"] + "\n"
prompt['content'] = prompt['content']+last_messages
history.append(prompt)
# Call ChatGPT API # Call ChatGPT API
start_time = time.time() start_time = time.time()
response = openai.ChatCompletion.create( response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", model="gpt-3.5-turbo",
# return first and last three messages from array messages=conversation,
messages=history,
) )
end_time = time.time() logging.info("OpenAI GPT response in " + str(round(time.time() - start_time, ndigits=2)) + " seconds")
logging.info("GPT-3 response in " + str(round(end_time - start_time, ndigits=2)) + " seconds")
# Try to parse the response
try: try:
# Parse the response # Parse the response
query_json = parse_gpt_json(response.choices[0].message.content) query_json = parse_gpt_json(response.choices[0].message.content)
logging.info(query_json)
# Check if the response looks like a "valid" JSON
if 'simplified_sentence' in query_json and 'response' in query_json:
response = query_json['response']
simplified_sentence = query_json['simplified_sentence']
# Add the response to the chat history
chat_messages[uuid].append({"role": "assistant", "content": response})
# Add to local database
db_utils.add_command(text, simplified_sentence, response)
# Return the response
return query_json
elif 'response' in query_json:
response = query_json['response']
# Add the response to the chat history
chat_messages[uuid].append({"role": "assistant", "content": response})
# Return the response
return query_json
except Exception as e: except Exception as e:
# If the response is not a JSON, it's probably a plaintext response # If the response is not a JSON, it's probably a plaintext response
logging.error("Error while parsing ChatGPT response, probably not JSON: " + str(response.choices)) logging.error("Error while parsing ChatGPT response, probably not JSON: " + str(response.choices))
return {"simplified_sentence": "Error", "response": "I am having trouble understanding you. Please try again."} return {"simplified_sentence": "Error", "response": "I am having trouble understanding you. Please try again."}
@ -160,7 +174,7 @@ def parse_gpt_json(input_string):
return None return None
def clear_chat(uuid): def clear_conversation(uuid):
"""Clear the chat history for a given uuid""" """Clear the chat history for a given uuid"""
logging.info("Cleared chat for uuid " + uuid) logging.info("Cleared chat for uuid " + uuid)
setup_messages(uuid) setup_messages(uuid)

View File

@ -0,0 +1,85 @@
import logging
from src.constants import ChatEngine, get_enum_from_str
from src.text.engine.chatgpt3 import chatgpt
conversations = {}
def start_new_conversation(engine: str, uuid: str):
"""
Start a new conversation with the given engine.
:param engine: See Enum ChatEngine
:param uuid: room id
:return: None
"""
if type(engine) is not ChatEngine:
engine = get_enum_from_str(ChatEngine, engine)
if engine is ChatEngine.CHATGPT3:
chatgpt.start_new_conversation(uuid)
else:
raise Exception("Unknown chat engine")
def clear_conversation(engine: str, uuid: str):
"""
Clear conversation with the given engine.
:param engine: See Enum ChatEngine
:param uuid: room id
:return: None
"""
if type(engine) is not ChatEngine:
engine = get_enum_from_str(ChatEngine, engine)
if uuid in conversations:
logging.info("Cleared chat for uuid " + uuid)
del conversations[uuid]
start_new_conversation(engine, uuid)
else:
raise Exception("Unknown conversation: " + uuid)
def process_text(engine: str, text: str, uuid: str):
"""
Process text with the given engine.
:param engine: See Enum ChatEngine
:param text: text
:param uuid: room id
:return: intent as JSON
"""
if type(engine) is not ChatEngine:
engine = get_enum_from_str(ChatEngine, engine)
if uuid not in conversations:
start_new_conversation(engine, uuid)
if engine is ChatEngine.CHATGPT3:
return chatgpt.process_text(text, uuid)
else:
raise Exception("Unknown chat engine")
def add_message_to_history(engine: str, text: str, uuid: str, role: str):
"""
Add a message to the history of the given engine for a specific room.
:param engine: See Enum ChatEngine
:param text: text
:param uuid: room id
:param role: user, assistant or system
:return: None
"""
if type(engine) is not ChatEngine:
engine = get_enum_from_str(ChatEngine, engine)
if uuid not in conversations:
start_new_conversation(engine, uuid)
if engine is ChatEngine.CHATGPT3:
chatgpt.add_message_to_history(text, uuid, role)
else:
raise Exception("Unknown chat engine")