improve gpt json parsing and history system
This commit is contained in:
parent
ecf2595273
commit
af4e7bb7f3
Binary file not shown.
@ -1,2 +1,2 @@
|
|||||||
You a sarcastic, introverted smart home assistant, dry humor.
|
You a sarcastic, introverted smart (home assistant) bot, dry humor.
|
||||||
Simplify complex sentence (ex: turn on tv in room), it should not be a question, add short response, return only JSON {"simplified_sentence", "response"}
|
Simplify complex sentence (ex. to : turn on tv in room), add short response, return only JSON {"simplified_sentence", "response"}
|
@ -5,8 +5,8 @@ import time
|
|||||||
|
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
from src.get_path_file import project_path
|
|
||||||
from src.database import db_utils
|
from src.database import db_utils
|
||||||
|
from src.get_path_file import project_path
|
||||||
|
|
||||||
chat_messages = {}
|
chat_messages = {}
|
||||||
|
|
||||||
@ -24,11 +24,25 @@ def chatgpt_recognise(text, uuid):
|
|||||||
# Add the user message to the chat history
|
# Add the user message to the chat history
|
||||||
chat_messages[uuid].append({"role": "user", "content": text})
|
chat_messages[uuid].append({"role": "user", "content": text})
|
||||||
|
|
||||||
|
history = chat_messages.get(uuid)
|
||||||
|
if len(chat_messages[uuid]) > 2:
|
||||||
|
history = []
|
||||||
|
prompt = chat_messages.get(uuid)[0]
|
||||||
|
last_messages = "\nFor context, last questions were: \n"
|
||||||
|
|
||||||
|
for message in chat_messages.get(uuid)[-6:]:
|
||||||
|
if message["role"] == "user":
|
||||||
|
last_messages += message["content"] + "\n"
|
||||||
|
|
||||||
|
prompt['content'] = prompt['content']+last_messages
|
||||||
|
history.append(prompt)
|
||||||
|
|
||||||
# Call ChatGPT API
|
# Call ChatGPT API
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
response = openai.ChatCompletion.create(
|
response = openai.ChatCompletion.create(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
messages=chat_messages.get(uuid),
|
# return first and last three messages from array
|
||||||
|
messages=history,
|
||||||
)
|
)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
logging.info("GPT-3 response in " + str(round(end_time - start_time, ndigits=2)) + " seconds")
|
logging.info("GPT-3 response in " + str(round(end_time - start_time, ndigits=2)) + " seconds")
|
||||||
@ -51,6 +65,15 @@ def chatgpt_recognise(text, uuid):
|
|||||||
# Return the response
|
# Return the response
|
||||||
return query_json
|
return query_json
|
||||||
|
|
||||||
|
elif 'response' in query_json:
|
||||||
|
response = query_json['response']
|
||||||
|
|
||||||
|
# Add the response to the chat history
|
||||||
|
chat_messages[uuid].append({"role": "assistant", "content": response})
|
||||||
|
|
||||||
|
# Return the response
|
||||||
|
return query_json
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# If the response is not a JSON, it's probably a plaintext response
|
# If the response is not a JSON, it's probably a plaintext response
|
||||||
logging.error("Error while parsing ChatGPT response, probably not JSON: " + str(response.choices))
|
logging.error("Error while parsing ChatGPT response, probably not JSON: " + str(response.choices))
|
||||||
@ -110,16 +133,34 @@ def parse_gpt_json(input_string):
|
|||||||
if not input_string.endswith('\"}'):
|
if not input_string.endswith('\"}'):
|
||||||
input_string = input_string.replace('}', '\"}')
|
input_string = input_string.replace('}', '\"}')
|
||||||
|
|
||||||
|
# If at this point, no JSON seems to be found, we convert the output to JSON
|
||||||
|
if "response" not in input_string and "simplified_sentence" not in input_string:
|
||||||
|
input_string = input_string.replace("{", "").replace("}", "").replace("\"", "")
|
||||||
|
|
||||||
|
# If the input string contains "Simplifié:" or "Réponse:", we split the string
|
||||||
|
if "Simplifié:" in input_string or "Réponse:" in input_string:
|
||||||
|
simplified_sentence = input_string.split("Simplifié: ")[1].split("Réponse: ")[0]
|
||||||
|
response = input_string.split("Réponse:")[1]
|
||||||
|
input_string = '{"simplified_sentence": "' + simplified_sentence + '", "response": "' + response + '"}'
|
||||||
|
elif input_string.count(':') == 2:
|
||||||
|
simplified_sentence = input_string.split(" : ")[1].split(". ")[0]
|
||||||
|
response = input_string.split(" : ")[2]
|
||||||
|
input_string = '{"simplified_sentence": "' + simplified_sentence + '", "response": "' + response + '"}'
|
||||||
|
else:
|
||||||
|
input_string = '{"response": "' + input_string + '"}'
|
||||||
|
|
||||||
parsed_json = json.loads(input_string)
|
parsed_json = json.loads(input_string)
|
||||||
return parsed_json
|
return parsed_json
|
||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
# Handle JSON decoding errors
|
# Handle JSON decoding errors
|
||||||
print("Failed to parse JSON: " + str(e))
|
print("Failed to parse JSON: " + str(e))
|
||||||
print("Input string: " + input_string)
|
print("Input string: " + input_string)
|
||||||
|
|
||||||
|
# Return an error message
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def clear_chat(uuid):
|
def clear_chat(uuid):
|
||||||
"""Clear the chat history for a given uuid"""
|
"""Clear the chat history for a given uuid"""
|
||||||
logging.info("Cleared chat for uuid " + uuid)
|
logging.info("Cleared chat for uuid " + uuid)
|
||||||
chat_messages[uuid] = []
|
setup_messages(uuid)
|
||||||
|
@ -26,7 +26,7 @@ def faster_whisper_stt(audio_file):
|
|||||||
logging.error("Model is not loaded")
|
logging.error("Model is not loaded")
|
||||||
load_model()
|
load_model()
|
||||||
|
|
||||||
segments, info = model.transcribe(audio_file, beam_size=5)
|
segments, info = model.transcribe(audio_file, beam_size=5, language='fr')
|
||||||
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
||||||
|
|
||||||
# combines all segments in one string
|
# combines all segments in one string
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
import json
|
|
||||||
import logging
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from pywhispercpp.model import Model
|
|
||||||
|
|
||||||
from jarvis.utils import languages_utils
|
|
||||||
|
|
||||||
|
|
||||||
def load_model():
|
|
||||||
log_level = logging.getLogger().level
|
|
||||||
global model
|
|
||||||
model = Model('base', n_threads=8, suppress_non_speech_tokens=True, log_level=logging.ERROR)
|
|
||||||
logging.getLogger().setLevel(log_level)
|
|
||||||
|
|
||||||
|
|
||||||
def get_model():
|
|
||||||
return model
|
|
||||||
|
|
||||||
|
|
||||||
def whisper_cpp_stt(audio_file):
|
|
||||||
"""
|
|
||||||
Transcribe audio file using whisper-cpp, no additional server/service needed, runs on CPU.
|
|
||||||
|
|
||||||
:param audio_file:
|
|
||||||
:param model:
|
|
||||||
:return: text
|
|
||||||
"""
|
|
||||||
if model is None:
|
|
||||||
logging.error("Model is not loaded")
|
|
||||||
load_model()
|
|
||||||
|
|
||||||
segments = model.transcribe(audio_file, speed_up=False, translate=False)
|
|
||||||
|
|
||||||
# combines all segments in one string
|
|
||||||
text = ''
|
|
||||||
for segment in segments:
|
|
||||||
text += segment.text + ' '
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def whisper_asr_stt(audio_file):
|
|
||||||
"""
|
|
||||||
Transcribe audio file using whisper-asr (docker), a server is needed, runs on GPU.
|
|
||||||
See : https://github.com/ahmetoner/whisper-asr-webservice
|
|
||||||
|
|
||||||
:param audio_file:
|
|
||||||
:return: text
|
|
||||||
"""
|
|
||||||
headers = {
|
|
||||||
'accept': 'application/json',
|
|
||||||
# 'Content-Type': 'multipart/form-data',
|
|
||||||
}
|
|
||||||
|
|
||||||
params = {
|
|
||||||
'task': 'transcribe',
|
|
||||||
# TODO: add to config
|
|
||||||
'language': languages_utils.get_language(),
|
|
||||||
'output': 'json',
|
|
||||||
}
|
|
||||||
|
|
||||||
files = {
|
|
||||||
'audio_file': open(audio_file, 'rb'),
|
|
||||||
}
|
|
||||||
|
|
||||||
# TODO: add to config
|
|
||||||
response = requests.post('https://whisper.yourdomain.xyz/asr', params=params, headers=headers, files=files)
|
|
||||||
return json.loads(response.text)['text']
|
|
99
test_json.py
Normal file
99
test_json.py
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def parse_invalid_json(input_string):
|
||||||
|
try:
|
||||||
|
if "{" in input_string and "}" in input_string:
|
||||||
|
input_string = str(input_string).split("{", 1)[1].rsplit("}", 1)[0]
|
||||||
|
|
||||||
|
# Preprocess the input to fix common issues
|
||||||
|
input_string = input_string.replace('\n', '')
|
||||||
|
input_string = input_string.replace('\'', '\"')
|
||||||
|
input_string = input_string.replace('\"', '"')
|
||||||
|
input_string = input_string.replace('simplified_sentence:', '\"simplified_sentence\":')
|
||||||
|
input_string = input_string.replace('response:', '\"response\":')
|
||||||
|
input_string = input_string.replace(',}', '}')
|
||||||
|
|
||||||
|
# Fix missing commas
|
||||||
|
input_string = input_string.replace('}', '},')
|
||||||
|
input_string = input_string.replace(',"}', '}')
|
||||||
|
|
||||||
|
# Remove trailing commas
|
||||||
|
input_string = input_string.rstrip(',')
|
||||||
|
|
||||||
|
# Wrap the input in curly braces if necessary
|
||||||
|
if not input_string.startswith('{'):
|
||||||
|
input_string = '{' + input_string
|
||||||
|
if not input_string.endswith('}'):
|
||||||
|
input_string += '}'
|
||||||
|
|
||||||
|
# Replace matched patterns (didn"t by didn't) with single quotes
|
||||||
|
input_string = re.sub(r'(?<=[\wÀ-ÖØ-öø-ÿ])"(?=[\wÀ-ÖØ-öø-ÿ])', "'", input_string)
|
||||||
|
|
||||||
|
# Replace matched patterns with single quotes
|
||||||
|
input_string = input_string.replace('{\"Simplified Sentence\": ', '{\"simplified_sentence\": \"')
|
||||||
|
input_string = input_string.replace('"Simplified Sentence"', '\"simplified_sentence\"')
|
||||||
|
input_string = input_string.replace('"Simplified_sentence"', '\"simplified_sentence\"')
|
||||||
|
input_string = input_string.replace('"Response"', '"response"')
|
||||||
|
|
||||||
|
if "\"response\": \"" not in input_string:
|
||||||
|
if "response\": " in input_string:
|
||||||
|
input_string = input_string.replace('"response": ', '\", \"response\": \"')
|
||||||
|
else:
|
||||||
|
input_string = input_string.replace('"response":', '\", \"response\": \"')
|
||||||
|
|
||||||
|
# If missing a closing quote, add it
|
||||||
|
if not input_string.endswith('\"}'):
|
||||||
|
input_string = input_string.replace('}', '\"}')
|
||||||
|
|
||||||
|
# If at this point, no JSON seems to be found, we convert the output to JSON
|
||||||
|
if "response" not in input_string and "simplified_sentence" not in input_string:
|
||||||
|
input_string = input_string.replace("{", "").replace("}", "").replace("\"", "")
|
||||||
|
|
||||||
|
# If the input string contains "Simplifié:" or "Réponse:", we split the string
|
||||||
|
if "Simplifié:" in input_string or "Réponse:" in input_string:
|
||||||
|
simplified_sentence = input_string.split("Simplifié: ")[1].split("Réponse: ")[0]
|
||||||
|
response = input_string.split("Réponse:")[1]
|
||||||
|
input_string = '{"simplified_sentence": "' + simplified_sentence + '", "response": "' + response + '"}'
|
||||||
|
elif input_string.count(':') == 2:
|
||||||
|
simplified_sentence = input_string.split(" : ")[1].split(". ")[0]
|
||||||
|
response = input_string.split(" : ")[2]
|
||||||
|
input_string = '{"simplified_sentence": "' + simplified_sentence + '", "response": "' + response + '"}'
|
||||||
|
else:
|
||||||
|
input_string = '{"response": "' + input_string + '"}'
|
||||||
|
|
||||||
|
parsed_json = json.loads(input_string)
|
||||||
|
return parsed_json
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
# Handle JSON decoding errors
|
||||||
|
print("Failed to parse JSON: " + str(e))
|
||||||
|
print("Input string: " + input_string)
|
||||||
|
|
||||||
|
# Return an error message
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
inputs = [
|
||||||
|
'"simplified_sentence": "Turn on living room TV",\n"response": "Sure thing, turning on the living room TV."',
|
||||||
|
'simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."',
|
||||||
|
'{"simplified_sentence": "Turn on living room TV", "response": "Sure thing, turning on the living room TV."}',
|
||||||
|
'{simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."}',
|
||||||
|
'{simplified_sentence: "Turn on living room TV", "response": "Sure thing, turning on the living room TV."}',
|
||||||
|
'{"simplified_sentence": "Turn on living room TV", response: "Sure thing, turning on the living room TV."}',
|
||||||
|
'{simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."',
|
||||||
|
'simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."}',
|
||||||
|
'{"simplified_sentence": "Turn on living room TV", "response": "Sure thing, turning on the living room TV."',
|
||||||
|
'"simplified_sentence": "Turn on living room TV", "response": "Sure thing, turning on the living room TV."}',
|
||||||
|
'\"Turn on the TV in the living room\" - \"Sure thing, boss.\" \n\n{\"simplified_sentence\": \"Turn on the TV in the living room\", \"response\": \"Sure thing, boss.\"}',
|
||||||
|
'{"Simplified Sentence": She managed to turn off the living room couch."Response": Congratulations, I didn"t know a couch could be turned on in the first place.}',
|
||||||
|
'\"Simplified_sentence\": \"Turn off the couch in the living room.\",\n\"Response\": \"Sure, let me handle that for you.\"',
|
||||||
|
'{"simplified_sentence": "Éteindre la télé du salon.", "response": "D\'accord, j"éteins la télé du salon."}',
|
||||||
|
'Simplifié: Age de Robert Downey Jr. durant Iron Man1.Réponse: RObert Downey Jr avait 43 ans lors de las ortie u premier film Iron Man en 2008',
|
||||||
|
'Bien sur, voici une phrase simplifiée : Eteins la lumière du salon. Et voici la réponse : D\'accord, j\'éteins la lumière du salon.'
|
||||||
|
]
|
||||||
|
|
||||||
|
for input_string in inputs:
|
||||||
|
parsed = parse_invalid_json(input_string)
|
||||||
|
print(parsed)
|
Loading…
Reference in New Issue
Block a user