added a behemoth of a function to parse chatgpt funky json

This commit is contained in:
Mathieu Broillet 2023-05-31 21:43:40 +02:00
parent b7419a6e58
commit b11a34ca56

View File

@ -1,8 +1,10 @@
import json
import logging
import openai
import re
import time
import openai
from jarvis.db import db_utils
chat_messages = {}
@ -32,7 +34,7 @@ def chatgpt_recognise(text, uuid):
try:
# Parse the response
query_json = json.loads(str(response.choices[0].message.content))
query_json = parse_gpt_json(response.choices[0].message.content)
# Check if the response looks like a "valid" JSON
if 'simplified_sentence' in query_json and 'response' in query_json:
@ -43,7 +45,7 @@ def chatgpt_recognise(text, uuid):
chat_messages[uuid].append({"role": "assistant", "content": response})
# Add to local database
db_utils.add_query(text, simplified_sentence, response)
db_utils.add_command(text, simplified_sentence, response)
# Return the response
return query_json
@ -51,9 +53,68 @@ def chatgpt_recognise(text, uuid):
except Exception as e:
# If the response is not a JSON, it's probably a plaintext response
logging.error("Error while parsing ChatGPT response, probably not JSON: " + str(response.choices))
logging.error(str(e))
# TODO: repeat query if it's not a JSON response
return {"simplified_sentence": "Error", "response": "An error has occured or I don't understand."}
def parse_gpt_json(input_string):
"""
As ChatGPT tends to return invalid JSON, this function tries to correct it and return a valid JSON
I know this is ugly, but it works :)
:param input_string: The string to parse
"""
try:
if "{" in input_string and "}" in input_string:
input_string = str(input_string).split("{", 1)[1].rsplit("}", 1)[0]
# Preprocess the input to fix common issues
input_string = input_string.replace('\n', '')
input_string = input_string.replace('\'', '\"')
input_string = input_string.replace('\"', '"')
input_string = input_string.replace('simplified_sentence:', '\"simplified_sentence\":')
input_string = input_string.replace('response:', '\"response\":')
input_string = input_string.replace(',}', '}')
# Fix missing commas
input_string = input_string.replace('}', '},')
input_string = input_string.replace(',"}', '}')
# Remove trailing commas
input_string = input_string.rstrip(',')
# Wrap the input in curly braces if necessary
if not input_string.startswith('{'):
input_string = '{' + input_string
if not input_string.endswith('}'):
input_string += '}'
# Replace matched patterns (didn"t by didn't) with single quotes
input_string = re.sub(r'(?<=[\wÀ-ÖØ-öø-ÿ])"(?=[\wÀ-ÖØ-öø-ÿ])', "'", input_string)
# Replace matched patterns with single quotes
input_string = input_string.replace('{\"Simplified Sentence\": ', '{\"simplified_sentence\": \"')
input_string = input_string.replace('"Simplified Sentence"', '\"simplified_sentence\"')
input_string = input_string.replace('"Simplified_sentence"', '\"simplified_sentence\"')
input_string = input_string.replace('"Response"', '"response"')
if "\"response\": \"" not in input_string:
if "response\": " in input_string:
input_string = input_string.replace('"response": ', '\", \"response\": \"')
else:
input_string = input_string.replace('"response":', '\", \"response\": \"')
# If missing a closing quote, add it
if not input_string.endswith('\"}'):
input_string = input_string.replace('}', '\"}')
parsed_json = json.loads(input_string)
return parsed_json
except json.JSONDecodeError as e:
# Handle JSON decoding errors
print("Failed to parse JSON: " + str(e))
print("Input string: " + input_string)
return None
@ -61,60 +122,3 @@ def clear_chat(uuid):
"""Clear the chat history for a given uuid"""
logging.info("Cleared chat for uuid " + uuid)
chat_messages[uuid] = []
# def get_answer_from_response(response):
# if 'action' not in response:
# # Fix for when it responds in plaintext to follow-up questions
# # In that case the response is an OpenAIObject not a JSON
# return response.choices[0].message.content
# else:
# if response['action'] == 'clarify':
# return response['question']
# elif response['action'] == 'command':
# return response['comment']
# elif response['action'] == 'query':
# return response['device_description']
# elif response['action'] == 'answer':
# return response['answer']
# elif response['action'] == 'calculate':
# return response['calculation']
# else:
# return "I don't know how to respond to that..."
# def get_conversation_as_one_message(uuid):
# """
# Prepare the messages to send to OpenAI API
# We don't use OpenAI way of using an array of messages because it doesn't work well with the prompt and chatgpt
# breaks out of character.
# So instead, we add the history to the system prompt and create a new conversation each time.
# It should not cost more tokens, but I am not sure at 100%.
# """
#
# # Load the prompt
# prompt = open("/home/mathieu/PycharmProjects/jarvis-server-v2/jarvis/utils/chatgpt_prompt_2_smaller.txt", "r").read()
#
# # Replace the variables in the prompt
# prompt = prompt.replace("{{timestamp}}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
# prompt = prompt.replace("{{location}}", "Lausanne in the canton Vaud of Switzerland")
#
# # Check if the user has already asked a question (to keep context)
# if len(chat_messages[uuid]) > 1:
# history = ""
#
# # Add the last 4 messages from the user to the prompt
# # we don't give the whole history because it adds a lot of tokens.
# history += "\nFor context, last discussion you had with user:\n"
# for message in chat_messages[uuid][-4:]:
# if message['role'] == "user":
# history += "U: " + message['content'] + "\n"
# elif message['role'] == "assistant":
# history += "Y: " + message['content'] + "\n"
#
# # Replace the {{history}} variable in the prompt with the history
# prompt = prompt.replace("{{history}}", history)
# else:
# # If the user hasn't asked a question yet, remove the history part of the prompt
# prompt = prompt.replace("{{history}}", "")
#
# return [{"role": "system", "content": prompt}]