added a behemoth of a function to parse chatgpt funky json

2023-05-31 21:43:40 +02:00 · 2023-05-31 21:43:40 +02:00 · b11a34ca56
commit b11a34ca56
parent b7419a6e58
1 changed files with 66 additions and 62 deletions
--- a/jarvis/utils/chatgpt_utils.py
+++ b/jarvis/utils/chatgpt_utils.py
@ -1,8 +1,10 @@
 import json
 import logging
-import openai
+import re
 import time

+import openai
+
 from jarvis.db import db_utils

 chat_messages = {}
@ -32,7 +34,7 @@ def chatgpt_recognise(text, uuid):

    try:
        # Parse the response
-        query_json = json.loads(str(response.choices[0].message.content))
+        query_json = parse_gpt_json(response.choices[0].message.content)

        # Check if the response looks like a "valid" JSON
        if 'simplified_sentence' in query_json and 'response' in query_json:
@ -43,7 +45,7 @@ def chatgpt_recognise(text, uuid):
            chat_messages[uuid].append({"role": "assistant", "content": response})

            # Add to local database
-            db_utils.add_query(text, simplified_sentence, response)
+            db_utils.add_command(text, simplified_sentence, response)

            # Return the response
            return query_json
@ -51,9 +53,68 @@ def chatgpt_recognise(text, uuid):
    except Exception as e:
        # If the response is not a JSON, it's probably a plaintext response
        logging.error("Error while parsing ChatGPT response, probably not JSON: " + str(response.choices))
-        logging.error(str(e))

-        # TODO: repeat query if it's not a JSON response
+        return {"simplified_sentence": "Error", "response": "An error has occured or I don't understand."}
+
+
+def parse_gpt_json(input_string):
+    """
+    As ChatGPT tends to return invalid JSON, this function tries to correct it and return a valid JSON
+    I know this is ugly, but it works :)
+
+    :param input_string: The string to parse
+    """
+
+    try:
+        if "{" in input_string and "}" in input_string:
+            input_string = str(input_string).split("{", 1)[1].rsplit("}", 1)[0]
+
+        # Preprocess the input to fix common issues
+        input_string = input_string.replace('\n', '')
+        input_string = input_string.replace('\'', '\"')
+        input_string = input_string.replace('\"', '"')
+        input_string = input_string.replace('simplified_sentence:', '\"simplified_sentence\":')
+        input_string = input_string.replace('response:', '\"response\":')
+        input_string = input_string.replace(',}', '}')
+
+        # Fix missing commas
+        input_string = input_string.replace('}', '},')
+        input_string = input_string.replace(',"}', '}')
+
+        # Remove trailing commas
+        input_string = input_string.rstrip(',')
+
+        # Wrap the input in curly braces if necessary
+        if not input_string.startswith('{'):
+            input_string = '{' + input_string
+        if not input_string.endswith('}'):
+            input_string += '}'
+
+        # Replace matched patterns (didn"t by didn't) with single quotes
+        input_string = re.sub(r'(?<=[\wÀ-ÖØ-öø-ÿ])"(?=[\wÀ-ÖØ-öø-ÿ])', "'", input_string)
+
+        # Replace matched patterns with single quotes
+        input_string = input_string.replace('{\"Simplified Sentence\": ', '{\"simplified_sentence\": \"')
+        input_string = input_string.replace('"Simplified Sentence"', '\"simplified_sentence\"')
+        input_string = input_string.replace('"Simplified_sentence"', '\"simplified_sentence\"')
+        input_string = input_string.replace('"Response"', '"response"')
+
+        if "\"response\": \"" not in input_string:
+            if "response\": " in input_string:
+                input_string = input_string.replace('"response": ', '\", \"response\": \"')
+            else:
+                input_string = input_string.replace('"response":', '\", \"response\": \"')
+
+        # If missing a closing quote, add it
+        if not input_string.endswith('\"}'):
+            input_string = input_string.replace('}', '\"}')
+
+        parsed_json = json.loads(input_string)
+        return parsed_json
+    except json.JSONDecodeError as e:
+        # Handle JSON decoding errors
+        print("Failed to parse JSON: " + str(e))
+        print("Input string: " + input_string)
        return None


@ -61,60 +122,3 @@ def clear_chat(uuid):
    """Clear the chat history for a given uuid"""
    logging.info("Cleared chat for uuid " + uuid)
    chat_messages[uuid] = []
-
-# def get_answer_from_response(response):
-#     if 'action' not in response:
-#         # Fix for when it responds in plaintext to follow-up questions
-#         # In that case the response is an OpenAIObject not a JSON
-#         return response.choices[0].message.content
-#     else:
-#         if response['action'] == 'clarify':
-#             return response['question']
-#         elif response['action'] == 'command':
-#             return response['comment']
-#         elif response['action'] == 'query':
-#             return response['device_description']
-#         elif response['action'] == 'answer':
-#             return response['answer']
-#         elif response['action'] == 'calculate':
-#             return response['calculation']
-#         else:
-#             return "I don't know how to respond to that..."
-
-
-# def get_conversation_as_one_message(uuid):
-#     """
-#     Prepare the messages to send to OpenAI API
-#     We don't use OpenAI way of using an array of messages because it doesn't work well with the prompt and chatgpt
-#     breaks out of character.
-#     So instead, we add the history to the system prompt and create a new conversation each time.
-#     It should not cost more tokens, but I am not sure at 100%.
-#     """
-#
-#     # Load the prompt
-#     prompt = open("/home/mathieu/PycharmProjects/jarvis-server-v2/jarvis/utils/chatgpt_prompt_2_smaller.txt", "r").read()
-#
-#     # Replace the variables in the prompt
-#     prompt = prompt.replace("{{timestamp}}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
-#     prompt = prompt.replace("{{location}}", "Lausanne in the canton Vaud of Switzerland")
-#
-#     # Check if the user has already asked a question (to keep context)
-#     if len(chat_messages[uuid]) > 1:
-#         history = ""
-#
-#         # Add the last 4 messages from the user to the prompt
-#         # we don't give the whole history because it adds a lot of tokens.
-#         history += "\nFor context, last discussion you had with user:\n"
-#         for message in chat_messages[uuid][-4:]:
-#             if message['role'] == "user":
-#                 history += "U: " + message['content'] + "\n"
-#             elif message['role'] == "assistant":
-#                 history += "Y: " + message['content'] + "\n"
-#
-#         # Replace the {{history}} variable in the prompt with the history
-#         prompt = prompt.replace("{{history}}", history)
-#     else:
-#         # If the user hasn't asked a question yet, remove the history part of the prompt
-#         prompt = prompt.replace("{{history}}", "")
-#
-#     return [{"role": "system", "content": prompt}]