100 lines
5.7 KiB
Python
100 lines
5.7 KiB
Python
import json
|
|
import re
|
|
|
|
|
|
def parse_invalid_json(input_string):
|
|
try:
|
|
if "{" in input_string and "}" in input_string:
|
|
input_string = str(input_string).split("{", 1)[1].rsplit("}", 1)[0]
|
|
|
|
# Preprocess the input to fix common issues
|
|
input_string = input_string.replace('\n', '')
|
|
input_string = input_string.replace('\'', '\"')
|
|
input_string = input_string.replace('\"', '"')
|
|
input_string = input_string.replace('simplified_sentence:', '\"simplified_sentence\":')
|
|
input_string = input_string.replace('response:', '\"response\":')
|
|
input_string = input_string.replace(',}', '}')
|
|
|
|
# Fix missing commas
|
|
input_string = input_string.replace('}', '},')
|
|
input_string = input_string.replace(',"}', '}')
|
|
|
|
# Remove trailing commas
|
|
input_string = input_string.rstrip(',')
|
|
|
|
# Wrap the input in curly braces if necessary
|
|
if not input_string.startswith('{'):
|
|
input_string = '{' + input_string
|
|
if not input_string.endswith('}'):
|
|
input_string += '}'
|
|
|
|
# Replace matched patterns (didn"t by didn't) with single quotes
|
|
input_string = re.sub(r'(?<=[\wÀ-ÖØ-öø-ÿ])"(?=[\wÀ-ÖØ-öø-ÿ])', "'", input_string)
|
|
|
|
# Replace matched patterns with single quotes
|
|
input_string = input_string.replace('{\"Simplified Sentence\": ', '{\"simplified_sentence\": \"')
|
|
input_string = input_string.replace('"Simplified Sentence"', '\"simplified_sentence\"')
|
|
input_string = input_string.replace('"Simplified_sentence"', '\"simplified_sentence\"')
|
|
input_string = input_string.replace('"Response"', '"response"')
|
|
|
|
if "\"response\": \"" not in input_string:
|
|
if "response\": " in input_string:
|
|
input_string = input_string.replace('"response": ', '\", \"response\": \"')
|
|
else:
|
|
input_string = input_string.replace('"response":', '\", \"response\": \"')
|
|
|
|
# If missing a closing quote, add it
|
|
if not input_string.endswith('\"}'):
|
|
input_string = input_string.replace('}', '\"}')
|
|
|
|
# If at this point, no JSON seems to be found, we convert the output to JSON
|
|
if "response" not in input_string and "simplified_sentence" not in input_string:
|
|
input_string = input_string.replace("{", "").replace("}", "").replace("\"", "")
|
|
|
|
# If the input string contains "Simplifié:" or "Réponse:", we split the string
|
|
if "Simplifié:" in input_string or "Réponse:" in input_string:
|
|
simplified_sentence = input_string.split("Simplifié: ")[1].split("Réponse: ")[0]
|
|
response = input_string.split("Réponse:")[1]
|
|
input_string = '{"simplified_sentence": "' + simplified_sentence + '", "response": "' + response + '"}'
|
|
elif input_string.count(':') == 2:
|
|
simplified_sentence = input_string.split(" : ")[1].split(". ")[0]
|
|
response = input_string.split(" : ")[2]
|
|
input_string = '{"simplified_sentence": "' + simplified_sentence + '", "response": "' + response + '"}'
|
|
else:
|
|
input_string = '{"response": "' + input_string + '"}'
|
|
|
|
parsed_json = json.loads(input_string)
|
|
return parsed_json
|
|
except json.JSONDecodeError as e:
|
|
# Handle JSON decoding errors
|
|
print("Failed to parse JSON: " + str(e))
|
|
print("Input string: " + input_string)
|
|
|
|
# Return an error message
|
|
return None
|
|
|
|
|
|
if __name__ == '__main__':
|
|
inputs = [
|
|
'"simplified_sentence": "Turn on living room TV",\n"response": "Sure thing, turning on the living room TV."',
|
|
'simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."',
|
|
'{"simplified_sentence": "Turn on living room TV", "response": "Sure thing, turning on the living room TV."}',
|
|
'{simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."}',
|
|
'{simplified_sentence: "Turn on living room TV", "response": "Sure thing, turning on the living room TV."}',
|
|
'{"simplified_sentence": "Turn on living room TV", response: "Sure thing, turning on the living room TV."}',
|
|
'{simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."',
|
|
'simplified_sentence: "Turn on living room TV", response: "Sure thing, turning on the living room TV."}',
|
|
'{"simplified_sentence": "Turn on living room TV", "response": "Sure thing, turning on the living room TV."',
|
|
'"simplified_sentence": "Turn on living room TV", "response": "Sure thing, turning on the living room TV."}',
|
|
'\"Turn on the TV in the living room\" - \"Sure thing, boss.\" \n\n{\"simplified_sentence\": \"Turn on the TV in the living room\", \"response\": \"Sure thing, boss.\"}',
|
|
'{"Simplified Sentence": She managed to turn off the living room couch."Response": Congratulations, I didn"t know a couch could be turned on in the first place.}',
|
|
'\"Simplified_sentence\": \"Turn off the couch in the living room.\",\n\"Response\": \"Sure, let me handle that for you.\"',
|
|
'{"simplified_sentence": "Éteindre la télé du salon.", "response": "D\'accord, j"éteins la télé du salon."}',
|
|
'Simplifié: Age de Robert Downey Jr. durant Iron Man1.Réponse: RObert Downey Jr avait 43 ans lors de las ortie u premier film Iron Man en 2008',
|
|
'Bien sur, voici une phrase simplifiée : Eteins la lumière du salon. Et voici la réponse : D\'accord, j\'éteins la lumière du salon.'
|
|
]
|
|
|
|
for input_string in inputs:
|
|
parsed = parse_invalid_json(input_string)
|
|
print(parsed)
|