Correct and clean up JSON handling (#4655)

* Correct and clean up JSON handling

* Use ast for message history too

* Lint

* Add comments explaining why we use literal_eval

* Add descriptions to llm_response_format schema

* Parse responses in code blocks

* Be more careful when parsing in code blocks

* Lint
This commit is contained in:
Erik Peterson
2023-06-13 09:54:50 -07:00
committed by GitHub
parent 7bf39cbb72
commit 07d9b584f7
15 changed files with 135 additions and 730 deletions

View File

@@ -1,7 +1,8 @@
"""Utilities for the json_fixes package."""
import ast
import json
import os.path
import re
from typing import Any
from jsonschema import Draft7Validator
@@ -12,37 +13,47 @@ CFG = Config()
LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"
def extract_char_position(error_message: str) -> int:
"""Extract the character position from the JSONDecodeError message.
def extract_json_from_response(response_content: str) -> dict:
# Sometimes the response includes the JSON in a code block with ```
if response_content.startswith("```") and response_content.endswith("```"):
# Discard the first and last ```, then re-join in case the response naturally included ```
response_content = "```".join(response_content.split("```")[1:-1])
Args:
error_message (str): The error message from the JSONDecodeError
exception.
Returns:
int: The character position.
"""
char_pattern = re.compile(r"\(char (\d+)\)")
if match := char_pattern.search(error_message):
return int(match[1])
else:
raise ValueError("Character position not found in the error message.")
# response content comes from OpenAI as a Python `str(content_dict)`, literal_eval reverses this
try:
return ast.literal_eval(response_content)
except BaseException as e:
logger.error(f"Error parsing JSON response with literal_eval {e}")
# TODO: How to raise an error here without causing the program to exit?
return {}
def validate_json(json_object: object, schema_name: str) -> dict | None:
def llm_response_schema(
schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT,
) -> dict[str, Any]:
filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
with open(filename, "r") as f:
return json.load(f)
def validate_json(
json_object: object, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
) -> bool:
"""
:type schema_name: object
:param schema_name: str
:type json_object: object
Returns:
bool: Whether the json_object is valid or not
"""
scheme_file = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
with open(scheme_file, "r") as f:
schema = json.load(f)
schema = llm_response_schema(schema_name)
validator = Draft7Validator(schema)
if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
logger.error("The JSON object is invalid.")
for error in errors:
logger.error(f"JSON Validation Error: {error}")
if CFG.debug_mode:
logger.error(
json.dumps(json_object, indent=4)
@@ -51,10 +62,11 @@ def validate_json(json_object: object, schema_name: str) -> dict | None:
for error in errors:
logger.error(f"Error: {error.message}")
else:
logger.debug("The JSON object is valid.")
return False
return json_object
logger.debug("The JSON object is valid.")
return True
def validate_json_string(json_string: str, schema_name: str) -> dict | None:
@@ -66,7 +78,9 @@ def validate_json_string(json_string: str, schema_name: str) -> dict | None:
try:
json_loaded = json.loads(json_string)
return validate_json(json_loaded, schema_name)
if not validate_json(json_loaded, schema_name):
return None
return json_loaded
except:
return None