Correct and clean up JSON handling (#4655)

* Correct and clean up JSON handling * Use ast for message history too * Lint * Add comments explaining why we use literal_eval * Add descriptions to llm_response_format schema * Parse responses in code blocks * Be more careful when parsing in code blocks * Lint
2025-12-24 09:24:27 +01:00 · 2023-06-13 09:54:50 -07:00
parent 7bf39cbb72
commit 07d9b584f7
15 changed files with 135 additions and 730 deletions
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -1,7 +1,8 @@
 """Utilities for the json_fixes package."""
+import ast
 import json
 import os.path
-import re
+from typing import Any

 from jsonschema import Draft7Validator

@@ -12,37 +13,47 @@ CFG = Config()
 LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"


-def extract_char_position(error_message: str) -> int:
-    """Extract the character position from the JSONDecodeError message.
+def extract_json_from_response(response_content: str) -> dict:
+    # Sometimes the response includes the JSON in a code block with ```
+    if response_content.startswith("```") and response_content.endswith("```"):
+        # Discard the first and last ```, then re-join in case the response naturally included ```
+        response_content = "```".join(response_content.split("```")[1:-1])

-    Args:
-        error_message (str): The error message from the JSONDecodeError
-          exception.
-
-    Returns:
-        int: The character position.
-    """
-
-    char_pattern = re.compile(r"\(char (\d+)\)")
-    if match := char_pattern.search(error_message):
-        return int(match[1])
-    else:
-        raise ValueError("Character position not found in the error message.")
+    # response content comes from OpenAI as a Python `str(content_dict)`, literal_eval reverses this
+    try:
+        return ast.literal_eval(response_content)
+    except BaseException as e:
+        logger.error(f"Error parsing JSON response with literal_eval {e}")
+        # TODO: How to raise an error here without causing the program to exit?
+        return {}


-def validate_json(json_object: object, schema_name: str) -> dict | None:
+def llm_response_schema(
+    schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT,
+) -> dict[str, Any]:
+    filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
+    with open(filename, "r") as f:
+        return json.load(f)
+
+
+def validate_json(
+    json_object: object, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
+) -> bool:
    """
    :type schema_name: object
    :param schema_name: str
    :type json_object: object
+
+    Returns:
+        bool: Whether the json_object is valid or not
    """
-    scheme_file = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
-    with open(scheme_file, "r") as f:
-        schema = json.load(f)
+    schema = llm_response_schema(schema_name)
    validator = Draft7Validator(schema)

    if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
-        logger.error("The JSON object is invalid.")
+        for error in errors:
+            logger.error(f"JSON Validation Error: {error}")
+
        if CFG.debug_mode:
            logger.error(
                json.dumps(json_object, indent=4)
@@ -51,10 +62,11 @@ def validate_json(json_object: object, schema_name: str) -> dict | None:

            for error in errors:
                logger.error(f"Error: {error.message}")
-    else:
-        logger.debug("The JSON object is valid.")
+        return False

-    return json_object
+    logger.debug("The JSON object is valid.")
+
+    return True


 def validate_json_string(json_string: str, schema_name: str) -> dict | None:
@@ -66,7 +78,9 @@ def validate_json_string(json_string: str, schema_name: str) -> dict | None:

    try:
        json_loaded = json.loads(json_string)
-        return validate_json(json_loaded, schema_name)
+        if not validate_json(json_loaded, schema_name):
+            return None
+        return json_loaded
    except:
        return None