fix(agent/text_processing): Fix extract_information LLM response parsing

OpenAI's newest models return JSON with markdown fences around it, breaking the `json.loads` parser. This commit adds an `extract_list_from_response` function to json_utils/utilities.py and uses this function to replace `json.loads` in `_process_text`.
2025-12-17 14:04:27 +01:00 · 2024-02-13 18:28:17 +01:00
parent 393d6b97e6
commit bb7f5abc6c
2 changed files with 28 additions and 2 deletions
--- a/autogpts/autogpt/autogpt/json_utils/utilities.py
+++ b/autogpts/autogpt/autogpt/json_utils/utilities.py
@@ -29,3 +29,27 @@ def extract_dict_from_response(response_content: str) -> dict[str, Any]:
            f"non-dict value {repr(result)}"
        )
    return result
 def extract_list_from_response(response_content: str) -> list[Any]:
    # Sometimes the response includes the JSON in a code block with ```
    pattern = r"```(?:json|JSON)*([\s\S]*?)```"
    match = re.search(pattern, response_content)
    if match:
        response_content = match.group(1).strip()
    else:
        # The string may contain JSON.
        json_pattern = r"\[[\s\S]*\]"
        match = re.search(json_pattern, response_content)
        if match:
            response_content = match.group()
    result = json.loads(response_content)
    if not isinstance(result, list):
        raise ValueError(
            f"Response '''{response_content}''' evaluated to "
            f"non-list value {repr(result)}"
        )
    return result
--- a/autogpts/autogpt/autogpt/processing/text.py
+++ b/autogpts/autogpt/autogpt/processing/text.py
@@ -1,5 +1,4 @@
 """Text processing functions"""
 import json
 import logging
 import math
 from typing import Iterator, Optional, TypeVar
@@ -13,6 +12,7 @@ from autogpt.core.resource.model_providers import (
    ChatModelProvider,
    ModelTokenizer,
 )
 from autogpt.json_utils.utilities import extract_list_from_response
 logger = logging.getLogger(__name__)
@@ -161,7 +161,9 @@ async def _process_text(
            temperature=0.5,
            max_tokens=max_result_tokens,
            completion_parser=lambda s: (
-                json.loads(s.content) if output_type is not str else None
+                extract_list_from_response(s.content)
                if output_type is not str
                else None
            ),
        )