Pass config everywhere in order to get rid of singleton (#4666)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
2026-01-06 07:44:35 +01:00 · 2023-06-18 19:05:41 -07:00
parent 096d27f342
commit a7f805604c
44 changed files with 323 additions and 300 deletions
--- a/autogpt/processing/text.py
+++ b/autogpt/processing/text.py
@@ -12,8 +12,6 @@ from autogpt.llm.utils import count_string_tokens, create_chat_completion
 from autogpt.logs import logger
 from autogpt.utils import batch

-CFG = Config()
-

 def _max_chunk_length(model: str, max: Optional[int] = None) -> int:
    model_max_input_tokens = OPEN_AI_MODELS[model].max_tokens - 1
@@ -60,13 +58,18 @@ def chunk_content(


 def summarize_text(
-    text: str, instruction: Optional[str] = None, question: Optional[str] = None
+    text: str,
+    config: Config,
+    instruction: Optional[str] = None,
+    question: Optional[str] = None,
 ) -> tuple[str, None | list[tuple[str, str]]]:
    """Summarize text using the OpenAI API

    Args:
        text (str): The text to summarize
+        config (Config): The config object
        instruction (str): Additional instruction for summarization, e.g. "focus on information related to polar bears", "omit personal information contained in the text"
+        question (str): Question to answer in the summary

    Returns:
        str: The summary of the text
@@ -79,7 +82,7 @@ def summarize_text(
    if instruction and question:
        raise ValueError("Parameters 'question' and 'instructions' cannot both be set")

-    model = CFG.fast_llm_model
+    model = config.fast_llm_model

    if question:
        instruction = (
@@ -111,14 +114,18 @@ def summarize_text(

        logger.debug(f"Summarizing with {model}:\n{summarization_prompt.dump()}\n")
        summary = create_chat_completion(
-            summarization_prompt, temperature=0, max_tokens=500
+            summarization_prompt, config, temperature=0, max_tokens=500
        )

        logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n")
        return summary.strip(), None

    summaries: list[str] = []
-    chunks = list(split_text(text, for_model=model, max_chunk_length=max_chunk_length))
+    chunks = list(
+        split_text(
+            text, for_model=model, config=config, max_chunk_length=max_chunk_length
+        )
+    )

    for i, (chunk, chunk_length) in enumerate(chunks):
        logger.info(
@@ -138,7 +145,8 @@ def summarize_text(

 def split_text(
    text: str,
-    for_model: str = CFG.fast_llm_model,
+    for_model: str,
+    config: Config,
    with_overlap=True,
    max_chunk_length: Optional[int] = None,
 ):
@@ -147,7 +155,9 @@ def split_text(
    Args:
        text (str): The text to split
        for_model (str): The model to chunk for; determines tokenizer and constraints
-        max_length (int, optional): The maximum length of each chunk
+        config (Config): The config object
+        with_overlap (bool, optional): Whether to allow overlap between chunks
+        max_chunk_length (int, optional): The maximum length of a chunk

    Yields:
        str: The next chunk of text
@@ -155,6 +165,7 @@ def split_text(
    Raises:
        ValueError: when a sentence is longer than the maximum length
    """
+
    max_length = _max_chunk_length(for_model, max_chunk_length)

    # flatten paragraphs to improve performance
@@ -168,7 +179,7 @@ def split_text(
    n_chunks = ceil(text_length / max_length)
    target_chunk_length = ceil(text_length / n_chunks)

-    nlp: spacy.language.Language = spacy.load(CFG.browse_spacy_language_model)
+    nlp: spacy.language.Language = spacy.load(config.browse_spacy_language_model)
    nlp.add_pipe("sentencizer")
    doc = nlp(text)
    sentences = [sentence.text.strip() for sentence in doc.sents]