Pass config everywhere in order to get rid of singleton (#4666)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
merwanehamadi
2023-06-18 19:05:41 -07:00
committed by GitHub
parent 096d27f342
commit a7f805604c
44 changed files with 323 additions and 300 deletions

View File

@@ -12,8 +12,6 @@ from autogpt.llm.utils import count_string_tokens, create_chat_completion
from autogpt.logs import logger
from autogpt.utils import batch
CFG = Config()
def _max_chunk_length(model: str, max: Optional[int] = None) -> int:
model_max_input_tokens = OPEN_AI_MODELS[model].max_tokens - 1
@@ -60,13 +58,18 @@ def chunk_content(
def summarize_text(
text: str, instruction: Optional[str] = None, question: Optional[str] = None
text: str,
config: Config,
instruction: Optional[str] = None,
question: Optional[str] = None,
) -> tuple[str, None | list[tuple[str, str]]]:
"""Summarize text using the OpenAI API
Args:
text (str): The text to summarize
config (Config): The config object
instruction (str): Additional instruction for summarization, e.g. "focus on information related to polar bears", "omit personal information contained in the text"
question (str): Question to answer in the summary
Returns:
str: The summary of the text
@@ -79,7 +82,7 @@ def summarize_text(
if instruction and question:
raise ValueError("Parameters 'question' and 'instructions' cannot both be set")
model = CFG.fast_llm_model
model = config.fast_llm_model
if question:
instruction = (
@@ -111,14 +114,18 @@ def summarize_text(
logger.debug(f"Summarizing with {model}:\n{summarization_prompt.dump()}\n")
summary = create_chat_completion(
summarization_prompt, temperature=0, max_tokens=500
summarization_prompt, config, temperature=0, max_tokens=500
)
logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n")
return summary.strip(), None
summaries: list[str] = []
chunks = list(split_text(text, for_model=model, max_chunk_length=max_chunk_length))
chunks = list(
split_text(
text, for_model=model, config=config, max_chunk_length=max_chunk_length
)
)
for i, (chunk, chunk_length) in enumerate(chunks):
logger.info(
@@ -138,7 +145,8 @@ def summarize_text(
def split_text(
text: str,
for_model: str = CFG.fast_llm_model,
for_model: str,
config: Config,
with_overlap=True,
max_chunk_length: Optional[int] = None,
):
@@ -147,7 +155,9 @@ def split_text(
Args:
text (str): The text to split
for_model (str): The model to chunk for; determines tokenizer and constraints
max_length (int, optional): The maximum length of each chunk
config (Config): The config object
with_overlap (bool, optional): Whether to allow overlap between chunks
max_chunk_length (int, optional): The maximum length of a chunk
Yields:
str: The next chunk of text
@@ -155,6 +165,7 @@ def split_text(
Raises:
ValueError: when a sentence is longer than the maximum length
"""
max_length = _max_chunk_length(for_model, max_chunk_length)
# flatten paragraphs to improve performance
@@ -168,7 +179,7 @@ def split_text(
n_chunks = ceil(text_length / max_length)
target_chunk_length = ceil(text_length / n_chunks)
nlp: spacy.language.Language = spacy.load(CFG.browse_spacy_language_model)
nlp: spacy.language.Language = spacy.load(config.browse_spacy_language_model)
nlp.add_pipe("sentencizer")
doc = nlp(text)
sentences = [sentence.text.strip() for sentence in doc.sents]