diff --git a/autogpts/autogpt/autogpt/commands/web_selenium.py b/autogpts/autogpt/autogpt/commands/web_selenium.py index 57f650fb..5585e2cb 100644 --- a/autogpts/autogpt/autogpt/commands/web_selenium.py +++ b/autogpts/autogpt/autogpt/commands/web_selenium.py @@ -30,11 +30,11 @@ from webdriver_manager.chrome import ChromeDriverManager from webdriver_manager.firefox import GeckoDriverManager from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager -from autogpt.agents.utils.exceptions import CommandExecutionError +from autogpt.agents.utils.exceptions import CommandExecutionError, TooMuchOutputError from autogpt.command_decorator import command from autogpt.core.utils.json_schema import JSONSchema from autogpt.processing.html import extract_hyperlinks, format_hyperlinks -from autogpt.processing.text import summarize_text +from autogpt.processing.text import extract_information, summarize_text from autogpt.url_utils.validators import validate_url COMMAND_CATEGORY = "web_browse" @@ -49,7 +49,7 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) FILE_DIR = Path(__file__).parent.parent -TOKENS_TO_TRIGGER_SUMMARY = 50 +MAX_RAW_CONTENT_LENGTH = 500 LINKS_TO_RETURN = 20 @@ -60,10 +60,8 @@ class BrowsingError(CommandExecutionError): @command( "read_webpage", ( - "Read a webpage, and extract specific information from it" - " if a question is specified." - " If you are looking to extract specific information from the webpage," - " you should specify a question." + "Read a webpage, and extract specific information from it." + " You must specify either topics_of_interest, a question, or get_raw_content." ), { "url": JSONSchema( @@ -71,6 +69,15 @@ class BrowsingError(CommandExecutionError): description="The URL to visit", required=True, ), + "topics_of_interest": JSONSchema( + type=JSONSchema.Type.ARRAY, + items=JSONSchema(type=JSONSchema.Type.STRING), + description=( + "A list of topics about which you want to extract information " + "from the page." + ), + required=False, + ), "question": JSONSchema( type=JSONSchema.Type.STRING, description=( @@ -78,10 +85,25 @@ class BrowsingError(CommandExecutionError): ), required=False, ), + "get_raw_content": JSONSchema( + type=JSONSchema.Type.BOOLEAN, + description=( + "If true, the unprocessed content of the webpage will be returned. " + "This consumes a lot of tokens, so use it with caution." + ), + required=False, + ), }, ) @validate_url -async def read_webpage(url: str, agent: Agent, question: str = "") -> str: +async def read_webpage( + url: str, + agent: Agent, + *, + topics_of_interest: list[str] = [], + get_raw_content: bool = False, + question: str = "", +) -> str: """Browse a website and return the answer and links to the user Args: @@ -103,12 +125,19 @@ async def read_webpage(url: str, agent: Agent, question: str = "") -> str: summarized = False if not text: return f"Website did not contain any text.\n\nLinks: {links}" - elif ( - agent.llm_provider.count_tokens(text, agent.llm.name) - > TOKENS_TO_TRIGGER_SUMMARY - ): + elif get_raw_content: + if ( + output_tokens := agent.llm_provider.count_tokens(text, agent.llm.name) + ) > MAX_RAW_CONTENT_LENGTH: + oversize_factor = round(output_tokens / MAX_RAW_CONTENT_LENGTH, 1) + raise TooMuchOutputError( + f"Page content is {oversize_factor}x the allowed length " + "for `get_raw_content=true`" + ) + return text + (f"\n\nLinks: {links}" if links else "") + else: text = await summarize_memorize_webpage( - url, text, question or None, agent, driver + url, text, question or None, topics_of_interest, agent, driver ) return_literal_content = bool(question) summarized = True @@ -265,6 +294,7 @@ async def summarize_memorize_webpage( url: str, text: str, question: str | None, + topics_of_interest: list[str], agent: Agent, driver: Optional[WebDriver] = None, ) -> str: @@ -295,10 +325,21 @@ async def summarize_memorize_webpage( # ) # memory.add(new_memory) - summary, _ = await summarize_text( - text, - question=question, - llm_provider=agent.llm_provider, - config=agent.legacy_config, # FIXME - ) - return summary + result = None + information = None + if topics_of_interest: + information = await extract_information( + text, + topics_of_interest=topics_of_interest, + llm_provider=agent.llm_provider, + config=agent.legacy_config, + ) + return "\n".join(f"* {i}" for i in information) + else: + result, _ = await summarize_text( + text, + question=question, + llm_provider=agent.llm_provider, + config=agent.legacy_config, + ) + return result diff --git a/autogpts/autogpt/autogpt/processing/text.py b/autogpts/autogpt/autogpt/processing/text.py index 5fed8e6b..75c064a1 100644 --- a/autogpts/autogpt/autogpt/processing/text.py +++ b/autogpts/autogpt/autogpt/processing/text.py @@ -1,4 +1,5 @@ """Text processing functions""" +import json import logging import math from typing import Iterator, Optional, TypeVar @@ -10,6 +11,7 @@ from autogpt.core.prompting import ChatPrompt from autogpt.core.resource.model_providers import ( ChatMessage, ChatModelProvider, + ChatModelResponse, ModelTokenizer, ) @@ -57,105 +59,165 @@ async def summarize_text( text: str, llm_provider: ChatModelProvider, config: Config, - instruction: Optional[str] = None, question: Optional[str] = None, -) -> tuple[str, None | list[tuple[str, str]]]: - """Summarize text using the OpenAI API - - Args: - text (str): The text to summarize. - llm_provider: LLM provider to use for summarization. - config (Config): The global application config, containing the FAST_LLM setting. - instruction (str): Additional instruction for summarization, e.g. - "focus on information related to polar bears", or - "omit personal information contained in the text". - question (str): Question to be answered by the summary. - - Returns: - str: The summary of the text - list[(summary, chunk)]: Text chunks and their summary, if the text was chunked. - None otherwise. - """ - if not text: - raise ValueError("No text to summarize") - - if instruction and question: - raise ValueError("Parameters 'question' and 'instructions' cannot both be set") - - model = config.fast_llm - + instruction: Optional[str] = None, +) -> tuple[str, list[tuple[str, str]]]: if question: + if instruction: + raise ValueError( + "Parameters 'question' and 'instructions' cannot both be set" + ) + instruction = ( - 'Include any information that can be used to answer the question: "%s". ' - "Do not directly answer the question itself." - ) % question - - summarization_prompt = ChatPrompt(messages=[]) - - text_tlength = llm_provider.count_tokens(text, model) - logger.info(f"Text length: {text_tlength} tokens") - - # reserve 50 tokens for summary prompt, 500 for the response - max_chunk_length = llm_provider.get_token_limit(model) - 550 - logger.info(f"Max chunk length: {max_chunk_length} tokens") - - if text_tlength < max_chunk_length: - # summarization_prompt.add("user", text) - summarization_prompt.messages.append( - ChatMessage.user( - "Write a concise summary of the following text." - f"{f' {instruction}' if instruction is not None else ''}:" - "\n\n\n" - f'LITERAL TEXT: """{text}"""' - "\n\n\n" - "CONCISE SUMMARY: The text is best summarized as" - ) + f'From the text, answer the question: "{question}". ' + "If the answer is not in the text, indicate this clearly " + "and concisely state why the text is not suitable to answer the question." + ) + elif not instruction: + instruction = ( + "Summarize or describe the text clearly and concisely, " + "whichever seems more appropriate." ) - summary = ( - await llm_provider.create_chat_completion( - model_prompt=summarization_prompt.messages, - model_name=model, - temperature=0, - max_tokens=500, - ) - ).response.content - - logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n") - return summary.strip(), None - - summaries: list[str] = [] - chunks = list( - split_text( - text, - config=config, - max_chunk_length=max_chunk_length, - tokenizer=llm_provider.get_tokenizer(model), - ) - ) - - for i, (chunk, chunk_length) in enumerate(chunks): - logger.info( - f"Summarizing chunk {i + 1} / {len(chunks)} of length {chunk_length} tokens" - ) - summary, _ = await summarize_text( - text=chunk, - instruction=instruction, - llm_provider=llm_provider, - config=config, - ) - summaries.append(summary) - - logger.info(f"Summarized {len(chunks)} chunks") - - summary, _ = await summarize_text( - "\n\n".join(summaries), + return await _process_text( # type: ignore + text=text, + instruction=instruction, llm_provider=llm_provider, config=config, ) - return summary.strip(), [ - (summaries[i], chunks[i][0]) for i in range(0, len(chunks)) - ] + + +async def extract_information( + source_text: str, + topics_of_interest: list[str], + llm_provider: ChatModelProvider, + config: Config, +) -> list[str]: + fmt_topics_list = "\n".join(f"* {topic}." for topic in topics_of_interest) + instruction = ( + "Extract relevant pieces of information about the following topics:\n" + f"{fmt_topics_list}\n" + "Reword pieces of information if needed to make them self-explanatory. " + "Be concise.\n\n" + "Respond with an `Array` in JSON format AND NOTHING ELSE. " + 'If the text contains no relevant information, return "[]".' + ) + return await _process_text( # type: ignore + text=source_text, + instruction=instruction, + output_type=list[str], + llm_provider=llm_provider, + config=config, + ) + + +async def _process_text( + text: str, + instruction: str, + llm_provider: ChatModelProvider, + config: Config, + output_type: type[str | list[str]] = str, +) -> tuple[str, list[tuple[str, str]]] | list[str]: + """Process text using the OpenAI API for summarization or information extraction + + Params: + text (str): The text to process. + instruction (str): Additional instruction for processing. + llm_provider: LLM provider to use. + config (Config): The global application config. + output_type: `str` for summaries or `list[str]` for piece-wise info extraction. + + Returns: + For summarization: tuple[str, None | list[(summary, chunk)]] + For piece-wise information extraction: list[str] + """ + if not text.strip(): + raise ValueError("No content") + + model = config.fast_llm + + text_tlength = llm_provider.count_tokens(text, model) + logger.debug(f"Text length: {text_tlength} tokens") + + max_result_tokens = 500 + max_chunk_length = llm_provider.get_token_limit(model) - max_result_tokens - 50 + logger.debug(f"Max chunk length: {max_chunk_length} tokens") + + if text_tlength < max_chunk_length: + prompt = ChatPrompt( + messages=[ + ChatMessage.system( + "The user is going to give you a text enclosed in triple quotes. " + f"{instruction}" + ), + ChatMessage.user(f'"""{text}"""'), + ] + ) + + logger.debug(f"PROCESSING:\n{prompt}") + + response = await llm_provider.create_chat_completion( + model_prompt=prompt.messages, + model_name=model, + temperature=0.5, + max_tokens=max_result_tokens, + completion_parser=lambda s: ( + json.loads(s.content) if output_type is not str else None + ), + ) + + if output_type == list[str]: + logger.debug(f"Raw LLM response: {repr(response.response.content)}") + fmt_result_bullet_list = "\n".join(f"* {r}" for r in response.parsed_result) + logger.debug( + f"\n{'-'*11} EXTRACTION RESULT {'-'*12}\n" + f"{fmt_result_bullet_list}\n" + f"{'-'*42}\n" + ) + return response.parsed_result + else: + summary = response.response.content + logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n") + return summary.strip(), [(summary, text)] + else: + chunks = list( + split_text( + text, + config=config, + max_chunk_length=max_chunk_length, + tokenizer=llm_provider.get_tokenizer(model), + ) + ) + + processed_results = [] + for i, (chunk, _) in enumerate(chunks): + logger.info(f"Processing chunk {i + 1} / {len(chunks)}") + chunk_result = await _process_text( + text=chunk, + instruction=instruction, + output_type=output_type, + llm_provider=llm_provider, + config=config, + ) + processed_results.extend( + chunk_result if output_type == list[str] else [chunk_result] + ) + + if output_type == list[str]: + return processed_results + else: + summary, _ = await _process_text( + "\n\n".join([result[0] for result in processed_results]), + instruction=( + "The text consists of multiple partial summaries. " + "Combine these partial summaries into one." + ), + llm_provider=llm_provider, + config=config, + ) + return summary.strip(), [ + (processed_results[i], chunks[i][0]) for i in range(0, len(chunks)) + ] def split_text(