feat(agent): Component-based Agents (#7054)

This incremental re-architecture unifies Agent code and plugins, so everything is component-based.

## Breaking changes

- Removed command categories and `DISABLED_COMMAND_CATEGORIES` environment variable. Use `DISABLED_COMMANDS` environment variable to disable individual commands.
- Changed `command` decorator; old-style commands are no longer supported. Implement `CommandProvider` on components instead.
- Removed `CommandRegistry`, now all commands are provided by components implementing `CommandProvider`.
- Removed `prompt_config` from `AgentSettings`.
- Removed plugin support: old plugins will no longer be loaded and executed.
- Removed `PromptScratchpad`, it was used by plugins and is no longer needed.
- Changed `ThoughtProcessOutput` from tuple to pydantic `BaseModel`.

## Other changes

- Created `AgentComponent`, protocols and logic to execute them.
- `BaseAgent` and `Agent` is now composed of components.
- Moved some logic from `BaseAgent` to `Agent`.
- Moved agent features and commands to components.
- Removed check if the same operation is about to be executed twice in a row.
- Removed file logging from `FileManagerComponent` (formerly `AgentFileManagerMixin`)
- Updated tests
- Added docs

See [Introduction](https://github.com/kcze/AutoGPT/blob/kpczerwinski/open-440-modular-agents/docs/content/AutoGPT/component%20agent/introduction.md) for more information.
This commit is contained in:
Krzysztof Czerwinski
2024-04-22 19:20:01 +02:00
committed by GitHub
parent 6ff02677d2
commit a74548d3cd
71 changed files with 3187 additions and 3172 deletions

View File

@@ -17,8 +17,8 @@ OPENAI_API_KEY=your-openai-api-key
## RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./data/agents/<agent_id>/workspace (Default: True)
# RESTRICT_TO_WORKSPACE=True
## DISABLED_COMMAND_CATEGORIES - The list of categories of commands that are disabled (Default: None)
# DISABLED_COMMAND_CATEGORIES=
## DISABLED_COMMANDS - The comma separated list of commands that are disabled (Default: None)
# DISABLED_COMMANDS=
## FILE_STORAGE_BACKEND - Choose a storage backend for contents
## Options: local, gcs, s3

View File

@@ -5,12 +5,11 @@ from pathlib import Path
from autogpt.agent_manager.agent_manager import AgentManager
from autogpt.agents.agent import Agent, AgentConfiguration, AgentSettings
from autogpt.agents.prompt_strategies.one_shot import OneShotAgentPromptStrategy
from autogpt.app.main import _configure_openai_provider, run_interaction_loop
from autogpt.commands import COMMAND_CATEGORIES
from autogpt.config import AIProfile, ConfigBuilder
from autogpt.file_storage import FileStorageBackendName, get_storage
from autogpt.logs.config import configure_logging
from autogpt.models.command_registry import CommandRegistry
LOG_DIR = Path(__file__).parent / "logs"
@@ -33,15 +32,15 @@ def bootstrap_agent(task: str, continuous_mode: bool) -> Agent:
config.noninteractive_mode = True
config.memory_backend = "no_memory"
command_registry = CommandRegistry.with_command_modules(COMMAND_CATEGORIES, config)
ai_profile = AIProfile(
ai_name="AutoGPT",
ai_role="a multi-purpose AI assistant.",
ai_goals=[task],
)
agent_prompt_config = Agent.default_settings.prompt_config.copy(deep=True)
agent_prompt_config = OneShotAgentPromptStrategy.default_configuration.copy(
deep=True
)
agent_prompt_config.use_functions_api = config.openai_functions
agent_settings = AgentSettings(
name=Agent.default_settings.name,
@@ -69,7 +68,6 @@ def bootstrap_agent(task: str, continuous_mode: bool) -> Agent:
agent = Agent(
settings=agent_settings,
llm_provider=_configure_openai_provider(config),
command_registry=command_registry,
file_storage=file_storage,
legacy_config=config,
)

View File

@@ -1,12 +1,10 @@
from typing import Optional
from autogpt.agents.agent import Agent, AgentConfiguration, AgentSettings
from autogpt.commands import COMMAND_CATEGORIES
from autogpt.config import AIDirectives, AIProfile, Config
from autogpt.core.resource.model_providers import ChatModelProvider
from autogpt.file_storage.base import FileStorage
from autogpt.logs.config import configure_chat_plugins
from autogpt.models.command_registry import CommandRegistry
from autogpt.plugins import scan_plugins
@@ -70,12 +68,6 @@ def _configure_agent(
app_config.plugins = scan_plugins(app_config)
configure_chat_plugins(app_config)
# Create a CommandRegistry instance and scan default folder
command_registry = CommandRegistry.with_command_modules(
modules=COMMAND_CATEGORIES,
config=app_config,
)
agent_state = state or create_agent_state(
agent_id=agent_id,
task=task,
@@ -89,7 +81,6 @@ def _configure_agent(
return Agent(
settings=agent_state,
llm_provider=llm_provider,
command_registry=command_registry,
file_storage=file_storage,
legacy_config=app_config,
)
@@ -102,9 +93,6 @@ def create_agent_state(
directives: AIDirectives,
app_config: Config,
) -> AgentSettings:
agent_prompt_config = Agent.default_settings.prompt_config.copy(deep=True)
agent_prompt_config.use_functions_api = app_config.openai_functions
return AgentSettings(
agent_id=agent_id,
name=Agent.default_settings.name,
@@ -119,6 +107,5 @@ def create_agent_state(
use_functions_api=app_config.openai_functions,
plugins=app_config.plugins,
),
prompt_config=agent_prompt_config,
history=Agent.default_settings.history.copy(deep=True),
)

View File

@@ -0,0 +1 @@
../../../../docs/content/AutoGPT/component agent/agents.md

View File

@@ -2,21 +2,31 @@ from __future__ import annotations
import inspect
import logging
import time
from datetime import datetime
from typing import TYPE_CHECKING, Optional
import sentry_sdk
from pydantic import Field
from autogpt.agents.prompt_strategies.one_shot import OneShotAgentPromptStrategy
from autogpt.commands.execute_code import CodeExecutorComponent
from autogpt.commands.git_operations import GitOperationsComponent
from autogpt.commands.image_gen import ImageGeneratorComponent
from autogpt.commands.system import SystemComponent
from autogpt.commands.user_interaction import UserInteractionComponent
from autogpt.commands.web_search import WebSearchComponent
from autogpt.commands.web_selenium import WebSeleniumComponent
from autogpt.components.event_history import EventHistoryComponent
from autogpt.core.configuration import Configurable
from autogpt.core.prompting import ChatPrompt
from autogpt.core.resource.model_providers import (
from autogpt.core.resource.model_providers import ChatMessage, ChatModelProvider
from autogpt.core.resource.model_providers.schema import (
AssistantChatMessage,
ChatMessage,
ChatModelProvider,
ChatModelResponse,
)
from autogpt.core.runner.client_lib.logging.helpers import dump_prompt
from autogpt.file_storage.base import FileStorage
from autogpt.llm.providers.openai import get_openai_command_specs
from autogpt.logs.log_cycle import (
CURRENT_CONTEXT_FILE_NAME,
NEXT_ACTION_FILE_NAME,
@@ -25,34 +35,39 @@ from autogpt.logs.log_cycle import (
)
from autogpt.logs.utils import fmt_kwargs
from autogpt.models.action_history import (
Action,
ActionErrorResult,
ActionInterruptedByHuman,
ActionResult,
ActionSuccessResult,
)
from autogpt.models.command import CommandOutput
from autogpt.models.context_item import ContextItem
from .base import BaseAgent, BaseAgentConfiguration, BaseAgentSettings
from .features.agent_file_manager import AgentFileManagerMixin
from .features.context import ContextMixin
from .features.watchdog import WatchdogMixin
from .prompt_strategies.one_shot import (
OneShotAgentPromptConfiguration,
OneShotAgentPromptStrategy,
)
from .utils.exceptions import (
from autogpt.models.command import Command, CommandOutput
from autogpt.utils.exceptions import (
AgentException,
AgentTerminated,
CommandExecutionError,
DuplicateOperationError,
InvalidArgumentError,
UnknownCommandError,
)
from .base import (
BaseAgent,
BaseAgentConfiguration,
BaseAgentSettings,
ThoughtProcessOutput,
)
from .features.agent_file_manager import FileManagerComponent
from .features.context import ContextComponent
from .features.watchdog import WatchdogComponent
from .protocols import (
AfterExecute,
AfterParse,
CommandProvider,
DirectiveProvider,
MessageProvider,
)
if TYPE_CHECKING:
from autogpt.config import Config
from autogpt.models.command_registry import CommandRegistry
logger = logging.getLogger(__name__)
@@ -63,49 +78,55 @@ class AgentConfiguration(BaseAgentConfiguration):
class AgentSettings(BaseAgentSettings):
config: AgentConfiguration = Field(default_factory=AgentConfiguration)
prompt_config: OneShotAgentPromptConfiguration = Field(
default_factory=(
lambda: OneShotAgentPromptStrategy.default_configuration.copy(deep=True)
)
)
class Agent(
ContextMixin,
AgentFileManagerMixin,
WatchdogMixin,
BaseAgent,
Configurable[AgentSettings],
):
"""AutoGPT's primary Agent; uses one-shot prompting."""
class Agent(BaseAgent, Configurable[AgentSettings]):
default_settings: AgentSettings = AgentSettings(
name="Agent",
description=__doc__,
description=__doc__ if __doc__ else "",
)
prompt_strategy: OneShotAgentPromptStrategy
def __init__(
self,
settings: AgentSettings,
llm_provider: ChatModelProvider,
command_registry: CommandRegistry,
file_storage: FileStorage,
legacy_config: Config,
):
prompt_strategy = OneShotAgentPromptStrategy(
configuration=settings.prompt_config,
logger=logger,
super().__init__(settings)
self.llm_provider = llm_provider
self.ai_profile = settings.ai_profile
self.directives = settings.directives
prompt_config = OneShotAgentPromptStrategy.default_configuration.copy(deep=True)
prompt_config.use_functions_api = settings.config.use_functions_api
self.prompt_strategy = OneShotAgentPromptStrategy(prompt_config, logger)
self.commands: list[Command] = []
# Components
self.system = SystemComponent(legacy_config, settings.ai_profile)
self.history = EventHistoryComponent(
settings.history,
self.send_token_limit,
lambda x: self.llm_provider.count_tokens(x, self.llm.name),
legacy_config,
llm_provider,
)
super().__init__(
settings=settings,
llm_provider=llm_provider,
prompt_strategy=prompt_strategy,
command_registry=command_registry,
file_storage=file_storage,
legacy_config=legacy_config,
self.user_interaction = UserInteractionComponent(legacy_config)
self.file_manager = FileManagerComponent(settings, file_storage)
self.code_executor = CodeExecutorComponent(
self.file_manager.workspace,
settings,
legacy_config,
)
self.git_ops = GitOperationsComponent(legacy_config)
self.image_gen = ImageGeneratorComponent(
self.file_manager.workspace, legacy_config
)
self.web_search = WebSearchComponent(legacy_config)
self.web_selenium = WebSeleniumComponent(legacy_config, llm_provider, self.llm)
self.context = ContextComponent(self.file_manager.workspace)
self.watchdog = WatchdogComponent(settings.config, settings.history)
self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
"""Timestamp the agent was created; only used for structured debug logging."""
@@ -113,83 +134,111 @@ class Agent(
self.log_cycle_handler = LogCycleHandler()
"""LogCycleHandler for structured debug logging."""
def build_prompt(
self,
*args,
extra_messages: Optional[list[ChatMessage]] = None,
include_os_info: Optional[bool] = None,
**kwargs,
) -> ChatPrompt:
if not extra_messages:
extra_messages = []
self.event_history = settings.history
self.legacy_config = legacy_config
# Clock
extra_messages.append(
ChatMessage.system(f"The current time and date is {time.strftime('%c')}"),
async def propose_action(self) -> ThoughtProcessOutput:
"""Proposes the next action to execute, based on the task and current state.
Returns:
The command name and arguments, if any, and the agent's thoughts.
"""
self.reset_trace()
# Get directives
resources = await self.run_pipeline(DirectiveProvider.get_resources)
constraints = await self.run_pipeline(DirectiveProvider.get_constraints)
best_practices = await self.run_pipeline(DirectiveProvider.get_best_practices)
directives = self.state.directives.copy(deep=True)
directives.resources += resources
directives.constraints += constraints
directives.best_practices += best_practices
# Get commands
self.commands = await self.run_pipeline(CommandProvider.get_commands)
self._remove_disabled_commands()
# Get messages
messages = await self.run_pipeline(MessageProvider.get_messages)
prompt: ChatPrompt = self.prompt_strategy.build_prompt(
messages=messages,
task=self.state.task,
ai_profile=self.state.ai_profile,
ai_directives=directives,
commands=get_openai_command_specs(self.commands),
include_os_info=self.legacy_config.execute_local_commands,
)
if include_os_info is None:
include_os_info = self.legacy_config.execute_local_commands
return super().build_prompt(
*args,
extra_messages=extra_messages,
include_os_info=include_os_info,
**kwargs,
)
def on_before_think(self, *args, **kwargs) -> ChatPrompt:
prompt = super().on_before_think(*args, **kwargs)
self.log_cycle_handler.log_count_within_cycle = 0
self.log_cycle_handler.log_cycle(
self.ai_profile.ai_name,
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
prompt.raw(),
CURRENT_CONTEXT_FILE_NAME,
)
return prompt
def parse_and_process_response(
self, llm_response: AssistantChatMessage, *args, **kwargs
) -> Agent.ThoughtProcessOutput:
for plugin in self.config.plugins:
if not plugin.can_handle_post_planning():
continue
llm_response.content = plugin.post_planning(llm_response.content or "")
logger.debug(f"Executing prompt:\n{dump_prompt(prompt)}")
output = await self.complete_and_parse(prompt)
self.config.cycle_count += 1
(
command_name,
arguments,
assistant_reply_dict,
) = self.prompt_strategy.parse_response_content(llm_response)
return output
# Check if command_name and arguments are already in the event_history
if self.event_history.matches_last_command(command_name, arguments):
raise DuplicateOperationError(
f"The command {command_name} with arguments {arguments} "
f"has been just executed."
)
async def complete_and_parse(
self, prompt: ChatPrompt, exception: Optional[Exception] = None
) -> ThoughtProcessOutput:
if exception:
prompt.messages.append(ChatMessage.system(f"Error: {exception}"))
response: ChatModelResponse[
ThoughtProcessOutput
] = await self.llm_provider.create_chat_completion(
prompt.messages,
model_name=self.llm.name,
completion_parser=self.parse_and_validate_response,
functions=(
get_openai_command_specs(self.commands)
if self.config.use_functions_api
else []
),
)
result = response.parsed_result
self.log_cycle_handler.log_cycle(
self.ai_profile.ai_name,
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
assistant_reply_dict,
result.thoughts,
NEXT_ACTION_FILE_NAME,
)
if command_name:
self.event_history.register_action(
Action(
name=command_name,
args=arguments,
reasoning=assistant_reply_dict["thoughts"]["reasoning"],
)
await self.run_pipeline(AfterParse.after_parse, result)
return result
def parse_and_validate_response(
self, llm_response: AssistantChatMessage
) -> ThoughtProcessOutput:
parsed_response = self.prompt_strategy.parse_response_content(llm_response)
# Validate command arguments
command_name = parsed_response.command_name
command = self._get_command(command_name)
if arg_errors := command.validate_args(parsed_response.command_args)[1]:
fmt_errors = [
f"{'.'.join(str(p) for p in f.path)}: {f.message}"
if f.path
else f.message
for f in arg_errors
]
raise InvalidArgumentError(
f"The set of arguments supplied for {command_name} is invalid:\n"
+ "\n".join(fmt_errors)
)
return command_name, arguments, assistant_reply_dict
return parsed_response
async def execute(
self,
@@ -202,7 +251,7 @@ class Agent(
if command_name == "human_feedback":
result = ActionInterruptedByHuman(feedback=user_input)
self.log_cycle_handler.log_cycle(
self.ai_profile.ai_name,
self.state.ai_profile.ai_name,
self.created_at,
self.config.cycle_count,
user_input,
@@ -210,31 +259,16 @@ class Agent(
)
else:
for plugin in self.config.plugins:
if not plugin.can_handle_pre_command():
continue
command_name, command_args = plugin.pre_command(
command_name, command_args
)
# Get commands
self.commands = await self.run_pipeline(CommandProvider.get_commands)
self._remove_disabled_commands()
try:
return_value = await execute_command(
return_value = await self._execute_command(
command_name=command_name,
arguments=command_args,
agent=self,
)
# Intercept ContextItem if one is returned by the command
if type(return_value) is tuple and isinstance(
return_value[1], ContextItem
):
context_item = return_value[1]
return_value = return_value[0]
logger.debug(
f"Command {command_name} returned a ContextItem: {context_item}"
)
self.context.add(context_item)
result = ActionSuccessResult(outputs=return_value)
except AgentTerminated:
raise
@@ -252,47 +286,30 @@ class Agent(
"Do not execute this command again with the same arguments."
)
for plugin in self.config.plugins:
if not plugin.can_handle_post_command():
continue
if result.status == "success":
result.outputs = plugin.post_command(command_name, result.outputs)
elif result.status == "error":
result.reason = plugin.post_command(command_name, result.reason)
await self.run_pipeline(AfterExecute.after_execute, result)
# Update action history
self.event_history.register_result(result)
await self.event_history.handle_compression(
self.llm_provider, self.legacy_config
)
logger.debug("\n".join(self.trace))
return result
async def _execute_command(
self,
command_name: str,
arguments: dict[str, str],
) -> CommandOutput:
"""Execute the command and return the result
#############
# Utilities #
#############
Args:
command_name (str): The name of the command to execute
arguments (dict): The arguments for the command
async def execute_command(
command_name: str,
arguments: dict[str, str],
agent: Agent,
) -> CommandOutput:
"""Execute the command and return the result
Args:
command_name (str): The name of the command to execute
arguments (dict): The arguments for the command
agent (Agent): The agent that is executing the command
Returns:
str: The result of the command
"""
# Execute a native command with the same name or alias, if it exists
if command := agent.command_registry.get_command(command_name):
Returns:
str: The result of the command
"""
# Execute a native command with the same name or alias, if it exists
command = self._get_command(command_name)
try:
result = command(**arguments, agent=agent)
result = command(**arguments)
if inspect.isawaitable(result):
return await result
return result
@@ -301,20 +318,31 @@ async def execute_command(
except Exception as e:
raise CommandExecutionError(str(e))
# Handle non-native commands (e.g. from plugins)
if agent._prompt_scratchpad:
for name, command in agent._prompt_scratchpad.commands.items():
if (
command_name == name
or command_name.lower() == command.description.lower()
):
try:
return command.method(**arguments)
except AgentException:
raise
except Exception as e:
raise CommandExecutionError(str(e))
def _get_command(self, command_name: str) -> Command:
for command in reversed(self.commands):
if command_name in command.names:
return command
raise UnknownCommandError(
f"Cannot execute command '{command_name}': unknown command."
)
raise UnknownCommandError(
f"Cannot execute command '{command_name}': unknown command."
)
def _remove_disabled_commands(self) -> None:
self.commands = [
command
for command in self.commands
if not any(
name in self.legacy_config.disabled_commands for name in command.names
)
]
def find_obscured_commands(self) -> list[Command]:
seen_names = set()
obscured_commands = []
for command in reversed(self.commands):
# If all of the command's names have been seen, it's obscured
if seen_names.issuperset(command.names):
obscured_commands.append(command)
else:
seen_names.update(command.names)
return list(reversed(obscured_commands))

View File

@@ -1,24 +1,35 @@
from __future__ import annotations
import copy
import inspect
import logging
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Any, Optional
from abc import ABCMeta, abstractmethod
from typing import (
TYPE_CHECKING,
Any,
Callable,
Iterator,
Optional,
ParamSpec,
TypeVar,
overload,
)
from auto_gpt_plugin_template import AutoGPTPluginTemplate
from pydantic import Field, validator
from colorama import Fore
from pydantic import BaseModel, Field, validator
if TYPE_CHECKING:
from autogpt.config import Config
from autogpt.core.prompting.base import PromptStrategy
from autogpt.core.resource.model_providers.schema import (
AssistantChatMessage,
ChatModelInfo,
ChatModelProvider,
ChatModelResponse,
)
from autogpt.models.command_registry import CommandRegistry
from autogpt.agents.utils.prompt_scratchpad import PromptScratchpad
from autogpt.agents import protocols as _protocols
from autogpt.agents.components import (
AgentComponent,
ComponentEndpointError,
EndpointPipelineError,
)
from autogpt.config import ConfigBuilder
from autogpt.config.ai_directives import AIDirectives
from autogpt.config.ai_profile import AIProfile
@@ -28,23 +39,18 @@ from autogpt.core.configuration import (
SystemSettings,
UserConfigurable,
)
from autogpt.core.prompting.schema import (
ChatMessage,
ChatPrompt,
CompletionModelFunction,
)
from autogpt.core.resource.model_providers.openai import (
OPEN_AI_CHAT_MODELS,
OpenAIModelName,
)
from autogpt.core.runner.client_lib.logging.helpers import dump_prompt
from autogpt.file_storage.base import FileStorage
from autogpt.llm.providers.openai import get_openai_command_specs
from autogpt.models.action_history import ActionResult, EpisodicActionHistory
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
logger = logging.getLogger(__name__)
T = TypeVar("T")
P = ParamSpec("P")
CommandName = str
CommandArgs = dict[str, str]
AgentThoughts = dict[str, Any]
@@ -145,47 +151,47 @@ class BaseAgentSettings(SystemSettings):
"""(STATE) The action history of the agent."""
class BaseAgent(Configurable[BaseAgentSettings], ABC):
"""Base class for all AutoGPT agent classes."""
class AgentMeta(ABCMeta):
def __call__(cls, *args, **kwargs):
# Create instance of the class (Agent or BaseAgent)
instance = super().__call__(*args, **kwargs)
# Automatically collect modules after the instance is created
instance._collect_components()
return instance
ThoughtProcessOutput = tuple[CommandName, CommandArgs, AgentThoughts]
class ThoughtProcessOutput(BaseModel):
command_name: str = ""
command_args: dict[str, Any] = Field(default_factory=dict)
thoughts: dict[str, Any] = Field(default_factory=dict)
def to_tuple(self) -> tuple[CommandName, CommandArgs, AgentThoughts]:
return self.command_name, self.command_args, self.thoughts
class BaseAgent(Configurable[BaseAgentSettings], metaclass=AgentMeta):
C = TypeVar("C", bound=AgentComponent)
default_settings = BaseAgentSettings(
name="BaseAgent",
description=__doc__,
description=__doc__ if __doc__ else "",
)
def __init__(
self,
settings: BaseAgentSettings,
llm_provider: ChatModelProvider,
prompt_strategy: PromptStrategy,
command_registry: CommandRegistry,
file_storage: FileStorage,
legacy_config: Config,
):
self.state = settings
self.components: list[AgentComponent] = []
self.config = settings.config
self.ai_profile = settings.ai_profile
self.directives = settings.directives
self.event_history = settings.history
# Execution data for debugging
self._trace: list[str] = []
self.legacy_config = legacy_config
"""LEGACY: Monolithic application configuration."""
logger.debug(f"Created {__class__} '{self.state.ai_profile.ai_name}'")
self.llm_provider = llm_provider
self.prompt_strategy = prompt_strategy
self.command_registry = command_registry
"""The registry containing all commands available to the agent."""
self._prompt_scratchpad: PromptScratchpad | None = None
# Support multi-inheritance and mixins for subclasses
super(BaseAgent, self).__init__()
logger.debug(f"Created {__class__} '{self.ai_profile.ai_name}'")
@property
def trace(self) -> list[str]:
return self._trace
@property
def llm(self) -> ChatModelInfo:
@@ -199,42 +205,9 @@ class BaseAgent(Configurable[BaseAgentSettings], ABC):
def send_token_limit(self) -> int:
return self.config.send_token_limit or self.llm.max_tokens * 3 // 4
@abstractmethod
async def propose_action(self) -> ThoughtProcessOutput:
"""Proposes the next action to execute, based on the task and current state.
Returns:
The command name and arguments, if any, and the agent's thoughts.
"""
# Scratchpad as surrogate PromptGenerator for plugin hooks
self._prompt_scratchpad = PromptScratchpad()
prompt: ChatPrompt = self.build_prompt(scratchpad=self._prompt_scratchpad)
prompt = self.on_before_think(prompt, scratchpad=self._prompt_scratchpad)
logger.debug(f"Executing prompt:\n{dump_prompt(prompt)}")
response = await self.llm_provider.create_chat_completion(
prompt.messages,
functions=get_openai_command_specs(
self.command_registry.list_available_commands(self)
)
+ list(self._prompt_scratchpad.commands.values())
if self.config.use_functions_api
else [],
model_name=self.llm.name,
completion_parser=lambda r: self.parse_and_process_response(
r,
prompt,
scratchpad=self._prompt_scratchpad,
),
)
self.config.cycle_count += 1
return self.on_response(
llm_response=response,
prompt=prompt,
scratchpad=self._prompt_scratchpad,
)
...
@abstractmethod
async def execute(
@@ -243,154 +216,156 @@ class BaseAgent(Configurable[BaseAgentSettings], ABC):
command_args: dict[str, str] = {},
user_input: str = "",
) -> ActionResult:
"""Executes the given command, if any, and returns the agent's response.
Params:
command_name: The name of the command to execute, if any.
command_args: The arguments to pass to the command, if any.
user_input: The user's input, if any.
Returns:
ActionResult: An object representing the result(s) of the command.
"""
...
def build_prompt(
def reset_trace(self):
self._trace = []
@overload
async def run_pipeline(
self, protocol_method: Callable[P, Iterator[T]], *args, retry_limit: int = 3
) -> list[T]:
...
@overload
async def run_pipeline(
self, protocol_method: Callable[P, None], *args, retry_limit: int = 3
) -> list[None]:
...
async def run_pipeline(
self,
scratchpad: PromptScratchpad,
extra_commands: Optional[list[CompletionModelFunction]] = None,
extra_messages: Optional[list[ChatMessage]] = None,
**extras,
) -> ChatPrompt:
"""Constructs a prompt using `self.prompt_strategy`.
protocol_method: Callable[P, Iterator[T] | None],
*args,
retry_limit: int = 3,
) -> list[T] | list[None]:
method_name = protocol_method.__name__
protocol_name = protocol_method.__qualname__.split(".")[0]
protocol_class = getattr(_protocols, protocol_name)
if not issubclass(protocol_class, AgentComponent):
raise TypeError(f"{repr(protocol_method)} is not a protocol method")
Params:
scratchpad: An object for plugins to write additional prompt elements to.
(E.g. commands, constraints, best practices)
extra_commands: Additional commands that the agent has access to.
extra_messages: Additional messages to include in the prompt.
"""
if not extra_commands:
extra_commands = []
if not extra_messages:
extra_messages = []
# Clone parameters to revert on failure
original_args = self._selective_copy(args)
pipeline_attempts = 0
method_result: list[T] = []
self._trace.append(f"⬇️ {Fore.BLUE}{method_name}{Fore.RESET}")
# Apply additions from plugins
for plugin in self.config.plugins:
if not plugin.can_handle_post_prompt():
continue
plugin.post_prompt(scratchpad)
ai_directives = self.directives.copy(deep=True)
ai_directives.resources += scratchpad.resources
ai_directives.constraints += scratchpad.constraints
ai_directives.best_practices += scratchpad.best_practices
extra_commands += list(scratchpad.commands.values())
while pipeline_attempts < retry_limit:
try:
for component in self.components:
# Skip other protocols
if not isinstance(component, protocol_class):
continue
prompt = self.prompt_strategy.build_prompt(
task=self.state.task,
ai_profile=self.ai_profile,
ai_directives=ai_directives,
commands=get_openai_command_specs(
self.command_registry.list_available_commands(self)
)
+ extra_commands,
event_history=self.event_history,
max_prompt_tokens=self.send_token_limit,
count_tokens=lambda x: self.llm_provider.count_tokens(x, self.llm.name),
count_message_tokens=lambda x: self.llm_provider.count_message_tokens(
x, self.llm.name
),
extra_messages=extra_messages,
**extras,
)
# Skip disabled components
if not component.enabled:
self._trace.append(
f" {Fore.LIGHTBLACK_EX}"
f"{component.__class__.__name__}{Fore.RESET}"
)
continue
return prompt
method = getattr(component, method_name, None)
if not callable(method):
continue
def on_before_think(
self,
prompt: ChatPrompt,
scratchpad: PromptScratchpad,
) -> ChatPrompt:
"""Called after constructing the prompt but before executing it.
component_attempts = 0
while component_attempts < retry_limit:
try:
component_args = self._selective_copy(args)
if inspect.iscoroutinefunction(method):
result = await method(*component_args)
else:
result = method(*component_args)
if result is not None:
method_result.extend(result)
args = component_args
self._trace.append(f"{component.__class__.__name__}")
Calls the `on_planning` hook of any enabled and capable plugins, adding their
output to the prompt.
Params:
prompt: The prompt that is about to be executed.
scratchpad: An object for plugins to write additional prompt elements to.
(E.g. commands, constraints, best practices)
Returns:
The prompt to execute
"""
current_tokens_used = self.llm_provider.count_message_tokens(
prompt.messages, self.llm.name
)
plugin_count = len(self.config.plugins)
for i, plugin in enumerate(self.config.plugins):
if not plugin.can_handle_on_planning():
continue
plugin_response = plugin.on_planning(scratchpad, prompt.raw())
if not plugin_response or plugin_response == "":
continue
message_to_add = ChatMessage.system(plugin_response)
tokens_to_add = self.llm_provider.count_message_tokens(
message_to_add, self.llm.name
)
if current_tokens_used + tokens_to_add > self.send_token_limit:
logger.debug(f"Plugin response too long, skipping: {plugin_response}")
logger.debug(f"Plugins remaining at stop: {plugin_count - i}")
except ComponentEndpointError:
self._trace.append(
f"{Fore.YELLOW}{component.__class__.__name__}: "
f"ComponentEndpointError{Fore.RESET}"
)
# Retry the same component on ComponentEndpointError
component_attempts += 1
continue
# Successful component execution
break
# Successful pipeline execution
break
prompt.messages.insert(
-1, message_to_add
) # HACK: assumes cycle instruction to be at the end
current_tokens_used += tokens_to_add
return prompt
except EndpointPipelineError:
self._trace.append(
f"{Fore.LIGHTRED_EX}{component.__class__.__name__}: "
f"EndpointPipelineError{Fore.RESET}"
)
# Restart from the beginning on EndpointPipelineError
# Revert to original parameters
args = self._selective_copy(original_args)
pipeline_attempts += 1
continue # Start the loop over
except Exception as e:
raise e
return method_result
def on_response(
self,
llm_response: ChatModelResponse,
prompt: ChatPrompt,
scratchpad: PromptScratchpad,
) -> ThoughtProcessOutput:
"""Called upon receiving a response from the chat model.
def _collect_components(self):
components = [
getattr(self, attr)
for attr in dir(self)
if isinstance(getattr(self, attr), AgentComponent)
]
Calls `self.parse_and_process_response()`.
if self.components:
# Check if any coponent is missed (added to Agent but not to components)
for component in components:
if component not in self.components:
logger.warning(
f"Component {component.__class__.__name__} "
"is attached to an agent but not added to components list"
)
# Skip collecting anf sorting and sort if ordering is explicit
return
self.components = self._topological_sort(components)
Params:
llm_response: The raw response from the chat model.
prompt: The prompt that was executed.
scratchpad: An object containing additional prompt elements from plugins.
(E.g. commands, constraints, best practices)
def _topological_sort(
self, components: list[AgentComponent]
) -> list[AgentComponent]:
visited = set()
stack = []
Returns:
The parsed command name and command args, if any, and the agent thoughts.
"""
def visit(node: AgentComponent):
if node in visited:
return
visited.add(node)
for neighbor_class in node.__class__.run_after:
# Find the instance of neighbor_class in components
neighbor = next(
(m for m in components if isinstance(m, neighbor_class)), None
)
if neighbor:
visit(neighbor)
stack.append(node)
return llm_response.parsed_result
for component in components:
visit(component)
# TODO: update memory/context
return stack
@abstractmethod
def parse_and_process_response(
self,
llm_response: AssistantChatMessage,
prompt: ChatPrompt,
scratchpad: PromptScratchpad,
) -> ThoughtProcessOutput:
"""Validate, parse & process the LLM's response.
Must be implemented by derivative classes: no base implementation is provided,
since the implementation depends on the role of the derivative Agent.
Params:
llm_response: The raw response from the chat model.
prompt: The prompt that was executed.
scratchpad: An object containing additional prompt elements from plugins.
(E.g. commands, constraints, best practices)
Returns:
The parsed command name and command args, if any, and the agent thoughts.
"""
pass
def _selective_copy(self, args: tuple[Any, ...]) -> tuple[Any, ...]:
copied_args = []
for item in args:
if isinstance(item, list):
# Shallow copy for lists
copied_item = item[:]
elif isinstance(item, dict):
# Shallow copy for dicts
copied_item = item.copy()
elif isinstance(item, BaseModel):
# Deep copy for Pydantic models (deep=True to also copy nested models)
copied_item = item.copy(deep=True)
else:
# Deep copy for other objects
copied_item = copy.deepcopy(item)
copied_args.append(copied_item)
return tuple(copied_args)

View File

@@ -0,0 +1,35 @@
from abc import ABC
from typing import Callable
class AgentComponent(ABC):
run_after: list[type["AgentComponent"]] = []
_enabled: Callable[[], bool] | bool = True
_disabled_reason: str = ""
@property
def enabled(self) -> bool:
if callable(self._enabled):
return self._enabled()
return self._enabled
@property
def disabled_reason(self) -> str:
return self._disabled_reason
class ComponentEndpointError(Exception):
"""Error of a single protocol method on a component."""
def __init__(self, message: str = ""):
self.message = message
super().__init__(message)
class EndpointPipelineError(ComponentEndpointError):
"""Error of an entire pipline of one endpoint."""
class ComponentSystemError(EndpointPipelineError):
"""Error of a group of pipelines;
multiple different enpoints."""

View File

@@ -1,18 +1,26 @@
from __future__ import annotations
import logging
from typing import Optional
import os
from pathlib import Path
from typing import Iterator, Optional
from autogpt.agents.protocols import CommandProvider, DirectiveProvider
from autogpt.command_decorator import command
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.file_storage.base import FileStorage
from autogpt.models.command import Command
from autogpt.utils.file_operations_utils import decode_textual_file
from ..base import BaseAgent, BaseAgentSettings
from ..base import BaseAgentSettings
logger = logging.getLogger(__name__)
class AgentFileManagerMixin:
"""Mixin that adds file manager (e.g. Agent state)
and workspace manager (e.g. Agent output files) support."""
class FileManagerComponent(DirectiveProvider, CommandProvider):
"""
Adds general file manager (e.g. Agent state),
workspace manager (e.g. Agent output files) support and
commands to perform operations on files and folders.
"""
files: FileStorage
"""Agent-related files, e.g. state, logs.
@@ -25,49 +33,17 @@ class AgentFileManagerMixin:
STATE_FILE = "state.json"
"""The name of the file where the agent's state is stored."""
LOGS_FILE = "file_logger.log"
"""The name of the file where the agent's logs are stored."""
def __init__(self, state: BaseAgentSettings, file_storage: FileStorage):
self.state = state
def __init__(self, **kwargs):
# Initialize other bases first, because we need the config from BaseAgent
super(AgentFileManagerMixin, self).__init__(**kwargs)
if not isinstance(self, BaseAgent):
raise NotImplementedError(
f"{__class__.__name__} can only be applied to BaseAgent derivatives"
)
if "file_storage" not in kwargs:
raise ValueError(
"AgentFileManagerMixin requires a file_storage in the constructor."
)
state: BaseAgentSettings = getattr(self, "state")
if not state.agent_id:
raise ValueError("Agent must have an ID.")
file_storage: FileStorage = kwargs["file_storage"]
self.files = file_storage.clone_with_subroot(f"agents/{state.agent_id}/")
self.workspace = file_storage.clone_with_subroot(
f"agents/{state.agent_id}/workspace"
)
self._file_storage = file_storage
# Read and cache logs
self._file_logs_cache = []
if self.files.exists(self.LOGS_FILE):
self._file_logs_cache = self.files.read_file(self.LOGS_FILE).split("\n")
async def log_file_operation(self, content: str) -> None:
"""Log a file operation to the agent's log file."""
logger.debug(f"Logging operation: {content}")
self._file_logs_cache.append(content)
await self.files.write_file(
self.LOGS_FILE, "\n".join(self._file_logs_cache) + "\n"
)
def get_file_operation_lines(self) -> list[str]:
"""Get the agent's file operation logs as list of strings."""
return self._file_logs_cache
async def save_state(self, save_as: Optional[str] = None) -> None:
"""Save the agent's state to the state file."""
@@ -100,3 +76,87 @@ class AgentFileManagerMixin:
f"agents/{new_id}/workspace"
)
state.agent_id = new_id
def get_resources(self) -> Iterator[str]:
yield "The ability to read and write files."
def get_commands(self) -> Iterator[Command]:
yield self.read_file
yield self.write_to_file
yield self.list_folder
@command(
parameters={
"filename": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path of the file to read",
required=True,
)
},
)
def read_file(self, filename: str | Path) -> str:
"""Read a file and return the contents
Args:
filename (str): The name of the file to read
Returns:
str: The contents of the file
"""
file = self.workspace.open_file(filename, binary=True)
content = decode_textual_file(file, os.path.splitext(filename)[1], logger)
return content
@command(
["write_file", "create_file"],
"Write a file, creating it if necessary. "
"If the file exists, it is overwritten.",
{
"filename": JSONSchema(
type=JSONSchema.Type.STRING,
description="The name of the file to write to",
required=True,
),
"contents": JSONSchema(
type=JSONSchema.Type.STRING,
description="The contents to write to the file",
required=True,
),
},
)
async def write_to_file(self, filename: str | Path, contents: str) -> str:
"""Write contents to a file
Args:
filename (str): The name of the file to write to
contents (str): The contents to write to the file
Returns:
str: A message indicating success or failure
"""
logger.info(f"self: {self}")
if directory := os.path.dirname(filename):
self.workspace.make_dir(directory)
await self.workspace.write_file(filename, contents)
return f"File {filename} has been written successfully."
@command(
parameters={
"folder": JSONSchema(
type=JSONSchema.Type.STRING,
description="The folder to list files in",
required=True,
)
},
)
def list_folder(self, folder: str | Path) -> list[str]:
"""Lists files in a folder recursively
Args:
folder (str): The folder to search in
Returns:
list[str]: A list of files found in the folder
"""
return [str(p) for p in self.workspace.list_files(folder)]

View File

@@ -1,14 +1,15 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any, Optional
if TYPE_CHECKING:
from autogpt.core.prompting import ChatPrompt
from autogpt.models.context_item import ContextItem
from ..base import BaseAgent
import contextlib
from pathlib import Path
from typing import Iterator, Optional
from autogpt.agents.protocols import CommandProvider, MessageProvider
from autogpt.command_decorator import command
from autogpt.core.resource.model_providers import ChatMessage
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.file_storage.base import FileStorage
from autogpt.models.command import Command
from autogpt.models.context_item import ContextItem, FileContextItem, FolderContextItem
from autogpt.utils.exceptions import InvalidArgumentError
class AgentContext:
@@ -32,51 +33,133 @@ class AgentContext:
def clear(self) -> None:
self.items.clear()
def format_numbered(self) -> str:
return "\n\n".join([f"{i}. {c.fmt()}" for i, c in enumerate(self.items, 1)])
def format_numbered(self, workspace: FileStorage) -> str:
return "\n\n".join(
[f"{i}. {c.fmt(workspace)}" for i, c in enumerate(self.items, 1)]
)
class ContextMixin:
"""Mixin that adds context support to a BaseAgent subclass"""
class ContextComponent(MessageProvider, CommandProvider):
"""Adds ability to keep files and folders open in the context (prompt)."""
context: AgentContext
def __init__(self, **kwargs: Any):
def __init__(self, workspace: FileStorage):
self.context = AgentContext()
self.workspace = workspace
super(ContextMixin, self).__init__(**kwargs)
def build_prompt(
self,
*args: Any,
extra_messages: Optional[list[ChatMessage]] = None,
**kwargs: Any,
) -> ChatPrompt:
if not extra_messages:
extra_messages = []
# Add context section to prompt
def get_messages(self) -> Iterator[ChatMessage]:
if self.context:
extra_messages.insert(
0,
ChatMessage.system(
"## Context\n"
f"{self.context.format_numbered()}\n\n"
"When a context item is no longer needed and you are not done yet, "
"you can hide the item by specifying its number in the list above "
"to `hide_context_item`.",
),
yield ChatMessage.system(
"## Context\n"
f"{self.context.format_numbered(self.workspace)}\n\n"
"When a context item is no longer needed and you are not done yet, "
"you can hide the item by specifying its number in the list above "
"to `hide_context_item`.",
)
return super(ContextMixin, self).build_prompt(
*args,
extra_messages=extra_messages,
**kwargs,
) # type: ignore
def get_commands(self) -> Iterator[Command]:
yield self.open_file
yield self.open_folder
if self.context:
yield self.close_context_item
@command(
parameters={
"file_path": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path of the file to open",
required=True,
)
}
)
async def open_file(self, file_path: Path) -> str:
"""Opens a file for editing or continued viewing;
creates it if it does not exist yet.
Note: If you only need to read or write a file once,
use `write_to_file` instead.
def get_agent_context(agent: BaseAgent) -> AgentContext | None:
if isinstance(agent, ContextMixin):
return agent.context
Args:
file_path (Path): The path of the file to open
return None
Returns:
str: A status message indicating what happened
"""
# Try to make the file path relative
relative_file_path = None
with contextlib.suppress(ValueError):
relative_file_path = file_path.relative_to(self.workspace.root)
created = False
if not self.workspace.exists(file_path):
await self.workspace.write_file(file_path, "")
created = True
file_path = relative_file_path or file_path
file = FileContextItem(path=file_path)
self.context.add(file)
return (
f"File {file_path}{' created,' if created else ''} has been opened"
" and added to the context ✅"
)
@command(
parameters={
"path": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path of the folder to open",
required=True,
)
}
)
def open_folder(self, path: Path) -> str:
"""Open a folder to keep track of its content
Args:
path (Path): The path of the folder to open
Returns:
str: A status message indicating what happened
"""
# Try to make the path relative
relative_path = None
with contextlib.suppress(ValueError):
relative_path = path.relative_to(self.workspace.root)
if not self.workspace.exists(path):
raise FileNotFoundError(
f"open_folder {path} failed: no such file or directory"
)
path = relative_path or path
folder = FolderContextItem(path=path)
self.context.add(folder)
return f"Folder {path} has been opened and added to the context ✅"
@command(
parameters={
"number": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The 1-based index of the context item to hide",
required=True,
)
}
)
def close_context_item(self, number: int) -> str:
"""Hide an open file, folder or other context item, to save tokens.
Args:
number (int): The 1-based index of the context item to hide
Returns:
str: A status message indicating what happened
"""
if number > len(self.context.items) or number == 0:
raise InvalidArgumentError(f"Index {number} out of range")
self.context.close(number)
return f"Context item {number} hidden ✅"

View File

@@ -1,41 +1,35 @@
from __future__ import annotations
import logging
from contextlib import ExitStack
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from ..base import BaseAgentConfiguration
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.agents.components import ComponentSystemError
from autogpt.agents.features.context import ContextComponent
from autogpt.agents.protocols import AfterParse
from autogpt.models.action_history import EpisodicActionHistory
from ..base import BaseAgent
from ..base import BaseAgentConfiguration
logger = logging.getLogger(__name__)
class WatchdogMixin:
class WatchdogComponent(AfterParse):
"""
Mixin that adds a watchdog feature to an agent class. Whenever the agent starts
Adds a watchdog feature to an agent class. Whenever the agent starts
looping, the watchdog will switch from the FAST_LLM to the SMART_LLM and re-think.
"""
config: BaseAgentConfiguration
event_history: EpisodicActionHistory
run_after = [ContextComponent]
def __init__(self, **kwargs) -> None:
# Initialize other bases first, because we need the event_history from BaseAgent
super(WatchdogMixin, self).__init__(**kwargs)
def __init__(
self, config: BaseAgentConfiguration, event_history: EpisodicActionHistory
):
self.config = config
self.event_history = event_history
self.revert_big_brain = False
if not isinstance(self, BaseAgent):
raise NotImplementedError(
f"{__class__.__name__} can only be applied to BaseAgent derivatives"
)
async def propose_action(self, *args, **kwargs) -> BaseAgent.ThoughtProcessOutput:
command_name, command_args, thoughts = await super(
WatchdogMixin, self
).propose_action(*args, **kwargs)
def after_parse(self, result: ThoughtProcessOutput) -> None:
if self.revert_big_brain:
self.config.big_brain = False
self.revert_big_brain = False
if not self.config.big_brain and self.config.fast_llm != self.config.smart_llm:
previous_command, previous_command_args = None, None
@@ -49,28 +43,18 @@ class WatchdogMixin:
rethink_reason = ""
if not command_name:
if not result.command_name:
rethink_reason = "AI did not specify a command"
elif (
command_name == previous_command
and command_args == previous_command_args
result.command_name == previous_command
and result.command_args == previous_command_args
):
rethink_reason = f"Repititive command detected ({command_name})"
rethink_reason = f"Repititive command detected ({result.command_name})"
if rethink_reason:
logger.info(f"{rethink_reason}, re-thinking with SMART_LLM...")
with ExitStack() as stack:
@stack.callback
def restore_state() -> None:
# Executed after exiting the ExitStack context
self.config.big_brain = False
# Remove partial record of current cycle
self.event_history.rewind()
# Switch to SMART_LLM and re-think
self.big_brain = True
return await self.propose_action(*args, **kwargs)
return command_name, command_args, thoughts
self.event_history.rewind()
self.big_brain = True
self.revert_big_brain = True
# Trigger retry of all pipelines prior to this component
raise ComponentSystemError()

View File

@@ -4,15 +4,10 @@ import json
import platform
import re
from logging import Logger
from typing import TYPE_CHECKING, Callable, Optional
import distro
if TYPE_CHECKING:
from autogpt.agents.agent import Agent
from autogpt.models.action_history import Episode
from autogpt.agents.utils.exceptions import InvalidAgentResponseError
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.config import AIDirectives, AIProfile
from autogpt.core.configuration.schema import SystemConfiguration, UserConfigurable
from autogpt.core.prompting import (
@@ -27,7 +22,8 @@ from autogpt.core.resource.model_providers.schema import (
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.core.utils.json_utils import extract_dict_from_json
from autogpt.prompts.utils import format_numbered_list, indent
from autogpt.prompts.utils import format_numbered_list
from autogpt.utils.exceptions import InvalidAgentResponseError
class OneShotAgentPromptConfiguration(SystemConfiguration):
@@ -153,68 +149,39 @@ class OneShotAgentPromptStrategy(PromptStrategy):
def build_prompt(
self,
*,
messages: list[ChatMessage],
task: str,
ai_profile: AIProfile,
ai_directives: AIDirectives,
commands: list[CompletionModelFunction],
event_history: list[Episode],
include_os_info: bool,
max_prompt_tokens: int,
count_tokens: Callable[[str], int],
count_message_tokens: Callable[[ChatMessage | list[ChatMessage]], int],
extra_messages: Optional[list[ChatMessage]] = None,
**extras,
) -> ChatPrompt:
"""Constructs and returns a prompt with the following structure:
1. System prompt
2. Message history of the agent, truncated & prepended with running summary
as needed
3. `cycle_instruction`
"""
if not extra_messages:
extra_messages = []
system_prompt = self.build_system_prompt(
ai_profile=ai_profile,
ai_directives=ai_directives,
commands=commands,
include_os_info=include_os_info,
)
system_prompt_tlength = count_message_tokens(ChatMessage.system(system_prompt))
user_task = f'"""{task}"""'
user_task_tlength = count_message_tokens(ChatMessage.user(user_task))
response_format_instr = self.response_format_instruction(
self.config.use_functions_api
)
extra_messages.append(ChatMessage.system(response_format_instr))
messages.append(ChatMessage.system(response_format_instr))
final_instruction_msg = ChatMessage.user(self.config.choose_action_instruction)
final_instruction_tlength = count_message_tokens(final_instruction_msg)
if event_history:
progress = self.compile_progress(
event_history,
count_tokens=count_tokens,
max_tokens=(
max_prompt_tokens
- system_prompt_tlength
- user_task_tlength
- final_instruction_tlength
- count_message_tokens(extra_messages)
),
)
extra_messages.insert(
0,
ChatMessage.system(f"## Progress\n\n{progress}"),
)
prompt = ChatPrompt(
messages=[
ChatMessage.system(system_prompt),
ChatMessage.user(user_task),
*extra_messages,
*messages,
final_instruction_msg,
],
)
@@ -253,38 +220,6 @@ class OneShotAgentPromptStrategy(PromptStrategy):
# Join non-empty parts together into paragraph format
return "\n\n".join(filter(None, system_prompt_parts)).strip("\n")
def compile_progress(
self,
episode_history: list[Episode],
max_tokens: Optional[int] = None,
count_tokens: Optional[Callable[[str], int]] = None,
) -> str:
if max_tokens and not count_tokens:
raise ValueError("count_tokens is required if max_tokens is set")
steps: list[str] = []
tokens: int = 0
n_episodes = len(episode_history)
for i, episode in enumerate(reversed(episode_history)):
# Use full format for the latest 4 steps, summary or format for older steps
if i < 4 or episode.summary is None:
step_content = indent(episode.format(), 2).strip()
else:
step_content = episode.summary
step = f"* Step {n_episodes - i}: {step_content}"
if max_tokens and count_tokens:
step_tokens = count_tokens(step)
if tokens + step_tokens > max_tokens:
break
tokens += step_tokens
steps.insert(0, step)
return "\n\n".join(steps)
def response_format_instruction(self, use_functions_api: bool) -> str:
response_schema = self.response_schema.copy(deep=True)
if (
@@ -374,7 +309,7 @@ class OneShotAgentPromptStrategy(PromptStrategy):
def parse_response_content(
self,
response: AssistantChatMessage,
) -> Agent.ThoughtProcessOutput:
) -> ThoughtProcessOutput:
if not response.content:
raise InvalidAgentResponseError("Assistant response has no text content")
@@ -403,7 +338,11 @@ class OneShotAgentPromptStrategy(PromptStrategy):
command_name, arguments = extract_command(
assistant_reply_dict, response, self.config.use_functions_api
)
return command_name, arguments, assistant_reply_dict
return ThoughtProcessOutput(
command_name=command_name,
command_args=arguments,
thoughts=assistant_reply_dict,
)
#############

View File

@@ -0,0 +1,51 @@
from abc import abstractmethod
from typing import TYPE_CHECKING, Iterator
from autogpt.agents.components import AgentComponent
if TYPE_CHECKING:
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.core.resource.model_providers.schema import ChatMessage
from autogpt.models.action_history import ActionResult
from autogpt.models.command import Command
class DirectiveProvider(AgentComponent):
def get_constraints(self) -> Iterator[str]:
return iter([])
def get_resources(self) -> Iterator[str]:
return iter([])
def get_best_practices(self) -> Iterator[str]:
return iter([])
class CommandProvider(AgentComponent):
@abstractmethod
def get_commands(self) -> Iterator["Command"]:
...
class MessageProvider(AgentComponent):
@abstractmethod
def get_messages(self) -> Iterator["ChatMessage"]:
...
class AfterParse(AgentComponent):
@abstractmethod
def after_parse(self, result: "ThoughtProcessOutput") -> None:
...
class ExecutionFailure(AgentComponent):
@abstractmethod
def execution_failure(self, error: Exception) -> None:
...
class AfterExecute(AgentComponent):
@abstractmethod
def after_execute(self, result: "ActionResult") -> None:
...

View File

@@ -1,108 +0,0 @@
import logging
from typing import Callable
from pydantic import BaseModel, Field
from autogpt.core.resource.model_providers.schema import CompletionModelFunction
from autogpt.core.utils.json_schema import JSONSchema
logger = logging.getLogger("PromptScratchpad")
class CallableCompletionModelFunction(CompletionModelFunction):
method: Callable
class PromptScratchpad(BaseModel):
commands: dict[str, CallableCompletionModelFunction] = Field(default_factory=dict)
resources: list[str] = Field(default_factory=list)
constraints: list[str] = Field(default_factory=list)
best_practices: list[str] = Field(default_factory=list)
def add_constraint(self, constraint: str) -> None:
"""
Add a constraint to the constraints list.
Params:
constraint (str): The constraint to be added.
"""
if constraint not in self.constraints:
self.constraints.append(constraint)
def add_command(
self,
name: str,
description: str,
params: dict[str, str | dict],
function: Callable,
) -> None:
"""
Registers a command.
*Should only be used by plugins.* Native commands should be added
directly to the CommandRegistry.
Params:
name (str): The name of the command (e.g. `command_name`).
description (str): The description of the command.
params (dict, optional): A dictionary containing argument names and their
types. Defaults to an empty dictionary.
function (callable, optional): A callable function to be called when
the command is executed. Defaults to None.
"""
for p, s in params.items():
invalid = False
if type(s) is str and s not in JSONSchema.Type._value2member_map_:
invalid = True
logger.warning(
f"Cannot add command '{name}':"
f" parameter '{p}' has invalid type '{s}'."
f" Valid types are: {JSONSchema.Type._value2member_map_.keys()}"
)
elif isinstance(s, dict):
try:
JSONSchema.from_dict(s)
except KeyError:
invalid = True
if invalid:
return
command = CallableCompletionModelFunction(
name=name,
description=description,
parameters={
name: JSONSchema(type=JSONSchema.Type._value2member_map_[spec])
if type(spec) is str
else JSONSchema.from_dict(spec)
for name, spec in params.items()
},
method=function,
)
if name in self.commands:
if description == self.commands[name].description:
return
logger.warning(
f"Replacing command {self.commands[name]} with conflicting {command}"
)
self.commands[name] = command
def add_resource(self, resource: str) -> None:
"""
Add a resource to the resources list.
Params:
resource (str): The resource to be added.
"""
if resource not in self.resources:
self.resources.append(resource)
def add_best_practice(self, best_practice: str) -> None:
"""
Add an item to the list of best practices.
Params:
best_practice (str): The best practice item to be added.
"""
if best_practice not in self.best_practices:
self.best_practices.append(best_practice)

View File

@@ -31,10 +31,7 @@ from sentry_sdk import set_user
from autogpt.agent_factory.configurators import configure_agent_with_state
from autogpt.agent_factory.generators import generate_agent_for_task
from autogpt.agent_manager import AgentManager
from autogpt.agents.utils.exceptions import AgentFinished
from autogpt.app.utils import is_port_free
from autogpt.commands.system import finish
from autogpt.commands.user_interaction import ask_user
from autogpt.config import Config
from autogpt.core.resource.model_providers import ChatModelProvider
from autogpt.core.resource.model_providers.openai import OpenAIProvider
@@ -42,6 +39,8 @@ from autogpt.core.resource.model_providers.schema import ModelProviderBudget
from autogpt.file_storage import FileStorage
from autogpt.logs.utils import fmt_kwargs
from autogpt.models.action_history import ActionErrorResult, ActionSuccessResult
from autogpt.utils.exceptions import AgentFinished
from autogpt.utils.utils import DEFAULT_ASK_COMMAND, DEFAULT_FINISH_COMMAND
logger = logging.getLogger(__name__)
@@ -149,7 +148,7 @@ class AgentProtocolServer:
file_storage=self.file_storage,
llm_provider=self._get_task_llm_provider(task),
)
await task_agent.save_state()
await task_agent.file_manager.save_state()
return task
@@ -230,18 +229,20 @@ class AgentProtocolServer:
step = await self.db.create_step(
task_id=task_id,
input=step_request,
is_last=execute_command == finish.__name__ and execute_approved,
is_last=execute_command == DEFAULT_FINISH_COMMAND and execute_approved,
)
agent.llm_provider = self._get_task_llm_provider(task, step.step_id)
# Execute previously proposed action
if execute_command:
assert execute_command_args is not None
agent.workspace.on_write_file = lambda path: self._on_agent_write_file(
task=task, step=step, relative_path=path
agent.file_manager.workspace.on_write_file = (
lambda path: self._on_agent_write_file(
task=task, step=step, relative_path=path
)
)
if execute_command == ask_user.__name__: # HACK
if execute_command == DEFAULT_ASK_COMMAND:
execute_result = ActionSuccessResult(outputs=user_input)
agent.event_history.register_result(execute_result)
elif not execute_command:
@@ -275,7 +276,7 @@ class AgentProtocolServer:
output=execute_command_args["reason"],
additional_output=additional_output,
)
await agent.save_state()
await agent.file_manager.save_state()
return step
else:
assert user_input
@@ -287,7 +288,9 @@ class AgentProtocolServer:
# Propose next action
try:
next_command, next_command_args, raw_output = await agent.propose_action()
next_command, next_command_args, raw_output = (
await agent.propose_action()
).to_tuple()
logger.debug(f"AI output: {raw_output}")
except Exception as e:
step = await self.db.update_step(
@@ -305,13 +308,13 @@ class AgentProtocolServer:
+ ("\n\n" if "\n" in str(execute_result) else " ")
+ f"{execute_result}\n\n"
)
if execute_command_args and execute_command != ask_user.__name__
if execute_command_args and execute_command != DEFAULT_ASK_COMMAND
else ""
)
output += f"{raw_output['thoughts']['speak']}\n\n"
output += (
f"Next Command: {next_command}({fmt_kwargs(next_command_args)})"
if next_command != ask_user.__name__
if next_command != DEFAULT_ASK_COMMAND
else next_command_args["question"]
)
@@ -357,7 +360,7 @@ class AgentProtocolServer:
additional_output=additional_output,
)
await agent.save_state()
await agent.file_manager.save_state()
return step
async def _on_agent_write_file(

View File

@@ -23,12 +23,10 @@ from autogpt.agent_factory.configurators import configure_agent_with_state, crea
from autogpt.agent_factory.profile_generator import generate_agent_profile_for_task
from autogpt.agent_manager import AgentManager
from autogpt.agents import AgentThoughts, CommandArgs, CommandName
from autogpt.agents.utils.exceptions import AgentTerminated, InvalidAgentResponseError
from autogpt.commands.execute_code import (
is_docker_available,
we_are_running_in_a_docker_container,
)
from autogpt.commands.system import finish
from autogpt.config import (
AIDirectives,
AIProfile,
@@ -43,6 +41,8 @@ from autogpt.logs.config import configure_chat_plugins, configure_logging
from autogpt.logs.helpers import print_attribute, speak
from autogpt.models.action_history import ActionInterruptedByHuman
from autogpt.plugins import scan_plugins
from autogpt.utils.exceptions import AgentTerminated, InvalidAgentResponseError
from autogpt.utils.utils import DEFAULT_FINISH_COMMAND
from scripts.install_plugin_deps import install_plugin_dependencies
from .configurator import apply_overrides_to_config
@@ -190,7 +190,7 @@ async def run_auto_gpt(
) <= len(existing_agents):
load_existing_agent = existing_agents[int(load_existing_agent) - 1]
if load_existing_agent not in existing_agents:
if load_existing_agent != "" and load_existing_agent not in existing_agents:
logger.info(
f"Unknown agent '{load_existing_agent}', "
f"creating a new one instead.",
@@ -235,7 +235,8 @@ async def run_auto_gpt(
if (
agent.event_history.current_episode
and agent.event_history.current_episode.action.name == finish.__name__
and agent.event_history.current_episode.action.name
== DEFAULT_FINISH_COMMAND
and not agent.event_history.current_episode.result
):
# Agent was resumed after `finish` -> rewrite result of `finish` action
@@ -327,11 +328,13 @@ async def run_auto_gpt(
llm_provider=llm_provider,
)
if not agent.config.allow_fs_access:
file_manager = agent.file_manager
if file_manager and not agent.config.allow_fs_access:
logger.info(
f"{Fore.YELLOW}"
"NOTE: All files/directories created by this agent can be found "
f"inside its workspace at:{Fore.RESET} {agent.workspace.root}",
f"inside its workspace at:{Fore.RESET} {file_manager.workspace.root}",
extra={"preserve_color": True},
)
@@ -351,7 +354,9 @@ async def run_auto_gpt(
" or enter a different ID to save to:",
)
# TODO: allow many-to-one relations of agents and workspaces
await agent.save_state(save_as_id if not save_as_id.isspace() else None)
await agent.file_manager.save_state(
save_as_id.strip() if not save_as_id.isspace() else None
)
@coroutine
@@ -541,7 +546,7 @@ async def run_interaction_loop(
command_name,
command_args,
assistant_reply_dict,
) = await agent.propose_action()
) = (await agent.propose_action()).to_tuple()
except InvalidAgentResponseError as e:
logger.warning(f"The agent's thoughts could not be parsed: {e}")
consecutive_failures += 1

View File

@@ -1,12 +1,5 @@
from __future__ import annotations
import functools
import inspect
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, ParamSpec, TypeVar
if TYPE_CHECKING:
from autogpt.agents.base import BaseAgent
from autogpt.config import Config
import re
from typing import Callable, Optional, ParamSpec, TypeVar
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.models.command import Command, CommandOutput, CommandParameter
@@ -19,19 +12,35 @@ CO = TypeVar("CO", bound=CommandOutput)
def command(
name: str,
description: str,
parameters: dict[str, JSONSchema],
enabled: Literal[True] | Callable[[Config], bool] = True,
disabled_reason: Optional[str] = None,
aliases: list[str] = [],
available: bool | Callable[[BaseAgent], bool] = True,
) -> Callable[[Callable[P, CO]], Callable[P, CO]]:
names: list[str] = [],
description: Optional[str] = None,
parameters: dict[str, JSONSchema] = {},
) -> Callable[[Callable[P, CommandOutput]], Command]:
"""
The command decorator is used to create Command objects from ordinary functions.
The command decorator is used to make a Command from a function.
Args:
names (list[str]): The names of the command.
If not provided, the function name will be used.
description (str): A brief description of what the command does.
If not provided, the docstring until double line break will be used
(or entire docstring if no double line break is found)
parameters (dict[str, JSONSchema]): The parameters of the function
that the command executes.
"""
def decorator(func: Callable[P, CO]) -> Callable[P, CO]:
def decorator(func: Callable[P, CO]) -> Command:
doc = func.__doc__ or ""
# If names is not provided, use the function name
command_names = names or [func.__name__]
# If description is not provided, use the first part of the docstring
if not (command_description := description):
if not func.__doc__:
raise ValueError("Description is required if function has no docstring")
# Return the part of the docstring before double line break or everything
command_description = re.sub(r"\s+", " ", doc.split("\n\n")[0].strip())
# Parameters
typed_parameters = [
CommandParameter(
name=param_name,
@@ -39,32 +48,15 @@ def command(
)
for param_name, spec in parameters.items()
]
cmd = Command(
name=name,
description=description,
# Wrap func with Command
command = Command(
names=command_names,
description=command_description,
method=func,
parameters=typed_parameters,
enabled=enabled,
disabled_reason=disabled_reason,
aliases=aliases,
available=available,
)
if inspect.iscoroutinefunction(func):
@functools.wraps(func)
async def wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
return await func(*args, **kwargs)
else:
@functools.wraps(func)
def wrapper(*args: P.args, **kwargs: P.kwargs) -> Any:
return func(*args, **kwargs)
setattr(wrapper, "command", cmd)
setattr(wrapper, AUTO_GPT_COMMAND_IDENTIFIER, True)
return wrapper
return command
return decorator

View File

@@ -0,0 +1 @@
../../../../docs/content/AutoGPT/component agent/components.md

View File

@@ -1,9 +0,0 @@
COMMAND_CATEGORIES = [
"autogpt.commands.execute_code",
"autogpt.commands.file_operations",
"autogpt.commands.user_interaction",
"autogpt.commands.web_search",
"autogpt.commands.web_selenium",
"autogpt.commands.system",
"autogpt.commands.image_gen",
]

View File

@@ -1,82 +0,0 @@
import functools
import logging
import re
from pathlib import Path
from typing import Callable, ParamSpec, TypeVar
from autogpt.agents.agent import Agent
P = ParamSpec("P")
T = TypeVar("T")
logger = logging.getLogger(__name__)
def sanitize_path_arg(
arg_name: str, make_relative: bool = False
) -> Callable[[Callable[P, T]], Callable[P, T]]:
"""Sanitizes the specified path (str | Path) argument, resolving it to a Path"""
def decorator(func: Callable) -> Callable:
# Get position of path parameter, in case it is passed as a positional argument
try:
arg_index = list(func.__annotations__.keys()).index(arg_name)
except ValueError:
raise TypeError(
f"Sanitized parameter '{arg_name}' absent or not annotated"
f" on function '{func.__name__}'"
)
# Get position of agent parameter, in case it is passed as a positional argument
try:
agent_arg_index = list(func.__annotations__.keys()).index("agent")
except ValueError:
raise TypeError(
f"Parameter 'agent' absent or not annotated"
f" on function '{func.__name__}'"
)
@functools.wraps(func)
def wrapper(*args, **kwargs):
logger.debug(f"Sanitizing arg '{arg_name}' on function '{func.__name__}'")
# Get Agent from the called function's arguments
agent = kwargs.get(
"agent", len(args) > agent_arg_index and args[agent_arg_index]
)
if not isinstance(agent, Agent):
raise RuntimeError("Could not get Agent from decorated command's args")
# Sanitize the specified path argument, if one is given
given_path: str | Path | None = kwargs.get(
arg_name, len(args) > arg_index and args[arg_index] or None
)
if given_path:
if type(given_path) is str:
# Fix workspace path from output in docker environment
given_path = re.sub(r"^\/workspace", ".", given_path)
if given_path in {"", "/", "."}:
sanitized_path = agent.workspace.root
else:
sanitized_path = agent.workspace.get_path(given_path)
# Make path relative if possible
if make_relative and sanitized_path.is_relative_to(
agent.workspace.root
):
sanitized_path = sanitized_path.relative_to(agent.workspace.root)
if arg_name in kwargs:
kwargs[arg_name] = sanitized_path
else:
# args is an immutable tuple; must be converted to a list to update
arg_list = list(args)
arg_list[arg_index] = sanitized_path
args = tuple(arg_list)
return func(*args, **kwargs)
return wrapper
return decorator

View File

@@ -1,32 +1,28 @@
"""Commands to execute code"""
import logging
import os
import shlex
import subprocess
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Iterator
import docker
from docker.errors import DockerException, ImageNotFound, NotFound
from docker.models.containers import Container as DockerContainer
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import (
from autogpt.agents.base import BaseAgentSettings
from autogpt.agents.protocols import CommandProvider
from autogpt.command_decorator import command
from autogpt.config import Config
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.file_storage.base import FileStorage
from autogpt.models.command import Command
from autogpt.utils.exceptions import (
CodeExecutionError,
CommandExecutionError,
InvalidArgumentError,
OperationNotAllowedError,
)
from autogpt.command_decorator import command
from autogpt.config import Config
from autogpt.core.utils.json_schema import JSONSchema
from .decorators import sanitize_path_arg
COMMAND_CATEGORY = "execute_code"
COMMAND_CATEGORY_TITLE = "Execute Code"
logger = logging.getLogger(__name__)
@@ -57,331 +53,344 @@ def is_docker_available() -> bool:
return False
@command(
"execute_python_code",
"Executes the given Python code inside a single-use Docker container"
" with access to your workspace folder",
{
"code": JSONSchema(
type=JSONSchema.Type.STRING,
description="The Python code to run",
required=True,
),
},
disabled_reason="To execute python code agent "
"must be running in a Docker container or "
"Docker must be available on the system.",
available=we_are_running_in_a_docker_container() or is_docker_available(),
)
def execute_python_code(code: str, agent: Agent) -> str:
"""
Create and execute a Python file in a Docker container and return the STDOUT of the
executed code.
class CodeExecutorComponent(CommandProvider):
"""Provides commands to execute Python code and shell commands."""
If the code generates any data that needs to be captured, use a print statement.
def __init__(
self, workspace: FileStorage, state: BaseAgentSettings, config: Config
):
self.workspace = workspace
self.state = state
self.legacy_config = config
Args:
code (str): The Python code to run.
agent (Agent): The Agent executing the command.
if not we_are_running_in_a_docker_container() and not is_docker_available():
logger.info(
"Docker is not available or does not support Linux containers. "
"The code execution commands will not be available."
)
Returns:
str: The STDOUT captured from the code when it ran.
"""
if not self.legacy_config.execute_local_commands:
logger.info(
"Local shell commands are disabled. To enable them,"
" set EXECUTE_LOCAL_COMMANDS to 'True' in your config file."
)
tmp_code_file = NamedTemporaryFile(
"w", dir=agent.workspace.root, suffix=".py", encoding="utf-8"
def get_commands(self) -> Iterator[Command]:
if we_are_running_in_a_docker_container() or is_docker_available():
yield self.execute_python_code
yield self.execute_python_file
if self.legacy_config.execute_local_commands:
yield self.execute_shell
yield self.execute_shell_popen
@command(
["execute_python_code"],
"Executes the given Python code inside a single-use Docker container"
" with access to your workspace folder",
{
"code": JSONSchema(
type=JSONSchema.Type.STRING,
description="The Python code to run",
required=True,
),
},
)
tmp_code_file.write(code)
tmp_code_file.flush()
def execute_python_code(self, code: str) -> str:
"""
Create and execute a Python file in a Docker container
and return the STDOUT of the executed code.
try:
return execute_python_file(tmp_code_file.name, agent) # type: ignore
except Exception as e:
raise CommandExecutionError(*e.args)
finally:
tmp_code_file.close()
If the code generates any data that needs to be captured,
use a print statement.
Args:
code (str): The Python code to run.
agent (Agent): The Agent executing the command.
@command(
"execute_python_file",
"Execute an existing Python file inside a single-use Docker container"
" with access to your workspace folder",
{
"filename": JSONSchema(
type=JSONSchema.Type.STRING,
description="The name of the file to execute",
required=True,
),
"args": JSONSchema(
type=JSONSchema.Type.ARRAY,
description="The (command line) arguments to pass to the script",
required=False,
items=JSONSchema(type=JSONSchema.Type.STRING),
),
},
disabled_reason="To execute python code agent "
"must be running in a Docker container or "
"Docker must be available on the system.",
available=we_are_running_in_a_docker_container() or is_docker_available(),
)
@sanitize_path_arg("filename")
def execute_python_file(
filename: Path, agent: Agent, args: list[str] | str = []
) -> str:
"""Execute a Python file in a Docker container and return the output
Returns:
str: The STDOUT captured from the code when it ran.
"""
Args:
filename (Path): The name of the file to execute
args (list, optional): The arguments with which to run the python script
Returns:
str: The output of the file
"""
logger.info(
f"Executing python file '{filename}' "
f"in working directory '{agent.workspace.root}'"
)
if isinstance(args, str):
args = args.split() # Convert space-separated string to a list
if not str(filename).endswith(".py"):
raise InvalidArgumentError("Invalid file type. Only .py files are allowed.")
file_path = filename
if not file_path.is_file():
# Mimic the response that you get from the command line to make it
# intuitively understandable for the LLM
raise FileNotFoundError(
f"python: can't open file '{filename}': [Errno 2] No such file or directory"
tmp_code_file = NamedTemporaryFile(
"w", dir=self.workspace.root, suffix=".py", encoding="utf-8"
)
tmp_code_file.write(code)
tmp_code_file.flush()
if we_are_running_in_a_docker_container():
logger.debug(
"AutoGPT is running in a Docker container; "
f"executing {file_path} directly..."
)
result = subprocess.run(
["python", "-B", str(file_path)] + args,
capture_output=True,
encoding="utf8",
cwd=str(agent.workspace.root),
)
if result.returncode == 0:
return result.stdout
else:
raise CodeExecutionError(result.stderr)
logger.debug("AutoGPT is not running in a Docker container")
try:
assert agent.state.agent_id, "Need Agent ID to attach Docker container"
client = docker.from_env()
# You can replace this with the desired Python image/version
# You can find available Python images on Docker Hub:
# https://hub.docker.com/_/python
image_name = "python:3-alpine"
container_is_fresh = False
container_name = f"{agent.state.agent_id}_sandbox"
try:
container: DockerContainer = client.containers.get(
container_name
) # type: ignore
except NotFound:
try:
client.images.get(image_name)
logger.debug(f"Image '{image_name}' found locally")
except ImageNotFound:
logger.info(
f"Image '{image_name}' not found locally,"
" pulling from Docker Hub..."
)
# Use the low-level API to stream the pull response
low_level_client = docker.APIClient()
for line in low_level_client.pull(image_name, stream=True, decode=True):
# Print the status and progress, if available
status = line.get("status")
progress = line.get("progress")
if status and progress:
logger.info(f"{status}: {progress}")
elif status:
logger.info(status)
return self.execute_python_file(tmp_code_file.name)
except Exception as e:
raise CommandExecutionError(*e.args)
finally:
tmp_code_file.close()
logger.debug(f"Creating new {image_name} container...")
container: DockerContainer = client.containers.run(
image_name,
["sleep", "60"], # Max 60 seconds to prevent permanent hangs
volumes={
str(agent.workspace.root): {
"bind": "/workspace",
"mode": "rw",
}
},
working_dir="/workspace",
@command(
["execute_python_file"],
"Execute an existing Python file inside a single-use Docker container"
" with access to your workspace folder",
{
"filename": JSONSchema(
type=JSONSchema.Type.STRING,
description="The name of the file to execute",
required=True,
),
"args": JSONSchema(
type=JSONSchema.Type.ARRAY,
description="The (command line) arguments to pass to the script",
required=False,
items=JSONSchema(type=JSONSchema.Type.STRING),
),
},
)
def execute_python_file(self, filename: str, args: list[str] | str = []) -> str:
"""Execute a Python file in a Docker container and return the output
Args:
filename (Path): The name of the file to execute
args (list, optional): The arguments with which to run the python script
Returns:
str: The output of the file
"""
logger.info(
f"Executing python file '{filename}' "
f"in working directory '{self.workspace.root}'"
)
if isinstance(args, str):
args = args.split() # Convert space-separated string to a list
if not str(filename).endswith(".py"):
raise InvalidArgumentError("Invalid file type. Only .py files are allowed.")
file_path = Path(filename)
if not file_path.is_file():
# Mimic the response that you get from the command line to make it
# intuitively understandable for the LLM
raise FileNotFoundError(
f"python: can't open file '{filename}': "
f"[Errno 2] No such file or directory"
)
if we_are_running_in_a_docker_container():
logger.debug(
"AutoGPT is running in a Docker container; "
f"executing {file_path} directly..."
)
result = subprocess.run(
["python", "-B", str(file_path)] + args,
capture_output=True,
encoding="utf8",
cwd=str(self.workspace.root),
)
if result.returncode == 0:
return result.stdout
else:
raise CodeExecutionError(result.stderr)
logger.debug("AutoGPT is not running in a Docker container")
try:
assert self.state.agent_id, "Need Agent ID to attach Docker container"
client = docker.from_env()
image_name = "python:3-alpine"
container_is_fresh = False
container_name = f"{self.state.agent_id}_sandbox"
try:
container: DockerContainer = client.containers.get(
container_name
) # type: ignore
except NotFound:
try:
client.images.get(image_name)
logger.debug(f"Image '{image_name}' found locally")
except ImageNotFound:
logger.info(
f"Image '{image_name}' not found locally,"
" pulling from Docker Hub..."
)
# Use the low-level API to stream the pull response
low_level_client = docker.APIClient()
for line in low_level_client.pull(
image_name, stream=True, decode=True
):
# Print the status and progress, if available
status = line.get("status")
progress = line.get("progress")
if status and progress:
logger.info(f"{status}: {progress}")
elif status:
logger.info(status)
logger.debug(f"Creating new {image_name} container...")
container: DockerContainer = client.containers.run(
image_name,
["sleep", "60"], # Max 60 seconds to prevent permanent hangs
volumes={
str(self.workspace.root): {
"bind": "/workspace",
"mode": "rw",
}
},
working_dir="/workspace",
stderr=True,
stdout=True,
detach=True,
name=container_name,
) # type: ignore
container_is_fresh = True
if not container.status == "running":
container.start()
elif not container_is_fresh:
container.restart()
logger.debug(f"Running {file_path} in container {container.name}...")
exec_result = container.exec_run(
[
"python",
"-B",
file_path.relative_to(self.workspace.root).as_posix(),
]
+ args,
stderr=True,
stdout=True,
detach=True,
name=container_name,
) # type: ignore
container_is_fresh = True
)
if not container.status == "running":
container.start()
elif not container_is_fresh:
container.restart()
if exec_result.exit_code != 0:
raise CodeExecutionError(exec_result.output.decode("utf-8"))
logger.debug(f"Running {file_path} in container {container.name}...")
exec_result = container.exec_run(
[
"python",
"-B",
file_path.relative_to(agent.workspace.root).as_posix(),
]
+ args,
stderr=True,
stdout=True,
return exec_result.output.decode("utf-8")
except DockerException as e:
logger.warning(
"Could not run the script in a container. "
"If you haven't already, please install Docker: "
"https://docs.docker.com/get-docker/"
)
raise CommandExecutionError(f"Could not run the script in a container: {e}")
def validate_command(self, command_line: str, config: Config) -> tuple[bool, bool]:
"""Check whether a command is allowed and whether it may be executed in a shell.
If shell command control is enabled, we disallow executing in a shell, because
otherwise the model could circumvent the command filter using shell features.
Args:
command_line (str): The command line to validate
config (Config): The app config including shell command control settings
Returns:
bool: True if the command is allowed, False otherwise
bool: True if the command may be executed in a shell, False otherwise
"""
if not command_line:
return False, False
command_name = shlex.split(command_line)[0]
if config.shell_command_control == ALLOWLIST_CONTROL:
return command_name in config.shell_allowlist, False
elif config.shell_command_control == DENYLIST_CONTROL:
return command_name not in config.shell_denylist, False
else:
return True, True
@command(
["execute_shell"],
"Execute a Shell Command, non-interactive commands only",
{
"command_line": JSONSchema(
type=JSONSchema.Type.STRING,
description="The command line to execute",
required=True,
)
},
)
def execute_shell(self, command_line: str) -> str:
"""Execute a shell command and return the output
Args:
command_line (str): The command line to execute
Returns:
str: The output of the command
"""
allow_execute, allow_shell = self.validate_command(
command_line, self.legacy_config
)
if not allow_execute:
logger.info(f"Command '{command_line}' not allowed")
raise OperationNotAllowedError("This shell command is not allowed.")
current_dir = Path.cwd()
# Change dir into workspace if necessary
if not current_dir.is_relative_to(self.workspace.root):
os.chdir(self.workspace.root)
logger.info(
f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
)
if exec_result.exit_code != 0:
raise CodeExecutionError(exec_result.output.decode("utf-8"))
return exec_result.output.decode("utf-8")
except DockerException as e:
logger.warning(
"Could not run the script in a container. "
"If you haven't already, please install Docker: "
"https://docs.docker.com/get-docker/"
result = subprocess.run(
command_line if allow_shell else shlex.split(command_line),
capture_output=True,
shell=allow_shell,
)
raise CommandExecutionError(f"Could not run the script in a container: {e}")
output = f"STDOUT:\n{result.stdout.decode()}\nSTDERR:\n{result.stderr.decode()}"
# Change back to whatever the prior working dir was
os.chdir(current_dir)
def validate_command(command_line: str, config: Config) -> tuple[bool, bool]:
"""Check whether a command is allowed and whether it may be executed in a shell.
return output
If shell command control is enabled, we disallow executing in a shell, because
otherwise the model could easily circumvent the command filter using shell features.
@command(
["execute_shell_popen"],
"Execute a Shell Command, non-interactive commands only",
{
"command_line": JSONSchema(
type=JSONSchema.Type.STRING,
description="The command line to execute",
required=True,
)
},
)
def execute_shell_popen(self, command_line: str) -> str:
"""Execute a shell command with Popen and returns an english description
of the event and the process id
Args:
command_line (str): The command line to validate
config (Config): The application config including shell command control settings
Args:
command_line (str): The command line to execute
Returns:
bool: True if the command is allowed, False otherwise
bool: True if the command may be executed in a shell, False otherwise
"""
if not command_line:
return False, False
command_name = shlex.split(command_line)[0]
if config.shell_command_control == ALLOWLIST_CONTROL:
return command_name in config.shell_allowlist, False
elif config.shell_command_control == DENYLIST_CONTROL:
return command_name not in config.shell_denylist, False
else:
return True, True
@command(
"execute_shell",
"Execute a Shell Command, non-interactive commands only",
{
"command_line": JSONSchema(
type=JSONSchema.Type.STRING,
description="The command line to execute",
required=True,
Returns:
str: Description of the fact that the process started and its id
"""
allow_execute, allow_shell = self.validate_command(
command_line, self.legacy_config
)
},
enabled=lambda config: config.execute_local_commands,
disabled_reason="You are not allowed to run local shell commands. To execute"
" shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
"in your config file: .env - do not attempt to bypass the restriction.",
)
def execute_shell(command_line: str, agent: Agent) -> str:
"""Execute a shell command and return the output
if not allow_execute:
logger.info(f"Command '{command_line}' not allowed")
raise OperationNotAllowedError("This shell command is not allowed.")
Args:
command_line (str): The command line to execute
current_dir = Path.cwd()
# Change dir into workspace if necessary
if not current_dir.is_relative_to(self.workspace.root):
os.chdir(self.workspace.root)
Returns:
str: The output of the command
"""
allow_execute, allow_shell = validate_command(command_line, agent.legacy_config)
if not allow_execute:
logger.info(f"Command '{command_line}' not allowed")
raise OperationNotAllowedError("This shell command is not allowed.")
current_dir = Path.cwd()
# Change dir into workspace if necessary
if not current_dir.is_relative_to(agent.workspace.root):
os.chdir(agent.workspace.root)
logger.info(
f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
)
result = subprocess.run(
command_line if allow_shell else shlex.split(command_line),
capture_output=True,
shell=allow_shell,
)
output = f"STDOUT:\n{result.stdout.decode()}\nSTDERR:\n{result.stderr.decode()}"
# Change back to whatever the prior working dir was
os.chdir(current_dir)
return output
@command(
"execute_shell_popen",
"Execute a Shell Command, non-interactive commands only",
{
"command_line": JSONSchema(
type=JSONSchema.Type.STRING,
description="The command line to execute",
required=True,
logger.info(
f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
)
},
lambda config: config.execute_local_commands,
"You are not allowed to run local shell commands. To execute"
" shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
"in your config. Do not attempt to bypass the restriction.",
)
def execute_shell_popen(command_line: str, agent: Agent) -> str:
"""Execute a shell command with Popen and returns an english description
of the event and the process id
Args:
command_line (str): The command line to execute
do_not_show_output = subprocess.DEVNULL
process = subprocess.Popen(
command_line if allow_shell else shlex.split(command_line),
shell=allow_shell,
stdout=do_not_show_output,
stderr=do_not_show_output,
)
Returns:
str: Description of the fact that the process started and its id
"""
allow_execute, allow_shell = validate_command(command_line, agent.legacy_config)
if not allow_execute:
logger.info(f"Command '{command_line}' not allowed")
raise OperationNotAllowedError("This shell command is not allowed.")
# Change back to whatever the prior working dir was
os.chdir(current_dir)
current_dir = Path.cwd()
# Change dir into workspace if necessary
if not current_dir.is_relative_to(agent.workspace.root):
os.chdir(agent.workspace.root)
logger.info(
f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
)
do_not_show_output = subprocess.DEVNULL
process = subprocess.Popen(
command_line if allow_shell else shlex.split(command_line),
shell=allow_shell,
stdout=do_not_show_output,
stderr=do_not_show_output,
)
# Change back to whatever the prior working dir was
os.chdir(current_dir)
return f"Subprocess started with PID:'{str(process.pid)}'"
return f"Subprocess started with PID:'{str(process.pid)}'"

View File

@@ -1,131 +0,0 @@
"""Commands to perform operations on files"""
from __future__ import annotations
import contextlib
from pathlib import Path
from typing import TYPE_CHECKING
from autogpt.agents.features.context import ContextMixin, get_agent_context
from autogpt.agents.utils.exceptions import (
CommandExecutionError,
DuplicateOperationError,
)
from autogpt.command_decorator import command
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.models.context_item import FileContextItem, FolderContextItem
from .decorators import sanitize_path_arg
COMMAND_CATEGORY = "file_operations"
COMMAND_CATEGORY_TITLE = "File Operations"
if TYPE_CHECKING:
from autogpt.agents import Agent, BaseAgent
def agent_implements_context(agent: BaseAgent) -> bool:
return isinstance(agent, ContextMixin)
@command(
"open_file",
"Opens a file for editing or continued viewing;"
" creates it if it does not exist yet. "
"Note: If you only need to read or write a file once, use `write_to_file` instead.",
{
"file_path": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path of the file to open",
required=True,
)
},
available=agent_implements_context,
)
@sanitize_path_arg("file_path")
def open_file(file_path: Path, agent: Agent) -> tuple[str, FileContextItem]:
"""Open a file and return a context item
Args:
file_path (Path): The path of the file to open
Returns:
str: A status message indicating what happened
FileContextItem: A ContextItem representing the opened file
"""
# Try to make the file path relative
relative_file_path = None
with contextlib.suppress(ValueError):
relative_file_path = file_path.relative_to(agent.workspace.root)
assert (agent_context := get_agent_context(agent)) is not None
created = False
if not file_path.exists():
file_path.touch()
created = True
elif not file_path.is_file():
raise CommandExecutionError(f"{file_path} exists but is not a file")
file_path = relative_file_path or file_path
file = FileContextItem(
file_path_in_workspace=file_path,
workspace_path=agent.workspace.root,
)
if file in agent_context:
raise DuplicateOperationError(f"The file {file_path} is already open")
return (
f"File {file_path}{' created,' if created else ''} has been opened"
" and added to the context ✅",
file,
)
@command(
"open_folder",
"Open a folder to keep track of its content",
{
"path": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path of the folder to open",
required=True,
)
},
available=agent_implements_context,
)
@sanitize_path_arg("path")
def open_folder(path: Path, agent: Agent) -> tuple[str, FolderContextItem]:
"""Open a folder and return a context item
Args:
path (Path): The path of the folder to open
Returns:
str: A status message indicating what happened
FolderContextItem: A ContextItem representing the opened folder
"""
# Try to make the path relative
relative_path = None
with contextlib.suppress(ValueError):
relative_path = path.relative_to(agent.workspace.root)
assert (agent_context := get_agent_context(agent)) is not None
if not path.exists():
raise FileNotFoundError(f"open_folder {path} failed: no such file or directory")
elif not path.is_dir():
raise CommandExecutionError(f"{path} exists but is not a folder")
path = relative_path or path
folder = FolderContextItem(
path_in_workspace=path,
workspace_path=agent.workspace.root,
)
if folder in agent_context:
raise DuplicateOperationError(f"The folder {path} is already open")
return f"Folder {path} has been opened and added to the context ✅", folder

View File

@@ -1,241 +0,0 @@
"""Commands to perform operations on files"""
from __future__ import annotations
import hashlib
import logging
import os
import os.path
from pathlib import Path
from typing import Iterator, Literal
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import DuplicateOperationError
from autogpt.command_decorator import command
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.memory.vector import MemoryItemFactory, VectorMemory
from .decorators import sanitize_path_arg
from .file_operations_utils import decode_textual_file
COMMAND_CATEGORY = "file_operations"
COMMAND_CATEGORY_TITLE = "File Operations"
from .file_context import open_file, open_folder # NOQA
logger = logging.getLogger(__name__)
Operation = Literal["write", "append", "delete"]
def text_checksum(text: str) -> str:
"""Get the hex checksum for the given text."""
return hashlib.md5(text.encode("utf-8")).hexdigest()
def operations_from_log(
logs: list[str],
) -> Iterator[
tuple[Literal["write", "append"], str, str] | tuple[Literal["delete"], str, None]
]:
"""Parse logs and return a tuple containing the log entries"""
for line in logs:
line = line.replace("File Operation Logger", "").strip()
if not line:
continue
operation, tail = line.split(": ", maxsplit=1)
operation = operation.strip()
if operation in ("write", "append"):
path, checksum = (x.strip() for x in tail.rsplit(" #", maxsplit=1))
yield (operation, path, checksum)
elif operation == "delete":
yield (operation, tail.strip(), None)
def file_operations_state(logs: list[str]) -> dict[str, str]:
"""Iterates over the operations and returns the expected state.
Constructs a dictionary that maps each file path written
or appended to its checksum. Deleted files are
removed from the dictionary.
Returns:
A dictionary mapping file paths to their checksums.
Raises:
FileNotFoundError: If file_manager.file_ops_log_path is not found.
ValueError: If the log file content is not in the expected format.
"""
state = {}
for operation, path, checksum in operations_from_log(logs):
if operation in ("write", "append"):
state[path] = checksum
elif operation == "delete":
del state[path]
return state
@sanitize_path_arg("file_path", make_relative=True)
def is_duplicate_operation(
operation: Operation, file_path: Path, agent: Agent, checksum: str | None = None
) -> bool:
"""Check if the operation has already been performed
Args:
operation: The operation to check for
file_path: The name of the file to check for
agent: The agent
checksum: The checksum of the contents to be written
Returns:
True if the operation has already been performed on the file
"""
state = file_operations_state(agent.get_file_operation_lines())
if operation == "delete" and file_path.as_posix() not in state:
return True
if operation == "write" and state.get(file_path.as_posix()) == checksum:
return True
return False
@sanitize_path_arg("file_path", make_relative=True)
async def log_operation(
operation: Operation,
file_path: str | Path,
agent: Agent,
checksum: str | None = None,
) -> None:
"""Log the file operation to the file_logger.log
Args:
operation: The operation to log
file_path: The name of the file the operation was performed on
checksum: The checksum of the contents to be written
"""
log_entry = (
f"{operation}: "
f"{file_path.as_posix() if isinstance(file_path, Path) else file_path}"
)
if checksum is not None:
log_entry += f" #{checksum}"
logger.debug(f"Logging file operation: {log_entry}")
await agent.log_file_operation(log_entry)
@command(
"read_file",
"Read an existing file",
{
"filename": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path of the file to read",
required=True,
)
},
)
def read_file(filename: str | Path, agent: Agent) -> str:
"""Read a file and return the contents
Args:
filename (Path): The name of the file to read
Returns:
str: The contents of the file
"""
file = agent.workspace.open_file(filename, binary=True)
content = decode_textual_file(file, os.path.splitext(filename)[1], logger)
# # TODO: invalidate/update memory when file is edited
# file_memory = MemoryItem.from_text_file(content, str(filename), agent.config)
# if len(file_memory.chunks) > 1:
# return file_memory.summary
return content
def ingest_file(
filename: str,
memory: VectorMemory,
) -> None:
"""
Ingest a file by reading its content, splitting it into chunks with a specified
maximum length and overlap, and adding the chunks to the memory storage.
Args:
filename: The name of the file to ingest
memory: An object with an add() method to store the chunks in memory
"""
try:
logger.info(f"Ingesting file {filename}")
content = read_file(filename)
# TODO: differentiate between different types of files
file_memory = MemoryItemFactory.from_text_file(content, filename)
logger.debug(f"Created memory: {file_memory.dump(True)}")
memory.add(file_memory)
logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}")
except Exception as err:
logger.warning(f"Error while ingesting file '{filename}': {err}")
@command(
"write_file",
"Write a file, creating it if necessary. If the file exists, it is overwritten.",
{
"filename": JSONSchema(
type=JSONSchema.Type.STRING,
description="The name of the file to write to",
required=True,
),
"contents": JSONSchema(
type=JSONSchema.Type.STRING,
description="The contents to write to the file",
required=True,
),
},
aliases=["create_file"],
)
async def write_to_file(filename: str | Path, contents: str, agent: Agent) -> str:
"""Write contents to a file
Args:
filename (Path): The name of the file to write to
contents (str): The contents to write to the file
Returns:
str: A message indicating success or failure
"""
checksum = text_checksum(contents)
if is_duplicate_operation("write", Path(filename), agent, checksum):
raise DuplicateOperationError(f"File {filename} has already been updated.")
if directory := os.path.dirname(filename):
agent.workspace.make_dir(directory)
await agent.workspace.write_file(filename, contents)
await log_operation("write", filename, agent, checksum)
return f"File {filename} has been written successfully."
@command(
"list_folder",
"List the items in a folder",
{
"folder": JSONSchema(
type=JSONSchema.Type.STRING,
description="The folder to list files in",
required=True,
)
},
)
def list_folder(folder: str | Path, agent: Agent) -> list[str]:
"""Lists files in a folder recursively
Args:
folder (Path): The folder to search in
Returns:
list[str]: A list of files found in the folder
"""
return [str(p) for p in agent.workspace.list_files(folder)]

View File

@@ -1,58 +1,61 @@
"""Commands to perform Git operations"""
from pathlib import Path
from typing import Iterator
from git.repo import Repo
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import CommandExecutionError
from autogpt.agents.protocols import CommandProvider
from autogpt.command_decorator import command
from autogpt.config.config import Config
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.models.command import Command
from autogpt.url_utils.validators import validate_url
from .decorators import sanitize_path_arg
COMMAND_CATEGORY = "git_operations"
COMMAND_CATEGORY_TITLE = "Git Operations"
from autogpt.utils.exceptions import CommandExecutionError
@command(
"clone_repository",
"Clones a Repository",
{
"url": JSONSchema(
type=JSONSchema.Type.STRING,
description="The URL of the repository to clone",
required=True,
),
"clone_path": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path to clone the repository to",
required=True,
),
},
lambda config: bool(config.github_username and config.github_api_key),
"Configure github_username and github_api_key.",
)
@sanitize_path_arg("clone_path")
@validate_url
def clone_repository(url: str, clone_path: Path, agent: Agent) -> str:
"""Clone a GitHub repository locally.
class GitOperationsComponent(CommandProvider):
"""Provides commands to perform Git operations."""
Args:
url (str): The URL of the repository to clone.
clone_path (Path): The path to clone the repository to.
def __init__(self, config: Config):
self._enabled = bool(config.github_username and config.github_api_key)
self._disabled_reason = "Configure github_username and github_api_key."
self.legacy_config = config
Returns:
str: The result of the clone operation.
"""
split_url = url.split("//")
auth_repo_url = f"//{agent.legacy_config.github_username}:{agent.legacy_config.github_api_key}@".join( # noqa: E501
split_url
def get_commands(self) -> Iterator[Command]:
yield self.clone_repository
@command(
parameters={
"url": JSONSchema(
type=JSONSchema.Type.STRING,
description="The URL of the repository to clone",
required=True,
),
"clone_path": JSONSchema(
type=JSONSchema.Type.STRING,
description="The path to clone the repository to",
required=True,
),
},
)
try:
Repo.clone_from(url=auth_repo_url, to_path=clone_path)
except Exception as e:
raise CommandExecutionError(f"Could not clone repo: {e}")
@validate_url
def clone_repository(self, url: str, clone_path: Path) -> str:
"""Clone a GitHub repository locally.
return f"""Cloned {url} to {clone_path}"""
Args:
url (str): The URL of the repository to clone.
clone_path (Path): The path to clone the repository to.
Returns:
str: The result of the clone operation.
"""
split_url = url.split("//")
auth_repo_url = (
f"//{self.legacy_config.github_username}:"
f"{self.legacy_config.github_api_key}@".join(split_url)
)
try:
Repo.clone_from(url=auth_repo_url, to_path=clone_path)
except Exception as e:
raise CommandExecutionError(f"Could not clone repo: {e}")
return f"""Cloned {url} to {clone_path}"""

View File

@@ -7,206 +7,216 @@ import time
import uuid
from base64 import b64decode
from pathlib import Path
from typing import Iterator
import requests
from openai import OpenAI
from PIL import Image
from autogpt.agents.agent import Agent
from autogpt.agents.protocols import CommandProvider
from autogpt.command_decorator import command
from autogpt.config.config import Config
from autogpt.core.utils.json_schema import JSONSchema
COMMAND_CATEGORY = "text_to_image"
COMMAND_CATEGORY_TITLE = "Text to Image"
from autogpt.file_storage.base import FileStorage
from autogpt.models.command import Command
logger = logging.getLogger(__name__)
@command(
"generate_image",
"Generates an Image",
{
"prompt": JSONSchema(
type=JSONSchema.Type.STRING,
description="The prompt used to generate the image",
required=True,
),
},
lambda config: bool(config.image_provider),
"Requires a image provider to be set.",
)
def generate_image(prompt: str, agent: Agent, size: int = 256) -> str:
"""Generate an image from a prompt.
class ImageGeneratorComponent(CommandProvider):
"""A component that provides commands to generate images from text prompts."""
Args:
prompt (str): The prompt to use
size (int, optional): The size of the image. Defaults to 256.
Not supported by HuggingFace.
def __init__(self, workspace: FileStorage, config: Config):
self._enabled = bool(config.image_provider)
self._disabled_reason = "No image provider set."
self.workspace = workspace
self.legacy_config = config
Returns:
str: The filename of the image
"""
filename = agent.workspace.root / f"{str(uuid.uuid4())}.jpg"
def get_commands(self) -> Iterator[Command]:
yield self.generate_image
# DALL-E
if agent.legacy_config.image_provider == "dalle":
return generate_image_with_dalle(prompt, filename, size, agent)
# HuggingFace
elif agent.legacy_config.image_provider == "huggingface":
return generate_image_with_hf(prompt, filename, agent)
# SD WebUI
elif agent.legacy_config.image_provider == "sdwebui":
return generate_image_with_sd_webui(prompt, filename, agent, size)
return "No Image Provider Set"
@command(
parameters={
"prompt": JSONSchema(
type=JSONSchema.Type.STRING,
description="The prompt used to generate the image",
required=True,
),
"size": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The size of the image",
required=False,
),
},
)
def generate_image(self, prompt: str, size: int) -> str:
"""Generate an image from a prompt.
Args:
prompt (str): The prompt to use
size (int, optional): The size of the image. Defaults to 256.
Not supported by HuggingFace.
def generate_image_with_hf(prompt: str, output_file: Path, agent: Agent) -> str:
"""Generate an image with HuggingFace's API.
Returns:
str: The filename of the image
"""
filename = self.workspace.root / f"{str(uuid.uuid4())}.jpg"
Args:
prompt (str): The prompt to use
filename (Path): The filename to save the image to
# DALL-E
if self.legacy_config.image_provider == "dalle":
return self.generate_image_with_dalle(prompt, filename, size)
# HuggingFace
elif self.legacy_config.image_provider == "huggingface":
return self.generate_image_with_hf(prompt, filename)
# SD WebUI
elif self.legacy_config.image_provider == "sdwebui":
return self.generate_image_with_sd_webui(prompt, filename, size)
return "No Image Provider Set"
Returns:
str: The filename of the image
"""
API_URL = f"https://api-inference.huggingface.co/models/{agent.legacy_config.huggingface_image_model}" # noqa: E501
if agent.legacy_config.huggingface_api_token is None:
raise ValueError(
"You need to set your Hugging Face API token in the config file."
def generate_image_with_hf(self, prompt: str, output_file: Path) -> str:
"""Generate an image with HuggingFace's API.
Args:
prompt (str): The prompt to use
filename (Path): The filename to save the image to
Returns:
str: The filename of the image
"""
API_URL = f"https://api-inference.huggingface.co/models/{self.legacy_config.huggingface_image_model}" # noqa: E501
if self.legacy_config.huggingface_api_token is None:
raise ValueError(
"You need to set your Hugging Face API token in the config file."
)
headers = {
"Authorization": f"Bearer {self.legacy_config.huggingface_api_token}",
"X-Use-Cache": "false",
}
retry_count = 0
while retry_count < 10:
response = requests.post(
API_URL,
headers=headers,
json={
"inputs": prompt,
},
)
if response.ok:
try:
image = Image.open(io.BytesIO(response.content))
logger.info(f"Image Generated for prompt:{prompt}")
image.save(output_file)
return f"Saved to disk: {output_file}"
except Exception as e:
logger.error(e)
break
else:
try:
error = json.loads(response.text)
if "estimated_time" in error:
delay = error["estimated_time"]
logger.debug(response.text)
logger.info("Retrying in", delay)
time.sleep(delay)
else:
break
except Exception as e:
logger.error(e)
break
retry_count += 1
return "Error creating image."
def generate_image_with_dalle(
self, prompt: str, output_file: Path, size: int
) -> str:
"""Generate an image with DALL-E.
Args:
prompt (str): The prompt to use
filename (Path): The filename to save the image to
size (int): The size of the image
Returns:
str: The filename of the image
"""
# Check for supported image sizes
if size not in [256, 512, 1024]:
closest = min([256, 512, 1024], key=lambda x: abs(x - size))
logger.info(
"DALL-E only supports image sizes of 256x256, 512x512, or 1024x1024. "
f"Setting to {closest}, was {size}."
)
size = closest
response = OpenAI(
api_key=self.legacy_config.openai_credentials.api_key.get_secret_value()
).images.generate(
prompt=prompt,
n=1,
size=f"{size}x{size}",
response_format="b64_json",
)
headers = {
"Authorization": f"Bearer {agent.legacy_config.huggingface_api_token}",
"X-Use-Cache": "false",
}
retry_count = 0
while retry_count < 10:
logger.info(f"Image Generated for prompt:{prompt}")
image_data = b64decode(response.data[0].b64_json)
with open(output_file, mode="wb") as png:
png.write(image_data)
return f"Saved to disk: {output_file}"
def generate_image_with_sd_webui(
self,
prompt: str,
output_file: Path,
size: int = 512,
negative_prompt: str = "",
extra: dict = {},
) -> str:
"""Generate an image with Stable Diffusion webui.
Args:
prompt (str): The prompt to use
filename (str): The filename to save the image to
size (int, optional): The size of the image. Defaults to 256.
negative_prompt (str, optional): The negative prompt to use. Defaults to "".
extra (dict, optional): Extra parameters to pass to the API. Defaults to {}.
Returns:
str: The filename of the image
"""
# Create a session and set the basic auth if needed
s = requests.Session()
if self.legacy_config.sd_webui_auth:
username, password = self.legacy_config.sd_webui_auth.split(":")
s.auth = (username, password or "")
# Generate the images
response = requests.post(
API_URL,
headers=headers,
f"{self.legacy_config.sd_webui_url}/sdapi/v1/txt2img",
json={
"inputs": prompt,
"prompt": prompt,
"negative_prompt": negative_prompt,
"sampler_index": "DDIM",
"steps": 20,
"config_scale": 7.0,
"width": size,
"height": size,
"n_iter": 1,
**extra,
},
)
if response.ok:
try:
image = Image.open(io.BytesIO(response.content))
logger.info(f"Image Generated for prompt:{prompt}")
image.save(output_file)
return f"Saved to disk: {output_file}"
except Exception as e:
logger.error(e)
break
else:
try:
error = json.loads(response.text)
if "estimated_time" in error:
delay = error["estimated_time"]
logger.debug(response.text)
logger.info("Retrying in", delay)
time.sleep(delay)
else:
break
except Exception as e:
logger.error(e)
break
logger.info(f"Image Generated for prompt: '{prompt}'")
retry_count += 1
# Save the image to disk
response = response.json()
b64 = b64decode(response["images"][0].split(",", 1)[0])
image = Image.open(io.BytesIO(b64))
image.save(output_file)
return "Error creating image."
def generate_image_with_dalle(
prompt: str, output_file: Path, size: int, agent: Agent
) -> str:
"""Generate an image with DALL-E.
Args:
prompt (str): The prompt to use
filename (Path): The filename to save the image to
size (int): The size of the image
Returns:
str: The filename of the image
"""
# Check for supported image sizes
if size not in [256, 512, 1024]:
closest = min([256, 512, 1024], key=lambda x: abs(x - size))
logger.info(
"DALL-E only supports image sizes of 256x256, 512x512, or 1024x1024. "
f"Setting to {closest}, was {size}."
)
size = closest
response = OpenAI(
api_key=agent.legacy_config.openai_credentials.api_key.get_secret_value()
).images.generate(
prompt=prompt,
n=1,
size=f"{size}x{size}",
response_format="b64_json",
)
logger.info(f"Image Generated for prompt:{prompt}")
image_data = b64decode(response.data[0].b64_json)
with open(output_file, mode="wb") as png:
png.write(image_data)
return f"Saved to disk: {output_file}"
def generate_image_with_sd_webui(
prompt: str,
output_file: Path,
agent: Agent,
size: int = 512,
negative_prompt: str = "",
extra: dict = {},
) -> str:
"""Generate an image with Stable Diffusion webui.
Args:
prompt (str): The prompt to use
filename (str): The filename to save the image to
size (int, optional): The size of the image. Defaults to 256.
negative_prompt (str, optional): The negative prompt to use. Defaults to "".
extra (dict, optional): Extra parameters to pass to the API. Defaults to {}.
Returns:
str: The filename of the image
"""
# Create a session and set the basic auth if needed
s = requests.Session()
if agent.legacy_config.sd_webui_auth:
username, password = agent.legacy_config.sd_webui_auth.split(":")
s.auth = (username, password or "")
# Generate the images
response = requests.post(
f"{agent.legacy_config.sd_webui_url}/sdapi/v1/txt2img",
json={
"prompt": prompt,
"negative_prompt": negative_prompt,
"sampler_index": "DDIM",
"steps": 20,
"config_scale": 7.0,
"width": size,
"height": size,
"n_iter": 1,
**extra,
},
)
logger.info(f"Image Generated for prompt: '{prompt}'")
# Save the image to disk
response = response.json()
b64 = b64decode(response["images"][0].split(",", 1)[0])
image = Image.open(io.BytesIO(b64))
image.save(output_file)
return f"Saved to disk: {output_file}"
return f"Saved to disk: {output_file}"

View File

@@ -1,69 +1,53 @@
"""Commands to control the internal state of the program"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING
import time
from typing import Iterator
from autogpt.agents.features.context import get_agent_context
from autogpt.agents.utils.exceptions import AgentFinished, InvalidArgumentError
from autogpt.agents.protocols import CommandProvider, DirectiveProvider, MessageProvider
from autogpt.command_decorator import command
from autogpt.config.ai_profile import AIProfile
from autogpt.config.config import Config
from autogpt.core.resource.model_providers.schema import ChatMessage
from autogpt.core.utils.json_schema import JSONSchema
COMMAND_CATEGORY = "system"
COMMAND_CATEGORY_TITLE = "System"
if TYPE_CHECKING:
from autogpt.agents.agent import Agent
from autogpt.models.command import Command
from autogpt.utils.exceptions import AgentFinished
from autogpt.utils.utils import DEFAULT_FINISH_COMMAND
logger = logging.getLogger(__name__)
@command(
"finish",
"Use this to shut down once you have completed your task,"
" or when there are insurmountable problems that make it impossible"
" for you to finish your task.",
{
"reason": JSONSchema(
type=JSONSchema.Type.STRING,
description="A summary to the user of how the goals were accomplished",
required=True,
)
},
)
def finish(reason: str, agent: Agent) -> None:
"""
A function that takes in a string and exits the program
class SystemComponent(DirectiveProvider, MessageProvider, CommandProvider):
"""Component for system messages and commands."""
Parameters:
reason (str): A summary to the user of how the goals were accomplished.
Returns:
A result string from create chat completion. A list of suggestions to
improve the code.
"""
raise AgentFinished(reason)
def __init__(self, config: Config, profile: AIProfile):
self.legacy_config = config
self.profile = profile
def get_constraints(self) -> Iterator[str]:
if self.profile.api_budget > 0.0:
yield (
f"It takes money to let you run. "
f"Your API budget is ${self.profile.api_budget:.3f}"
)
@command(
"hide_context_item",
"Hide an open file, folder or other context item, to save memory.",
{
"number": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The 1-based index of the context item to hide",
required=True,
)
},
available=lambda a: bool(get_agent_context(a)),
)
def close_context_item(number: int, agent: Agent) -> str:
assert (context := get_agent_context(agent)) is not None
def get_messages(self) -> Iterator[ChatMessage]:
# Clock
yield ChatMessage.system(f"The current time and date is {time.strftime('%c')}")
if number > len(context.items) or number == 0:
raise InvalidArgumentError(f"Index {number} out of range")
def get_commands(self) -> Iterator[Command]:
yield self.finish
context.close(number)
return f"Context item {number} hidden ✅"
@command(
names=[DEFAULT_FINISH_COMMAND],
parameters={
"reason": JSONSchema(
type=JSONSchema.Type.STRING,
description="A summary to the user of how the goals were accomplished",
required=True,
),
},
)
def finish(self, reason: str):
"""Use this to shut down once you have completed your task,
or when there are insurmountable problems that make it impossible
for you to finish your task."""
raise AgentFinished(reason)

View File

@@ -1,10 +0,0 @@
from datetime import datetime
def get_datetime() -> str:
"""Return the current date and time
Returns:
str: The current date and time
"""
return "Current date and time: " + datetime.now().strftime("%Y-%m-%d %H:%M:%S")

View File

@@ -1,32 +1,37 @@
"""Commands to interact with the user"""
from typing import Iterator
from __future__ import annotations
from autogpt.agents.agent import Agent
from autogpt.agents.protocols import CommandProvider
from autogpt.app.utils import clean_input
from autogpt.command_decorator import command
from autogpt.config.config import Config
from autogpt.core.utils.json_schema import JSONSchema
COMMAND_CATEGORY = "user_interaction"
COMMAND_CATEGORY_TITLE = "User Interaction"
from autogpt.models.command import Command
from autogpt.utils.utils import DEFAULT_ASK_COMMAND
@command(
"ask_user",
(
"If you need more details or information regarding the given goals,"
" you can ask the user for input"
),
{
"question": JSONSchema(
type=JSONSchema.Type.STRING,
description="The question or prompt to the user",
required=True,
)
},
enabled=lambda config: not config.noninteractive_mode,
)
async def ask_user(question: str, agent: Agent) -> str:
print(f"\nQ: {question}")
resp = clean_input(agent.legacy_config, "A:")
return f"The user's answer: '{resp}'"
class UserInteractionComponent(CommandProvider):
"""Provides commands to interact with the user."""
def __init__(self, config: Config):
self.config = config
self._enabled = not config.noninteractive_mode
def get_commands(self) -> Iterator[Command]:
yield self.ask_user
@command(
names=[DEFAULT_ASK_COMMAND],
parameters={
"question": JSONSchema(
type=JSONSchema.Type.STRING,
description="The question or prompt to the user",
required=True,
)
},
)
def ask_user(self, question: str) -> str:
"""If you need more details or information regarding the given goals,
you can ask the user for input."""
print(f"\nQ: {question}")
resp = clean_input(self.config, "A:")
return f"The user's answer: '{resp}'"

View File

@@ -1,169 +1,195 @@
"""Commands to search the web with"""
from __future__ import annotations
import json
import logging
import time
from typing import Iterator
from duckduckgo_search import DDGS
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import ConfigurationError
from autogpt.agents.protocols import CommandProvider, DirectiveProvider
from autogpt.command_decorator import command
from autogpt.config.config import Config
from autogpt.core.utils.json_schema import JSONSchema
COMMAND_CATEGORY = "web_search"
COMMAND_CATEGORY_TITLE = "Web Search"
from autogpt.models.command import Command
from autogpt.utils.exceptions import ConfigurationError
DUCKDUCKGO_MAX_ATTEMPTS = 3
@command(
"web_search",
"Searches the web",
{
"query": JSONSchema(
type=JSONSchema.Type.STRING,
description="The search query",
required=True,
)
},
aliases=["search"],
)
def web_search(query: str, agent: Agent, num_results: int = 8) -> str:
"""Return the results of a Google search
Args:
query (str): The search query.
num_results (int): The number of results to return.
Returns:
str: The results of the search.
"""
search_results = []
attempts = 0
while attempts < DUCKDUCKGO_MAX_ATTEMPTS:
if not query:
return json.dumps(search_results)
search_results = DDGS().text(query, max_results=num_results)
if search_results:
break
time.sleep(1)
attempts += 1
search_results = [
{
"title": r["title"],
"url": r["href"],
**({"exerpt": r["body"]} if r.get("body") else {}),
}
for r in search_results
]
results = (
"## Search results\n"
# "Read these results carefully."
# " Extract the information you need for your task from the list of results"
# " if possible. Otherwise, choose a webpage from the list to read entirely."
# "\n\n"
) + "\n\n".join(
f"### \"{r['title']}\"\n"
f"**URL:** {r['url']} \n"
"**Excerpt:** " + (f'"{exerpt}"' if (exerpt := r.get("exerpt")) else "N/A")
for r in search_results
)
return safe_google_results(results)
logger = logging.getLogger(__name__)
@command(
"google",
"Google Search",
{
"query": JSONSchema(
type=JSONSchema.Type.STRING,
description="The search query",
required=True,
)
},
lambda config: bool(config.google_api_key)
and bool(config.google_custom_search_engine_id),
"Configure google_api_key and custom_search_engine_id.",
aliases=["search"],
)
def google(query: str, agent: Agent, num_results: int = 8) -> str | list[str]:
"""Return the results of a Google search using the official Google API
class WebSearchComponent(DirectiveProvider, CommandProvider):
"""Provides commands to search the web."""
Args:
query (str): The search query.
num_results (int): The number of results to return.
def __init__(self, config: Config):
self.legacy_config = config
Returns:
str: The results of the search.
"""
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
try:
# Get the Google API key and Custom Search Engine ID from the config file
api_key = agent.legacy_config.google_api_key
custom_search_engine_id = agent.legacy_config.google_custom_search_engine_id
# Initialize the Custom Search API service
service = build("customsearch", "v1", developerKey=api_key)
# Send the search query and retrieve the results
result = (
service.cse()
.list(q=query, cx=custom_search_engine_id, num=num_results)
.execute()
)
# Extract the search result items from the response
search_results = result.get("items", [])
# Create a list of only the URLs from the search results
search_results_links = [item["link"] for item in search_results]
except HttpError as e:
# Handle errors in the API call
error_details = json.loads(e.content.decode())
# Check if the error is related to an invalid or missing API key
if error_details.get("error", {}).get(
"code"
) == 403 and "invalid API key" in error_details.get("error", {}).get(
"message", ""
if (
not self.legacy_config.google_api_key
or not self.legacy_config.google_custom_search_engine_id
):
raise ConfigurationError(
"The provided Google API key is invalid or missing."
logger.info(
"Configure google_api_key and custom_search_engine_id "
"to use Google API search."
)
raise
# google_result can be a list or a string depending on the search results
# Return the list of search result URLs
return safe_google_results(search_results_links)
def get_resources(self) -> Iterator[str]:
yield "Internet access for searches and information gathering."
def get_commands(self) -> Iterator[Command]:
yield self.web_search
def safe_google_results(results: str | list) -> str:
"""
Return the results of a Google search in a safe format.
if (
self.legacy_config.google_api_key
and self.legacy_config.google_custom_search_engine_id
):
yield self.google
Args:
results (str | list): The search results.
@command(
["web_search", "search"],
"Searches the web",
{
"query": JSONSchema(
type=JSONSchema.Type.STRING,
description="The search query",
required=True,
),
"num_results": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The number of results to return",
minimum=1,
maximum=10,
required=False,
),
},
)
def web_search(self, query: str, num_results: int = 8) -> str:
"""Return the results of a Google search
Returns:
str: The results of the search.
"""
if isinstance(results, list):
safe_message = json.dumps(
[result.encode("utf-8", "ignore").decode("utf-8") for result in results]
Args:
query (str): The search query.
num_results (int): The number of results to return.
Returns:
str: The results of the search.
"""
search_results = []
attempts = 0
while attempts < DUCKDUCKGO_MAX_ATTEMPTS:
if not query:
return json.dumps(search_results)
search_results = DDGS().text(query, max_results=num_results)
if search_results:
break
time.sleep(1)
attempts += 1
search_results = [
{
"title": r["title"],
"url": r["href"],
**({"exerpt": r["body"]} if r.get("body") else {}),
}
for r in search_results
]
results = ("## Search results\n") + "\n\n".join(
f"### \"{r['title']}\"\n"
f"**URL:** {r['url']} \n"
"**Excerpt:** " + (f'"{exerpt}"' if (exerpt := r.get("exerpt")) else "N/A")
for r in search_results
)
else:
safe_message = results.encode("utf-8", "ignore").decode("utf-8")
return safe_message
return self.safe_google_results(results)
@command(
["google"],
"Google Search",
{
"query": JSONSchema(
type=JSONSchema.Type.STRING,
description="The search query",
required=True,
),
"num_results": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The number of results to return",
minimum=1,
maximum=10,
required=False,
),
},
)
def google(self, query: str, num_results: int = 8) -> str | list[str]:
"""Return the results of a Google search using the official Google API
Args:
query (str): The search query.
num_results (int): The number of results to return.
Returns:
str: The results of the search.
"""
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
try:
# Get the Google API key and Custom Search Engine ID from the config file
api_key = self.legacy_config.google_api_key
custom_search_engine_id = self.legacy_config.google_custom_search_engine_id
# Initialize the Custom Search API service
service = build("customsearch", "v1", developerKey=api_key)
# Send the search query and retrieve the results
result = (
service.cse()
.list(q=query, cx=custom_search_engine_id, num=num_results)
.execute()
)
# Extract the search result items from the response
search_results = result.get("items", [])
# Create a list of only the URLs from the search results
search_results_links = [item["link"] for item in search_results]
except HttpError as e:
# Handle errors in the API call
error_details = json.loads(e.content.decode())
# Check if the error is related to an invalid or missing API key
if error_details.get("error", {}).get(
"code"
) == 403 and "invalid API key" in error_details.get("error", {}).get(
"message", ""
):
raise ConfigurationError(
"The provided Google API key is invalid or missing."
)
raise
# google_result can be a list or a string depending on the search results
# Return the list of search result URLs
return self.safe_google_results(search_results_links)
def safe_google_results(self, results: str | list) -> str:
"""
Return the results of a Google search in a safe format.
Args:
results (str | list): The search results.
Returns:
str: The results of the search.
"""
if isinstance(results, list):
safe_message = json.dumps(
[result.encode("utf-8", "ignore").decode("utf-8") for result in results]
)
else:
safe_message = results.encode("utf-8", "ignore").decode("utf-8")
return safe_message

View File

@@ -1,13 +1,9 @@
"""Commands for browsing a website"""
from __future__ import annotations
import asyncio
import logging
import re
from pathlib import Path
from sys import platform
from typing import TYPE_CHECKING, Optional, Type
from typing import Iterator, Type
from urllib.request import urlretrieve
from bs4 import BeautifulSoup
@@ -32,21 +28,19 @@ from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager
from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
from autogpt.agents.utils.exceptions import CommandExecutionError, TooMuchOutputError
from autogpt.agents.protocols import CommandProvider, DirectiveProvider
from autogpt.command_decorator import command
from autogpt.config import Config
from autogpt.core.resource.model_providers.schema import (
ChatModelInfo,
ChatModelProvider,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.models.command import Command
from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
from autogpt.processing.text import extract_information, summarize_text
from autogpt.url_utils.validators import validate_url
COMMAND_CATEGORY = "web_browse"
COMMAND_CATEGORY_TITLE = "Web Browsing"
if TYPE_CHECKING:
from autogpt.agents.agent import Agent
from autogpt.config import Config
from autogpt.utils.exceptions import CommandExecutionError, TooMuchOutputError
logger = logging.getLogger(__name__)
@@ -59,321 +53,324 @@ class BrowsingError(CommandExecutionError):
"""An error occurred while trying to browse the page"""
@command(
"read_webpage",
(
"Read a webpage, and extract specific information from it."
" You must specify either topics_of_interest, a question, or get_raw_content."
),
{
"url": JSONSchema(
type=JSONSchema.Type.STRING,
description="The URL to visit",
required=True,
class WebSeleniumComponent(DirectiveProvider, CommandProvider):
"""Provides commands to browse the web using Selenium."""
def __init__(
self,
config: Config,
llm_provider: ChatModelProvider,
model_info: ChatModelInfo,
):
self.legacy_config = config
self.llm_provider = llm_provider
self.model_info = model_info
def get_resources(self) -> Iterator[str]:
yield "Ability to read websites."
def get_commands(self) -> Iterator[Command]:
yield self.read_webpage
@command(
["read_webpage"],
(
"Read a webpage, and extract specific information from it."
" You must specify either topics_of_interest,"
" a question, or get_raw_content."
),
"topics_of_interest": JSONSchema(
type=JSONSchema.Type.ARRAY,
items=JSONSchema(type=JSONSchema.Type.STRING),
description=(
"A list of topics about which you want to extract information "
"from the page."
{
"url": JSONSchema(
type=JSONSchema.Type.STRING,
description="The URL to visit",
required=True,
),
required=False,
),
"question": JSONSchema(
type=JSONSchema.Type.STRING,
description=(
"A question that you want to answer using the content of the webpage."
"topics_of_interest": JSONSchema(
type=JSONSchema.Type.ARRAY,
items=JSONSchema(type=JSONSchema.Type.STRING),
description=(
"A list of topics about which you want to extract information "
"from the page."
),
required=False,
),
required=False,
),
"get_raw_content": JSONSchema(
type=JSONSchema.Type.BOOLEAN,
description=(
"If true, the unprocessed content of the webpage will be returned. "
"This consumes a lot of tokens, so use it with caution."
"question": JSONSchema(
type=JSONSchema.Type.STRING,
description=(
"A question you want to answer using the content of the webpage."
),
required=False,
),
required=False,
),
},
)
@validate_url
async def read_webpage(
url: str,
agent: Agent,
*,
topics_of_interest: list[str] = [],
get_raw_content: bool = False,
question: str = "",
) -> str:
"""Browse a website and return the answer and links to the user
"get_raw_content": JSONSchema(
type=JSONSchema.Type.BOOLEAN,
description=(
"If true, the unprocessed content of the webpage will be returned. "
"This consumes a lot of tokens, so use it with caution."
),
required=False,
),
},
)
@validate_url
async def read_webpage(
self,
url: str,
*,
topics_of_interest: list[str] = [],
get_raw_content: bool = False,
question: str = "",
) -> str:
"""Browse a website and return the answer and links to the user
Args:
url (str): The url of the website to browse
question (str): The question to answer using the content of the webpage
Args:
url (str): The url of the website to browse
question (str): The question to answer using the content of the webpage
Returns:
str: The answer and links to the user and the webdriver
"""
driver = None
try:
driver = await open_page_in_browser(url, agent.legacy_config)
Returns:
str: The answer and links to the user and the webdriver
"""
driver = None
try:
driver = await self.open_page_in_browser(url, self.legacy_config)
text = scrape_text_with_selenium(driver)
links = scrape_links_with_selenium(driver, url)
text = self.scrape_text_with_selenium(driver)
links = self.scrape_links_with_selenium(driver, url)
return_literal_content = True
summarized = False
if not text:
return f"Website did not contain any text.\n\nLinks: {links}"
elif get_raw_content:
if (
output_tokens := agent.llm_provider.count_tokens(text, agent.llm.name)
) > MAX_RAW_CONTENT_LENGTH:
oversize_factor = round(output_tokens / MAX_RAW_CONTENT_LENGTH, 1)
raise TooMuchOutputError(
f"Page content is {oversize_factor}x the allowed length "
"for `get_raw_content=true`"
)
return text + (f"\n\nLinks: {links}" if links else "")
else:
text = await summarize_memorize_webpage(
url, text, question or None, topics_of_interest, agent, driver
)
return_literal_content = bool(question)
summarized = True
# Limit links to LINKS_TO_RETURN
if len(links) > LINKS_TO_RETURN:
links = links[:LINKS_TO_RETURN]
text_fmt = f"'''{text}'''" if "\n" in text else f"'{text}'"
links_fmt = "\n".join(f"- {link}" for link in links)
return (
f"Page content{' (summary)' if summarized else ''}:"
if return_literal_content
else "Answer gathered from webpage:"
) + f" {text_fmt}\n\nLinks:\n{links_fmt}"
except WebDriverException as e:
# These errors are often quite long and include lots of context.
# Just grab the first line.
msg = e.msg.split("\n")[0]
if "net::" in msg:
raise BrowsingError(
"A networking error occurred while trying to load the page: %s"
% re.sub(r"^unknown error: ", "", msg)
)
raise CommandExecutionError(msg)
finally:
if driver:
close_browser(driver)
def scrape_text_with_selenium(driver: WebDriver) -> str:
"""Scrape text from a browser window using selenium
Args:
driver (WebDriver): A driver object representing the browser window to scrape
Returns:
str: the text scraped from the website
"""
# Get the HTML content directly from the browser's DOM
page_source = driver.execute_script("return document.body.outerHTML;")
soup = BeautifulSoup(page_source, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
def scrape_links_with_selenium(driver: WebDriver, base_url: str) -> list[str]:
"""Scrape links from a website using selenium
Args:
driver (WebDriver): A driver object representing the browser window to scrape
base_url (str): The base URL to use for resolving relative links
Returns:
List[str]: The links scraped from the website
"""
page_source = driver.page_source
soup = BeautifulSoup(page_source, "html.parser")
for script in soup(["script", "style"]):
script.extract()
hyperlinks = extract_hyperlinks(soup, base_url)
return format_hyperlinks(hyperlinks)
async def open_page_in_browser(url: str, config: Config) -> WebDriver:
"""Open a browser window and load a web page using Selenium
Params:
url (str): The URL of the page to load
config (Config): The applicable application configuration
Returns:
driver (WebDriver): A driver object representing the browser window to scrape
"""
logging.getLogger("selenium").setLevel(logging.CRITICAL)
options_available: dict[str, Type[BrowserOptions]] = {
"chrome": ChromeOptions,
"edge": EdgeOptions,
"firefox": FirefoxOptions,
"safari": SafariOptions,
}
options: BrowserOptions = options_available[config.selenium_web_browser]()
options.add_argument(f"user-agent={config.user_agent}")
if isinstance(options, FirefoxOptions):
if config.selenium_headless:
options.headless = True
options.add_argument("--disable-gpu")
driver = FirefoxDriver(
service=GeckoDriverService(GeckoDriverManager().install()), options=options
)
elif isinstance(options, EdgeOptions):
driver = EdgeDriver(
service=EdgeDriverService(EdgeDriverManager().install()), options=options
)
elif isinstance(options, SafariOptions):
# Requires a bit more setup on the users end.
# See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari # noqa: E501
driver = SafariDriver(options=options)
elif isinstance(options, ChromeOptions):
if platform == "linux" or platform == "linux2":
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--remote-debugging-port=9222")
options.add_argument("--no-sandbox")
if config.selenium_headless:
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
_sideload_chrome_extensions(options, config.app_data_dir / "assets" / "crx")
if (chromium_driver_path := Path("/usr/bin/chromedriver")).exists():
chrome_service = ChromeDriverService(str(chromium_driver_path))
else:
try:
chrome_driver = ChromeDriverManager().install()
except AttributeError as e:
if "'NoneType' object has no attribute 'split'" in str(e):
# https://github.com/SergeyPirogov/webdriver_manager/issues/649
logger.critical(
"Connecting to browser failed: is Chrome or Chromium installed?"
return_literal_content = True
summarized = False
if not text:
return f"Website did not contain any text.\n\nLinks: {links}"
elif get_raw_content:
if (
output_tokens := self.llm_provider.count_tokens(
text, self.model_info.name
)
raise
chrome_service = ChromeDriverService(chrome_driver)
driver = ChromeDriver(service=chrome_service, options=options)
) > MAX_RAW_CONTENT_LENGTH:
oversize_factor = round(output_tokens / MAX_RAW_CONTENT_LENGTH, 1)
raise TooMuchOutputError(
f"Page content is {oversize_factor}x the allowed length "
"for `get_raw_content=true`"
)
return text + (f"\n\nLinks: {links}" if links else "")
else:
text = await self.summarize_webpage(
text, question or None, topics_of_interest
)
return_literal_content = bool(question)
summarized = True
driver.get(url)
# Limit links to LINKS_TO_RETURN
if len(links) > LINKS_TO_RETURN:
links = links[:LINKS_TO_RETURN]
# Wait for page to be ready, sleep 2 seconds, wait again until page ready.
# This allows the cookiewall squasher time to get rid of cookie walls.
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
await asyncio.sleep(2)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
text_fmt = f"'''{text}'''" if "\n" in text else f"'{text}'"
links_fmt = "\n".join(f"- {link}" for link in links)
return (
f"Page content{' (summary)' if summarized else ''}:"
if return_literal_content
else "Answer gathered from webpage:"
) + f" {text_fmt}\n\nLinks:\n{links_fmt}"
return driver
except WebDriverException as e:
# These errors are often quite long and include lots of context.
# Just grab the first line.
msg = e.msg.split("\n")[0] if e.msg else str(e)
if "net::" in msg:
raise BrowsingError(
"A networking error occurred while trying to load the page: %s"
% re.sub(r"^unknown error: ", "", msg)
)
raise CommandExecutionError(msg)
finally:
if driver:
driver.close()
def scrape_text_with_selenium(self, driver: WebDriver) -> str:
"""Scrape text from a browser window using selenium
def _sideload_chrome_extensions(options: ChromeOptions, dl_folder: Path) -> None:
crx_download_url_template = "https://clients2.google.com/service/update2/crx?response=redirect&prodversion=49.0&acceptformat=crx3&x=id%3D{crx_id}%26installsource%3Dondemand%26uc" # noqa
cookiewall_squasher_crx_id = "edibdbjcniadpccecjdfdjjppcpchdlm"
adblocker_crx_id = "cjpalhdlnbpafiamejdnhcphjbkeiagm"
Args:
driver (WebDriver): A driver object representing
the browser window to scrape
# Make sure the target folder exists
dl_folder.mkdir(parents=True, exist_ok=True)
Returns:
str: the text scraped from the website
"""
for crx_id in (cookiewall_squasher_crx_id, adblocker_crx_id):
crx_path = dl_folder / f"{crx_id}.crx"
if not crx_path.exists():
logger.debug(f"Downloading CRX {crx_id}...")
crx_download_url = crx_download_url_template.format(crx_id=crx_id)
urlretrieve(crx_download_url, crx_path)
logger.debug(f"Downloaded {crx_path.name}")
options.add_extension(str(crx_path))
# Get the HTML content directly from the browser's DOM
page_source = driver.execute_script("return document.body.outerHTML;")
soup = BeautifulSoup(page_source, "html.parser")
for script in soup(["script", "style"]):
script.extract()
def close_browser(driver: WebDriver) -> None:
"""Close the browser
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
Args:
driver (WebDriver): The webdriver to close
def scrape_links_with_selenium(self, driver: WebDriver, base_url: str) -> list[str]:
"""Scrape links from a website using selenium
Returns:
None
"""
driver.quit()
Args:
driver (WebDriver): A driver object representing
the browser window to scrape
base_url (str): The base URL to use for resolving relative links
Returns:
List[str]: The links scraped from the website
"""
page_source = driver.page_source
soup = BeautifulSoup(page_source, "html.parser")
async def summarize_memorize_webpage(
url: str,
text: str,
question: str | None,
topics_of_interest: list[str],
agent: Agent,
driver: Optional[WebDriver] = None,
) -> str:
"""Summarize text using the OpenAI API
for script in soup(["script", "style"]):
script.extract()
Args:
url (str): The url of the text
text (str): The text to summarize
question (str): The question to ask the model
driver (WebDriver): The webdriver to use to scroll the page
hyperlinks = extract_hyperlinks(soup, base_url)
Returns:
str: The summary of the text
"""
if not text:
raise ValueError("No text to summarize")
return format_hyperlinks(hyperlinks)
text_length = len(text)
logger.debug(f"Web page content length: {text_length} characters")
async def open_page_in_browser(self, url: str, config: Config) -> WebDriver:
"""Open a browser window and load a web page using Selenium
# memory = get_memory(agent.legacy_config)
Params:
url (str): The URL of the page to load
config (Config): The applicable application configuration
# new_memory = MemoryItem.from_webpage(
# content=text,
# url=url,
# config=agent.legacy_config,
# question=question,
# )
# memory.add(new_memory)
Returns:
driver (WebDriver): A driver object representing
the browser window to scrape
"""
logging.getLogger("selenium").setLevel(logging.CRITICAL)
result = None
information = None
if topics_of_interest:
information = await extract_information(
text,
topics_of_interest=topics_of_interest,
llm_provider=agent.llm_provider,
config=agent.legacy_config,
options_available: dict[str, Type[BrowserOptions]] = {
"chrome": ChromeOptions,
"edge": EdgeOptions,
"firefox": FirefoxOptions,
"safari": SafariOptions,
}
options: BrowserOptions = options_available[config.selenium_web_browser]()
options.add_argument(f"user-agent={config.user_agent}")
if isinstance(options, FirefoxOptions):
if config.selenium_headless:
options.headless = True
options.add_argument("--disable-gpu")
driver = FirefoxDriver(
service=GeckoDriverService(GeckoDriverManager().install()),
options=options,
)
elif isinstance(options, EdgeOptions):
driver = EdgeDriver(
service=EdgeDriverService(EdgeDriverManager().install()),
options=options,
)
elif isinstance(options, SafariOptions):
# Requires a bit more setup on the users end.
# See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari # noqa: E501
driver = SafariDriver(options=options)
elif isinstance(options, ChromeOptions):
if platform == "linux" or platform == "linux2":
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--remote-debugging-port=9222")
options.add_argument("--no-sandbox")
if config.selenium_headless:
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
self._sideload_chrome_extensions(
options, config.app_data_dir / "assets" / "crx"
)
if (chromium_driver_path := Path("/usr/bin/chromedriver")).exists():
chrome_service = ChromeDriverService(str(chromium_driver_path))
else:
try:
chrome_driver = ChromeDriverManager().install()
except AttributeError as e:
if "'NoneType' object has no attribute 'split'" in str(e):
# https://github.com/SergeyPirogov/webdriver_manager/issues/649
logger.critical(
"Connecting to browser failed:"
" is Chrome or Chromium installed?"
)
raise
chrome_service = ChromeDriverService(chrome_driver)
driver = ChromeDriver(service=chrome_service, options=options)
driver.get(url)
# Wait for page to be ready, sleep 2 seconds, wait again until page ready.
# This allows the cookiewall squasher time to get rid of cookie walls.
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
return "\n".join(f"* {i}" for i in information)
else:
result, _ = await summarize_text(
text,
question=question,
llm_provider=agent.llm_provider,
config=agent.legacy_config,
await asyncio.sleep(2)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
return result
return driver
def _sideload_chrome_extensions(
self, options: ChromeOptions, dl_folder: Path
) -> None:
crx_download_url_template = "https://clients2.google.com/service/update2/crx?response=redirect&prodversion=49.0&acceptformat=crx3&x=id%3D{crx_id}%26installsource%3Dondemand%26uc" # noqa
cookiewall_squasher_crx_id = "edibdbjcniadpccecjdfdjjppcpchdlm"
adblocker_crx_id = "cjpalhdlnbpafiamejdnhcphjbkeiagm"
# Make sure the target folder exists
dl_folder.mkdir(parents=True, exist_ok=True)
for crx_id in (cookiewall_squasher_crx_id, adblocker_crx_id):
crx_path = dl_folder / f"{crx_id}.crx"
if not crx_path.exists():
logger.debug(f"Downloading CRX {crx_id}...")
crx_download_url = crx_download_url_template.format(crx_id=crx_id)
urlretrieve(crx_download_url, crx_path)
logger.debug(f"Downloaded {crx_path.name}")
options.add_extension(str(crx_path))
async def summarize_webpage(
self,
text: str,
question: str | None,
topics_of_interest: list[str],
) -> str:
"""Summarize text using the OpenAI API
Args:
url (str): The url of the text
text (str): The text to summarize
question (str): The question to ask the model
driver (WebDriver): The webdriver to use to scroll the page
Returns:
str: The summary of the text
"""
if not text:
raise ValueError("No text to summarize")
text_length = len(text)
logger.debug(f"Web page content length: {text_length} characters")
result = None
information = None
if topics_of_interest:
information = await extract_information(
text,
topics_of_interest=topics_of_interest,
llm_provider=self.llm_provider,
config=self.legacy_config,
)
return "\n".join(f"* {i}" for i in information)
else:
result, _ = await summarize_text(
text,
question=question,
llm_provider=self.llm_provider,
config=self.legacy_config,
)
return result

View File

@@ -0,0 +1,91 @@
from typing import Callable, Iterator, Optional
from autogpt.agents.base import ThoughtProcessOutput
from autogpt.agents.features.watchdog import WatchdogComponent
from autogpt.agents.protocols import AfterExecute, AfterParse, MessageProvider
from autogpt.config.config import Config
from autogpt.core.resource.model_providers.schema import ChatMessage, ChatModelProvider
from autogpt.models.action_history import (
Action,
ActionResult,
Episode,
EpisodicActionHistory,
)
from autogpt.prompts.utils import indent
class EventHistoryComponent(MessageProvider, AfterParse, AfterExecute):
"""Keeps track of the event history and provides a summary of the steps."""
run_after = [WatchdogComponent]
def __init__(
self,
event_history: EpisodicActionHistory,
max_tokens: int,
count_tokens: Callable[[str], int],
legacy_config: Config,
llm_provider: ChatModelProvider,
) -> None:
self.event_history = event_history
self.max_tokens = max_tokens
self.count_tokens = count_tokens
self.legacy_config = legacy_config
self.llm_provider = llm_provider
def get_messages(self) -> Iterator[ChatMessage]:
yield ChatMessage.system(
self._compile_progress(
self.event_history.episodes,
self.max_tokens,
self.count_tokens,
)
)
def after_parse(self, result: ThoughtProcessOutput) -> None:
if result.command_name:
self.event_history.register_action(
Action(
name=result.command_name,
args=result.command_args,
reasoning=result.thoughts["thoughts"]["reasoning"],
)
)
async def after_execute(self, result: ActionResult) -> None:
self.event_history.register_result(result)
await self.event_history.handle_compression(
self.llm_provider, self.legacy_config
)
def _compile_progress(
self,
episode_history: list[Episode],
max_tokens: Optional[int] = None,
count_tokens: Optional[Callable[[str], int]] = None,
) -> str:
if max_tokens and not count_tokens:
raise ValueError("count_tokens is required if max_tokens is set")
steps: list[str] = []
tokens: int = 0
n_episodes = len(episode_history)
for i, episode in enumerate(reversed(episode_history)):
# Use full format for the latest 4 steps, summary or format for older steps
if i < 4 or episode.summary is None:
step_content = indent(episode.format(), 2).strip()
else:
step_content = episode.summary
step = f"* Step {n_episodes - i}: {step_content}"
if max_tokens and count_tokens:
step_tokens = count_tokens(step)
if tokens + step_tokens > max_tokens:
break
tokens += step_tokens
steps.insert(0, step)
return "\n\n".join(steps)

View File

@@ -5,7 +5,7 @@ import yaml
from pydantic import BaseModel, Field
from autogpt.logs.helpers import request_user_double_check
from autogpt.utils import validate_yaml_file
from autogpt.utils.utils import validate_yaml_file
logger = logging.getLogger(__name__)

View File

@@ -118,9 +118,9 @@ class Config(SystemSettings, arbitrary_types_allowed=True):
# Commands #
############
# General
disabled_command_categories: list[str] = UserConfigurable(
disabled_commands: list[str] = UserConfigurable(
default_factory=list,
from_env=lambda: _safe_split(os.getenv("DISABLED_COMMAND_CATEGORIES")),
from_env=lambda: _safe_split(os.getenv("DISABLED_COMMANDS")),
)
# File ops

View File

@@ -396,7 +396,7 @@ class OpenAIProvider(
max_output_tokens: Optional[int] = None,
**kwargs,
) -> ChatModelResponse[_T]:
"""Create a completion using the OpenAI API."""
"""Create a completion using the OpenAI API and parse it."""
openai_messages, completion_kwargs = self._get_chat_completion_args(
model_prompt=model_prompt,

View File

@@ -4,10 +4,8 @@ from agent_protocol import StepHandler, StepResult
from autogpt.agents import Agent
from autogpt.app.main import UserFeedback
from autogpt.commands import COMMAND_CATEGORIES
from autogpt.config import AIProfile, ConfigBuilder
from autogpt.logs.helpers import user_friendly_output
from autogpt.models.command_registry import CommandRegistry
from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
@@ -82,7 +80,6 @@ def bootstrap_agent(task, continuous_mode) -> Agent:
config.logging.plain_console_output = True
config.continuous_mode = continuous_mode
config.temperature = 0
command_registry = CommandRegistry.with_command_modules(COMMAND_CATEGORIES, config)
config.memory_backend = "no_memory"
ai_profile = AIProfile(
ai_name="AutoGPT",
@@ -92,7 +89,6 @@ def bootstrap_agent(task, continuous_mode) -> Agent:
# FIXME this won't work - ai_profile and triggering_prompt is not a valid argument,
# lacks file_storage, settings and llm_provider
return Agent(
command_registry=command_registry,
ai_profile=ai_profile,
legacy_config=config,
triggering_prompt=DEFAULT_TRIGGERING_PROMPT,

View File

@@ -1,10 +1,12 @@
from __future__ import annotations
import logging
from typing import Callable, Iterable, TypeVar
from typing import TYPE_CHECKING, Callable, Iterable, TypeVar
if TYPE_CHECKING:
from autogpt.models.command import Command
from autogpt.core.resource.model_providers import CompletionModelFunction
from autogpt.models.command import Command
logger = logging.getLogger(__name__)
@@ -20,7 +22,7 @@ def get_openai_command_specs(
"""
return [
CompletionModelFunction(
name=command.name,
name=command.names[0],
description=command.description,
parameters={param.name: param.spec for param in command.parameters},
)

View File

@@ -9,7 +9,6 @@ from autogpt.processing.text import summarize_text
from autogpt.prompts.utils import format_numbered_list, indent
if TYPE_CHECKING:
from autogpt.agents.base import CommandArgs, CommandName
from autogpt.config.config import Config
from autogpt.core.resource.model_providers import ChatModelProvider
@@ -160,15 +159,6 @@ class EpisodicActionHistory(BaseModel):
self.current_episode.result = result
self.cursor = len(self.episodes)
def matches_last_command(
self, command_name: CommandName, arguments: CommandArgs
) -> bool:
"""Check if the last command matches the given name and arguments."""
if len(self.episodes) > 0:
last_command = self.episodes[-1].action
return last_command.name == command_name and last_command.args == arguments
return False
def rewind(self, number_of_episodes: int = 0) -> None:
"""Resets the history to an earlier state.

View File

@@ -1,11 +1,9 @@
from __future__ import annotations
import inspect
from typing import TYPE_CHECKING, Any, Callable, Literal, Optional
from typing import Any, Callable
if TYPE_CHECKING:
from autogpt.agents.base import BaseAgent
from autogpt.config import Config
from autogpt.core.utils.json_schema import JSONSchema
from .command_parameter import CommandParameter
from .context_item import ContextItem
@@ -25,40 +23,56 @@ class Command:
def __init__(
self,
name: str,
names: list[str],
description: str,
method: Callable[..., CommandOutput],
parameters: list[CommandParameter],
enabled: Literal[True] | Callable[[Config], bool] = True,
disabled_reason: Optional[str] = None,
aliases: list[str] = [],
available: bool | Callable[[BaseAgent], bool] = True,
):
self.name = name
# Check if all parameters are provided
if not self._parameters_match(method, parameters):
raise ValueError(
f"Command {names[0]} has different parameters than provided schema"
)
self.names = names
self.description = description
self.method = method
self.parameters = parameters
self.enabled = enabled
self.disabled_reason = disabled_reason
self.aliases = aliases
self.available = available
@property
def is_async(self) -> bool:
return inspect.iscoroutinefunction(self.method)
def __call__(self, *args, agent: BaseAgent, **kwargs) -> Any:
if callable(self.enabled) and not self.enabled(agent.legacy_config):
if self.disabled_reason:
raise RuntimeError(
f"Command '{self.name}' is disabled: {self.disabled_reason}"
)
raise RuntimeError(f"Command '{self.name}' is disabled")
def validate_args(self, args: dict[str, Any]):
"""
Validates the given arguments against the command's parameter specifications
if not self.available or callable(self.available) and not self.available(agent):
raise RuntimeError(f"Command '{self.name}' is not available")
Returns:
bool: Whether the given set of arguments is valid for this command
list[ValidationError]: Issues with the set of arguments (if any)
"""
params_schema = JSONSchema(
type=JSONSchema.Type.OBJECT,
properties={p.name: p.spec for p in self.parameters},
)
return params_schema.validate_object(args)
return self.method(*args, **kwargs, agent=agent)
def _parameters_match(
self, func: Callable, parameters: list[CommandParameter]
) -> bool:
# Get the function's signature
signature = inspect.signature(func)
# Extract parameter names, ignoring 'self' for methods
func_param_names = [
param.name
for param in signature.parameters.values()
if param.name != "self"
]
names = [param.name for param in parameters]
# Check if sorted lists of names/keys are equal
return sorted(func_param_names) == sorted(names)
def __call__(self, *args, **kwargs) -> Any:
return self.method(*args, **kwargs)
def __str__(self) -> str:
params = [
@@ -67,6 +81,18 @@ class Command:
for param in self.parameters
]
return (
f"{self.name}: {self.description.rstrip('.')}. "
f"{self.names[0]}: {self.description.rstrip('.')}. "
f"Params: ({', '.join(params)})"
)
def __get__(self, instance, owner):
if instance is None:
# Accessed on the class, not an instance
return self
# Bind the method to the instance
return Command(
self.names,
self.description,
self.method.__get__(instance, owner),
self.parameters,
)

View File

@@ -1,212 +0,0 @@
from __future__ import annotations
import importlib
import inspect
import logging
from dataclasses import dataclass, field
from types import ModuleType
from typing import TYPE_CHECKING, Any, Iterator
if TYPE_CHECKING:
from autogpt.agents.base import BaseAgent
from autogpt.config import Config
from autogpt.command_decorator import AUTO_GPT_COMMAND_IDENTIFIER
from autogpt.models.command import Command
logger = logging.getLogger(__name__)
class CommandRegistry:
"""
The CommandRegistry class is a manager for a collection of Command objects.
It allows the registration, modification, and retrieval of Command objects,
as well as the scanning and loading of command plugins from a specified
directory.
"""
commands: dict[str, Command]
commands_aliases: dict[str, Command]
# Alternative way to structure the registry; currently redundant with self.commands
categories: dict[str, CommandCategory]
@dataclass
class CommandCategory:
name: str
title: str
description: str
commands: list[Command] = field(default_factory=list[Command])
modules: list[ModuleType] = field(default_factory=list[ModuleType])
def __init__(self):
self.commands = {}
self.commands_aliases = {}
self.categories = {}
def __contains__(self, command_name: str):
return command_name in self.commands or command_name in self.commands_aliases
def _import_module(self, module_name: str) -> Any:
return importlib.import_module(module_name)
def _reload_module(self, module: Any) -> Any:
return importlib.reload(module)
def register(self, cmd: Command) -> None:
if cmd.name in self.commands:
logger.warning(
f"Command '{cmd.name}' already registered and will be overwritten!"
)
self.commands[cmd.name] = cmd
if cmd.name in self.commands_aliases:
logger.warning(
f"Command '{cmd.name}' will overwrite alias with the same name of "
f"'{self.commands_aliases[cmd.name]}'!"
)
for alias in cmd.aliases:
self.commands_aliases[alias] = cmd
def unregister(self, command: Command) -> None:
if command.name in self.commands:
del self.commands[command.name]
for alias in command.aliases:
del self.commands_aliases[alias]
else:
raise KeyError(f"Command '{command.name}' not found in registry.")
def reload_commands(self) -> None:
"""Reloads all loaded command plugins."""
for cmd_name in self.commands:
cmd = self.commands[cmd_name]
module = self._import_module(cmd.__module__)
reloaded_module = self._reload_module(module)
if hasattr(reloaded_module, "register"):
reloaded_module.register(self)
def get_command(self, name: str) -> Command | None:
if name in self.commands:
return self.commands[name]
if name in self.commands_aliases:
return self.commands_aliases[name]
def call(self, command_name: str, agent: BaseAgent, **kwargs) -> Any:
if command := self.get_command(command_name):
return command(**kwargs, agent=agent)
raise KeyError(f"Command '{command_name}' not found in registry")
def list_available_commands(self, agent: BaseAgent) -> Iterator[Command]:
"""Iterates over all registered commands and yields those that are available.
Params:
agent (BaseAgent): The agent that the commands will be checked against.
Yields:
Command: The next available command.
"""
for cmd in self.commands.values():
available = cmd.available
if callable(cmd.available):
available = cmd.available(agent)
if available:
yield cmd
# def command_specs(self) -> str:
# """
# Returns a technical declaration of all commands in the registry,
# for use in a prompt.
# """
#
# Declaring functions or commands should be done in a model-specific way to
# achieve optimal results. For this reason, it should NOT be implemented here,
# but in an LLM provider module.
# MUST take command AVAILABILITY into account.
@staticmethod
def with_command_modules(modules: list[str], config: Config) -> CommandRegistry:
new_registry = CommandRegistry()
logger.debug(
"The following command categories are disabled: "
f"{config.disabled_command_categories}"
)
enabled_command_modules = [
x for x in modules if x not in config.disabled_command_categories
]
logger.debug(
f"The following command categories are enabled: {enabled_command_modules}"
)
for command_module in enabled_command_modules:
new_registry.import_command_module(command_module)
# Unregister commands that are incompatible with the current config
for command in [c for c in new_registry.commands.values()]:
if callable(command.enabled) and not command.enabled(config):
new_registry.unregister(command)
logger.debug(
f"Unregistering incompatible command '{command.name}':"
f" \"{command.disabled_reason or 'Disabled by current config.'}\""
)
return new_registry
def import_command_module(self, module_name: str) -> None:
"""
Imports the specified Python module containing command plugins.
This method imports the associated module and registers any functions or
classes that are decorated with the `AUTO_GPT_COMMAND_IDENTIFIER` attribute
as `Command` objects. The registered `Command` objects are then added to the
`commands` dictionary of the `CommandRegistry` object.
Args:
module_name (str): The name of the module to import for command plugins.
"""
module = importlib.import_module(module_name)
category = self.register_module_category(module)
for attr_name in dir(module):
attr = getattr(module, attr_name)
command = None
# Register decorated functions
if getattr(attr, AUTO_GPT_COMMAND_IDENTIFIER, False):
command = attr.command
# Register command classes
elif (
inspect.isclass(attr) and issubclass(attr, Command) and attr != Command
):
command = attr()
if command:
self.register(command)
category.commands.append(command)
def register_module_category(self, module: ModuleType) -> CommandCategory:
if not (category_name := getattr(module, "COMMAND_CATEGORY", None)):
raise ValueError(f"Cannot import invalid command module {module.__name__}")
if category_name not in self.categories:
self.categories[category_name] = CommandRegistry.CommandCategory(
name=category_name,
title=getattr(
module, "COMMAND_CATEGORY_TITLE", category_name.capitalize()
),
description=getattr(module, "__doc__", ""),
)
category = self.categories[category_name]
if module not in category.modules:
category.modules.append(module)
return category

View File

@@ -1,12 +1,12 @@
import logging
import os.path
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional
from pydantic import BaseModel, Field
from autogpt.commands.file_operations_utils import decode_textual_file
from autogpt.file_storage.base import FileStorage
from autogpt.utils.file_operations_utils import decode_textual_file
logger = logging.getLogger(__name__)
@@ -24,67 +24,51 @@ class ContextItem(ABC):
"""A string indicating the source location of the context item"""
...
@property
@abstractmethod
def content(self) -> str:
def get_content(self, workspace: FileStorage) -> str:
"""The content represented by the context item"""
...
def fmt(self) -> str:
def fmt(self, workspace: FileStorage) -> str:
return (
f"{self.description} (source: {self.source})\n"
"```\n"
f"{self.content}\n"
f"{self.get_content(workspace)}\n"
"```"
)
class FileContextItem(BaseModel, ContextItem):
file_path_in_workspace: Path
workspace_path: Path
@property
def file_path(self) -> Path:
return self.workspace_path / self.file_path_in_workspace
path: Path
@property
def description(self) -> str:
return f"The current content of the file '{self.file_path_in_workspace}'"
return f"The current content of the file '{self.path}'"
@property
def source(self) -> str:
return str(self.file_path_in_workspace)
return str(self.path)
@property
def content(self) -> str:
# TODO: use workspace.open_file()
with open(self.file_path, "rb") as file:
return decode_textual_file(file, os.path.splitext(file.name)[1], logger)
def get_content(self, workspace: FileStorage) -> str:
with workspace.open_file(self.path, "r", True) as file:
return decode_textual_file(file, self.path.suffix, logger)
class FolderContextItem(BaseModel, ContextItem):
path_in_workspace: Path
workspace_path: Path
@property
def path(self) -> Path:
return self.workspace_path / self.path_in_workspace
def __post_init__(self) -> None:
assert self.path.exists(), "Selected path does not exist"
assert self.path.is_dir(), "Selected path is not a directory"
path: Path
@property
def description(self) -> str:
return f"The contents of the folder '{self.path_in_workspace}' in the workspace"
return f"The contents of the folder '{self.path}' in the workspace"
@property
def source(self) -> str:
return str(self.path_in_workspace)
return str(self.path)
@property
def content(self) -> str:
items = [f"{p.name}{'/' if p.is_dir() else ''}" for p in self.path.iterdir()]
def get_content(self, workspace: FileStorage) -> str:
files = [str(p) for p in workspace.list_files(self.path)]
folders = [f"{str(p)}/" for p in workspace.list_folders(self.path)]
items = folders + files
items.sort()
return "\n".join(items)

View File

@@ -1,6 +1,7 @@
import functools
import re
from typing import Any, Callable, ParamSpec, TypeVar
from inspect import signature
from typing import Callable, ParamSpec, TypeVar
from urllib.parse import urljoin, urlparse
P = ParamSpec("P")
@@ -14,32 +15,29 @@ def validate_url(func: Callable[P, T]) -> Callable[P, T]:
"""
@functools.wraps(func)
def wrapper(url: str, *args, **kwargs) -> Any:
"""Check if the URL is valid and not a local file accessor.
def wrapper(*args, **kwargs):
sig = signature(func)
bound_args = sig.bind(*args, **kwargs)
bound_args.apply_defaults()
Args:
url (str): The URL to check
url = bound_args.arguments.get("url")
if url is None:
raise ValueError("URL is required for this function")
Returns:
the result of the wrapped function
Raises:
ValueError if the url fails any of the validation tests
"""
# Most basic check if the URL is valid:
if not re.match(r"^https?://", url):
raise ValueError("Invalid URL format")
raise ValueError(
"Invalid URL format: URL must start with http:// or https://"
)
if not is_valid_url(url):
raise ValueError("Missing Scheme or Network location")
# Restrict access to local files
if check_local_file_access(url):
raise ValueError("Access to local files is restricted")
# Check URL length
if len(url) > 2000:
raise ValueError("URL is too long")
return func(sanitize_url(url), *args, **kwargs)
bound_args.arguments["url"] = sanitize_url(url)
return func(*bound_args.args, **bound_args.kwargs)
return wrapper

View File

@@ -36,10 +36,6 @@ class UnknownCommandError(AgentException):
hint = "Do not try to use this command again."
class DuplicateOperationError(AgentException):
"""The proposed operation has already been executed"""
class CommandExecutionError(AgentException):
"""An error occurred when trying to execute the command"""

View File

@@ -0,0 +1,31 @@
import inspect
from typing import Optional
import sentry_sdk
def retry(retry_count: int = 3, pass_exception: str = "exception"):
"""Decorator to retry a function multiple times on failure.
Can pass the exception to the function as a keyword argument."""
def decorator(func):
params = inspect.signature(func).parameters
async def wrapper(*args, **kwargs):
exception: Optional[Exception] = None
attempts = 0
while attempts < retry_count:
try:
if pass_exception in params:
kwargs[pass_exception] = exception
return await func(*args, **kwargs)
except Exception as e:
attempts += 1
exception = e
sentry_sdk.capture_exception(e)
if attempts >= retry_count:
raise e
return wrapper
return decorator

View File

@@ -3,6 +3,9 @@ from pathlib import Path
import yaml
from colorama import Fore
DEFAULT_FINISH_COMMAND = "finish"
DEFAULT_ASK_COMMAND = "ask_user"
def validate_yaml_file(file: str | Path):
try:

View File

@@ -4,8 +4,6 @@ constraints: [
'You are unable to interact with physical objects. If this is absolutely necessary to fulfill a task or objective or to complete a step, you must ask the user to do it for you. If the user refuses this, and there is no other way to achieve your goals, you must terminate to avoid wasting time and energy.'
]
resources: [
'Internet access for searches and information gathering.',
'The ability to read and write files.',
'You are a Large Language Model, trained on millions of pages of text, including a lot of factual knowledge. Make use of this factual knowledge to avoid unnecessary gathering of information.'
]
best_practices: [

View File

@@ -19,7 +19,6 @@ from autogpt.file_storage.local import (
LocalFileStorage,
)
from autogpt.logs.config import configure_logging
from autogpt.models.command_registry import CommandRegistry
pytest_plugins = [
"tests.integration.agent_factory",
@@ -116,11 +115,6 @@ def agent(
ai_goals=[],
)
command_registry = CommandRegistry()
agent_prompt_config = Agent.default_settings.prompt_config.copy(deep=True)
agent_prompt_config.use_functions_api = config.openai_functions
agent_settings = AgentSettings(
name=Agent.default_settings.name,
description=Agent.default_settings.description,
@@ -133,14 +127,12 @@ def agent(
use_functions_api=config.openai_functions,
plugins=config.plugins,
),
prompt_config=agent_prompt_config,
history=Agent.default_settings.history.copy(deep=True),
)
agent = Agent(
settings=agent_settings,
llm_provider=llm_provider,
command_registry=command_registry,
file_storage=storage,
legacy_config=config,
)

View File

@@ -1,9 +1,10 @@
import pytest
from autogpt.agents.agent import Agent, AgentConfiguration, AgentSettings
from autogpt.agents.prompt_strategies.one_shot import OneShotAgentPromptStrategy
from autogpt.config import AIProfile, Config
from autogpt.file_storage import FileStorageBackendName, get_storage
from autogpt.memory.vector import get_memory
from autogpt.models.command_registry import CommandRegistry
@pytest.fixture
@@ -20,8 +21,6 @@ def memory_json_file(config: Config):
@pytest.fixture
def dummy_agent(config: Config, llm_provider, memory_json_file):
command_registry = CommandRegistry()
ai_profile = AIProfile(
ai_name="Dummy Agent",
ai_role="Dummy Role",
@@ -30,7 +29,9 @@ def dummy_agent(config: Config, llm_provider, memory_json_file):
],
)
agent_prompt_config = Agent.default_settings.prompt_config.copy(deep=True)
agent_prompt_config = OneShotAgentPromptStrategy.default_configuration.copy(
deep=True
)
agent_prompt_config.use_functions_api = config.openai_functions
agent_settings = AgentSettings(
name=Agent.default_settings.name,
@@ -46,10 +47,17 @@ def dummy_agent(config: Config, llm_provider, memory_json_file):
history=Agent.default_settings.history.copy(deep=True),
)
local = config.file_storage_backend == FileStorageBackendName.LOCAL
restrict_to_root = not local or config.restrict_to_workspace
file_storage = get_storage(
config.file_storage_backend, root_path="data", restrict_to_root=restrict_to_root
)
file_storage.initialize()
agent = Agent(
settings=agent_settings,
llm_provider=llm_provider,
command_registry=command_registry,
file_storage=file_storage,
legacy_config=config,
)

View File

@@ -5,12 +5,19 @@ from pathlib import Path
import pytest
import autogpt.commands.execute_code as sut # system under testing
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import (
InvalidArgumentError,
OperationNotAllowedError,
from autogpt.commands.execute_code import (
ALLOWLIST_CONTROL,
CodeExecutorComponent,
is_docker_available,
we_are_running_in_a_docker_container,
)
from autogpt.utils.exceptions import InvalidArgumentError, OperationNotAllowedError
@pytest.fixture
def code_executor_component(agent: Agent):
return agent.code_executor
@pytest.fixture
@@ -20,7 +27,9 @@ def random_code(random_string) -> str:
@pytest.fixture
def python_test_file(agent: Agent, random_code: str):
temp_file = tempfile.NamedTemporaryFile(dir=agent.workspace.root, suffix=".py")
temp_file = tempfile.NamedTemporaryFile(
dir=agent.file_manager.workspace.root, suffix=".py"
)
temp_file.write(str.encode(random_code))
temp_file.flush()
@@ -30,7 +39,9 @@ def python_test_file(agent: Agent, random_code: str):
@pytest.fixture
def python_test_args_file(agent: Agent):
temp_file = tempfile.NamedTemporaryFile(dir=agent.workspace.root, suffix=".py")
temp_file = tempfile.NamedTemporaryFile(
dir=agent.file_manager.workspace.root, suffix=".py"
)
temp_file.write(str.encode("import sys\nprint(sys.argv[1], sys.argv[2])"))
temp_file.flush()
@@ -43,85 +54,114 @@ def random_string():
return "".join(random.choice(string.ascii_lowercase) for _ in range(10))
def test_execute_python_file(python_test_file: Path, random_string: str, agent: Agent):
if not (sut.is_docker_available() or sut.we_are_running_in_a_docker_container()):
def test_execute_python_file(
code_executor_component: CodeExecutorComponent,
python_test_file: Path,
random_string: str,
agent: Agent,
):
if not (is_docker_available() or we_are_running_in_a_docker_container()):
pytest.skip("Docker is not available")
result: str = sut.execute_python_file(python_test_file, agent=agent)
result: str = code_executor_component.execute_python_file(python_test_file)
assert result.replace("\r", "") == f"Hello {random_string}!\n"
def test_execute_python_file_args(
python_test_args_file: Path, random_string: str, agent: Agent
code_executor_component: CodeExecutorComponent,
python_test_args_file: Path,
random_string: str,
agent: Agent,
):
if not (sut.is_docker_available() or sut.we_are_running_in_a_docker_container()):
if not (is_docker_available() or we_are_running_in_a_docker_container()):
pytest.skip("Docker is not available")
random_args = [random_string] * 2
random_args_string = " ".join(random_args)
result = sut.execute_python_file(
python_test_args_file, args=random_args, agent=agent
result = code_executor_component.execute_python_file(
python_test_args_file, args=random_args
)
assert result == f"{random_args_string}\n"
def test_execute_python_code(random_code: str, random_string: str, agent: Agent):
if not (sut.is_docker_available() or sut.we_are_running_in_a_docker_container()):
def test_execute_python_code(
code_executor_component: CodeExecutorComponent,
random_code: str,
random_string: str,
agent: Agent,
):
if not (is_docker_available() or we_are_running_in_a_docker_container()):
pytest.skip("Docker is not available")
result: str = sut.execute_python_code(random_code, agent=agent)
result: str = code_executor_component.execute_python_code(random_code)
assert result.replace("\r", "") == f"Hello {random_string}!\n"
def test_execute_python_file_invalid(agent: Agent):
def test_execute_python_file_invalid(
code_executor_component: CodeExecutorComponent, agent: Agent
):
with pytest.raises(InvalidArgumentError):
sut.execute_python_file(Path("not_python.txt"), agent)
code_executor_component.execute_python_file(Path("not_python.txt"))
def test_execute_python_file_not_found(agent: Agent):
def test_execute_python_file_not_found(
code_executor_component: CodeExecutorComponent, agent: Agent
):
with pytest.raises(
FileNotFoundError,
match=r"python: can't open file '([a-zA-Z]:)?[/\\\-\w]*notexist.py': "
r"\[Errno 2\] No such file or directory",
):
sut.execute_python_file(Path("notexist.py"), agent)
code_executor_component.execute_python_file(Path("notexist.py"))
def test_execute_shell(random_string: str, agent: Agent):
result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
def test_execute_shell(
code_executor_component: CodeExecutorComponent, random_string: str, agent: Agent
):
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert f"Hello {random_string}!" in result
def test_execute_shell_local_commands_not_allowed(random_string: str, agent: Agent):
result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
def test_execute_shell_local_commands_not_allowed(
code_executor_component: CodeExecutorComponent, random_string: str, agent: Agent
):
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert f"Hello {random_string}!" in result
def test_execute_shell_denylist_should_deny(agent: Agent, random_string: str):
def test_execute_shell_denylist_should_deny(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
):
agent.legacy_config.shell_denylist = ["echo"]
with pytest.raises(OperationNotAllowedError, match="not allowed"):
sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
def test_execute_shell_denylist_should_allow(agent: Agent, random_string: str):
def test_execute_shell_denylist_should_allow(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
):
agent.legacy_config.shell_denylist = ["cat"]
result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert "Hello" in result and random_string in result
def test_execute_shell_allowlist_should_deny(agent: Agent, random_string: str):
agent.legacy_config.shell_command_control = sut.ALLOWLIST_CONTROL
def test_execute_shell_allowlist_should_deny(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
):
agent.legacy_config.shell_command_control = ALLOWLIST_CONTROL
agent.legacy_config.shell_allowlist = ["cat"]
with pytest.raises(OperationNotAllowedError, match="not allowed"):
sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
def test_execute_shell_allowlist_should_allow(agent: Agent, random_string: str):
agent.legacy_config.shell_command_control = sut.ALLOWLIST_CONTROL
def test_execute_shell_allowlist_should_allow(
code_executor_component: CodeExecutorComponent, agent: Agent, random_string: str
):
agent.legacy_config.shell_command_control = ALLOWLIST_CONTROL
agent.legacy_config.shell_allowlist = ["echo"]
result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
result = code_executor_component.execute_shell(f"echo 'Hello {random_string}!'")
assert "Hello" in result and random_string in result

View File

@@ -7,7 +7,12 @@ import pytest
from PIL import Image
from autogpt.agents.agent import Agent
from autogpt.commands.image_gen import generate_image, generate_image_with_sd_webui
from autogpt.commands.image_gen import ImageGeneratorComponent
@pytest.fixture
def image_gen_component(agent: Agent):
return agent.image_gen
@pytest.fixture(params=[256, 512, 1024])
@@ -18,9 +23,16 @@ def image_size(request):
@pytest.mark.requires_openai_api_key
@pytest.mark.vcr
def test_dalle(agent: Agent, storage, image_size, cached_openai_client):
def test_dalle(
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
image_size,
cached_openai_client,
):
"""Test DALL-E image generation."""
generate_and_validate(
image_gen_component,
agent,
storage,
image_provider="dalle",
@@ -37,9 +49,16 @@ def test_dalle(agent: Agent, storage, image_size, cached_openai_client):
"image_model",
["CompVis/stable-diffusion-v1-4", "stabilityai/stable-diffusion-2-1"],
)
def test_huggingface(agent: Agent, storage, image_size, image_model):
def test_huggingface(
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
image_size,
image_model,
):
"""Test HuggingFace image generation."""
generate_and_validate(
image_gen_component,
agent,
storage,
image_provider="huggingface",
@@ -49,9 +68,12 @@ def test_huggingface(agent: Agent, storage, image_size, image_model):
@pytest.mark.xfail(reason="SD WebUI call does not work.")
def test_sd_webui(agent: Agent, storage, image_size):
def test_sd_webui(
image_gen_component: ImageGeneratorComponent, agent: Agent, storage, image_size
):
"""Test SD WebUI image generation."""
generate_and_validate(
image_gen_component,
agent,
storage,
image_provider="sd_webui",
@@ -60,11 +82,12 @@ def test_sd_webui(agent: Agent, storage, image_size):
@pytest.mark.xfail(reason="SD WebUI call does not work.")
def test_sd_webui_negative_prompt(agent: Agent, storage, image_size):
def test_sd_webui_negative_prompt(
image_gen_component: ImageGeneratorComponent, storage, image_size
):
gen_image = functools.partial(
generate_image_with_sd_webui,
image_gen_component.generate_image_with_sd_webui,
prompt="astronaut riding a horse",
agent=agent,
size=image_size,
extra={"seed": 123},
)
@@ -90,6 +113,7 @@ def lst(txt):
def generate_and_validate(
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
image_size,
@@ -103,7 +127,7 @@ def generate_and_validate(
agent.legacy_config.huggingface_image_model = hugging_face_image_model
prompt = "astronaut riding a horse"
image_path = lst(generate_image(prompt, agent, image_size, **kwargs))
image_path = lst(image_gen_component.generate_image(prompt, image_size, **kwargs))
assert image_path.exists()
with Image.open(image_path) as img:
assert img.size == (image_size, image_size)
@@ -125,7 +149,13 @@ def generate_and_validate(
)
@pytest.mark.parametrize("delay", [10, 0])
def test_huggingface_fail_request_with_delay(
agent: Agent, storage, image_size, image_model, return_text, delay
image_gen_component: ImageGeneratorComponent,
agent: Agent,
storage,
image_size,
image_model,
return_text,
delay,
):
return_text = return_text.replace("[model]", image_model).replace(
"[delay]", str(delay)
@@ -150,7 +180,7 @@ def test_huggingface_fail_request_with_delay(
with patch("time.sleep") as mock_sleep:
# Verify request fails.
result = generate_image(prompt, agent, image_size)
result = image_gen_component.generate_image(prompt, image_size)
assert result == "Error creating image."
# Verify retry was called with delay if delay is in return_text
@@ -160,7 +190,9 @@ def test_huggingface_fail_request_with_delay(
mock_sleep.assert_not_called()
def test_huggingface_fail_request_no_delay(mocker, agent: Agent):
def test_huggingface_fail_request_no_delay(
mocker, image_gen_component: ImageGeneratorComponent, agent: Agent
):
agent.legacy_config.huggingface_api_token = "1"
# Mock requests.post
@@ -177,7 +209,7 @@ def test_huggingface_fail_request_no_delay(mocker, agent: Agent):
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
result = generate_image("astronaut riding a horse", agent, 512)
result = image_gen_component.generate_image("astronaut riding a horse", 512)
assert result == "Error creating image."
@@ -185,7 +217,9 @@ def test_huggingface_fail_request_no_delay(mocker, agent: Agent):
mock_sleep.assert_not_called()
def test_huggingface_fail_request_bad_json(mocker, agent: Agent):
def test_huggingface_fail_request_bad_json(
mocker, image_gen_component: ImageGeneratorComponent, agent: Agent
):
agent.legacy_config.huggingface_api_token = "1"
# Mock requests.post
@@ -200,7 +234,7 @@ def test_huggingface_fail_request_bad_json(mocker, agent: Agent):
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
result = generate_image("astronaut riding a horse", agent, 512)
result = image_gen_component.generate_image("astronaut riding a horse", 512)
assert result == "Error creating image."
@@ -208,7 +242,9 @@ def test_huggingface_fail_request_bad_json(mocker, agent: Agent):
mock_sleep.assert_not_called()
def test_huggingface_fail_request_bad_image(mocker, agent: Agent):
def test_huggingface_fail_request_bad_image(
mocker, image_gen_component: ImageGeneratorComponent, agent: Agent
):
agent.legacy_config.huggingface_api_token = "1"
# Mock requests.post
@@ -218,12 +254,14 @@ def test_huggingface_fail_request_bad_image(mocker, agent: Agent):
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
result = generate_image("astronaut riding a horse", agent, 512)
result = image_gen_component.generate_image("astronaut riding a horse", 512)
assert result == "Error creating image."
def test_huggingface_fail_missing_api_token(mocker, agent: Agent):
def test_huggingface_fail_missing_api_token(
mocker, image_gen_component: ImageGeneratorComponent, agent: Agent
):
agent.legacy_config.image_provider = "huggingface"
agent.legacy_config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
@@ -232,4 +270,4 @@ def test_huggingface_fail_missing_api_token(mocker, agent: Agent):
# Verify request raises an error.
with pytest.raises(ValueError):
generate_image("astronaut riding a horse", agent, 512)
image_gen_component.generate_image("astronaut riding a horse", 512)

View File

@@ -1,18 +1,25 @@
import pytest
from autogpt.agents.agent import Agent
from autogpt.commands.web_selenium import BrowsingError, read_webpage
from autogpt.commands.web_selenium import BrowsingError, WebSeleniumComponent
@pytest.fixture
def web_selenium_component(agent: Agent):
return agent.web_selenium
@pytest.mark.vcr
@pytest.mark.requires_openai_api_key
@pytest.mark.asyncio
async def test_browse_website_nonexistent_url(agent: Agent, cached_openai_client: None):
async def test_browse_website_nonexistent_url(
web_selenium_component: WebSeleniumComponent, cached_openai_client: None
):
url = "https://auto-gpt-thinks-this-website-does-not-exist.com"
question = "How to execute a barrel roll"
with pytest.raises(BrowsingError, match="NAME_NOT_RESOLVED") as raised:
await read_webpage(url=url, question=question, agent=agent)
await web_selenium_component.read_webpage(url=url, question=question)
# Sanity check that the response is not too long
assert len(raised.exconly()) < 200

View File

@@ -1,29 +0,0 @@
from autogpt.command_decorator import command
from autogpt.core.utils.json_schema import JSONSchema
COMMAND_CATEGORY = "mock"
@command(
"function_based_cmd",
"Function-based test command",
{
"arg1": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="arg 1",
required=True,
),
"arg2": JSONSchema(
type=JSONSchema.Type.STRING,
description="arg 2",
required=True,
),
},
)
def function_based_cmd(arg1: int, arg2: str) -> str:
"""A function-based test command.
Returns:
str: the two arguments separated by a dash.
"""
return f"{arg1} - {arg2}"

View File

@@ -1,239 +0,0 @@
from __future__ import annotations
import os
import shutil
import sys
from pathlib import Path
from typing import TYPE_CHECKING
import pytest
if TYPE_CHECKING:
from autogpt.agents import Agent, BaseAgent
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.models.command import Command, CommandParameter
from autogpt.models.command_registry import CommandRegistry
PARAMETERS = [
CommandParameter(
"arg1",
spec=JSONSchema(
type=JSONSchema.Type.INTEGER,
description="Argument 1",
required=True,
),
),
CommandParameter(
"arg2",
spec=JSONSchema(
type=JSONSchema.Type.STRING,
description="Argument 2",
required=False,
),
),
]
def example_command_method(arg1: int, arg2: str, agent: BaseAgent) -> str:
"""Example function for testing the Command class."""
# This function is static because it is not used by any other test cases.
return f"{arg1} - {arg2}"
def test_command_creation():
"""Test that a Command object can be created with the correct attributes."""
cmd = Command(
name="example",
description="Example command",
method=example_command_method,
parameters=PARAMETERS,
)
assert cmd.name == "example"
assert cmd.description == "Example command"
assert cmd.method == example_command_method
assert (
str(cmd)
== "example: Example command. Params: (arg1: integer, arg2: Optional[string])"
)
@pytest.fixture
def example_command():
yield Command(
name="example",
description="Example command",
method=example_command_method,
parameters=PARAMETERS,
)
def test_command_call(example_command: Command, agent: Agent):
"""Test that Command(*args) calls and returns the result of method(*args)."""
result = example_command(arg1=1, arg2="test", agent=agent)
assert result == "1 - test"
def test_command_call_with_invalid_arguments(example_command: Command, agent: Agent):
"""Test that calling a Command object with invalid arguments raises a TypeError."""
with pytest.raises(TypeError):
example_command(arg1="invalid", does_not_exist="test", agent=agent)
def test_register_command(example_command: Command):
"""Test that a command can be registered to the registry."""
registry = CommandRegistry()
registry.register(example_command)
assert registry.get_command(example_command.name) == example_command
assert len(registry.commands) == 1
def test_unregister_command(example_command: Command):
"""Test that a command can be unregistered from the registry."""
registry = CommandRegistry()
registry.register(example_command)
registry.unregister(example_command)
assert len(registry.commands) == 0
assert example_command.name not in registry
@pytest.fixture
def example_command_with_aliases(example_command: Command):
example_command.aliases = ["example_alias", "example_alias_2"]
return example_command
def test_register_command_aliases(example_command_with_aliases: Command):
"""Test that a command can be registered to the registry."""
registry = CommandRegistry()
command = example_command_with_aliases
registry.register(command)
assert command.name in registry
assert registry.get_command(command.name) == command
for alias in command.aliases:
assert registry.get_command(alias) == command
assert len(registry.commands) == 1
def test_unregister_command_aliases(example_command_with_aliases: Command):
"""Test that a command can be unregistered from the registry."""
registry = CommandRegistry()
command = example_command_with_aliases
registry.register(command)
registry.unregister(command)
assert len(registry.commands) == 0
assert command.name not in registry
for alias in command.aliases:
assert alias not in registry
def test_command_in_registry(example_command_with_aliases: Command):
"""Test that `command_name in registry` works."""
registry = CommandRegistry()
command = example_command_with_aliases
assert command.name not in registry
assert "nonexistent_command" not in registry
registry.register(command)
assert command.name in registry
assert "nonexistent_command" not in registry
for alias in command.aliases:
assert alias in registry
def test_get_command(example_command: Command):
"""Test that a command can be retrieved from the registry."""
registry = CommandRegistry()
registry.register(example_command)
retrieved_cmd = registry.get_command(example_command.name)
assert retrieved_cmd == example_command
def test_get_nonexistent_command():
"""Test that attempting to get a nonexistent command raises a KeyError."""
registry = CommandRegistry()
assert registry.get_command("nonexistent_command") is None
assert "nonexistent_command" not in registry
def test_call_command(agent: Agent):
"""Test that a command can be called through the registry."""
registry = CommandRegistry()
cmd = Command(
name="example",
description="Example command",
method=example_command_method,
parameters=PARAMETERS,
)
registry.register(cmd)
result = registry.call("example", arg1=1, arg2="test", agent=agent)
assert result == "1 - test"
def test_call_nonexistent_command(agent: Agent):
"""Test that attempting to call a nonexistent command raises a KeyError."""
registry = CommandRegistry()
with pytest.raises(KeyError):
registry.call("nonexistent_command", arg1=1, arg2="test", agent=agent)
def test_import_mock_commands_module():
"""Test that the registry can import a module with mock command plugins."""
registry = CommandRegistry()
mock_commands_module = "tests.mocks.mock_commands"
registry.import_command_module(mock_commands_module)
assert "function_based_cmd" in registry
assert registry.commands["function_based_cmd"].name == "function_based_cmd"
assert (
registry.commands["function_based_cmd"].description
== "Function-based test command"
)
def test_import_temp_command_file_module(tmp_path: Path):
"""
Test that the registry can import a command plugins module from a temp file.
Args:
tmp_path (pathlib.Path): Path to a temporary directory.
"""
registry = CommandRegistry()
# Create a temp command file
src = Path(os.getcwd()) / "tests/mocks/mock_commands.py"
temp_commands_file = tmp_path / "mock_commands.py"
shutil.copyfile(src, temp_commands_file)
# Add the temp directory to sys.path to make the module importable
sys.path.append(str(tmp_path))
temp_commands_module = "mock_commands"
registry.import_command_module(temp_commands_module)
# Remove the temp directory from sys.path
sys.path.remove(str(tmp_path))
assert "function_based_cmd" in registry
assert registry.commands["function_based_cmd"].name == "function_based_cmd"
assert (
registry.commands["function_based_cmd"].description
== "Function-based test command"
)

View File

@@ -148,7 +148,8 @@ def test_azure_config(config_with_azure: Config) -> None:
@pytest.mark.asyncio
async def test_create_config_gpt4only(config: Config) -> None:
with mock.patch(
"autogpt.core.resource.model_providers.openai.OpenAIProvider.get_available_models"
"autogpt.core.resource.model_providers.openai."
"OpenAIProvider.get_available_models"
) as mock_get_models:
mock_get_models.return_value = [
ChatModelInfo(
@@ -168,7 +169,8 @@ async def test_create_config_gpt4only(config: Config) -> None:
@pytest.mark.asyncio
async def test_create_config_gpt3only(config: Config) -> None:
with mock.patch(
"autogpt.core.resource.model_providers.openai.OpenAIProvider.get_available_models"
"autogpt.core.resource.model_providers.openai."
"OpenAIProvider.get_available_models"
) as mock_get_models:
mock_get_models.return_value = [
ChatModelInfo(

View File

@@ -1,13 +1,11 @@
import os
import re
from pathlib import Path
import pytest
from pytest_mock import MockerFixture
import autogpt.commands.file_operations as file_ops
import autogpt.agents.features.agent_file_manager as file_ops
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import DuplicateOperationError
from autogpt.config import Config
from autogpt.file_storage import FileStorage
from autogpt.memory.vector.memory_item import MemoryItem
@@ -38,6 +36,11 @@ def mock_MemoryItem_from_text(
)
@pytest.fixture
def file_manager_component(agent: Agent):
return agent.file_manager
@pytest.fixture()
def test_file_name():
return Path("test_file.txt")
@@ -58,197 +61,76 @@ def test_nested_file(storage: FileStorage):
return storage.get_path("nested/test_file.txt")
def test_file_operations_log():
all_logs = (
"File Operation Logger\n"
"write: path/to/file1.txt #checksum1\n"
"write: path/to/file2.txt #checksum2\n"
"write: path/to/file3.txt #checksum3\n"
"append: path/to/file2.txt #checksum4\n"
"delete: path/to/file3.txt\n"
)
logs = all_logs.split("\n")
expected = [
("write", "path/to/file1.txt", "checksum1"),
("write", "path/to/file2.txt", "checksum2"),
("write", "path/to/file3.txt", "checksum3"),
("append", "path/to/file2.txt", "checksum4"),
("delete", "path/to/file3.txt", None),
]
assert list(file_ops.operations_from_log(logs)) == expected
def test_is_duplicate_operation(agent: Agent, mocker: MockerFixture):
# Prepare a fake state dictionary for the function to use
state = {
"path/to/file1.txt": "checksum1",
"path/to/file2.txt": "checksum2",
}
mocker.patch.object(file_ops, "file_operations_state", lambda _: state)
# Test cases with write operations
assert (
file_ops.is_duplicate_operation(
"write", Path("path/to/file1.txt"), agent, "checksum1"
)
is True
)
assert (
file_ops.is_duplicate_operation(
"write", Path("path/to/file1.txt"), agent, "checksum2"
)
is False
)
assert (
file_ops.is_duplicate_operation(
"write", Path("path/to/file3.txt"), agent, "checksum3"
)
is False
)
# Test cases with append operations
assert (
file_ops.is_duplicate_operation(
"append", Path("path/to/file1.txt"), agent, "checksum1"
)
is False
)
# Test cases with delete operations
assert (
file_ops.is_duplicate_operation("delete", Path("path/to/file1.txt"), agent)
is False
)
assert (
file_ops.is_duplicate_operation("delete", Path("path/to/file3.txt"), agent)
is True
)
# Test logging a file operation
@pytest.mark.asyncio
async def test_log_operation(agent: Agent):
await file_ops.log_operation("log_test", Path("path/to/test"), agent=agent)
log_entry = agent.get_file_operation_lines()[-1]
assert "log_test: path/to/test" in log_entry
def test_text_checksum(file_content: str):
checksum = file_ops.text_checksum(file_content)
different_checksum = file_ops.text_checksum("other content")
assert re.match(r"^[a-fA-F0-9]+$", checksum) is not None
assert checksum != different_checksum
@pytest.mark.asyncio
async def test_log_operation_with_checksum(agent: Agent):
await file_ops.log_operation(
"log_test", Path("path/to/test"), agent=agent, checksum="ABCDEF"
)
log_entry = agent.get_file_operation_lines()[-1]
assert "log_test: path/to/test #ABCDEF" in log_entry
@pytest.mark.asyncio
async def test_read_file(
mock_MemoryItem_from_text,
test_file_path: Path,
file_content,
file_manager_component,
agent: Agent,
):
await agent.workspace.write_file(test_file_path.name, file_content)
await file_ops.log_operation(
"write", Path(test_file_path.name), agent, file_ops.text_checksum(file_content)
)
content = file_ops.read_file(test_file_path.name, agent=agent)
await agent.file_manager.workspace.write_file(test_file_path.name, file_content)
content = file_manager_component.read_file(test_file_path.name)
assert content.replace("\r", "") == file_content
def test_read_file_not_found(agent: Agent):
def test_read_file_not_found(file_manager_component):
filename = "does_not_exist.txt"
with pytest.raises(FileNotFoundError):
file_ops.read_file(filename, agent=agent)
file_manager_component.read_file(filename)
@pytest.mark.asyncio
async def test_write_to_file_relative_path(test_file_name: Path, agent: Agent):
async def test_write_to_file_relative_path(
test_file_name: Path, file_manager_component, agent: Agent
):
new_content = "This is new content.\n"
await file_ops.write_to_file(test_file_name, new_content, agent=agent)
with open(agent.workspace.get_path(test_file_name), "r", encoding="utf-8") as f:
await file_manager_component.write_to_file(test_file_name, new_content)
with open(
agent.file_manager.workspace.get_path(test_file_name), "r", encoding="utf-8"
) as f:
content = f.read()
assert content == new_content
@pytest.mark.asyncio
async def test_write_to_file_absolute_path(test_file_path: Path, agent: Agent):
async def test_write_to_file_absolute_path(
test_file_path: Path, file_manager_component
):
new_content = "This is new content.\n"
await file_ops.write_to_file(test_file_path, new_content, agent=agent)
await file_manager_component.write_to_file(test_file_path, new_content)
with open(test_file_path, "r", encoding="utf-8") as f:
content = f.read()
assert content == new_content
@pytest.mark.asyncio
async def test_write_file_logs_checksum(test_file_name: Path, agent: Agent):
new_content = "This is new content.\n"
new_checksum = file_ops.text_checksum(new_content)
await file_ops.write_to_file(test_file_name, new_content, agent=agent)
log_entry = agent.get_file_operation_lines()[-1]
assert log_entry == f"write: {test_file_name} #{new_checksum}"
@pytest.mark.asyncio
async def test_write_file_fails_if_content_exists(test_file_name: Path, agent: Agent):
new_content = "This is new content.\n"
await file_ops.log_operation(
"write",
test_file_name,
agent=agent,
checksum=file_ops.text_checksum(new_content),
)
with pytest.raises(DuplicateOperationError):
await file_ops.write_to_file(test_file_name, new_content, agent=agent)
@pytest.mark.asyncio
async def test_write_file_succeeds_if_content_different(
test_file_path: Path, file_content: str, agent: Agent
):
await agent.workspace.write_file(test_file_path.name, file_content)
await file_ops.log_operation(
"write", Path(test_file_path.name), agent, file_ops.text_checksum(file_content)
)
new_content = "This is different content.\n"
await file_ops.write_to_file(test_file_path.name, new_content, agent=agent)
@pytest.mark.asyncio
async def test_list_files(agent: Agent):
async def test_list_files(file_manager_component, agent: Agent):
# Create files A and B
file_a_name = "file_a.txt"
file_b_name = "file_b.txt"
test_directory = Path("test_directory")
await agent.workspace.write_file(file_a_name, "This is file A.")
await agent.workspace.write_file(file_b_name, "This is file B.")
await agent.file_manager.workspace.write_file(file_a_name, "This is file A.")
await agent.file_manager.workspace.write_file(file_b_name, "This is file B.")
# Create a subdirectory and place a copy of file_a in it
agent.workspace.make_dir(test_directory)
await agent.workspace.write_file(
agent.file_manager.workspace.make_dir(test_directory)
await agent.file_manager.workspace.write_file(
test_directory / file_a_name, "This is file A in the subdirectory."
)
files = file_ops.list_folder(".", agent=agent)
files = file_manager_component.list_folder(".")
assert file_a_name in files
assert file_b_name in files
assert os.path.join(test_directory, file_a_name) in files
# Clean up
agent.workspace.delete_file(file_a_name)
agent.workspace.delete_file(file_b_name)
agent.workspace.delete_file(test_directory / file_a_name)
agent.workspace.delete_dir(test_directory)
agent.file_manager.workspace.delete_file(file_a_name)
agent.file_manager.workspace.delete_file(file_b_name)
agent.file_manager.workspace.delete_file(test_directory / file_a_name)
agent.file_manager.workspace.delete_dir(test_directory)
# Case 2: Search for a file that does not exist and make sure we don't throw
non_existent_file = "non_existent_file.txt"
files = file_ops.list_folder("", agent=agent)
files = file_manager_component.list_folder("")
assert non_existent_file not in files

View File

@@ -3,9 +3,9 @@ from git.exc import GitCommandError
from git.repo.base import Repo
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import CommandExecutionError
from autogpt.commands.git_operations import clone_repository
from autogpt.commands.git_operations import GitOperationsComponent
from autogpt.file_storage.base import FileStorage
from autogpt.utils.exceptions import CommandExecutionError
@pytest.fixture
@@ -13,7 +13,17 @@ def mock_clone_from(mocker):
return mocker.patch.object(Repo, "clone_from")
def test_clone_auto_gpt_repository(storage: FileStorage, mock_clone_from, agent: Agent):
@pytest.fixture
def git_ops_component(agent: Agent):
return agent.git_ops
def test_clone_auto_gpt_repository(
git_ops_component: GitOperationsComponent,
storage: FileStorage,
mock_clone_from,
agent: Agent,
):
mock_clone_from.return_value = None
repo = "github.com/Significant-Gravitas/Auto-GPT.git"
@@ -23,7 +33,7 @@ def test_clone_auto_gpt_repository(storage: FileStorage, mock_clone_from, agent:
expected_output = f"Cloned {url} to {clone_path}"
clone_result = clone_repository(url=url, clone_path=clone_path, agent=agent)
clone_result = git_ops_component.clone_repository(url, clone_path)
assert clone_result == expected_output
mock_clone_from.assert_called_once_with(
@@ -32,7 +42,12 @@ def test_clone_auto_gpt_repository(storage: FileStorage, mock_clone_from, agent:
)
def test_clone_repository_error(storage: FileStorage, mock_clone_from, agent: Agent):
def test_clone_repository_error(
git_ops_component: GitOperationsComponent,
storage: FileStorage,
mock_clone_from,
agent: Agent,
):
url = "https://github.com/this-repository/does-not-exist.git"
clone_path = storage.get_path("does-not-exist")
@@ -41,4 +56,4 @@ def test_clone_repository_error(storage: FileStorage, mock_clone_from, agent: Ag
)
with pytest.raises(CommandExecutionError):
clone_repository(url=url, clone_path=clone_path, agent=agent)
git_ops_component.clone_repository(url, clone_path)

View File

@@ -10,10 +10,7 @@ import pytest
import yaml
from bs4 import BeautifulSoup
from autogpt.commands.file_operations_utils import (
decode_textual_file,
is_file_binary_fn,
)
from autogpt.utils.file_operations_utils import decode_textual_file, is_file_binary_fn
logger = logging.getLogger(__name__)

View File

@@ -15,7 +15,7 @@ from autogpt.app.utils import (
set_env_config_value,
)
from autogpt.core.utils.json_utils import extract_dict_from_json
from autogpt.utils import validate_yaml_file
from autogpt.utils.utils import validate_yaml_file
from tests.utils import skip_in_ci

View File

@@ -4,23 +4,32 @@ import pytest
from googleapiclient.errors import HttpError
from autogpt.agents.agent import Agent
from autogpt.agents.utils.exceptions import ConfigurationError
from autogpt.commands.web_search import google, safe_google_results, web_search
from autogpt.commands.web_search import WebSearchComponent
from autogpt.utils.exceptions import ConfigurationError
@pytest.fixture
def web_search_component(agent: Agent):
return agent.web_search
@pytest.mark.parametrize(
"query, expected_output",
[("test", "test"), (["test1", "test2"], '["test1", "test2"]')],
)
def test_safe_google_results(query, expected_output):
result = safe_google_results(query)
@pytest.fixture
def test_safe_google_results(
query, expected_output, web_search_component: WebSearchComponent
):
result = web_search_component.safe_google_results(query)
assert isinstance(result, str)
assert result == expected_output
def test_safe_google_results_invalid_input():
@pytest.fixture
def test_safe_google_results_invalid_input(web_search_component: WebSearchComponent):
with pytest.raises(AttributeError):
safe_google_results(123)
web_search_component.safe_google_results(123) # type: ignore
@pytest.mark.parametrize(
@@ -37,13 +46,18 @@ def test_safe_google_results_invalid_input():
],
)
def test_google_search(
query, num_results, expected_output_parts, return_value, mocker, agent: Agent
query,
num_results,
expected_output_parts,
return_value,
mocker,
web_search_component: WebSearchComponent,
):
mock_ddg = mocker.Mock()
mock_ddg.return_value = return_value
mocker.patch("autogpt.commands.web_search.DDGS.text", mock_ddg)
actual_output = web_search(query, agent=agent, num_results=num_results)
actual_output = web_search_component.web_search(query, num_results=num_results)
for o in expected_output_parts:
assert o in actual_output
@@ -82,11 +96,11 @@ def test_google_official_search(
expected_output,
search_results,
mock_googleapiclient,
agent: Agent,
web_search_component: WebSearchComponent,
):
mock_googleapiclient.return_value = search_results
actual_output = google(query, agent=agent, num_results=num_results)
assert actual_output == safe_google_results(expected_output)
actual_output = web_search_component.google(query, num_results=num_results)
assert actual_output == web_search_component.safe_google_results(expected_output)
@pytest.mark.parametrize(
@@ -115,7 +129,7 @@ def test_google_official_search_errors(
mock_googleapiclient,
http_code,
error_msg,
agent: Agent,
web_search_component: WebSearchComponent,
):
class resp:
def __init__(self, _status, _reason):
@@ -133,4 +147,4 @@ def test_google_official_search_errors(
mock_googleapiclient.side_effect = error
with pytest.raises(expected_error_type):
google(query, agent=agent, num_results=num_results)
web_search_component.google(query, num_results=num_results)

View File

@@ -0,0 +1,8 @@
### Other stuff
Debugging may be easier because we can inspect the exact components that were called and where the pipeline failed (current WIP pipeline):
![](../imgs/modular-pipeline.png)
Also that makes it possible to call component/pipeline/function again when failed and recover.
If it's necessary to get a component in a random place, agent provides generic, type safe `get_component(type[T]) -> T | None`

View File

@@ -0,0 +1,37 @@
# 🤖 Agents
Agent is composed of [🧩 Components](./components.md) and responsible for executing pipelines and some additional logic. The base class for all agents is `BaseAgent`, it has the necessary logic to collect components and execute protocols.
## Important methods
`BaseAgent` provides two abstract methods needed for any agent to work properly:
1. `propose_action`: This method is responsible for proposing an action based on the current state of the agent, it returns `ThoughtProcessOutput`.
2. `execute`: This method is responsible for executing the proposed action, returns `ActionResult`.
## AutoGPT Agent
`Agent` is the main agent provided by AutoGPT. It's a subclass of `BaseAgent`. It has all the [Built-in Components](./built-in-components.md). `Agent` implements the essential abstract methods from `BaseAgent`: `propose_action` and `execute`.
## Building your own Agent
The easiest way to build your own agent is to extend the `Agent` class and add additional components. By doing this you can reuse the existing components and the default logic for executing [⚙️ Protocols](./protocols.md).
```py
class MyComponent(AgentComponent):
pass
class MyAgent(Agent):
def __init__(
self,
settings: AgentSettings,
llm_provider: ChatModelProvider,
file_storage: FileStorage,
legacy_config: Config,
):
# Call the parent constructor to bring in the default components
super().__init__(settings, llm_provider, file_storage, legacy_config)
# Add your custom component
self.my_component = MyComponent()
```
For more customization, you can override the `propose_action` and `execute` or even subclass `BaseAgent` directly. This way you can have full control over the agent's components and behavior. Have a look at the [implementation of Agent](./../../../../autogpts/autogpt/autogpt/agents/agent.py) for more details.

View File

@@ -0,0 +1,115 @@
# Built-in Components
This page lists all [🧩 Components](./components.md) and [⚙️ Protocols](./protocols.md) they implement that are natively provided. They are used by the AutoGPT agent.
## `SystemComponent`
Essential component to allow an agent to finish.
**DirectiveProvider**
- Constraints about API budget
**MessageProvider**
- Current time and date
- Remaining API budget and warnings if budget is low
**CommandProvider**
- `finish` used when task is completed
## `UserInteractionComponent`
Adds ability to interact with user in CLI.
**CommandProvider**
- `ask_user` used to ask user for input
## `FileManagerComponent`
Adds ability to read and write persistent files to local storage, Google Cloud Storage or Amazon's S3.
Necessary for saving and loading agent's state (preserving session).
**DirectiveProvider**
- Resource information that it's possible to read and write files
**CommandProvider**
- `read_file` used to read file
- `write_file` used to write file
- `list_folder` lists all files in a folder
## `CodeExecutorComponent`
Lets the agent execute non-interactive Shell commands and Python code. Python execution works only if Docker is available.
**CommandProvider**
- `execute_shell` execute shell command
- `execute_shell_popen` execute shell command with popen
- `execute_python_code` execute Python code
- `execute_python_file` execute Python file
## `EventHistoryComponent`
Keeps track of agent's actions and their outcomes. Provides their summary to the prompt.
**MessageProvider**
- Agent's progress summary
**AfterParse**
- Register agent's action
**ExecutionFailuer**
- Rewinds the agent's action, so it isn't saved
**AfterExecute**
- Saves the agent's action result in the history
## `GitOperationsComponent`
**CommandProvider**
- `clone_repository` used to clone a git repository
## `ImageGeneratorComponent`
Adds ability to generate images using various providers, see [Image Generation configuration](./../configuration/imagegen.md) to learn more.
**CommandProvider**
- `generate_image` used to generate an image given a prompt
## `WebSearchComponent`
Allows agent to search the web.
**DirectiveProvider**
- Resource information that it's possible to search the web
**CommandProvider**
- `search_web` used to search the web using DuckDuckGo
- `google` used to search the web using Google, requires API key
## `WebSeleniumComponent`
Allows agent to read websites using Selenium.
**DirectiveProvider**
- Resource information that it's possible to read websites
**CommandProvider**
- `read_website` used to read a specific url and look for specific topics or answer a question
## `ContextComponent`
Adds ability to keep up-to-date file and folder content in the prompt.
**MessageProvider**
- Content of elements in the context
**CommandProvider**
- `open_file` used to open a file into context
- `open_folder` used to open a folder into context
- `close_context_item` remove an item from the context
## `WatchdogComponent`
Watches if agent is looping and switches to smart mode if necessary.
**AfterParse**
- Investigates what happened and switches to smart mode if necessary

View File

@@ -0,0 +1,102 @@
# 🛠️ Commands
Commands are a way for the agent to do anything; e.g. interact with the user or APIs and use tools. They are provided by components that implement the `CommandProvider` [⚙️ Protocol](./protocols.md). Commands are functions that can be called by the agent, they can have parameters and return values that will be seen by the agent.
```py
class CommandProvider(Protocol):
def get_commands(self) -> Iterator[Command]:
...
```
## `command` decorator
The easiest and recommended way to provide a command is to use `command` decorator on a component method and then just yield it in `get_commands` as part of your provider. Each command needs a name, description and a parameter schema - `JSONSchema`. By default method name is used as a command name, and first part of docstring for the description (before first double newline) and schema can be provided in the decorator.
### Example usage of `command` decorator
```py
# Assuming this is inside some component class
@command(
parameters={
"a": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The first number",
required=True,
),
"b": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The second number",
required=True,
)})
def multiply(self, a: int, b: int) -> str:
"""
Multiplies two numbers.
Args:
a: First number
b: Second number
Returns:
Result of multiplication
"""
return str(a * b)
```
The agent will be able to call this command, named `multiply` with two arguments and will receive the result. The command description will be: `Multiplies two numbers.`
We can provide `names` and `description` in the decorator, the above command is equivalent to:
```py
@command(
names=["multiply"],
description="Multiplies two numbers.",
parameters={
"a": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The first number",
required=True,
),
"b": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The second number",
required=True,
)})
def multiply_command(self, a: int, b: int) -> str:
return str(a * b)
```
To provide the `multiply` command to the agent, we need to yield it in `get_commands`:
```py
def get_commands(self) -> Iterator[Command]:
yield self.multiply
```
## Creating `Command` directly
If you don't want to use the decorator, you can create a `Command` object directly.
```py
def multiply(self, a: int, b: int) -> str:
return str(a * b)
def get_commands(self) -> Iterator[Command]:
yield Command(
names=["multiply"],
description="Multiplies two numbers.",
method=self.multiply,
parameters=[
CommandParameter(name="a", spec=JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The first number",
required=True,
)),
CommandParameter(name="b", spec=JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The second number",
required=True,
)),
],
)
```

View File

@@ -0,0 +1,128 @@
# 🧩 Components
Components are the building blocks of [🤖 Agents](./agents.md). They are classes inheriting `AgentComponent` or implementing one or more [⚙️ Protocols](./protocols.md) that give agent additional abilities or processing.
Components can be used to implement various functionalities like providing messages to the prompt, executing code, or interacting with external services.
They can be enabled or disabled, ordered, and can rely on each other.
Components assigned in the agent's `__init__` via `self` are automatically detected upon the agent's instantiation.
For example inside `__init__`: `self.my_component = MyComponent()`.
You can use any valid Python variable name, what matters for the component to be detected is its type (`AgentComponent` or any protocol inheriting from it).
Visit [Built-in Components](./built-in-components.md) to see what components are available out of the box.
```py
from autogpt.agents import Agent
from autogpt.agents.components import AgentComponent
class HelloComponent(AgentComponent):
pass
class SomeComponent(AgentComponent):
def __init__(self, hello_component: HelloComponent):
self.hello_component = hello_component
class MyAgent(Agent):
def __init__(self):
# These components will be automatically discovered and used
self.hello_component = HelloComponent()
# We pass HelloComponent to SomeComponent
self.some_component = SomeComponent(self.hello_component)
```
## Ordering components
The execution order of components is important because the latter ones may depend on the results of the former ones.
### Implicit order
Components can be ordered implicitly by the agent; each component can set `run_after` list to specify which components should run before it. This is useful when components rely on each other or need to be executed in a specific order. Otherwise, the order of components is alphabetical.
```py
# This component will run after HelloComponent
class CalculatorComponent(AgentComponent):
run_after = [HelloComponent]
```
### Explicit order
Sometimes it may be easier to order components explicitly by setting `self.components` list in the agent's `__init__` method. This way you can also ensure there's no circular dependencies and `run_after` is ignored.
!!! warning
Be sure to include all components - by setting `self.components` list, you're overriding the default behavior of discovering components automatically. Since it's usually not intended agent will inform you in the terminal if some components were skipped.
```py
class MyAgent(Agent):
def __init__(self):
self.hello_component = HelloComponent()
self.calculator_component = CalculatorComponent(self.hello_component)
# Explicitly set components list
self.components = [self.hello_component, self.calculator_component]
```
## Disabling components
You can control which components are enabled by setting their `_enabled` attribute.
Either provide a `bool` value or a `Callable[[], bool]`, will be checked each time
the component is about to be executed. This way you can dynamically enable or disable
components based on some conditions.
You can also provide a reason for disabling the component by setting `_disabled_reason`.
The reason will be visible in the debug information.
```py
class DisabledComponent(MessageProvider):
def __init__(self):
# Disable this component
self._enabled = False
self._disabled_reason = "This component is disabled because of reasons."
# Or disable based on some condition, either statically...:
self._enabled = self.some_property is not None
# ... or dynamically:
self._enabled = lambda: self.some_property is not None
# This method will never be called
def get_messages(self) -> Iterator[ChatMessage]:
yield ChatMessage.user("This message won't be seen!")
def some_condition(self) -> bool:
return False
```
If you don't want the component at all, you can just remove it from the agent's `__init__` method. If you want to remove components you inherit from the parent class you can set the relevant attribute to `None`:
!!! Warning
Be careful when removing components that are required by other components. This may lead to errors and unexpected behavior.
```py
class MyAgent(Agent):
def __init__(self):
super().__init__(...)
# Disable WatchdogComponent that is in the parent class
self.watchdog = None
```
## Exceptions
Custom errors are provided which can be used to control the execution flow in case something went wrong. All those errors can be raised in protocol methods and will be caught by the agent.
By default agent will retry three times and then re-raise an exception if it's still not resolved. All passed arguments are automatically handled and the values are reverted when needed.
All errors accept an optional `str` message. There are following errors ordered by increasing broadness:
1. `ComponentEndpointError`: A single endpoint method failed to execute. Agent will retry the execution of this endpoint on the component.
2. `EndpointPipelineError`: A pipeline failed to execute. Agent will retry the execution of the endpoint for all components.
3. `ComponentSystemError`: Multiple pipelines failed.
**Example**
```py
from autogpt.agents.components import ComponentEndpointError
from autogpt.agents.protocols import MessageProvider
# Example of raising an error
class MyComponent(MessageProvider):
def get_messages(self) -> Iterator[ChatMessage]:
# This will cause the component to always fail
# and retry 3 times before re-raising the exception
raise ComponentEndpointError("Endpoint error!")
```

View File

@@ -0,0 +1,235 @@
# Creating Components
## The minimal component
Components can be used to implement various functionalities like providing messages to the prompt, executing code, or interacting with external services.
*Component* is a class that inherits from `AgentComponent` OR implements one or more *protocols*. Every *protocol* inherits `AgentComponent`, so your class automatically becomes a *component* once you inherit any *protocol*.
```py
class MyComponent(AgentComponent):
pass
```
This is already a valid component, but it doesn't do anything yet. To add some functionality to it, you need to implement one or more *protocols*.
Let's create a simple component that adds "Hello World!" message to the agent's prompt. To do this we need to implement `MessageProvider` *protocol* in our component. `MessageProvider` is an interface with `get_messages` method:
```py
# No longer need to inherit AgentComponent, because MessageProvider already does it
class HelloComponent(MessageProvider):
def get_messages(self) -> Iterator[ChatMessage]:
yield ChatMessage.user("Hello World!")
```
Now we can add our component to an existing agent or create a new Agent class and add it there:
```py
class MyAgent(Agent):
self.hello_component = HelloComponent()
```
`get_messages` will called by the agent each time it needs to build a new prompt and the yielded messages will be added accordingly.
## Passing data to and between components
Since components are regular classes you can pass data (including other components) to them via the `__init__` method.
For example we can pass a config object and then retrieve an API key from it when needed:
```py
class ConfigurableComponent(MessageProvider):
def __init__(self, config: Config):
self.config = config
def get_messages(self) -> Iterator[ChatMessage]:
if self.config.openai_credentials.api_key:
yield ChatMessage.system("API key found!")
else:
yield ChatMessage.system("API key not found!")
```
!!! note
Component-specific configuration handling isn't implemented yet.
## Providing commands
To extend what an agent can do, you need to provide commands using `CommandProvider` protocol. For example to allow agent to multiply two numbers, you can create a component like this:
```py
class MultiplicatorComponent(CommandProvider):
def get_commands(self) -> Iterator[Command]:
# Yield the command so the agent can use it
yield self.multiply
@command(
parameters={
"a": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The first number",
required=True,
),
"b": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The second number",
required=True,
)})
def multiply(self, a: int, b: int) -> str:
"""
Multiplies two numbers.
Args:
a: First number
b: Second number
Returns:
Result of multiplication
"""
return str(a * b)
```
To learn more about commands see [🛠️ Commands](./commands.md).
## Prompt structure
After components provided all necessary data, the agent needs to build the final prompt that will be send to a llm.
Currently, `PromptStrategy` (*not* a protocol) is responsible for building the final prompt.
If you want to change the way the prompt is built, you need to create a new `PromptStrategy` class, and then call relavant methods in your agent class.
You can have a look at the default strategy used by the AutoGPT Agent: [OneShotAgentPromptStrategy](../../../../autogpts/autogpt/autogpt/agents/prompt_strategies/one_shot.py), and how it's used in the [Agent](../../../../autogpts/autogpt/autogpt/agents/agent.py) (search for `self.prompt_strategy`).
## Example `UserInteractionComponent`
Let's create a slighlty simplified version of the component that is used by the built-in agent.
It gives an ability for the agent to ask user for input in the terminal.
1. Create a class for the component that inherits from `CommandProvider`.
```py
class MyUserInteractionComponent(CommandProvider):
"""Provides commands to interact with the user."""
pass
```
2. Implement command method that will ask user for input and return it.
```py
def ask_user(self, question: str) -> str:
"""If you need more details or information regarding the given goals,
you can ask the user for input."""
print(f"\nQ: {question}")
resp = input("A:")
return f"The user's answer: '{resp}'"
```
3. The command needs to be decorated with `@command`.
```py
@command(
parameters={
"question": JSONSchema(
type=JSONSchema.Type.STRING,
description="The question or prompt to the user",
required=True,
)
},
)
def ask_user(self, question: str) -> str:
"""If you need more details or information regarding the given goals,
you can ask the user for input."""
print(f"\nQ: {question}")
resp = input("A:")
return f"The user's answer: '{resp}'"
```
4. We need to implement `CommandProvider`'s `get_commands` method to yield the command.
```py
def get_commands(self) -> Iterator[Command]:
yield self.ask_user
```
5. Since agent isn't always running in the terminal or interactive mode, we need to disable this component by setting `self._enabled` when it's not possible to ask for user input.
```py
def __init__(self, config: Config):
self.config = config
self._enabled = not config.noninteractive_mode
```
The final component should look like this:
```py
# 1.
class MyUserInteractionComponent(CommandProvider):
"""Provides commands to interact with the user."""
# We pass config to check if we're in noninteractive mode
def __init__(self, config: Config):
self.config = config
# 5.
self._enabled = not config.noninteractive_mode
# 4.
def get_commands(self) -> Iterator[Command]:
# Yielding the command so the agent can use it
# This won't be yielded if the component is disabled
yield self.ask_user
# 3.
@command(
# We need to provide a schema for ALL the command parameters
parameters={
"question": JSONSchema(
type=JSONSchema.Type.STRING,
description="The question or prompt to the user",
required=True,
)
},
)
# 2.
# Command name will be its method name and description will be its docstring
def ask_user(self, question: str) -> str:
"""If you need more details or information regarding the given goals,
you can ask the user for input."""
print(f"\nQ: {question}")
resp = input("A:")
return f"The user's answer: '{resp}'"
```
Now if we want to use our user interaction *instead of* the default one we need to somehow remove the default one (if our agent inherits from `Agent` the default one is inherited) and add our own. We can simply override the `user_interaction` in `__init__` method:
```py
class MyAgent(Agent):
def __init__(
self,
settings: AgentSettings,
llm_provider: ChatModelProvider,
file_storage: FileStorage,
legacy_config: Config,
):
# Call the parent constructor to bring in the default components
super().__init__(settings, llm_provider, file_storage, legacy_config)
# Disable the default user interaction component by overriding it
self.user_interaction = MyUserInteractionComponent()
```
Alternatively we can disable the default component by setting it to `None`:
```py
class MyAgent(Agent):
def __init__(
self,
settings: AgentSettings,
llm_provider: ChatModelProvider,
file_storage: FileStorage,
legacy_config: Config,
):
# Call the parent constructor to bring in the default components
super().__init__(settings, llm_provider, file_storage, legacy_config)
# Disable the default user interaction component
self.user_interaction = None
# Add our own component
self.my_user_interaction = MyUserInteractionComponent(legacy_config)
```
## Learn more
The best place to see more examples is to look at the built-in components in the [autogpt/components](../../../../autogpts/autogpt/autogpt/components/) and [autogpt/commands](../../../../autogpts/autogpt/autogpt/commands/) directories.
Guide on how to extend the built-in agent and build your own: [🤖 Agents](./agents.md)
Order of some components matters, see [🧩 Components](./components.md) to learn more about components and how they can be customized.
To see built-in protocols with accompanying examples visit [⚙️ Protocols](./protocols.md).

View File

@@ -0,0 +1,17 @@
# Component Agents
This guide explains the component-based architecture of AutoGPT agents. It's a new way of building agents that is more flexible and easier to extend. Components replace some agent's logic and plugins with a more modular and composable system.
Agent is composed of *components*, and each *component* implements a range of *protocols* (interfaces), each one providing a specific functionality, e.g. additional commands or messages. Each *protocol* is handled in a specific order, defined by the agent. This allows for a clear separation of concerns and a more modular design.
This system is simple, flexible, requires basically no configuration, and doesn't hide any data - anything can still be passed or accessed directly from or between components.
### Definitions & Guides
See [Creating Components](./creating-components.md) to get started! Or you can explore the following topics in detail:
- [🧩 Component](./components.md): a class that implements one or more *protocols*. It can be added to an agent to provide additional functionality. See what's already provided in [Built-in Components](./built-in-components.md).
- [⚙️ Protocol](./protocols.md): an interface that defines a set of methods that a component must implement. Protocols are used to group related functionality.
- [🛠️ Command](./commands.md): enable *agent* to interact with user and tools.
- [🤖 Agent](./agents.md): a class that is composed of components. It's responsible for executing pipelines and managing the components.
- **Pipeline**: a sequence of method calls on components. Pipelines are used to execute a series of actions in a specific order. As of now there's no formal class for a pipeline, it's just a sequence of method calls on components. There are two default pipelines implemented in the default agent: `propose_action` and `execute`. See [🤖 Agent](./agents.md) to learn more.

View File

@@ -0,0 +1,166 @@
# ⚙️ Protocols
Protocols are *interfaces* implemented by [Components](./components.md) used to group related functionality. Each protocol needs to be handled explicitly by the agent at some point of the execution. We provide a comprehensive list of built-in protocols that are already handled in the built-in `Agent`, so when you inherit from the base agent all built-in protocols will work!
**Protocols are listed in the order of the default execution.**
## Order-independent protocols
Components implementing exclusively order-independent protocols can added in any order, including in-between ordered protocols.
### `DirectiveProvider`
Yields constraints, resources and best practices for the agent. This has no direct impact on other protocols; is purely informational and will be passed to a llm when the prompt is built.
```py
class DirectiveProvider(AgentComponent):
def get_constraints(self) -> Iterator[str]:
return iter([])
def get_resources(self) -> Iterator[str]:
return iter([])
def get_best_practices(self) -> Iterator[str]:
return iter([])
```
**Example** A web-search component can provide a resource information. Keep in mind that this actually doesn't allow the agent to access the internet. To do this a relevant `Command` needs to be provided.
```py
class WebSearchComponent(DirectiveProvider):
def get_resources(self) -> Iterator[str]:
yield "Internet access for searches and information gathering."
# We can skip "get_constraints" and "get_best_practices" if they aren't needed
```
### `CommandProvider`
Provides a command that can be executed by the agent.
```py
class CommandProvider(AgentComponent):
def get_commands(self) -> Iterator[Command]:
...
```
The easiest way to provide a command is to use `command` decorator on a component method and then yield the method. Each command needs a name, description and a parameter schema using `JSONSchema`. By default method name is used as a command name, and first part of docstring for the description (before `Args:` or `Returns:`) and schema can be provided in the decorator.
**Example** Calculator component that can perform multiplication. Agent is able to call this command if it's relevant to a current task and will see the returned result.
```py
from autogpt.agents.components import Component
from autogpt.agents.protocols import CommandProvider
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.utils.command_decorator import command
class CalculatorComponent(CommandProvider):
get_commands(self) -> Iterator[Command]:
yield self.multiply
@command(parameters={
"a": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The first number",
required=True,
),
"b": JSONSchema(
type=JSONSchema.Type.INTEGER,
description="The second number",
required=True,
)})
def multiply(self, a: int, b: int) -> str:
"""
Multiplies two numbers.
Args:
a: First number
b: Second number
Returns:
Result of multiplication
"""
return str(a * b)
```
The agent will be able to call this command, named `multiply` with two arguments and will receive the result. The command description will be: `Multiplies two numbers.`
To learn more about commands see [🛠️ Commands](./commands.md).
## Order-dependent protocols
The order of components implementing order-dependent protocols is important.
Some components may depend on the results of components before them.
### `MessageProvider`
Yields messages that will be added to the agent's prompt. You can use either `ChatMessage.user()`: this will interpreted as a user-sent message or `ChatMessage.system()`: that will be more important.
```py
class MessageProvider(AgentComponent):
def get_messages(self) -> Iterator[ChatMessage]:
...
```
**Example** Component that provides a message to the agent's prompt.
```py
class HelloComponent(MessageProvider):
def get_messages(self) -> Iterator[ChatMessage]:
yield ChatMessage.user("Hello World!")
```
### `AfterParse`
Protocol called after the response is parsed.
```py
class AfterParse(AgentComponent):
def after_parse(self, response: ThoughtProcessOutput) -> None:
...
```
**Example** Component that logs the response after it's parsed.
```py
class LoggerComponent(AfterParse):
def after_parse(self, response: ThoughtProcessOutput) -> None:
logger.info(f"Response: {response}")
```
### `ExecutionFailure`
Protocol called when the execution of the command fails.
```py
class ExecutionFailure(AgentComponent):
@abstractmethod
def execution_failure(self, error: Exception) -> None:
...
```
**Example** Component that logs the error when the command fails.
```py
class LoggerComponent(ExecutionFailure):
def execution_failure(self, error: Exception) -> None:
logger.error(f"Command execution failed: {error}")
```
### `AfterExecute`
Protocol called after the command is successfully executed by the agent.
```py
class AfterExecute(AgentComponent):
def after_execute(self, result: ActionResult) -> None:
...
```
**Example** Component that logs the result after the command is executed.
```py
class LoggerComponent(AfterExecute):
def after_execute(self, result: ActionResult) -> None:
logger.info(f"Result: {result}")
```

View File

@@ -11,7 +11,7 @@ Configuration is controlled through the `Config` object. You can set configurati
- `BROWSE_CHUNK_MAX_LENGTH`: When browsing website, define the length of chunks to summarize. Default: 3000
- `BROWSE_SPACY_LANGUAGE_MODEL`: [spaCy language model](https://spacy.io/usage/models) to use when creating chunks. Default: en_core_web_sm
- `CHAT_MESSAGES_ENABLED`: Enable chat messages. Optional
- `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.execute_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
- `DISABLED_COMMANDS`: Commands to disable. Use comma separated names of commands. See the list of commands from built-in components [here](../component%20agent/components.md). Default: None
- `ELEVENLABS_API_KEY`: ElevenLabs API Key. Optional.
- `ELEVENLABS_VOICE_ID`: ElevenLabs Voice ID. Optional.
- `EMBEDDING_MODEL`: LLM Model to use for embedding tasks. Default: `text-embedding-3-small`

View File

@@ -1,20 +0,0 @@
## Plugins
⚠️💀 **WARNING** 💀⚠️: Review the code of any plugin you use thoroughly, as plugins can execute any Python code, potentially leading to malicious activities, such as stealing your API keys.
To configure plugins, you can create or edit the `plugins_config.yaml` file in the root directory of AutoGPT. This file allows you to enable or disable plugins as desired. For specific configuration instructions, please refer to the documentation provided for each plugin. The file should be formatted in YAML. Here is an example for your reference:
```yaml
plugin_a:
config:
api_key: my-api-key
enabled: false
plugin_b:
config: {}
enabled: true
```
See our [Plugins Repo](https://github.com/Significant-Gravitas/Auto-GPT-Plugins) for more info on how to install all the amazing plugins the community has built!
Alternatively, developers can use the [AutoGPT Plugin Template](https://github.com/Significant-Gravitas/Auto-GPT-Plugin-Template) as a starting point for creating your own plugins.

View File

@@ -214,16 +214,18 @@ To print out debug logs:
./autogpt.sh --debug
```
## Disabling Command Categories
## Disabling Commands
If you want to selectively disable some command groups, you can use the
`DISABLED_COMMAND_CATEGORIES` config in your `.env`. You can find the list of available
categories [here][command categories].
The best way to disable commands is to disable or remove the [component][components] that provides them.
However, if you want to selectively disable some commands, you can use the `DISABLED_COMMANDS` config in your `.env`.
Put the names of the commands you want to disable, separated by commas.
You can find the list of commands in built-in components [here][commands].
For example, to disable coding related features, set it to the value below:
For example, to disable python coding features, set it to the value below:
```ini
DISABLED_COMMAND_CATEGORIES=autogpt.commands.execute_code
DISABLED_COMMANDS=execute_python_code,execute_python_file
```
[command categories]: https://github.com/Significant-Gravitas/AutoGPT/blob/master/autogpts/autogpt/autogpt/commands/__init__.py
[components]: ./component%20agent/components.md
[commands]: ./component%20agent/built-in-components.md

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB