refactor(agent): Tweak model_providers.schema for easier use

- Set default values for `ProviderBudget` / `ModelProviderBudget` fields
- Remove redundant field redefinitions on `ModelProviderBudget` class
- Change `ModelProviderUsage.update_usage(..)` and `ModelProviderBudget.update_usage_and_cost(..)` signatures for easier use
- Change `ModelProviderBudget.usage` from `ModelProviderUsage` to `defaultdict[str, ModelProviderUsage]` for per-model usage tracking
- Fix `ChatModelInfo`/`EmbeddingModelInfo` `service` attribute: rename from `llm_service` to match base class and fix types.
  This makes it unnecessary to specify the `service` field when creating a `ChatModelInfo` or `EmbeddingModelInfo` object.
- Use `defaultdict(ModelProviderBudget)` for task budget tracking in agent_protocol_server.py
This commit is contained in:
Reinier van der Leer
2024-04-22 18:29:43 +02:00
parent 7bb7c30842
commit 4db4ca08b2
5 changed files with 29 additions and 62 deletions

View File

@@ -1,6 +1,7 @@
import logging
import os
import pathlib
from collections import defaultdict
from io import BytesIO
from uuid import uuid4
@@ -60,7 +61,7 @@ class AgentProtocolServer:
self.file_storage = file_storage
self.llm_provider = llm_provider
self.agent_manager = AgentManager(file_storage)
self._task_budgets = {}
self._task_budgets = defaultdict(ModelProviderBudget)
async def start(self, port: int = 8000, router: APIRouter = base_router):
"""Start the agent server."""
@@ -461,9 +462,7 @@ class AgentProtocolServer:
"""
Configures the LLM provider with headers to link outgoing requests to the task.
"""
task_llm_budget = self._task_budgets.get(
task.task_id, self.llm_provider.default_settings.budget.copy(deep=True)
)
task_llm_budget = self._task_budgets[task.task_id]
task_llm_provider_config = self.llm_provider._configuration.copy(deep=True)
_extra_request_headers = task_llm_provider_config.extra_request_headers

View File

@@ -1,6 +1,5 @@
import enum
import logging
import math
import os
from pathlib import Path
from typing import Any, Callable, Coroutine, Iterator, Optional, ParamSpec, TypeVar
@@ -37,9 +36,7 @@ from autogpt.core.resource.model_providers.schema import (
ModelProviderConfiguration,
ModelProviderCredentials,
ModelProviderName,
ModelProviderService,
ModelProviderSettings,
ModelProviderUsage,
ModelTokenizer,
)
from autogpt.core.utils.json_schema import JSONSchema
@@ -49,7 +46,6 @@ _T = TypeVar("_T")
_P = ParamSpec("_P")
OpenAIEmbeddingParser = Callable[[Embedding], Embedding]
OpenAIChatParser = Callable[[str], dict]
class OpenAIModelName(str, enum.Enum):
@@ -87,7 +83,6 @@ OPEN_AI_EMBEDDING_MODELS = {
for info in [
EmbeddingModelInfo(
name=OpenAIModelName.EMBEDDING_v2,
service=ModelProviderService.EMBEDDING,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.0001 / 1000,
max_tokens=8191,
@@ -95,7 +90,6 @@ OPEN_AI_EMBEDDING_MODELS = {
),
EmbeddingModelInfo(
name=OpenAIModelName.EMBEDDING_v3_S,
service=ModelProviderService.EMBEDDING,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.00002 / 1000,
max_tokens=8191,
@@ -103,7 +97,6 @@ OPEN_AI_EMBEDDING_MODELS = {
),
EmbeddingModelInfo(
name=OpenAIModelName.EMBEDDING_v3_L,
service=ModelProviderService.EMBEDDING,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.00013 / 1000,
max_tokens=8191,
@@ -118,7 +111,6 @@ OPEN_AI_CHAT_MODELS = {
for info in [
ChatModelInfo(
name=OpenAIModelName.GPT3_v1,
service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.0015 / 1000,
completion_token_cost=0.002 / 1000,
@@ -127,7 +119,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT3_v2_16k,
service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.003 / 1000,
completion_token_cost=0.004 / 1000,
@@ -136,7 +127,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT3_v3,
service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.001 / 1000,
completion_token_cost=0.002 / 1000,
@@ -145,7 +135,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT3_v4,
service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.0005 / 1000,
completion_token_cost=0.0015 / 1000,
@@ -154,7 +143,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT4_v1,
service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.03 / 1000,
completion_token_cost=0.06 / 1000,
@@ -163,7 +151,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT4_v1_32k,
service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.06 / 1000,
completion_token_cost=0.12 / 1000,
@@ -172,7 +159,6 @@ OPEN_AI_CHAT_MODELS = {
),
ChatModelInfo(
name=OpenAIModelName.GPT4_TURBO,
service=ModelProviderService.CHAT,
provider_name=ModelProviderName.OPENAI,
prompt_token_cost=0.01 / 1000,
completion_token_cost=0.03 / 1000,
@@ -305,21 +291,12 @@ class OpenAIProvider(
retries_per_request=10,
),
credentials=None,
budget=ModelProviderBudget(
total_budget=math.inf,
total_cost=0.0,
remaining_budget=math.inf,
usage=ModelProviderUsage(
prompt_tokens=0,
completion_tokens=0,
total_tokens=0,
),
),
budget=ModelProviderBudget(),
)
_budget: ModelProviderBudget
_configuration: OpenAIConfiguration
_credentials: OpenAICredentials
_budget: ModelProviderBudget
def __init__(
self,
@@ -648,12 +625,9 @@ class OpenAIProvider(
prompt_tokens_used = completion_tokens_used = 0
cost = self._budget.update_usage_and_cost(
ChatModelResponse(
response=AssistantChatMessage(content=None),
model_info=OPEN_AI_CHAT_MODELS[model],
prompt_tokens_used=prompt_tokens_used,
completion_tokens_used=completion_tokens_used,
)
model_info=OPEN_AI_CHAT_MODELS[model],
input_tokens_used=prompt_tokens_used,
output_tokens_used=completion_tokens_used,
)
self._logger.debug(
f"Completion usage: {prompt_tokens_used} input, "

View File

@@ -1,6 +1,7 @@
import abc
import enum
import math
from collections import defaultdict
from typing import (
Any,
Callable,
@@ -90,7 +91,7 @@ class AssistantToolCallDict(TypedDict):
class AssistantChatMessage(ChatMessage):
role: Literal["assistant"] = "assistant"
role: Literal[ChatMessage.Role.ASSISTANT] = ChatMessage.Role.ASSISTANT
content: Optional[str]
tool_calls: Optional[list[AssistantToolCall]] = None
@@ -187,39 +188,34 @@ class ModelProviderUsage(ProviderUsage):
completion_tokens: int = 0
prompt_tokens: int = 0
total_tokens: int = 0
def update_usage(
self,
model_response: ModelResponse,
input_tokens_used: int,
output_tokens_used: int = 0,
) -> None:
self.completion_tokens += model_response.completion_tokens_used
self.prompt_tokens += model_response.prompt_tokens_used
self.total_tokens += (
model_response.completion_tokens_used + model_response.prompt_tokens_used
)
self.prompt_tokens += input_tokens_used
self.completion_tokens += output_tokens_used
class ModelProviderBudget(ProviderBudget):
total_budget: float = UserConfigurable()
total_cost: float
remaining_budget: float
usage: ModelProviderUsage
usage: defaultdict[str, ModelProviderUsage] = defaultdict(ModelProviderUsage)
def update_usage_and_cost(
self,
model_response: ModelResponse,
model_info: ModelInfo,
input_tokens_used: int,
output_tokens_used: int = 0,
) -> float:
"""Update the usage and cost of the provider.
Returns:
float: The (calculated) cost of the given model response.
"""
model_info = model_response.model_info
self.usage.update_usage(model_response)
self.usage[model_info.name].update_usage(input_tokens_used, output_tokens_used)
incurred_cost = (
model_response.completion_tokens_used * model_info.completion_token_cost
+ model_response.prompt_tokens_used * model_info.prompt_token_cost
output_tokens_used * model_info.completion_token_cost
+ input_tokens_used * model_info.prompt_token_cost
)
self.total_cost += incurred_cost
self.remaining_budget -= incurred_cost
@@ -230,7 +226,7 @@ class ModelProviderSettings(ProviderSettings):
resource_type: ResourceType = ResourceType.MODEL
configuration: ModelProviderConfiguration
credentials: ModelProviderCredentials
budget: ModelProviderBudget
budget: Optional[ModelProviderBudget] = None
class ModelProvider(abc.ABC):
@@ -238,8 +234,8 @@ class ModelProvider(abc.ABC):
default_settings: ClassVar[ModelProviderSettings]
_budget: Optional[ModelProviderBudget]
_configuration: ModelProviderConfiguration
_budget: Optional[ModelProviderBudget] = None
@abc.abstractmethod
def count_tokens(self, text: str, model_name: str) -> int:
@@ -284,7 +280,7 @@ class ModelTokenizer(Protocol):
class EmbeddingModelInfo(ModelInfo):
"""Struct for embedding model information."""
llm_service = ModelProviderService.EMBEDDING
service: Literal[ModelProviderService.EMBEDDING] = ModelProviderService.EMBEDDING
max_tokens: int
embedding_dimensions: int
@@ -322,7 +318,7 @@ class EmbeddingModelProvider(ModelProvider):
class ChatModelInfo(ModelInfo):
"""Struct for language model information."""
llm_service = ModelProviderService.CHAT
service: Literal[ModelProviderService.CHAT] = ModelProviderService.CHAT
max_tokens: int
has_function_call_api: bool = False

View File

@@ -1,5 +1,6 @@
import abc
import enum
import math
from pydantic import BaseModel, SecretBytes, SecretField, SecretStr
@@ -25,9 +26,9 @@ class ProviderUsage(SystemConfiguration, abc.ABC):
class ProviderBudget(SystemConfiguration):
total_budget: float = UserConfigurable()
total_cost: float
remaining_budget: float
total_budget: float = UserConfigurable(math.inf)
total_cost: float = 0
remaining_budget: float = math.inf
usage: ProviderUsage
@abc.abstractmethod

View File

@@ -18,7 +18,6 @@ from autogpt.core.resource.model_providers.openai import OpenAIModelName
from autogpt.core.resource.model_providers.schema import (
ChatModelInfo,
ModelProviderName,
ModelProviderService,
)
@@ -153,7 +152,6 @@ async def test_create_config_gpt4only(config: Config) -> None:
) as mock_get_models:
mock_get_models.return_value = [
ChatModelInfo(
service=ModelProviderService.CHAT,
name=GPT_4_MODEL,
provider_name=ModelProviderName.OPENAI,
max_tokens=4096,
@@ -174,7 +172,6 @@ async def test_create_config_gpt3only(config: Config) -> None:
) as mock_get_models:
mock_get_models.return_value = [
ChatModelInfo(
service=ModelProviderService.CHAT,
name=GPT_3_MODEL,
provider_name=ModelProviderName.OPENAI,
max_tokens=4096,