removed tracing and monitoring

This commit is contained in:
SwiftyOS
2023-08-31 11:46:24 +02:00
parent 7a933cacb7
commit f0f290b0ae
8 changed files with 336 additions and 1394 deletions

View File

@@ -5,8 +5,6 @@ from dotenv import load_dotenv
load_dotenv()
import autogpt.sdk.forge_log
ENABLE_TRACING = os.environ.get("ENABLE_TRACING", "false").lower() == "true"
autogpt.sdk.forge_log.setup_logger()

View File

@@ -6,7 +6,6 @@ from fastapi import APIRouter, FastAPI, Response, UploadFile
from fastapi.responses import FileResponse
from hypercorn.asyncio import serve
from hypercorn.config import Config
from prometheus_fastapi_instrumentator import Instrumentator
from .db import AgentDB
from .errors import NotFoundError
@@ -14,7 +13,6 @@ from .forge_log import ForgeLogger
from .middlewares import AgentMiddleware
from .routes.agent_protocol import base_router
from .schema import *
from .tracing import setup_tracing
from .workspace import Workspace
LOG = ForgeLogger(__name__)
@@ -37,17 +35,8 @@ class Agent:
version="v0.4",
)
# Add Prometheus metrics to the agent
# https://github.com/trallnag/prometheus-fastapi-instrumentator
instrumentator = Instrumentator().instrument(app)
@app.on_event("startup")
async def _startup():
instrumentator.expose(app)
app.include_router(router)
app.add_middleware(AgentMiddleware, agent=self)
setup_tracing(app)
config.loglevel = "ERROR"
config.bind = [f"0.0.0.0:{port}"]

View File

@@ -5,7 +5,6 @@ import logging.handlers
import os
import queue
ENABLE_TRACING = os.environ.get("ENABLE_TRACING", "false").lower() == "true"
JSON_LOGGING = os.environ.get("JSON_LOGGING", "false").lower() == "true"
CHAT = 29

View File

@@ -31,7 +31,6 @@ from fastapi.responses import FileResponse
from autogpt.sdk.errors import *
from autogpt.sdk.forge_log import ForgeLogger
from autogpt.sdk.schema import *
from autogpt.sdk.tracing import tracing
base_router = APIRouter()
@@ -71,7 +70,6 @@ async def check_server_status():
@base_router.post("/agent/tasks", tags=["agent"], response_model=Task)
@tracing("Creating new task", is_create_task=True)
async def create_agent_task(request: Request, task_request: TaskRequestBody) -> Task:
"""
Creates a new task using the provided TaskRequestBody and returns a Task.
@@ -182,7 +180,6 @@ async def list_agent_tasks(
@base_router.get("/agent/tasks/{task_id}", tags=["agent"], response_model=Task)
@tracing("Getting task details")
async def get_agent_task(request: Request, task_id: str) -> Task:
"""
Gets the details of a task by ID.
@@ -326,7 +323,6 @@ async def list_agent_task_steps(
@base_router.post("/agent/tasks/{task_id}/steps", tags=["agent"], response_model=Step)
@tracing("Creating and executing Step")
async def execute_agent_task_step(
request: Request, task_id: str, step: StepRequestBody
) -> Step:
@@ -396,7 +392,6 @@ async def execute_agent_task_step(
@base_router.get(
"/agent/tasks/{task_id}/steps/{step_id}", tags=["agent"], response_model=Step
)
@tracing("Getting Step Details")
async def get_agent_task_step(request: Request, task_id: str, step_id: str) -> Step:
"""
Retrieves the details of a specific step for a given task.
@@ -445,7 +440,6 @@ async def get_agent_task_step(request: Request, task_id: str, step_id: str) -> S
tags=["agent"],
response_model=TaskArtifactsListResponse,
)
@tracing("Listing Task Artifacts")
async def list_agent_task_artifacts(
request: Request,
task_id: str,
@@ -506,7 +500,6 @@ async def list_agent_task_artifacts(
@base_router.post(
"/agent/tasks/{task_id}/artifacts", tags=["agent"], response_model=Artifact
)
@tracing("Uploading task artifact")
async def upload_agent_task_artifacts(
request: Request, task_id: str, file: UploadFile, relative_path: str
) -> Artifact:
@@ -564,7 +557,6 @@ async def upload_agent_task_artifacts(
@base_router.get(
"/agent/tasks/{task_id}/artifacts/{artifact_id}", tags=["agent"], response_model=str
)
@tracing("Downloading task artifact")
async def download_agent_task_artifact(
request: Request, task_id: str, artifact_id: str
) -> FileResponse:

View File

@@ -1,144 +0,0 @@
import os
from functools import wraps
from dotenv import load_dotenv
from autogpt.sdk.forge_log import ForgeLogger
load_dotenv()
ENABLE_TRACING = os.environ.get("ENABLE_TRACING", "false").lower() == "true"
LOG = ForgeLogger(__name__)
def setup_tracing(app):
LOG.info(f"Tracing status: {ENABLE_TRACING}")
if ENABLE_TRACING:
from opentelemetry import trace
from opentelemetry.exporter.jaeger.thrift import JaegerExporter
from opentelemetry.sdk.resources import SERVICE_NAME, Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
resource = Resource(attributes={SERVICE_NAME: "Auto-GPT-Forge"})
# Configure the tracer provider to export traces to Jaeger
jaeger_exporter = JaegerExporter(
agent_host_name="jaeger",
agent_port=6831,
)
provider = TracerProvider(resource=resource)
processor = BatchSpanProcessor(jaeger_exporter)
provider.add_span_processor(processor)
trace.set_tracer_provider(provider)
# Instrument FastAPI app
# FastAPIInstrumentor.instrument_app(app)
LOG.info("Tracing Setup")
if ENABLE_TRACING:
from functools import wraps
from opentelemetry import trace
from opentelemetry.trace import NonRecordingSpan
from pydantic import BaseModel
from autogpt.sdk.schema import Task
tasks_context_db = {}
class TaskIDTraceContext:
"""Custom context manager to override default tracing behavior."""
def __init__(self, task_id: str, span_name: str):
self.task_id = task_id
self.span_name = span_name
self.span = None
def __enter__(self):
# Get the default tracer
tracer = trace.get_tracer(__name__)
# Check if the task_id has been traced before
if self.task_id in tasks_context_db:
# Get the span context from the previous task
span_context = tasks_context_db[self.task_id].get("span_context")
LOG.info(
f"Task ID: {self.task_id} Span Context trace_id: {span_context.trace_id} span_id: {span_context.span_id}"
)
assert span_context, "No Span context for existing task_id"
# Create a new span linked to the previous span context
ctx = trace.set_span_in_context(NonRecordingSpan(span_context))
self.span = tracer.start_span(self.span_name, context=ctx)
else:
# If it's a new task_id, start a new span
self.span = tracer.start_span(self.span_name)
# Set this span in context and store it for future use
tasks_context_db[self.task_id] = {
"span_context": self.span.get_span_context()
}
return self.span
def __exit__(self, type, value, traceback):
if self.span:
self.span.end()
self.span = None
def tracing(operation_name: str, is_create_task: bool = False):
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
function_ran = False
task_id = "none"
if is_create_task:
result = await func(*args, **kwargs)
if isinstance(result, Task):
task_id = result.task_id
function_ran = True
else:
task_id = kwargs.get("task_id", "none")
step_id = kwargs.get("step_id", "none")
LOG.info(f"Starting Trace for task_id: {task_id}")
with TaskIDTraceContext(task_id, operation_name) as span:
span.set_attribute("task_id", task_id)
span.set_attribute("step_id", step_id)
# Add request event with all kwargs
kwargs_copy = {k: v for k, v in kwargs.items() if k != "request"}
for key, value in kwargs_copy.items():
if isinstance(value, BaseModel):
kwargs_copy[key] = value.json()
span.add_event(name="request", attributes=kwargs_copy)
if not function_ran:
result = await func(*args, **kwargs)
# Convert result to json before adding response event
if isinstance(result, BaseModel):
result_json = result.json()
span.add_event("response", {"response": result_json})
return result
return wrapper
return decorator
else:
def tracing(operation_name: str, is_create_task: bool = False):
"""
Stub function that does nothing so we can have a global enable tracing switch
"""
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
return await func(*args, **kwargs)
return wrapper
return decorator

View File

@@ -1,25 +0,0 @@
version: '3'
services:
agent:
build:
context: .
dockerfile: Dockerfile
ports:
- 8000:8000
prometheus:
image: prom/prometheus
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- 9090:9090
grafana:
image: grafana/grafana
ports:
- 3000:3000
jaeger:
image: jaegertracing/all-in-one
ports:
- 16686:16686
- 14268:14268

1529
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -10,10 +10,8 @@ readme = "README.md"
python = "^3.10"
python-dotenv = "^1.0.0"
openai = "^0.27.8"
helicone = "^1.0.6"
tenacity = "^8.2.2"
sqlalchemy = "^2.0.19"
google-cloud-storage = "^2.10.0"
aiohttp = "^3.8.5"
colorlog = "^6.7.0"
@@ -41,14 +39,6 @@ pandas = "^2.0.3"
dash-bootstrap-components = "^1.4.2"
[tool.poetry.group.monitoring.dependencies]
prometheus-fastapi-instrumentator = "^6.1.0"
opentelemetry-api = "^1.19.0"
opentelemetry-sdk = "^1.19.0"
opentelemetry-exporter-otlp = "^1.19.0"
opentelemetry-instrumentation-fastapi = "^0.40b0"
opentelemetry-exporter-jaeger = "^1.19.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"