mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-01-09 01:04:25 +01:00
Add "Simple web server" challenge (#74)
Co-authored-by: Silen Naihin <silen.naihin@gmail.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
from typing import Union
|
||||
|
||||
|
||||
class RegressionManager:
|
||||
@@ -15,7 +16,9 @@ class RegressionManager:
|
||||
f.read().strip()
|
||||
) # read the content and remove any leading/trailing whitespace
|
||||
if file_content: # if file is not empty, load the json
|
||||
self.tests = json.loads(file_content)
|
||||
data = json.loads(file_content)
|
||||
self.tests = {k: data[k] for k in sorted(data)}
|
||||
data = self.replace_backslash(data)
|
||||
else: # if file is empty, assign an empty dictionary
|
||||
self.tests = {}
|
||||
except FileNotFoundError:
|
||||
@@ -36,3 +39,13 @@ class RegressionManager:
|
||||
if test_name in self.tests:
|
||||
del self.tests[test_name]
|
||||
self.save()
|
||||
|
||||
def replace_backslash(self, value: str) -> Union[str, list[str], dict]:
|
||||
if isinstance(value, str):
|
||||
return value.replace("\\\\", "/") # escape \ with \\
|
||||
elif isinstance(value, list):
|
||||
return [self.replace_backslash(i) for i in value]
|
||||
elif isinstance(value, dict):
|
||||
return {k: self.replace_backslash(v) for k, v in value.items()}
|
||||
else:
|
||||
return value
|
||||
|
||||
27
agbenchmark/challenges/code/d3/custom_python/api_tests.py
Normal file
27
agbenchmark/challenges/code/d3/custom_python/api_tests.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def make_assertion() -> None:
|
||||
if os.environ.get("MOCK_TEST", "False").lower() == "true":
|
||||
mock_response = Mock(requests.Response)
|
||||
mock_response.status_code = 200
|
||||
mock_response.json.return_value = {"status": "OK"}
|
||||
|
||||
with patch("requests.get", return_value=mock_response):
|
||||
make_request_and_assert()
|
||||
else:
|
||||
make_request_and_assert()
|
||||
|
||||
|
||||
def make_request_and_assert() -> Dict[str, Any]:
|
||||
response = requests.get("http://localhost:8079/health")
|
||||
if response.status_code != 200:
|
||||
raise AssertionError(
|
||||
f"Expected status code 200, but got {response.status_code}"
|
||||
)
|
||||
|
||||
return response.json()
|
||||
18
agbenchmark/challenges/code/d3/data.json
Normal file
18
agbenchmark/challenges/code/d3/data.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "TestCreateSimpleWebServer",
|
||||
"category": ["code"],
|
||||
"task": "Build a basic web server that responds to a GET localhost:8079/health with a 200 OK. Deploy this web server locally at the port 8079. ",
|
||||
"dependencies": ["TestDebugSimpleTypoWithGuidance"],
|
||||
"ground": {
|
||||
"answer": "GET localhost:8079/health responds with a 200 OK",
|
||||
"should_contain": [],
|
||||
"should_not_contain": [],
|
||||
"files": [],
|
||||
"type": "custom_python"
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "medium",
|
||||
"description": "Tests ability for the agent to build a simple web server locally",
|
||||
"side_effects": []
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,8 @@ import glob
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import pkgutil
|
||||
import sys
|
||||
import types
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
@@ -47,6 +49,19 @@ def generate_tests() -> None:
|
||||
class_name = data.get("name", "")
|
||||
|
||||
challenge_location = get_test_path(json_file)
|
||||
if data["ground"]["type"] == "custom_python":
|
||||
custom_python_location = (
|
||||
f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python"
|
||||
)
|
||||
sys.path.append(str(custom_python_location))
|
||||
|
||||
for (module_loader, name, ispkg) in pkgutil.iter_modules(
|
||||
[str(custom_python_location)]
|
||||
):
|
||||
module = importlib.import_module(name)
|
||||
|
||||
if hasattr(module, "make_assertion"):
|
||||
make_assertion = getattr(module, "make_assertion")
|
||||
|
||||
# Define test class dynamically
|
||||
challenge_class = types.new_class(class_name, (Challenge,))
|
||||
@@ -58,7 +73,15 @@ def generate_tests() -> None:
|
||||
self.setup_challenge(config)
|
||||
|
||||
scores = self.get_scores(config)
|
||||
assert 1 in scores
|
||||
|
||||
# Check if make_assertion is defined and use it
|
||||
if "make_assertion" in locals():
|
||||
try:
|
||||
make_assertion()
|
||||
except AssertionError as error:
|
||||
print(error) # Or handle this in another way
|
||||
else:
|
||||
assert 1 in scores
|
||||
|
||||
# Parametrize the method here
|
||||
test_method = pytest.mark.parametrize(
|
||||
|
||||
31
poetry.lock
generated
31
poetry.lock
generated
@@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
@@ -961,6 +961,33 @@ notebook = ["ipywidgets (>=6)"]
|
||||
slack = ["slack-sdk"]
|
||||
telegram = ["requests"]
|
||||
|
||||
[[package]]
|
||||
name = "types-requests"
|
||||
version = "2.31.0.1"
|
||||
description = "Typing stubs for requests"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "types-requests-2.31.0.1.tar.gz", hash = "sha256:3de667cffa123ce698591de0ad7db034a5317457a596eb0b4944e5a9d9e8d1ac"},
|
||||
{file = "types_requests-2.31.0.1-py3-none-any.whl", hash = "sha256:afb06ef8f25ba83d59a1d424bd7a5a939082f94b94e90ab5e6116bd2559deaa3"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
types-urllib3 = "*"
|
||||
|
||||
[[package]]
|
||||
name = "types-urllib3"
|
||||
version = "1.26.25.13"
|
||||
description = "Typing stubs for urllib3"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "types-urllib3-1.26.25.13.tar.gz", hash = "sha256:3300538c9dc11dad32eae4827ac313f5d986b8b21494801f1bf97a1ac6c03ae5"},
|
||||
{file = "types_urllib3-1.26.25.13-py3-none-any.whl", hash = "sha256:5dbd1d2bef14efee43f5318b5d36d805a489f6600252bb53626d4bfafd95e27c"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typing-extensions"
|
||||
version = "4.7.1"
|
||||
@@ -1082,4 +1109,4 @@ multidict = ">=4.0"
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "44b5789494e73f3cb8bcb9d25daa62143e59352a246fd7724fdb3ad58c2560ae"
|
||||
content-hash = "81b84bbe08d4a09fb6a4f99c7fb018e0c0fcd879fa368c388b0af20c7c9a3f31"
|
||||
|
||||
@@ -16,6 +16,7 @@ pydantic = "^1.10.9"
|
||||
pytest-depends = "^1.0.1"
|
||||
python-dotenv = "^0.21.0"
|
||||
click = "^8.1.3"
|
||||
types-requests = "^2.31.0.1"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
flake8 = "^3.9.2"
|
||||
|
||||
@@ -1,69 +1,90 @@
|
||||
{
|
||||
"TestWriteFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark\\challenges\\interface\\write_file"
|
||||
},
|
||||
"TestReadFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestWriteFile"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\interface\\read_file"
|
||||
},
|
||||
"TestBasicMemory": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestReadFile",
|
||||
"TestWriteFile"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\memory\\m1"
|
||||
"test": "agbenchmark/challenges/memory/m1"
|
||||
},
|
||||
"TestBasicRetrieval": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestWriteFile",
|
||||
"TestSearch"
|
||||
],
|
||||
"test": "agbenchmark/challenges/retrieval/r1"
|
||||
},
|
||||
"TestCreateSimpleWebServer": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/code/d3"
|
||||
},
|
||||
"TestDebugSimpleTypoWithGuidance": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestReadFile",
|
||||
"TestWriteFile"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\retrieval\\r1"
|
||||
"test": "agbenchmark/challenges/code/d1"
|
||||
},
|
||||
"TestDebugSimpleTypoWithoutGuidance": {
|
||||
"difficulty": "medium",
|
||||
"dependencies": [
|
||||
"TestDebugSimpleTypoWithGuidance"
|
||||
],
|
||||
"test": "agbenchmark/challenges/code/d2"
|
||||
},
|
||||
"TestReadFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestWriteFile"
|
||||
],
|
||||
"test": "agbenchmark/challenges/interface/read_file"
|
||||
},
|
||||
"TestRememberMultipleIds": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestBasicMemory"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\memory\\m2"
|
||||
},
|
||||
"TestRetrieval2": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestBasicRetrieval"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\retrieval\\r2"
|
||||
"test": "agbenchmark/challenges/memory/m2"
|
||||
},
|
||||
"TestRememberMultipleIdsWithNoise": {
|
||||
"difficulty": "medium",
|
||||
"dependencies": [
|
||||
"TestRememberMultipleIds"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\memory\\m3"
|
||||
},
|
||||
"TestRetrieval3": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestRetrieval2"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\retrieval\\r3"
|
||||
"test": "agbenchmark/challenges/memory/m3"
|
||||
},
|
||||
"TestRememberMultiplePhrasesWithNoise": {
|
||||
"difficulty": "medium",
|
||||
"dependencies": [
|
||||
"TestRememberMultipleIdsWithNoise"
|
||||
],
|
||||
"test": "agbenchmark\\challenges\\memory\\m4"
|
||||
"test": "agbenchmark/challenges/memory/m4"
|
||||
},
|
||||
"TestRetrieval2": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestBasicRetrieval"
|
||||
],
|
||||
"test": "agbenchmark/challenges/retrieval/r2"
|
||||
},
|
||||
"TestRetrieval3": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [
|
||||
"TestRetrieval2"
|
||||
],
|
||||
"test": "agbenchmark/challenges/retrieval/r3"
|
||||
},
|
||||
"TestSearch": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark\\challenges\\interface\\search"
|
||||
"test": "agbenchmark/challenges/interface/search"
|
||||
},
|
||||
"TestWriteFile": {
|
||||
"difficulty": "basic",
|
||||
"dependencies": [],
|
||||
"test": "agbenchmark/challenges/interface/write_file"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user