diff --git a/.github/workflows/benchmark-ci.yml b/.github/workflows/benchmark-ci.yml
index f58e7fa3..f7cdeac5 100644
--- a/.github/workflows/benchmark-ci.yml
+++ b/.github/workflows/benchmark-ci.yml
@@ -240,6 +240,8 @@ jobs:
             poetry run uvicorn server:app --reload &
             sleep 5
             export AGENT_NAME=mini-agi
+            echo "poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000"
+            poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000
           else
             echo "${prefix}agbenchmark start"
             ${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved."
diff --git a/benchmark/agbenchmark/generate_test.py b/benchmark/agbenchmark/generate_test.py
index a26b1987..b4d6b201 100644
--- a/benchmark/agbenchmark/generate_test.py
+++ b/benchmark/agbenchmark/generate_test.py
@@ -231,10 +231,13 @@ def generate_tests() -> None:  # sourcery skip: invert-any-all
 
     # for suites to know if the file has already been used to generate the tests
     # Dynamic class creation
+
     while json_files:
         json_file = (
             json_files.popleft()
         )  # Take and remove the first element from json_files
+        if challenge_should_be_ignored(json_file):
+            continue
         data = ChallengeData.get_json_from_path(json_file)
         suite_config = SuiteConfig.suite_data_if_suite(Path(json_file))
 
@@ -293,4 +296,8 @@ def generate_tests() -> None:  # sourcery skip: invert-any-all
             print(f"Generated test for {data['name']}.")
 
 
+def challenge_should_be_ignored(json_file):
+    return "challenges/deprecated" in json_file or "challenges/library" in json_file
+
+
 generate_tests()
diff --git a/benchmark/agbenchmark/start_benchmark.py b/benchmark/agbenchmark/start_benchmark.py
index ae52cc75..a94ae234 100644
--- a/benchmark/agbenchmark/start_benchmark.py
+++ b/benchmark/agbenchmark/start_benchmark.py
@@ -95,7 +95,8 @@ def run_benchmark(
     test: Optional[str] = None,
     suite: Optional[str] = None,
     cutoff: Optional[int] = None,
-    server: bool = False,
+    api_mode: bool = False,
+    host: Optional[str] = None,
 ) -> int:
     """Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
     # Check if configuration file exists and is not empty
@@ -132,7 +133,12 @@ def run_benchmark(
             config = json.load(f)
     else:
         config = {}
-
+    host = host or config.get("host")
+    api_mode = api_mode or config.get("api_mode")
+    if host:
+        config["host"] = host
+    if api_mode:
+        config["api_mode"] = api_mode
     print("benchmark run path", CONFIG_PATH, HOME_DIRECTORY)
     if not config.get("workspace"):
         config["workspace"] = click.prompt(
@@ -141,7 +147,7 @@ def run_benchmark(
             show_default=True,
         )
 
-    if config.get("api_mode") and not config.get("host"):
+    if api_mode and not host:
         config["host"] = click.prompt(
             "Please enter the Agent API host address",
             default="http://localhost:8000",
@@ -195,7 +201,10 @@ def run_benchmark(
         elif explore:
             print("Only attempt challenges that have never been beaten")
             pytest_args.append("--explore")
-
+    if host:
+        pytest_args.append(f"--host={host}")
+    if api_mode:
+        pytest_args.append("--api_mode")
     if mock:
         pytest_args.append("--mock")
 
@@ -215,6 +224,8 @@ def run_benchmark(
         print(f"Setting cuttoff override to {cutoff} seconds.")
 
     pytest_args.extend((str(CURRENT_DIRECTORY), "--cache-clear"))
+    pytest_args.append("--disable-warnings")
+
     return pytest.main(pytest_args)
 
 
@@ -249,6 +260,8 @@ def cli() -> None:
 )
 @click.option("--nc", is_flag=True, help="Run without cutoff")
 @click.option("--cutoff", help="Set or override tests cutoff (seconds)")
+@click.option("--api_mode", help="API mode")
+@click.option("--host", help="Define API host")
 def start(
     maintain: bool,
     improve: bool,
@@ -262,6 +275,8 @@ def start(
     suite: Optional[str] = None,
     cutoff: Optional[int] = None,
     backend: Optional[bool] = False,
+    api_mode: bool = False,
+    host: Optional[str] = None,
 ) -> Any:
     # Redirect stdout if backend is True
     original_stdout = sys.stdout  # Save the original standard output
@@ -282,6 +297,8 @@ def start(
                 test=test,
                 suite=suite,
                 cutoff=cutoff,
+                api_mode=api_mode,
+                host=host,
             )
 
         sys.stdout = original_stdout
@@ -404,4 +421,4 @@ def get_regression_data() -> Any:
 
 
 # if __name__ == "__main__":
-#     start()
+# start()
diff --git a/benchmark/poetry.lock b/benchmark/poetry.lock
index 129e111f..7d943095 100644
--- a/benchmark/poetry.lock
+++ b/benchmark/poetry.lock
@@ -152,6 +152,27 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
+[[package]]
+name = "anyio"
+version = "3.7.1"
+description = "High level compatibility layer for multiple asynchronous event loop implementations"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"},
+    {file = "anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780"},
+]
+
+[package.dependencies]
+exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
+idna = ">=2.8"
+sniffio = ">=1.1"
+
+[package.extras]
+doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-jquery"]
+test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"]
+trio = ["trio (<0.22)"]
+
 [[package]]
 name = "appnope"
 version = "0.1.3"
@@ -617,6 +638,26 @@ files = [
 [package.extras]
 tests = ["asttokens", "littleutils", "pytest", "rich"]
 
+[[package]]
+name = "fastapi"
+version = "0.103.1"
+description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "fastapi-0.103.1-py3-none-any.whl", hash = "sha256:5e5f17e826dbd9e9b5a5145976c5cd90bcaa61f2bf9a69aca423f2bcebe44d83"},
+    {file = "fastapi-0.103.1.tar.gz", hash = "sha256:345844e6a82062f06a096684196aaf96c1198b25c06b72c1311b882aa2d8a35d"},
+]
+
+[package.dependencies]
+anyio = ">=3.7.1,<4.0.0"
+pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0"
+starlette = ">=0.27.0,<0.28.0"
+typing-extensions = ">=4.5.0"
+
+[package.extras]
+all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
+
 [[package]]
 name = "filelock"
 version = "3.12.3"
@@ -2070,6 +2111,20 @@ files = [
 [package.extras]
 cli = ["click (>=5.0)"]
 
+[[package]]
+name = "python-multipart"
+version = "0.0.6"
+description = "A streaming multipart parser for Python"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "python_multipart-0.0.6-py3-none-any.whl", hash = "sha256:ee698bab5ef148b0a760751c261902cd096e57e10558e11aca17646b74ee1c18"},
+    {file = "python_multipart-0.0.6.tar.gz", hash = "sha256:e9925a80bb668529f1b67c7fdb0a5dacdd7cbfc6fb0bff3ea443fe22bdd62132"},
+]
+
+[package.extras]
+dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatch", "invoke (==1.7.3)", "more-itertools (==4.3.0)", "pbr (==4.3.0)", "pluggy (==1.0.0)", "py (==1.11.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-timeout (==2.1.0)", "pyyaml (==5.1)"]
+
 [[package]]
 name = "pytz"
 version = "2023.3.post1"
@@ -2295,6 +2350,23 @@ pure-eval = "*"
 [package.extras]
 tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"]
 
+[[package]]
+name = "starlette"
+version = "0.27.0"
+description = "The little ASGI library that shines."
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"},
+    {file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"},
+]
+
+[package.dependencies]
+anyio = ">=3.4.0,<5"
+
+[package.extras]
+full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"]
+
 [[package]]
 name = "tomli"
 version = "2.0.1"
@@ -2597,4 +2669,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "c97e1a4310f7d362f157f164b01393eb21fd182e197384c7867cfe002ea46506"
+content-hash = "e86dcefdd1198516ad76fafb4877fd46f5b8623a7be52e069c3ac39509ba7c4e"
diff --git a/benchmark/pyproject.toml b/benchmark/pyproject.toml
index 0bc50af0..88740f4b 100644
--- a/benchmark/pyproject.toml
+++ b/benchmark/pyproject.toml
@@ -29,6 +29,8 @@ selenium = "^4.11.2"
 agent-protocol-client = "^0.2.2"
 pytest-asyncio = "^0.21.1"
 uvicorn = "^0.23.2"
+fastapi = "^0.103.1"
+python-multipart = "^0.0.6"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/benchmark/server.py b/benchmark/server.py
index 5537a6ef..e7f639eb 100644
--- a/benchmark/server.py
+++ b/benchmark/server.py
@@ -17,10 +17,6 @@ logger = logging.getLogger(__name__)
 app = FastAPI()
 artifacts: List[Dict[str, Any]] = []
 
-with open("agent/gpt-engineer/agbenchmark/config.json", "r") as file:
-    config = json.load(file)
-    logger.info("Loaded configuration")
-
 
 class Task(BaseModel):
     input: str
@@ -34,7 +30,11 @@ async def upload_file(
         "Uploading file for task_id: %s with relative path: %s", task_id, relative_path
     )
     absolute_directory_path = Path(__file__).parent.absolute()
-    save_path = absolute_directory_path / "agent/gpt-engineer" / config["workspace"]
+    save_path = (
+        absolute_directory_path
+        / "agent/gpt-engineer"
+        / "projects/my-new-project/workspace"
+    )
 
     random_string = str(randint(0, 100000))
     while random_string in artifacts: