diff --git a/.github/workflows/autogpt.yml b/.github/workflows/autogpt.yml
new file mode 100644
index 00000000..2b192511
--- /dev/null
+++ b/.github/workflows/autogpt.yml
@@ -0,0 +1,62 @@
+name: Auto-GPT Regression Test
+
+on:
+  workflow_dispatch:
+
+jobs:
+  regression-tests:
+    permissions:
+      pull-requests: write
+      contents: write
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        python-version: ["3.10"]
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          ref: ${{ github.event.pull_request.head.ref }}
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          submodules: true
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Install Poetry
+        run: |
+          curl -sSL https://install.python-poetry.org | python -
+
+      - name: Set up Poetry cache
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pypoetry
+            .venv
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/pyproject.toml') }}-${{ hashFiles('**/poetry.lock') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Set up venv and install Python dependencies
+        run: |
+          python -m venv venv
+          source venv/bin/activate
+          poetry install
+
+      - name: Build project
+        run: |
+          source venv/bin/activate
+          poetry build
+          cd agent/Auto-GPT
+          pip install -r requirements.txt
+          pip install ../../dist/agbenchmark-0.1.0-py3-none-any.whl
+          agbenchmark start --reg
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
diff --git a/.gitignore b/.gitignore
index 68bc17f9..c41065ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -157,4 +157,6 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
+.DS_Store
+```
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 00000000..b2dc714c
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "Auto-GPT"]
+	path = agent/Auto-GPT
+	url = https://github.com/Significant-Gravitas/Auto-GPT.git
+	branch = benchmark-integration
diff --git a/agbenchmark/agent_interface.py b/agbenchmark/agent_interface.py
index 2ff2acf3..0961dc0f 100644
--- a/agbenchmark/agent_interface.py
+++ b/agbenchmark/agent_interface.py
@@ -1,9 +1,10 @@
-import os
 import importlib
-import time
-from agbenchmark.mocks.MockManager import MockManager
-from multiprocessing import Process, Pipe
 
+from agbenchmark.mocks.MockManager import MockManager
+import os
+import sys
+import subprocess
+import time
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -26,45 +27,44 @@ def run_agent(task, mock_func, config):
         timeout = config["cutoff"]
         print(f"Running Python function '{config['func_path']}' with timeout {timeout}")
 
-        parent_conn, child_conn = Pipe()
+        # Get the current working directory
+        cwd = os.getcwd()
+
+        # Add current directory to Python's import path
+        sys.path.append(cwd)
+
 
-        # Import the specific agent dynamically
         module_name = config["func_path"].replace("/", ".").rstrip(".py")
         module = importlib.import_module(module_name)
-        run_specific_agent = getattr(module, "run_specific_agent")
 
-        process = Process(target=run_specific_agent, args=(task, child_conn))
-        process.start()
+
+        command = [sys.executable, "benchmarks.py", str(task)]
+        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, cwd=cwd)
+
         start_time = time.time()
+        timeout = config["cutoff"]
 
         while True:
-            if (
-                parent_conn.poll()
-            ):  # Check if there's a new message from the child process
-                response, cycle_count = parent_conn.recv()
-                print(f"Cycle {cycle_count}: {response}")
+            output = process.stdout.readline()
+            print(output.strip())
 
-                if cycle_count >= config["cutoff"]:
-                    print(
-                        f"Cycle count has reached the limit of {config['cutoff']}. Terminating."
-                    )
-                    child_conn.send("terminate")
-                    break
-
-            if time.time() - start_time > timeout:
-                print(
-                    "The Python function has exceeded the time limit and was terminated."
-                )
-                child_conn.send(
-                    "terminate"
-                )  # Send a termination signal to the child process
-                break
-
-            if not process.is_alive():
+            # Check if process has ended
+            if process.poll() is not None:
                 print("The Python function has finished running.")
                 break
 
-        process.join()
+            # Check if process has exceeded timeout
+            if time.time() - start_time > timeout:
+                print("The Python function has exceeded the time limit and was terminated.")
+                process.terminate()
+                break
+
+            # Optional: sleep for a while
+            time.sleep(0.1)
+
+        # Wait for process to terminate, then get return code
+        process.wait()
+
 
 
 ENVIRONMENT = os.getenv("ENVIRONMENT") or "production"
diff --git a/agbenchmark/config.json b/agbenchmark/config.json
index d9b42ca4..e1c5f154 100644
--- a/agbenchmark/config.json
+++ b/agbenchmark/config.json
@@ -1,5 +1,5 @@
 {
-  "workspace": "C:\\Users\\silen\\miniagi",
-  "func_path": "agent/benchmarks.py",
+  "workspace": "autogpt/workspace/auto_gpt_workspace",
+  "func_path": "benchmarks.py",
   "cutoff": 60
 }
diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py
index 0f1fc7bb..4284d1eb 100644
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@@ -1,15 +1,18 @@
 import json
 import os
+from pathlib import Path
+
 import pytest
 import shutil
 from agbenchmark.tests.regression.RegressionManager import RegressionManager
+from agbenchmark.start_benchmark import CONFIG_PATH, REGRESSION_TESTS_PATH
 
 
 @pytest.fixture(scope="module")
 def config(request):
-    config_file = os.path.abspath("agbenchmark/config.json")
-    print(f"Config file: {config_file}")
-    with open(config_file, "r") as f:
+
+    print(f"Config file: {CONFIG_PATH}")
+    with open(CONFIG_PATH, "r") as f:
         config = json.load(f)
 
     if request.config.getoption("--mock"):
@@ -36,10 +39,7 @@ def workspace(config):
 def pytest_addoption(parser):
     parser.addoption("--mock", action="store_true", default=False)
 
-
-regression_json = "agbenchmark/tests/regression/regression_tests.json"
-
-regression_manager = RegressionManager(regression_json)
+regression_manager = RegressionManager(REGRESSION_TESTS_PATH)
 
 
 # this is to get the challenge_data from every test
@@ -53,13 +53,16 @@ def pytest_runtest_makereport(item, call):
         challenge_data = item.funcargs.get("challenge_data", None)
         difficulty = challenge_data.info.difficulty if challenge_data else "unknown"
         dependencies = challenge_data.dependencies if challenge_data else []
-
+        parts = item.nodeid.split("::")[0].split("/")
+        agbenchmark_index = parts.index("agbenchmark")
+        file_path = "/".join(parts[agbenchmark_index:])
         test_details = {
             "difficulty": difficulty,
             "dependencies": dependencies,
-            "test": item.nodeid,
+            "test": file_path,
         }
 
+
         print("pytest_runtest_makereport", test_details)
         if call.excinfo is None:
             regression_manager.add_test(item.nodeid.split("::")[1], test_details)
diff --git a/agbenchmark/start_benchmark.py b/agbenchmark/start_benchmark.py
index fe395cd2..28b038e9 100644
--- a/agbenchmark/start_benchmark.py
+++ b/agbenchmark/start_benchmark.py
@@ -7,6 +7,13 @@ from dotenv import load_dotenv, set_key
 
 load_dotenv()
 
+CURRENT_DIRECTORY = Path(__file__).resolve().parent
+
+new_path = CURRENT_DIRECTORY / "config.json"
+
+CONFIG_PATH = str(new_path.resolve())
+
+REGRESSION_TESTS_PATH = str(Path(os.getcwd()) / "regression_tests.json")
 
 @click.group()
 def cli():
@@ -15,16 +22,12 @@ def cli():
 
 @cli.command()
 @click.option("--category", default=None, help="Specific category to run")
-@click.option("--noreg", is_flag=True, help="Skip regression tests")
+@click.option("--reg", is_flag=True, help="Runs only regression tests")
 @click.option("--mock", is_flag=True, help="Run with mock")
-def start(category, noreg, mock):
+def start(category, reg, mock):
     """Start the benchmark tests. If a category flag is provided, run the categories with that mark."""
-    config_file = "agbenchmark/config.json"
-
-    config_dir = os.path.abspath(config_file)
-
     # Check if configuration file exists and is not empty
-    if not os.path.exists(config_dir) or os.stat(config_dir).st_size == 0:
+    if not os.path.exists(CONFIG_PATH) or os.stat(CONFIG_PATH).st_size == 0:
         config = {}
 
         config["workspace"] = click.prompt(
@@ -42,11 +45,11 @@ def start(category, noreg, mock):
             default="60",
         )
 
-        with open(config_dir, "w") as f:
+        with open(CONFIG_PATH, "w") as f:
             json.dump(config, f)
     else:
         # If the configuration file exists and is not empty, load it
-        with open(config_dir, "r") as f:
+        with open(CONFIG_PATH, "r") as f:
             config = json.load(f)
 
     set_key(".env", "MOCK_TEST", "True" if mock else "False")
@@ -58,11 +61,9 @@ def start(category, noreg, mock):
     if not os.path.exists(workspace_path):
         os.makedirs(workspace_path, exist_ok=True)
 
-    regression_path = os.path.abspath(
-        "agbenchmark/tests/regression/regression_tests.json"
-    )
-    if not os.path.exists(regression_path):
-        with open(regression_path, "a"):
+
+    if not os.path.exists(REGRESSION_TESTS_PATH):
+        with open(REGRESSION_TESTS_PATH, "a"):
             pass
 
     print("Current configuration:")
@@ -70,31 +71,40 @@ def start(category, noreg, mock):
         print(f"{key}: {value}")
 
     print("Starting benchmark tests...", category)
-    pytest_args = ["agbenchmark", "-vs"]
+    tests_to_run = []
+    pytest_args = ["-vs"]
     if category:
         pytest_args.extend(
             ["-m", category]
-        )  # run categorys that are of a specific marker
-        if noreg:
-            pytest_args.extend(
-                ["-k", "not regression"]
-            )  # run categorys that are of a specific marker but don't include regression categorys
-        print(f"Running {'non-regression' + category if noreg else category} categorys")
+        )
     else:
-        if noreg:
-            print("Running all non-regression categorys")
-            pytest_args.extend(
-                ["-k", "not regression"]
-            )  # run categorys that are not regression categorys
+        if reg:
+            print("Running all regression tests")
+            tests_to_run = get_regression_tests()
         else:
-            print("Running all categorys")  # run all categorys
+            print("Running all categories")
 
     if mock:
         pytest_args.append("--mock")
 
     # Run pytest with the constructed arguments
+    if not tests_to_run:
+        tests_to_run = [str(CURRENT_DIRECTORY)]
+    pytest_args.extend(tests_to_run)
     pytest.main(pytest_args)
 
 
+def get_regression_tests():
+    if not Path(REGRESSION_TESTS_PATH).exists():
+        with open(REGRESSION_TESTS_PATH, 'w') as file:
+            json.dump({}, file)
+
+    with open(REGRESSION_TESTS_PATH, 'r') as file:
+        data = json.load(file)
+
+    regression_tests = [str(CURRENT_DIRECTORY / ".." / value['test']) for key, value in data.items()]
+
+    return regression_tests
+
 if __name__ == "__main__":
     start()
diff --git a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
index 306375dd..8d3eb540 100644
--- a/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
+++ b/agbenchmark/tests/basic_abilities/write_file/write_file_test.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
 import pytest
 from agbenchmark.tests.basic_abilities.BasicChallenge import BasicChallenge
 import os
@@ -9,10 +11,11 @@ class TestWriteFile(BasicChallenge):
     def get_file_path(self) -> str:  # all tests must implement this method
         return os.path.join(os.path.dirname(__file__), "w_file_data.json")
 
-    @pytest.mark.depends(on=[], name="basic_write_file")
     def test_method(self, config):
         self.setup_challenge(config)
-        files_contents = self.open_files(config["workspace"], self.data.ground.files)
+
+        workspace = Path(os.getcwd()) / config['workspace']
+        files_contents = self.open_files(workspace, self.data.ground.files)
 
         scores = []
         for file_content in files_contents:
diff --git a/agbenchmark/tests/regression/regression_tests.json b/agbenchmark/tests/regression/regression_tests.json
deleted file mode 100644
index 9e26dfee..00000000
--- a/agbenchmark/tests/regression/regression_tests.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
\ No newline at end of file
diff --git a/agent/Auto-GPT b/agent/Auto-GPT
new file mode 160000
index 00000000..c29ec925
--- /dev/null
+++ b/agent/Auto-GPT
@@ -0,0 +1 @@
+Subproject commit c29ec925fd9e24f219ef0f2884b08908cd66239b
diff --git a/agent/mini-agi b/agent/mini-agi
deleted file mode 160000
index d2add8f1..00000000
--- a/agent/mini-agi
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d2add8f18caf96934a2d193583720cfc9b89451b
diff --git a/regression_tests.json b/regression_tests.json
new file mode 100644
index 00000000..e3633a2a
--- /dev/null
+++ b/regression_tests.json
@@ -0,0 +1,7 @@
+{
+    "TestWriteFile": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/tests/basic_abilities/write_file/write_file_test.py"
+    }
+}
\ No newline at end of file