From 79ba85a22ef138ae4b51cb0189c205c8b2b44d01 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sat, 3 Jun 2023 07:48:32 -0700
Subject: [PATCH 01/97] Cache Python Packages in the CI pipeline (#4488)

---
 .github/workflows/ci.yml                       | 18 ++++++++++++++++++
 .../integration/challenges/current_score.json  |  2 +-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5219b982..8651931c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -32,6 +32,15 @@ jobs:
         with:
           python-version: ${{ env.min-python-version }}
 
+      - name: Set Date
+        run: echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+
+      - name: Cache Python packages
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ env.DATE }}
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
@@ -112,6 +121,15 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
+      - name: Set Date
+        run: echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+
+      - name: Cache Python packages
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ env.DATE }}
+
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json
index 72661399..8412e908 100644
--- a/tests/integration/challenges/current_score.json
+++ b/tests/integration/challenges/current_score.json
@@ -45,4 +45,4 @@
             "max_level_beaten": 1
         }
     }
-}
\ No newline at end of file
+}

From 55a8e242b005b1ae651089c8b36d9e7aba69a04d Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Sat, 3 Jun 2023 14:51:53 +0000
Subject: [PATCH 02/97] Update current score

---
 tests/integration/challenges/current_score.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json
index 8412e908..72661399 100644
--- a/tests/integration/challenges/current_score.json
+++ b/tests/integration/challenges/current_score.json
@@ -45,4 +45,4 @@
             "max_level_beaten": 1
         }
     }
-}
+}
\ No newline at end of file

From 378126822f57380d013e2a749c04438f64c7a536 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Sat, 3 Jun 2023 14:51:55 +0000
Subject: [PATCH 03/97] Update submodule reference

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index be280df4..28497a8b 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit be280df43d6a23b8074d9cba10d18ed8724a54c9
+Subproject commit 28497a8b4ef0e62375dd5024794426cb401a3779

From 59d31b021d80513d01e2c9a24d523dade671a8d6 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sun, 4 Jun 2023 09:20:13 -0700
Subject: [PATCH 04/97] Skip flaky challenges (#4573)

---
 tests/integration/challenges/current_score.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json
index 72661399..6a133f3e 100644
--- a/tests/integration/challenges/current_score.json
+++ b/tests/integration/challenges/current_score.json
@@ -12,7 +12,7 @@
     "debug_code": {
         "debug_code_challenge_a": {
             "max_level": 1,
-            "max_level_beaten": 1
+            "max_level_beaten": null
         }
     },
     "information_retrieval": {
@@ -22,7 +22,7 @@
         },
         "information_retrieval_challenge_b": {
             "max_level": 1,
-            "max_level_beaten": 1
+            "max_level_beaten": null
         }
     },
     "kubernetes": {
@@ -45,4 +45,4 @@
             "max_level_beaten": 1
         }
     }
-}
\ No newline at end of file
+}

From 74e8a886e60333f3668e9cb8f1a2400025fc82a9 Mon Sep 17 00:00:00 2001
From: Benny van der Lans <49377421+bfalans@users.noreply.github.com>
Date: Sun, 4 Jun 2023 19:37:35 +0200
Subject: [PATCH 05/97] Add `replace_in_file` command (#4565)

Resubmission of #3643

---------

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 autogpt/commands/file_operations.py | 63 +++++++++++++++++++++++++++++
 tests/unit/test_file_operations.py  | 47 +++++++++++++++++++++
 2 files changed, 110 insertions(+)

diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 824db50c..cb5fb36c 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -4,6 +4,7 @@ from __future__ import annotations
 import hashlib
 import os
 import os.path
+import re
 from typing import TYPE_CHECKING, Generator, Literal
 
 import requests
@@ -224,6 +225,68 @@ def write_to_file(filename: str, text: str, config: Config) -> str:
         return f"Error: {err}"
 
 
+@command(
+    "replace_in_file",
+    "Replace text or code in a file",
+    '"filename": "<filename>", '
+    '"old_text": "<old_text>", "new_text": "<new_text>", '
+    '"occurrence_index": "<occurrence_index>"',
+)
+def replace_in_file(
+    filename: str, old_text: str, new_text: str, config: Config, occurrence_index=None
+):
+    """Update a file by replacing one or all occurrences of old_text with new_text using Python's built-in string
+    manipulation and regular expression modules for cross-platform file editing similar to sed and awk.
+
+    Args:
+        filename (str): The name of the file
+        old_text (str): String to be replaced. \n will be stripped from the end.
+        new_text (str): New string. \n will be stripped from the end.
+        occurrence_index (int): Optional index of the occurrence to replace. If None, all occurrences will be replaced.
+
+    Returns:
+        str: A message indicating whether the file was updated successfully or if there were no matches found for old_text
+        in the file.
+
+    Raises:
+        Exception: If there was an error updating the file.
+    """
+    try:
+        with open(filename, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        old_text = old_text.rstrip("\n")
+        new_text = new_text.rstrip("\n")
+
+        if occurrence_index is None:
+            new_content = content.replace(old_text, new_text)
+        else:
+            matches = list(re.finditer(re.escape(old_text), content))
+            if not matches:
+                return f"No matches found for {old_text} in {filename}"
+
+            if int(occurrence_index) >= len(matches):
+                return f"Occurrence index {occurrence_index} is out of range for {old_text} in {filename}"
+
+            match = matches[int(occurrence_index)]
+            start, end = match.start(), match.end()
+            new_content = content[:start] + new_text + content[end:]
+
+        if content == new_content:
+            return f"No matches found for {old_text} in {filename}"
+
+        with open(filename, "w", encoding="utf-8") as f:
+            f.write(new_content)
+
+        with open(filename, "r", encoding="utf-8") as f:
+            checksum = text_checksum(f.read())
+        log_operation("update", filename, config, checksum=checksum)
+
+        return f"File {filename} updated successfully."
+    except Exception as e:
+        return "Error: " + str(e)
+
+
 @command(
     "append_to_file", "Append to file", '"filename": "<filename>", "text": "<text>"'
 )
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index 35c77a15..1d0219eb 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -268,6 +268,53 @@ def test_write_file_succeeds_if_content_different(
     assert result == "File written to successfully."
 
 
+# Update file testing
+def test_replace_in_file_all_occurrences(test_file, test_file_path, config):
+    old_content = "This is a test file.\n we test file here\na test is needed"
+    expected_content = (
+        "This is a update file.\n we update file here\na update is needed"
+    )
+    test_file.write(old_content)
+    test_file.close()
+    file_ops.replace_in_file(test_file_path, "test", "update", config)
+    with open(test_file_path) as f:
+        new_content = f.read()
+    print(new_content)
+    print(expected_content)
+    assert new_content == expected_content
+
+
+def test_replace_in_file_one_occurrence(test_file, test_file_path, config):
+    old_content = "This is a test file.\n we test file here\na test is needed"
+    expected_content = "This is a test file.\n we update file here\na test is needed"
+    test_file.write(old_content)
+    test_file.close()
+    file_ops.replace_in_file(
+        test_file_path, "test", "update", config, occurrence_index=1
+    )
+    with open(test_file_path) as f:
+        new_content = f.read()
+
+    assert new_content == expected_content
+
+
+def test_replace_in_file_multiline_old_text(test_file, test_file_path, config):
+    old_content = "This is a multi_line\ntest for testing\nhow well this function\nworks when the input\nis multi-lined"
+    expected_content = "This is a multi_line\nfile. succeeded test\nis multi-lined"
+    test_file.write(old_content)
+    test_file.close()
+    file_ops.replace_in_file(
+        test_file_path,
+        "\ntest for testing\nhow well this function\nworks when the input\n",
+        "\nfile. succeeded test\n",
+        config,
+    )
+    with open(test_file_path) as f:
+        new_content = f.read()
+
+    assert new_content == expected_content
+
+
 def test_append_to_file(test_nested_file: Path, config):
     append_text = "This is appended text.\n"
     file_ops.write_to_file(test_nested_file, append_text, config)

From 60ac0c4da15930d5e40af87fba6248ec37a951ee Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Mon, 5 Jun 2023 14:39:45 +0000
Subject: [PATCH 06/97] Update challenge scores

---
 tests/integration/challenges/current_score.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/challenges/current_score.json b/tests/integration/challenges/current_score.json
index 6a133f3e..bd91afcb 100644
--- a/tests/integration/challenges/current_score.json
+++ b/tests/integration/challenges/current_score.json
@@ -45,4 +45,4 @@
             "max_level_beaten": 1
         }
     }
-}
+}
\ No newline at end of file

From ee6b97ef5e7e19031584ec88390bfa644703c1af Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Tue, 6 Jun 2023 07:27:08 -0700
Subject: [PATCH 07/97] Fix Python CI "update cassettes" step (#4591)

* Fix updated cassettes step

* Clarifications

* Use github.ref_name instead of github.ref

* Fix duplicate runs on `master`

---------

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 .github/workflows/ci.yml                      | 109 +++++++-----------
 .../basic_abilities/test_write_file.py        |   3 +-
 .../test_information_retrieval_challenge_a.py |   3 +-
 .../test_information_retrieval_challenge_b.py |   3 +-
 .../test_kubernetes_template_challenge_a.py   |   2 +
 .../memory/test_memory_challenge_a.py         |   3 +-
 .../memory/test_memory_challenge_b.py         |   3 +-
 .../memory/test_memory_challenge_c.py         |   3 +-
 8 files changed, 55 insertions(+), 74 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index df9148c4..8d44f2a4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,7 +18,7 @@ concurrency:
 jobs:
   lint:
     # eliminate duplicate runs on master
-    if: github.event_name == 'push' || github.ref_name != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
+    if: github.event_name == 'push' || github.base_ref != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
 
     runs-on: ubuntu-latest
     env:
@@ -37,14 +37,15 @@ jobs:
         with:
           python-version: ${{ env.min-python-version }}
 
-      - name: Set Date
-        run: echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
 
-      - name: Cache Python packages
+      - name: Set up Python dependency cache
         uses: actions/cache@v3
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ env.DATE }}
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
 
       - name: Install dependencies
         run: |
@@ -73,7 +74,7 @@ jobs:
 
   test:
     # eliminate duplicate runs on master
-    if: github.event_name == 'push' || github.ref_name != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
+    if: github.event_name == 'push' || github.base_ref != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
 
     permissions:
       # Gives the action the necessary permissions for publishing new
@@ -90,7 +91,7 @@ jobs:
         python-version: ["3.10"]
 
     steps:
-      - name: Check out repository
+      - name: Checkout repository
         uses: actions/checkout@v3
         with:
           fetch-depth: 0
@@ -98,8 +99,12 @@ jobs:
           repository: ${{ github.event.pull_request.head.repo.full_name }}
           submodules: true
 
-      - id: checkout_cassettes
-        name: Check out cassettes
+      - name: Configure git user Auto-GPT-Bot
+        run: |
+          git config --global user.name "Auto-GPT-Bot"
+          git config --global user.email "github-bot@agpt.co"
+
+      - name: Checkout cassettes
         if: ${{ startsWith(github.event_name, 'pull_request') }}
         run: |
           cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}"
@@ -111,21 +116,14 @@ jobs:
 
             git checkout $cassette_branch
 
-            if git merge --no-commit --no-ff ${{ github.event.pull_request.base.ref }}; then
-              echo "Using cassettes from mirror branch, synced to upstream branch '${{ github.event.pull_request.base.ref }}'"
-            else
-              echo "Could not merge upstream changes to cassettes. Using cassettes from ${{ github.event.pull_request.base.ref }}."
-              git merge --abort
-              git checkout ${{ github.event.pull_request.base.ref }}
-
-              # Delete branch to prevent conflict when re-creating it
-              git branch -D $cassette_branch
-            fi
-            echo "cassette_branch=$(git branch --show-current)" >> $GITHUB_OUTPUT
+            # Pick non-conflicting cassette updates from the base branch
+            git merge --no-commit --strategy-option=ours origin/${{ github.event.pull_request.base.ref }}
+            echo "Using cassettes from mirror branch '$cassette_branch'," \
+              "synced to upstream branch '${{ github.event.pull_request.base.ref }}'."
           else
-            echo "Branch '$cassette_branch' does not exist in cassette submodule."\
-              "Using cassettes from ${{ github.event.pull_request.base.ref }}."
-            echo "cassette_branch=${{ github.event.pull_request.base.ref }}" >> $GITHUB_OUTPUT
+            git checkout -b $cassette_branch
+            echo "Branch '$cassette_branch' does not exist in cassette submodule." \
+              "Using cassettes from '${{ github.event.pull_request.base.ref }}'."
           fi
 
       - name: Set up Python ${{ matrix.python-version }}
@@ -133,21 +131,22 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
 
-      - name: Set Date
-        run: echo "DATE=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
 
-      - name: Cache Python packages
+      - name: Set up Python dependency cache
         uses: actions/cache@v3
         with:
           path: ~/.cache/pip
-          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ env.DATE }}
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
 
-      - name: Install dependencies
+      - name: Install Python dependencies
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
 
-      - name: Run pytest tests with coverage
+      - name: Run pytest with coverage
         run: |
           pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term
           python tests/integration/challenges/utils/build_current_score.py
@@ -162,10 +161,9 @@ jobs:
 
       - id: setup_git_auth
         name: Set up git token authentication
+        # Cassettes may be pushed even when tests fail
+        if: success() || failure()
         run: |
-          git config --global user.name "Auto-GPT-Bot"
-          git config --global user.email "github-bot@agpt.co"
-
           config_key="http.${{ github.server_url }}/.extraheader"
           base64_pat=$(echo -n "pat:${{ secrets.PAT_REVIEW }}" | base64 -w0)
 
@@ -186,58 +184,39 @@ jobs:
           if ! git diff --quiet $score_file; then
             git add $score_file
             git commit -m "Update challenge scores"
-            git push origin HEAD:${{ github.ref }}
+            git push origin HEAD:${{ github.ref_name }}
           else
             echo "The challenge scores didn't change."
           fi
 
       - id: push_cassettes
         name: Push updated cassettes
+        # For pull requests, push updated cassettes even when tests fail
+        if: github.event_name == 'push' || success() || failure()
         run: |
           if [ "${{ startsWith(github.event_name, 'pull_request') }}" = "true" ]; then
             is_pull_request=true
             cassette_branch="${{ github.event.pull_request.user.login }}-${{ github.event.pull_request.head.ref }}"
-            cassette_source_branch="${{ steps.checkout_cassettes.outputs.cassette_branch }}"
-            base_branch="${{ github.event.pull_request.base.ref }}"
           else
-            current_branch=$(echo ${{ github.ref }} | sed -e "s/refs\/heads\///g")
-            cassette_branch=$current_branch
+            cassette_branch="${{ github.ref_name }}"
           fi
 
           cd tests/Auto-GPT-test-cassettes
-          git fetch origin $cassette_source_branch:$cassette_source_branch
-
           # Commit & push changes to cassettes if any
-          if ! git diff --quiet $cassette_source_branch --; then
-            if [ "$cassette_branch" != "$cassette_source_branch" ]; then
-              git checkout -b $cassette_branch
-            fi
+          if ! git diff --quiet; then
             git add .
             git commit -m "Auto-update cassettes"
-
-            if [ $is_pull_request ]; then
-              git push --force origin HEAD:$cassette_branch
-            else
-              git push origin HEAD:$cassette_branch
-            fi
-
-            cd ../..
-            if [ $is_pull_request ]; then
-              git fetch origin $base_branch
-              cassette_diff=$(git diff origin/$base_branch)
-            else
+            git push origin HEAD:$cassette_branch
+            if [ ! $is_pull_request ]; then
+              cd ../..
               git add tests/Auto-GPT-test-cassettes
               git commit -m "Update cassette submodule"
-              git push origin HEAD:$current_branch
+              git push origin HEAD:$cassette_branch
             fi
-          else
-            echo "No cassette changes to commit"
-          fi
-
-          if [ -n "$cassette_diff" ]; then
             echo "updated=true" >> $GITHUB_OUTPUT
           else
             echo "updated=false" >> $GITHUB_OUTPUT
+            echo "No cassette changes to commit"
           fi
 
       - name: Post Set up git token auth
@@ -246,7 +225,7 @@ jobs:
           git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
           git submodule foreach git config --unset-all '${{ steps.setup_git_auth.outputs.config_key }}'
 
-      - name: Apply or remove behaviour change label and comment on PR
+      - name: Apply "behaviour change" label and comment on PR
         if: ${{ startsWith(github.event_name, 'pull_request') }}
         run: |
           PR_NUMBER=${{ github.event.pull_request.number }}
@@ -263,10 +242,4 @@ jobs:
 
             echo $TOKEN | gh auth login --with-token
             gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
-          else
-            echo "Removing label..."
-            curl -X DELETE \
-            -H "Authorization: Bearer $TOKEN" \
-            -H "Accept: application/vnd.github.v3+json" \
-            https://api.github.com/repos/$REPO/issues/$PR_NUMBER/labels/behaviour%20change
           fi
diff --git a/tests/integration/challenges/basic_abilities/test_write_file.py b/tests/integration/challenges/basic_abilities/test_write_file.py
index cbbad514..393dbfd0 100644
--- a/tests/integration/challenges/basic_abilities/test_write_file.py
+++ b/tests/integration/challenges/basic_abilities/test_write_file.py
@@ -1,4 +1,5 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
@@ -17,7 +18,7 @@ CYCLE_COUNT = 3
 @challenge
 def test_write_file(
     writer_agent: Agent,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     config: Config,
     level_to_run: int,
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index 6b970e8b..2f61fef3 100644
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -1,4 +1,5 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
@@ -19,7 +20,7 @@ from autogpt.agent import Agent
 def test_information_retrieval_challenge_a(
     information_retrieval_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     config: Config,
     level_to_run: int,
 ) -> None:
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index feac95a0..6461e13f 100644
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -1,6 +1,7 @@
 import contextlib
 
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
@@ -20,7 +21,7 @@ CYCLE_COUNT = 3
 def test_information_retrieval_challenge_b(
     get_nobel_prize_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     level_to_run: int,
     config: Config,
 ) -> None:
diff --git a/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index 5fd280ac..aa46ac4d 100644
--- a/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -1,5 +1,6 @@
 import pytest
 import yaml
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
@@ -19,6 +20,7 @@ CYCLE_COUNT = 3
 def test_kubernetes_template_challenge_a(
     kubernetes_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
+    patched_api_requestor: MockerFixture,
     config: Config,
     level_to_run: int,
 ) -> None:
diff --git a/tests/integration/challenges/memory/test_memory_challenge_a.py b/tests/integration/challenges/memory/test_memory_challenge_a.py
index 8919bf58..08f461bd 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_a.py
+++ b/tests/integration/challenges/memory/test_memory_challenge_a.py
@@ -1,4 +1,5 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
@@ -15,7 +16,7 @@ from tests.utils import requires_api_key
 @challenge
 def test_memory_challenge_a(
     memory_management_agent: Agent,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     config: Config,
     level_to_run: int,
diff --git a/tests/integration/challenges/memory/test_memory_challenge_b.py b/tests/integration/challenges/memory/test_memory_challenge_b.py
index 5c28b330..c8276312 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_b.py
+++ b/tests/integration/challenges/memory/test_memory_challenge_b.py
@@ -1,4 +1,5 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
@@ -17,7 +18,7 @@ NOISE = 1000
 @challenge
 def test_memory_challenge_b(
     memory_management_agent: Agent,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     config: Config,
     level_to_run: int,
diff --git a/tests/integration/challenges/memory/test_memory_challenge_c.py b/tests/integration/challenges/memory/test_memory_challenge_c.py
index 23c0217d..ab8ece10 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_c.py
+++ b/tests/integration/challenges/memory/test_memory_challenge_c.py
@@ -1,4 +1,5 @@
 import pytest
+from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
@@ -18,7 +19,7 @@ NOISE = 1000
 @challenge
 def test_memory_challenge_c(
     memory_management_agent: Agent,
-    patched_api_requestor: None,
+    patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     config: Config,
     level_to_run: int,

From 8a881f70a3f445a1326a4d08fb2217f70c856527 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Tue, 6 Jun 2023 14:35:43 +0000
Subject: [PATCH 08/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 28497a8b..38ecb014 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 28497a8b4ef0e62375dd5024794426cb401a3779
+Subproject commit 38ecb0145aa3a88e2eb5f04a556146614a2882e1

From dafbd1168624f930caec6e92eada6d7064dc649e Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Tue, 6 Jun 2023 19:48:49 +0200
Subject: [PATCH 09/97] Rearrange tests & fix CI (#4596)

* Rearrange tests into unit/integration/challenge categories

* Fix linting + `tests.challenges` imports

* Fix obscured duplicate test in test_url_validation.py

* Move VCR conftest to tests.vcr

* Specify tests to run & their order (unit -> integration -> challenges) in CI

* Fail Docker CI when tests fail

* Fix import & linting errors in tests

* Fix `get_text_summary`

* Fix linting errors

* Clean up pytest args in CI

* Remove bogus tests from GoCodeo
---
 .github/workflows/ci.yml                      |  9 ++-
 .github/workflows/docker-ci.yml               | 11 ++-
 .github/workflows/pr-label.yml                |  2 +-
 autogpt/app.py                                |  2 +-
 docs/challenges/building_challenges.md        |  4 +-
 .../information_retrieval/challenge_a.md      |  2 +-
 .../information_retrieval/challenge_b.md      |  2 +-
 docs/challenges/memory/challenge_b.md         |  3 +-
 docs/challenges/memory/challenge_c.md         |  2 +-
 mypy.ini                                      |  2 +-
 .../{integration => }/challenges/__init__.py  |  0
 .../challenges/basic_abilities/__init__.py    |  0
 .../basic_abilities/goal_oriented_tasks.md    |  0
 .../basic_abilities/test_browse_website.py    |  6 +-
 .../basic_abilities/test_write_file.py        |  6 +-
 .../challenge_decorator/__init__.py           |  0
 .../challenge_decorator/challenge.py          |  0
 .../challenge_decorator.py                    |  8 +-
 .../challenge_decorator/challenge_utils.py    |  2 +-
 .../challenge_decorator/score_utils.py        |  2 +-
 .../{integration => }/challenges/conftest.py  |  5 +-
 .../challenges/current_score.json             |  0
 .../challenges/debug_code/data/two_sum.py     |  0
 .../debug_code/data/two_sum_tests.py          |  0
 .../debug_code/test_debug_code_challenge_a.py |  6 +-
 .../test_information_retrieval_challenge_a.py |  6 +-
 .../test_information_retrieval_challenge_b.py |  6 +-
 .../test_kubernetes_template_challenge_a.py   |  6 +-
 .../challenges/memory/__init__.py             |  0
 .../memory/test_memory_challenge_a.py         |  6 +-
 .../memory/test_memory_challenge_b.py         |  6 +-
 .../memory/test_memory_challenge_c.py         |  6 +-
 ..._challenge_should_be_formatted_properly.py |  0
 tests/{integration => }/challenges/utils.py   |  0
 .../challenges/utils/build_current_score.py   |  8 +-
 tests/conftest.py                             | 15 ++--
 tests/integration/conftest.py                 | 56 --------------
 tests/integration/test_commands.py            | 32 --------
 tests/test_analyze_code.py                    | 74 -------------------
 tests/test_audio_text_read_audio.py           | 56 --------------
 tests/test_audio_text_read_audio_from_file.py | 55 --------------
 tests/{ => unit}/test_agent.py                |  0
 tests/{ => unit}/test_agent_manager.py        |  0
 tests/{ => unit}/test_ai_config.py            |  0
 tests/{ => unit}/test_api_manager.py          |  0
 tests/{ => unit}/test_commands.py             |  0
 tests/{ => unit}/test_config.py               |  0
 .../test_git_commands.py                      |  0
 .../test_google_search.py                     |  0
 tests/{ => unit}/test_logs.py                 |  0
 tests/unit/test_make_agent.py                 | 24 ++++++
 tests/{ => unit}/test_prompt_config.py        |  0
 tests/{ => unit}/test_prompt_generator.py     |  0
 tests/{ => unit}/test_text_file_parsers.py    |  0
 tests/unit/test_url_validation.py             | 28 +++----
 tests/{ => unit}/test_utils.py                |  5 --
 tests/{ => unit}/test_workspace.py            |  0
 tests/vcr/__init__.py                         | 61 +++++++++++++++
 tests/vcr/vcr_filter.py                       |  3 +-
 59 files changed, 150 insertions(+), 377 deletions(-)
 rename tests/{integration => }/challenges/__init__.py (100%)
 rename tests/{integration => }/challenges/basic_abilities/__init__.py (100%)
 rename tests/{integration => }/challenges/basic_abilities/goal_oriented_tasks.md (100%)
 rename tests/{integration => }/challenges/basic_abilities/test_browse_website.py (76%)
 rename tests/{integration => }/challenges/basic_abilities/test_write_file.py (81%)
 rename tests/{integration => }/challenges/challenge_decorator/__init__.py (100%)
 rename tests/{integration => }/challenges/challenge_decorator/challenge.py (100%)
 rename tests/{integration => }/challenges/challenge_decorator/challenge_decorator.py (90%)
 rename tests/{integration => }/challenges/challenge_decorator/challenge_utils.py (96%)
 rename tests/{integration => }/challenges/challenge_decorator/score_utils.py (95%)
 rename tests/{integration => }/challenges/conftest.py (89%)
 rename tests/{integration => }/challenges/current_score.json (100%)
 rename tests/{integration => }/challenges/debug_code/data/two_sum.py (100%)
 rename tests/{integration => }/challenges/debug_code/data/two_sum_tests.py (100%)
 rename tests/{integration => }/challenges/debug_code/test_debug_code_challenge_a.py (89%)
 rename tests/{integration => }/challenges/information_retrieval/test_information_retrieval_challenge_a.py (89%)
 rename tests/{integration => }/challenges/information_retrieval/test_information_retrieval_challenge_b.py (90%)
 rename tests/{integration => }/challenges/kubernetes/test_kubernetes_template_challenge_a.py (88%)
 rename tests/{integration => }/challenges/memory/__init__.py (100%)
 rename tests/{integration => }/challenges/memory/test_memory_challenge_a.py (90%)
 rename tests/{integration => }/challenges/memory/test_memory_challenge_b.py (91%)
 rename tests/{integration => }/challenges/memory/test_memory_challenge_c.py (95%)
 rename tests/{integration => }/challenges/test_challenge_should_be_formatted_properly.py (100%)
 rename tests/{integration => }/challenges/utils.py (100%)
 rename tests/{integration => }/challenges/utils/build_current_score.py (84%)
 delete mode 100644 tests/integration/conftest.py
 delete mode 100644 tests/integration/test_commands.py
 delete mode 100644 tests/test_analyze_code.py
 delete mode 100644 tests/test_audio_text_read_audio.py
 delete mode 100644 tests/test_audio_text_read_audio_from_file.py
 rename tests/{ => unit}/test_agent.py (100%)
 rename tests/{ => unit}/test_agent_manager.py (100%)
 rename tests/{ => unit}/test_ai_config.py (100%)
 rename tests/{ => unit}/test_api_manager.py (100%)
 rename tests/{ => unit}/test_commands.py (100%)
 rename tests/{ => unit}/test_config.py (100%)
 rename tests/{integration => unit}/test_git_commands.py (100%)
 rename tests/{integration => unit}/test_google_search.py (100%)
 rename tests/{ => unit}/test_logs.py (100%)
 create mode 100644 tests/unit/test_make_agent.py
 rename tests/{ => unit}/test_prompt_config.py (100%)
 rename tests/{ => unit}/test_prompt_generator.py (100%)
 rename tests/{ => unit}/test_text_file_parsers.py (100%)
 rename tests/{ => unit}/test_utils.py (98%)
 rename tests/{ => unit}/test_workspace.py (100%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8d44f2a4..7f965299 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -5,7 +5,7 @@ on:
     branches: [ master, ci-test* ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   pull_request:
     branches: [ stable, master ]
   pull_request_target:
@@ -148,8 +148,9 @@ jobs:
 
       - name: Run pytest with coverage
         run: |
-          pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term
-          python tests/integration/challenges/utils/build_current_score.py
+          pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing --cov-report xml \
+            tests/unit tests/integration tests/challenges
+          python tests/challenges/utils/build_current_score.py
         env:
           CI: true
           PROXY: ${{ secrets.PROXY }}
@@ -179,7 +180,7 @@ jobs:
       - name: Push updated challenge scores
         if: github.event_name == 'push'
         run: |
-          score_file="tests/integration/challenges/current_score.json"
+          score_file="tests/challenges/current_score.json"
 
           if ! git diff --quiet $score_file; then
             git add $score_file
diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
index a61b707d..ff43666c 100644
--- a/.github/workflows/docker-ci.yml
+++ b/.github/workflows/docker-ci.yml
@@ -5,7 +5,7 @@ on:
     branches: [ master ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   pull_request:
     branches: [ master, stable ]
 
@@ -108,15 +108,18 @@ jobs:
           set +e
           test_output=$(
             docker run --env CI --env OPENAI_API_KEY --entrypoint python ${{ env.IMAGE_NAME }} -m \
-            pytest -n auto --cov=autogpt --cov-report term-missing --cov-branch --cov-report xml --cov-report term 2>&1
+            pytest -n auto --cov=autogpt --cov-branch --cov-report term-missing \
+              tests/unit tests/integration 2>&1
           )
           test_failure=$?
-  
+
           echo "$test_output"
-  
+
           cat << $EOF >> $GITHUB_STEP_SUMMARY
           # Tests $([ $test_failure = 0 ] && echo '✅' || echo '❌')
           \`\`\`
           $test_output
           \`\`\`
           $EOF
+
+          exit $test_failure
diff --git a/.github/workflows/pr-label.yml b/.github/workflows/pr-label.yml
index 0bab5638..62a197de 100644
--- a/.github/workflows/pr-label.yml
+++ b/.github/workflows/pr-label.yml
@@ -6,7 +6,7 @@ on:
     branches: [ master ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
-      - 'tests/integration/challenges/current_score.json'
+      - 'tests/challenges/current_score.json'
   # So that the `dirtyLabel` is removed if conflicts are resolve
   # We recommend `pull_request_target` so that github secrets are available.
   # In `pull_request` we wouldn't be able to change labels of fork PRs
diff --git a/autogpt/app.py b/autogpt/app.py
index 0804b482..525deddc 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -142,7 +142,7 @@ def get_text_summary(url: str, question: str, config: Config) -> str:
     Returns:
         str: The summary of the text
     """
-    text = scrape_text(url)
+    text = scrape_text(url, config)
     summary, _ = summarize_text(text, question=question)
 
     return f""" "Result" : {summary}"""
diff --git a/docs/challenges/building_challenges.md b/docs/challenges/building_challenges.md
index 09ab3bf5..0bd416cc 100644
--- a/docs/challenges/building_challenges.md
+++ b/docs/challenges/building_challenges.md
@@ -70,7 +70,7 @@ def kubernetes_agent(
 ```
 
 ## Creating your challenge
-Go to `tests/integration/challenges`and create a file that is called `test_your_test_description.py` and add it to the appropriate folder. If no category exists you can create a new one.
+Go to `tests/challenges`and create a file that is called `test_your_test_description.py` and add it to the appropriate folder. If no category exists you can create a new one.
 
 Your test could look something like this 
 
@@ -84,7 +84,7 @@ import yaml
 
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.integration.agent_utils import run_interaction_loop
-from tests.integration.challenges.utils import run_multiple_times
+from tests.challenges.utils import run_multiple_times
 from tests.utils import requires_api_key
 
 
diff --git a/docs/challenges/information_retrieval/challenge_a.md b/docs/challenges/information_retrieval/challenge_a.md
index de21066e..bf1b7b10 100644
--- a/docs/challenges/information_retrieval/challenge_a.md
+++ b/docs/challenges/information_retrieval/challenge_a.md
@@ -5,7 +5,7 @@
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
+pytest -s tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py --level=2
 ```
 
 ## Description
diff --git a/docs/challenges/information_retrieval/challenge_b.md b/docs/challenges/information_retrieval/challenge_b.md
index bf77a984..f4e68a15 100644
--- a/docs/challenges/information_retrieval/challenge_b.md
+++ b/docs/challenges/information_retrieval/challenge_b.md
@@ -5,7 +5,7 @@
 **Command to try**:
 
 ```
-pytest -s tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+pytest -s tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
 ```
 
 ## Description
diff --git a/docs/challenges/memory/challenge_b.md b/docs/challenges/memory/challenge_b.md
index 49c7c40f..abc6da6b 100644
--- a/docs/challenges/memory/challenge_b.md
+++ b/docs/challenges/memory/challenge_b.md
@@ -4,7 +4,7 @@
 
 **Command to try**: 
 ```
-pytest -s tests/integration/challenges/memory/test_memory_challenge_b.py --level=3
+pytest -s tests/challenges/memory/test_memory_challenge_b.py --level=3
 ``
 
 ## Description
@@ -41,4 +41,3 @@ Write all the task_ids into the file output.txt. The file has not been created y
 ## Objective
 
 The objective of this challenge is to test the agent's ability to follow instructions and maintain memory of the task IDs throughout the process. The agent successfully completed this challenge if it wrote the task ids in a file.
-
diff --git a/docs/challenges/memory/challenge_c.md b/docs/challenges/memory/challenge_c.md
index fd02a4a5..e197ddbd 100644
--- a/docs/challenges/memory/challenge_c.md
+++ b/docs/challenges/memory/challenge_c.md
@@ -4,7 +4,7 @@
 
 **Command to try**: 
 ```
-pytest -s tests/integration/challenges/memory/test_memory_challenge_c.py --level=2
+pytest -s tests/challenges/memory/test_memory_challenge_c.py --level=2
 ``
 
 ## Description
diff --git a/mypy.ini b/mypy.ini
index b977deb0..275cd260 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -2,7 +2,7 @@
 follow_imports = skip
 check_untyped_defs = True
 disallow_untyped_defs = True
-files = tests/integration/challenges/**/*.py
+files = tests/challenges/**/*.py
 
 [mypy-requests.*]
 ignore_missing_imports = True
diff --git a/tests/integration/challenges/__init__.py b/tests/challenges/__init__.py
similarity index 100%
rename from tests/integration/challenges/__init__.py
rename to tests/challenges/__init__.py
diff --git a/tests/integration/challenges/basic_abilities/__init__.py b/tests/challenges/basic_abilities/__init__.py
similarity index 100%
rename from tests/integration/challenges/basic_abilities/__init__.py
rename to tests/challenges/basic_abilities/__init__.py
diff --git a/tests/integration/challenges/basic_abilities/goal_oriented_tasks.md b/tests/challenges/basic_abilities/goal_oriented_tasks.md
similarity index 100%
rename from tests/integration/challenges/basic_abilities/goal_oriented_tasks.md
rename to tests/challenges/basic_abilities/goal_oriented_tasks.md
diff --git a/tests/integration/challenges/basic_abilities/test_browse_website.py b/tests/challenges/basic_abilities/test_browse_website.py
similarity index 76%
rename from tests/integration/challenges/basic_abilities/test_browse_website.py
rename to tests/challenges/basic_abilities/test_browse_website.py
index 09e5ab22..b918434c 100644
--- a/tests/integration/challenges/basic_abilities/test_browse_website.py
+++ b/tests/challenges/basic_abilities/test_browse_website.py
@@ -1,10 +1,8 @@
 import pytest
 
 from autogpt.agent import Agent
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 2
diff --git a/tests/integration/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
similarity index 81%
rename from tests/integration/challenges/basic_abilities/test_write_file.py
rename to tests/challenges/basic_abilities/test_write_file.py
index 393dbfd0..033f76e6 100644
--- a/tests/integration/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -4,10 +4,8 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
diff --git a/tests/integration/challenges/challenge_decorator/__init__.py b/tests/challenges/challenge_decorator/__init__.py
similarity index 100%
rename from tests/integration/challenges/challenge_decorator/__init__.py
rename to tests/challenges/challenge_decorator/__init__.py
diff --git a/tests/integration/challenges/challenge_decorator/challenge.py b/tests/challenges/challenge_decorator/challenge.py
similarity index 100%
rename from tests/integration/challenges/challenge_decorator/challenge.py
rename to tests/challenges/challenge_decorator/challenge.py
diff --git a/tests/integration/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
similarity index 90%
rename from tests/integration/challenges/challenge_decorator/challenge_decorator.py
rename to tests/challenges/challenge_decorator/challenge_decorator.py
index fe12317e..1d1fbf91 100644
--- a/tests/integration/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -4,11 +4,9 @@ from typing import Any, Callable, Optional
 
 import pytest
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
-from tests.integration.challenges.challenge_decorator.challenge_utils import (
-    create_challenge,
-)
-from tests.integration.challenges.challenge_decorator.score_utils import (
+from tests.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge_utils import create_challenge
+from tests.challenges.challenge_decorator.score_utils import (
     get_scores,
     update_new_score,
 )
diff --git a/tests/integration/challenges/challenge_decorator/challenge_utils.py b/tests/challenges/challenge_decorator/challenge_utils.py
similarity index 96%
rename from tests/integration/challenges/challenge_decorator/challenge_utils.py
rename to tests/challenges/challenge_decorator/challenge_utils.py
index 7db7648f..74f4cf56 100644
--- a/tests/integration/challenges/challenge_decorator/challenge_utils.py
+++ b/tests/challenges/challenge_decorator/challenge_utils.py
@@ -1,7 +1,7 @@
 import os
 from typing import Any, Callable, Dict, Optional, Tuple
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge import Challenge
 
 CHALLENGE_PREFIX = "test_"
 
diff --git a/tests/integration/challenges/challenge_decorator/score_utils.py b/tests/challenges/challenge_decorator/score_utils.py
similarity index 95%
rename from tests/integration/challenges/challenge_decorator/score_utils.py
rename to tests/challenges/challenge_decorator/score_utils.py
index 0a3b71a8..1a8be744 100644
--- a/tests/integration/challenges/challenge_decorator/score_utils.py
+++ b/tests/challenges/challenge_decorator/score_utils.py
@@ -2,7 +2,7 @@ import json
 import os
 from typing import Any, Dict, Optional, Tuple
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
+from tests.challenges.challenge_decorator.challenge import Challenge
 
 CURRENT_SCORE_LOCATION = "../current_score"
 NEW_SCORE_LOCATION = "../new_score"
diff --git a/tests/integration/challenges/conftest.py b/tests/challenges/conftest.py
similarity index 89%
rename from tests/integration/challenges/conftest.py
rename to tests/challenges/conftest.py
index 5514a129..dff45f11 100644
--- a/tests/integration/challenges/conftest.py
+++ b/tests/challenges/conftest.py
@@ -5,9 +5,8 @@ from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
 
-from tests.integration.challenges.challenge_decorator.challenge import Challenge
-from tests.integration.conftest import BASE_VCR_CONFIG
-from tests.vcr.vcr_filter import before_record_response
+from tests.challenges.challenge_decorator.challenge import Challenge
+from tests.vcr import BASE_VCR_CONFIG, before_record_response
 
 
 def before_record_response_filter_errors(
diff --git a/tests/integration/challenges/current_score.json b/tests/challenges/current_score.json
similarity index 100%
rename from tests/integration/challenges/current_score.json
rename to tests/challenges/current_score.json
diff --git a/tests/integration/challenges/debug_code/data/two_sum.py b/tests/challenges/debug_code/data/two_sum.py
similarity index 100%
rename from tests/integration/challenges/debug_code/data/two_sum.py
rename to tests/challenges/debug_code/data/two_sum.py
diff --git a/tests/integration/challenges/debug_code/data/two_sum_tests.py b/tests/challenges/debug_code/data/two_sum_tests.py
similarity index 100%
rename from tests/integration/challenges/debug_code/data/two_sum_tests.py
rename to tests/challenges/debug_code/data/two_sum_tests.py
diff --git a/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
similarity index 89%
rename from tests/integration/challenges/debug_code/test_debug_code_challenge_a.py
rename to tests/challenges/debug_code/test_debug_code_challenge_a.py
index 008e562c..93df754e 100644
--- a/tests/integration/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -7,10 +7,8 @@ from autogpt.agent import Agent
 from autogpt.commands.execute_code import execute_python_file
 from autogpt.commands.file_operations import append_to_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 5
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
similarity index 89%
rename from tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
rename to tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index 2f61fef3..eb3d0c94 100644
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -3,10 +3,8 @@ from pytest_mock import MockerFixture
 
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
diff --git a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
similarity index 90%
rename from tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
rename to tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index 6461e13f..51195f77 100644
--- a/tests/integration/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -6,10 +6,8 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
diff --git a/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
similarity index 88%
rename from tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
rename to tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index aa46ac4d..8ea20b94 100644
--- a/tests/integration/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -5,10 +5,8 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
diff --git a/tests/integration/challenges/memory/__init__.py b/tests/challenges/memory/__init__.py
similarity index 100%
rename from tests/integration/challenges/memory/__init__.py
rename to tests/challenges/memory/__init__.py
diff --git a/tests/integration/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py
similarity index 90%
rename from tests/integration/challenges/memory/test_memory_challenge_a.py
rename to tests/challenges/memory/test_memory_challenge_a.py
index 08f461bd..3e3251dd 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -4,10 +4,8 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
 
diff --git a/tests/integration/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py
similarity index 91%
rename from tests/integration/challenges/memory/test_memory_challenge_b.py
rename to tests/challenges/memory/test_memory_challenge_b.py
index c8276312..011fa17a 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -4,10 +4,8 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import generate_noise, run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import generate_noise, run_interaction_loop
 from tests.utils import requires_api_key
 
 NOISE = 1000
diff --git a/tests/integration/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
similarity index 95%
rename from tests/integration/challenges/memory/test_memory_challenge_c.py
rename to tests/challenges/memory/test_memory_challenge_c.py
index ab8ece10..d7cc6994 100644
--- a/tests/integration/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -4,10 +4,8 @@ from pytest_mock import MockerFixture
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
-from tests.integration.challenges.challenge_decorator.challenge_decorator import (
-    challenge,
-)
-from tests.integration.challenges.utils import generate_noise, run_interaction_loop
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import generate_noise, run_interaction_loop
 from tests.utils import requires_api_key
 
 NOISE = 1000
diff --git a/tests/integration/challenges/test_challenge_should_be_formatted_properly.py b/tests/challenges/test_challenge_should_be_formatted_properly.py
similarity index 100%
rename from tests/integration/challenges/test_challenge_should_be_formatted_properly.py
rename to tests/challenges/test_challenge_should_be_formatted_properly.py
diff --git a/tests/integration/challenges/utils.py b/tests/challenges/utils.py
similarity index 100%
rename from tests/integration/challenges/utils.py
rename to tests/challenges/utils.py
diff --git a/tests/integration/challenges/utils/build_current_score.py b/tests/challenges/utils/build_current_score.py
similarity index 84%
rename from tests/integration/challenges/utils/build_current_score.py
rename to tests/challenges/utils/build_current_score.py
index 743b1328..aec125b4 100644
--- a/tests/integration/challenges/utils/build_current_score.py
+++ b/tests/challenges/utils/build_current_score.py
@@ -26,12 +26,8 @@ def recursive_sort_dict(data: dict) -> dict:
 
 
 cwd = os.getcwd()  # get current working directory
-new_score_filename_pattern = os.path.join(
-    cwd, "tests/integration/challenges/new_score_*.json"
-)
-current_score_filename = os.path.join(
-    cwd, "tests/integration/challenges/current_score.json"
-)
+new_score_filename_pattern = os.path.join(cwd, "tests/challenges/new_score_*.json")
+current_score_filename = os.path.join(cwd, "tests/challenges/current_score.json")
 
 merged_data: Dict[str, Any] = {}
 for filename in glob.glob(new_score_filename_pattern):
diff --git a/tests/conftest.py b/tests/conftest.py
index 98bebc9e..0ee023b5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,4 +1,3 @@
-import os
 from pathlib import Path
 
 import pytest
@@ -8,15 +7,11 @@ from autogpt.config.config import Config
 from autogpt.llm.api_manager import ApiManager
 from autogpt.workspace import Workspace
 
-pytest_plugins = ["tests.integration.agent_factory", "tests.integration.memory.utils"]
-
-PROXY = os.environ.get("PROXY")
-
-
-@pytest.fixture()
-def vcr_cassette_dir(request):
-    test_name = os.path.splitext(request.node.name)[0]
-    return os.path.join("tests/Auto-GPT-test-cassettes", test_name)
+pytest_plugins = [
+    "tests.integration.agent_factory",
+    "tests.integration.memory.utils",
+    "tests.vcr",
+]
 
 
 @pytest.fixture()
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
deleted file mode 100644
index 686f50be..00000000
--- a/tests/integration/conftest.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import os
-
-import openai.api_requestor
-import pytest
-from pytest_mock import MockerFixture
-
-from tests.conftest import PROXY
-from tests.vcr.vcr_filter import before_record_request, before_record_response
-
-BASE_VCR_CONFIG = {
-    "record_mode": "new_episodes",
-    "before_record_request": before_record_request,
-    "before_record_response": before_record_response,
-    "filter_headers": [
-        "Authorization",
-        "X-OpenAI-Client-User-Agent",
-        "User-Agent",
-    ],
-    "match_on": ["method", "body"],
-}
-
-
-@pytest.fixture(scope="session")
-def vcr_config():
-    # this fixture is called by the pytest-recording vcr decorator.
-    return BASE_VCR_CONFIG
-
-
-def patch_api_base(requestor):
-    new_api_base = f"{PROXY}/v1"
-    requestor.api_base = new_api_base
-    return requestor
-
-
-@pytest.fixture
-def patched_api_requestor(mocker: MockerFixture):
-    original_init = openai.api_requestor.APIRequestor.__init__
-    original_validate_headers = openai.api_requestor.APIRequestor._validate_headers
-
-    def patched_init(requestor, *args, **kwargs):
-        original_init(requestor, *args, **kwargs)
-        patch_api_base(requestor)
-
-    def patched_validate_headers(self, supplied_headers):
-        headers = original_validate_headers(self, supplied_headers)
-        headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
-        headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
-        return headers
-
-    if PROXY:
-        mocker.patch("openai.api_requestor.APIRequestor.__init__", new=patched_init)
-        mocker.patch.object(
-            openai.api_requestor.APIRequestor,
-            "_validate_headers",
-            new=patched_validate_headers,
-        )
diff --git a/tests/integration/test_commands.py b/tests/integration/test_commands.py
deleted file mode 100644
index 1cbb3929..00000000
--- a/tests/integration/test_commands.py
+++ /dev/null
@@ -1,32 +0,0 @@
-"""Unit tests for the commands module"""
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from autogpt.app import list_agents, start_agent
-from tests.utils import requires_api_key
-
-
-@pytest.mark.vcr
-@pytest.mark.integration_test
-@requires_api_key("OPENAI_API_KEY")
-def test_make_agent(patched_api_requestor, config) -> None:
-    """Test that an agent can be created"""
-    # Use the mock agent manager to avoid creating a real agent
-    with patch("openai.ChatCompletion.create") as mock:
-        response = MagicMock()
-        # del response.error
-        response.choices[0].messages[0].content = "Test message"
-        response.usage.prompt_tokens = 1
-        response.usage.completion_tokens = 1
-        mock.return_value = response
-        start_agent(
-            "Test Agent", "chat", "Hello, how are you?", config, "gpt-3.5-turbo"
-        )
-        agents = list_agents(config)
-        assert "List of agents:\n0: chat" == agents
-        start_agent(
-            "Test Agent 2", "write", "Hello, how are you?", config, "gpt-3.5-turbo"
-        )
-        agents = list_agents(config)
-        assert "List of agents:\n0: chat\n1: write" == agents
diff --git a/tests/test_analyze_code.py b/tests/test_analyze_code.py
deleted file mode 100644
index 98ab8b72..00000000
--- a/tests/test_analyze_code.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Date: 2023-5-13
-# Author: Generated by GoCodeo.
-import pytest
-
-from autogpt.commands.analyze_code import analyze_code
-from autogpt.config import Config
-
-
-@pytest.fixture
-def mock_call_ai_function(mocker):
-    return mocker.patch("autogpt.commands.analyze_code.call_ai_function")
-
-
-class TestAnalyzeCode:
-    def test_positive_analyze_code(self, mock_call_ai_function):
-        # Positive Test
-        mock_call_ai_function.return_value = ["Suggestion 1", "Suggestion 2"]
-        code = "def example_function():\n    pass"
-        config = Config()
-        result = analyze_code(code, config)
-        assert result == ["Suggestion 1", "Suggestion 2"]
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
-
-    def test_negative_analyze_code(
-        self,
-        mock_call_ai_function,
-        config: Config,
-    ):
-        # Negative Test
-        mock_call_ai_function.return_value = []
-        code = "def example_function():\n    pass"
-        result = analyze_code(code, config)
-        assert result == []
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
-
-    def test_error_analyze_code(self, mock_call_ai_function, config: Config):
-        # Error Test
-        mock_call_ai_function.side_effect = Exception("Error occurred")
-        code = "def example_function():\n    pass"
-        with pytest.raises(Exception):
-            result = analyze_code(code, config)
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
-
-    def test_edge_analyze_code_empty_code(
-        self,
-        mock_call_ai_function,
-        config: Config,
-    ):
-        # Edge Test
-        mock_call_ai_function.return_value = ["Suggestion 1", "Suggestion 2"]
-        code = ""
-        result = analyze_code(code, config)
-        assert result == ["Suggestion 1", "Suggestion 2"]
-        mock_call_ai_function.assert_called_once_with(
-            "def analyze_code(code: str) -> list[str]:",
-            [code],
-            "Analyzes the given code and returns a list of suggestions for improvements.",
-            config=config,
-        )
diff --git a/tests/test_audio_text_read_audio.py b/tests/test_audio_text_read_audio.py
deleted file mode 100644
index 4385da32..00000000
--- a/tests/test_audio_text_read_audio.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Date: 2023-5-13
-# Author: Generated by GoCodeo.
-import json
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from autogpt.commands.audio_text import read_audio
-
-
-class TestReadAudio:
-    @patch("requests.post")
-    def test_positive_read_audio(self, mock_post, config):
-        # Positive Test
-        audio_data = b"test_audio_data"
-        mock_response = MagicMock()
-        mock_response.content.decode.return_value = json.dumps(
-            {"text": "Hello, world!"}
-        )
-        mock_post.return_value = mock_response
-
-        config.huggingface_api_token = "testing-token"
-        result = read_audio(audio_data, config)
-        assert result == "The audio says: Hello, world!"
-        mock_post.assert_called_once_with(
-            f"https://api-inference.huggingface.co/models/{config.huggingface_audio_to_text_model}",
-            headers={"Authorization": f"Bearer {config.huggingface_api_token}"},
-            data=audio_data,
-        )
-
-    @patch("requests.post")
-    def test_negative_read_audio(self, mock_post, config):
-        # Negative Test
-        audio_data = b"test_audio_data"
-        mock_response = MagicMock()
-        mock_response.content.decode.return_value = json.dumps({"text": ""})
-        mock_post.return_value = mock_response
-        config.huggingface_api_token = "testing-token"
-        result = read_audio(audio_data, config)
-        assert result == "The audio says: "
-        mock_post.assert_called_once_with(
-            f"https://api-inference.huggingface.co/models/{config.huggingface_audio_to_text_model}",
-            headers={"Authorization": f"Bearer {config.huggingface_api_token}"},
-            data=audio_data,
-        )
-
-    def test_error_read_audio(self, config):
-        # Error Test
-        config.huggingface_api_token = None
-        with pytest.raises(ValueError):
-            read_audio(b"test_audio_data", config)
-
-    def test_edge_read_audio_empty_audio(self, config):
-        # Edge Test
-        with pytest.raises(ValueError):
-            read_audio(b"", config)
diff --git a/tests/test_audio_text_read_audio_from_file.py b/tests/test_audio_text_read_audio_from_file.py
deleted file mode 100644
index c8d66a06..00000000
--- a/tests/test_audio_text_read_audio_from_file.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Date: 2023-5-13
-# Author: Generated by GoCodeo.
-
-
-from unittest.mock import mock_open, patch
-
-import pytest
-
-from autogpt.commands.audio_text import read_audio_from_file
-from autogpt.config import Config
-
-
-@pytest.fixture
-def mock_read_audio(mocker):
-    return mocker.patch("autogpt.commands.audio_text.read_audio")
-
-
-class TestReadAudioFromFile:
-    def test_positive_read_audio_from_file(self, mock_read_audio):
-        # Positive test
-        mock_read_audio.return_value = "This is a sample text."
-        mock_file_data = b"Audio data"
-        m = mock_open(read_data=mock_file_data)
-
-        with patch("builtins.open", m):
-            result = read_audio_from_file("test_audio.wav", Config())
-            assert result == "This is a sample text."
-            m.assert_called_once_with("test_audio.wav", "rb")
-
-    def test_negative_read_audio_from_file(self, mock_read_audio):
-        # Negative test
-        mock_read_audio.return_value = "This is a sample text."
-        mock_file_data = b"Audio data"
-        m = mock_open(read_data=mock_file_data)
-
-        with patch("builtins.open", m):
-            result = read_audio_from_file("test_audio.wav", Config())
-            assert result != "Incorrect text."
-            m.assert_called_once_with("test_audio.wav", "rb")
-
-    def test_error_read_audio_from_file(self):
-        # Error test
-        with pytest.raises(FileNotFoundError):
-            read_audio_from_file("non_existent_file.wav", Config())
-
-    def test_edge_empty_audio_file(self, mock_read_audio):
-        # Edge test
-        mock_read_audio.return_value = ""
-        mock_file_data = b""
-        m = mock_open(read_data=mock_file_data)
-
-        with patch("builtins.open", m):
-            result = read_audio_from_file("empty_audio.wav", Config())
-            assert result == ""
-            m.assert_called_once_with("empty_audio.wav", "rb")
diff --git a/tests/test_agent.py b/tests/unit/test_agent.py
similarity index 100%
rename from tests/test_agent.py
rename to tests/unit/test_agent.py
diff --git a/tests/test_agent_manager.py b/tests/unit/test_agent_manager.py
similarity index 100%
rename from tests/test_agent_manager.py
rename to tests/unit/test_agent_manager.py
diff --git a/tests/test_ai_config.py b/tests/unit/test_ai_config.py
similarity index 100%
rename from tests/test_ai_config.py
rename to tests/unit/test_ai_config.py
diff --git a/tests/test_api_manager.py b/tests/unit/test_api_manager.py
similarity index 100%
rename from tests/test_api_manager.py
rename to tests/unit/test_api_manager.py
diff --git a/tests/test_commands.py b/tests/unit/test_commands.py
similarity index 100%
rename from tests/test_commands.py
rename to tests/unit/test_commands.py
diff --git a/tests/test_config.py b/tests/unit/test_config.py
similarity index 100%
rename from tests/test_config.py
rename to tests/unit/test_config.py
diff --git a/tests/integration/test_git_commands.py b/tests/unit/test_git_commands.py
similarity index 100%
rename from tests/integration/test_git_commands.py
rename to tests/unit/test_git_commands.py
diff --git a/tests/integration/test_google_search.py b/tests/unit/test_google_search.py
similarity index 100%
rename from tests/integration/test_google_search.py
rename to tests/unit/test_google_search.py
diff --git a/tests/test_logs.py b/tests/unit/test_logs.py
similarity index 100%
rename from tests/test_logs.py
rename to tests/unit/test_logs.py
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
new file mode 100644
index 00000000..cff20ee3
--- /dev/null
+++ b/tests/unit/test_make_agent.py
@@ -0,0 +1,24 @@
+from unittest.mock import MagicMock
+
+from pytest_mock import MockerFixture
+
+from autogpt.app import list_agents, start_agent
+from autogpt.config import Config
+
+
+def test_make_agent(config: Config, mocker: MockerFixture) -> None:
+    """Test that an agent can be created"""
+    mock = mocker.patch("openai.ChatCompletion.create")
+
+    response = MagicMock()
+    # del response.error
+    response.choices[0].messages[0].content = "Test message"
+    response.usage.prompt_tokens = 1
+    response.usage.completion_tokens = 1
+    mock.return_value = response
+    start_agent("Test Agent", "chat", "Hello, how are you?", config, "gpt-3.5-turbo")
+    agents = list_agents(config)
+    assert "List of agents:\n0: chat" == agents
+    start_agent("Test Agent 2", "write", "Hello, how are you?", config, "gpt-3.5-turbo")
+    agents = list_agents(config)
+    assert "List of agents:\n0: chat\n1: write" == agents
diff --git a/tests/test_prompt_config.py b/tests/unit/test_prompt_config.py
similarity index 100%
rename from tests/test_prompt_config.py
rename to tests/unit/test_prompt_config.py
diff --git a/tests/test_prompt_generator.py b/tests/unit/test_prompt_generator.py
similarity index 100%
rename from tests/test_prompt_generator.py
rename to tests/unit/test_prompt_generator.py
diff --git a/tests/test_text_file_parsers.py b/tests/unit/test_text_file_parsers.py
similarity index 100%
rename from tests/test_text_file_parsers.py
rename to tests/unit/test_text_file_parsers.py
diff --git a/tests/unit/test_url_validation.py b/tests/unit/test_url_validation.py
index 16eb8cd5..5d6e8124 100644
--- a/tests/unit/test_url_validation.py
+++ b/tests/unit/test_url_validation.py
@@ -49,25 +49,17 @@ def test_url_validation_succeeds(url):
     assert dummy_method(url) == url
 
 
-bad_protocol_data = (
-    ("htt://example.com"),
-    ("httppp://example.com"),
-    (" https://example.com"),
+@pytest.mark.parametrize(
+    "url,expected_error",
+    [
+        ("htt://example.com", "Invalid URL format"),
+        ("httppp://example.com", "Invalid URL format"),
+        (" https://example.com", "Invalid URL format"),
+        ("http://?query=q", "Missing Scheme or Network location"),
+    ],
 )
-
-
-@pytest.mark.parametrize("url", bad_protocol_data)
-def test_url_validation_fails_bad_protocol(url):
-    with raises(ValueError, match="Invalid URL format"):
-        dummy_method(url)
-
-
-missing_loc = (("http://?query=q"),)
-
-
-@pytest.mark.parametrize("url", missing_loc)
-def test_url_validation_fails_bad_protocol(url):
-    with raises(ValueError, match="Missing Scheme or Network location"):
+def test_url_validation_fails_invalid_url(url, expected_error):
+    with raises(ValueError, match=expected_error):
         dummy_method(url)
 
 
diff --git a/tests/test_utils.py b/tests/unit/test_utils.py
similarity index 98%
rename from tests/test_utils.py
rename to tests/unit/test_utils.py
index c0ce28cc..099176ba 100644
--- a/tests/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,7 +1,6 @@
 import os
 from unittest.mock import patch
 
-import pytest
 import requests
 
 from autogpt.utils import (
@@ -151,7 +150,3 @@ def test_get_current_git_branch_failure(mock_repo):
     branch_name = get_current_git_branch()
 
     assert branch_name == ""
-
-
-if __name__ == "__main__":
-    pytest.main()
diff --git a/tests/test_workspace.py b/tests/unit/test_workspace.py
similarity index 100%
rename from tests/test_workspace.py
rename to tests/unit/test_workspace.py
diff --git a/tests/vcr/__init__.py b/tests/vcr/__init__.py
index e69de29b..e1a2620c 100644
--- a/tests/vcr/__init__.py
+++ b/tests/vcr/__init__.py
@@ -0,0 +1,61 @@
+import os
+
+import openai.api_requestor
+import pytest
+from pytest_mock import MockerFixture
+
+from .vcr_filter import PROXY, before_record_request, before_record_response
+
+BASE_VCR_CONFIG = {
+    "record_mode": "new_episodes",
+    "before_record_request": before_record_request,
+    "before_record_response": before_record_response,
+    "filter_headers": [
+        "Authorization",
+        "X-OpenAI-Client-User-Agent",
+        "User-Agent",
+    ],
+    "match_on": ["method", "body"],
+}
+
+
+@pytest.fixture(scope="session")
+def vcr_config():
+    # this fixture is called by the pytest-recording vcr decorator.
+    return BASE_VCR_CONFIG
+
+
+@pytest.fixture()
+def vcr_cassette_dir(request):
+    test_name = os.path.splitext(request.node.name)[0]
+    return os.path.join("tests/Auto-GPT-test-cassettes", test_name)
+
+
+def patch_api_base(requestor):
+    new_api_base = f"{PROXY}/v1"
+    requestor.api_base = new_api_base
+    return requestor
+
+
+@pytest.fixture
+def patched_api_requestor(mocker: MockerFixture):
+    original_init = openai.api_requestor.APIRequestor.__init__
+    original_validate_headers = openai.api_requestor.APIRequestor._validate_headers
+
+    def patched_init(requestor, *args, **kwargs):
+        original_init(requestor, *args, **kwargs)
+        patch_api_base(requestor)
+
+    def patched_validate_headers(self, supplied_headers):
+        headers = original_validate_headers(self, supplied_headers)
+        headers["AGENT-MODE"] = os.environ.get("AGENT_MODE")
+        headers["AGENT-TYPE"] = os.environ.get("AGENT_TYPE")
+        return headers
+
+    if PROXY:
+        mocker.patch("openai.api_requestor.APIRequestor.__init__", new=patched_init)
+        mocker.patch.object(
+            openai.api_requestor.APIRequestor,
+            "_validate_headers",
+            new=patched_validate_headers,
+        )
diff --git a/tests/vcr/vcr_filter.py b/tests/vcr/vcr_filter.py
index 4cc49fd3..1ba433a7 100644
--- a/tests/vcr/vcr_filter.py
+++ b/tests/vcr/vcr_filter.py
@@ -1,8 +1,9 @@
 import json
+import os
 import re
 from typing import Any, Dict, List
 
-from tests.conftest import PROXY
+PROXY = os.environ.get("PROXY")
 
 REPLACEMENTS: List[Dict[str, str]] = [
     {

From 055806e124b2c244b37e367f318e55e548b2cd5a Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Tue, 6 Jun 2023 13:56:17 -0700
Subject: [PATCH 10/97] Fix inverted logic for deny_command (#4563)

---
 autogpt/commands/execute_code.py       | 2 +-
 tests/integration/test_execute_code.py | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 20c5e1a2..8826e478 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -109,7 +109,7 @@ def validate_command(command: str, config: Config) -> bool:
     if not tokens:
         return False
 
-    if config.deny_commands and tokens[0] not in config.deny_commands:
+    if config.deny_commands and tokens[0] in config.deny_commands:
         return False
 
     for keyword in config.allow_commands:
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index c75d66fa..fa3cf259 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -48,3 +48,11 @@ def test_execute_python_file_invalid(config):
 def test_execute_shell(config_allow_execute, random_string, config):
     result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
     assert f"Hello {random_string}!" in result
+
+
+def test_execute_shell_deny_command(
+    python_test_file: str, config_allow_execute: bool, config: Config
+):
+    config.deny_commands = ["echo"]
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    assert "Error:" in result and "not allowed" in result

From 53efa8f6bffa8c64e8d8f8eff47c493319d7abf3 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Tue, 6 Jun 2023 14:46:41 -0700
Subject: [PATCH 11/97] Update cassette submodule & fix current_score.json
 generation (#4601)

* Update cassette submodule

* add a new line when building current_score.json
---
 tests/Auto-GPT-test-cassettes                 | 2 +-
 tests/challenges/utils/build_current_score.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 38ecb014..dbea8039 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 38ecb0145aa3a88e2eb5f04a556146614a2882e1
+Subproject commit dbea803947475a53ee764aa4f74f304796f197fc
diff --git a/tests/challenges/utils/build_current_score.py b/tests/challenges/utils/build_current_score.py
index aec125b4..b8e75242 100644
--- a/tests/challenges/utils/build_current_score.py
+++ b/tests/challenges/utils/build_current_score.py
@@ -40,4 +40,5 @@ for filename in glob.glob(new_score_filename_pattern):
 sorted_data = recursive_sort_dict(merged_data)
 
 with open(current_score_filename, "w") as f_current:
-    json.dump(sorted_data, f_current, indent=4)
+    json_data = json.dumps(sorted_data, indent=4)
+    f_current.write(json_data + "\n")

From 835decc6c1db8eee3269eed3f330d72c546dd512 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Tue, 6 Jun 2023 21:48:57 +0000
Subject: [PATCH 12/97] Update challenge scores

---
 tests/challenges/current_score.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index bd91afcb..6a133f3e 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -45,4 +45,4 @@
             "max_level_beaten": 1
         }
     }
-}
\ No newline at end of file
+}

From 1b04e5cafc1b9f1c321bfbaa22b5c4b4864067f5 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Tue, 6 Jun 2023 16:20:52 -0700
Subject: [PATCH 13/97] Fix duckduckgo rate limiting (#4592)

* Fix duckduckgo rate limiting

* use list instead of loop

---------

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 autogpt/commands/google_search.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/autogpt/commands/google_search.py b/autogpt/commands/google_search.py
index c01ec0a1..aa34861d 100644
--- a/autogpt/commands/google_search.py
+++ b/autogpt/commands/google_search.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import json
+import time
 from itertools import islice
 from typing import TYPE_CHECKING
 
@@ -12,6 +13,8 @@ from autogpt.commands.command import command
 if TYPE_CHECKING:
     from autogpt.config import Config
 
+DUCKDUCKGO_MAX_ATTEMPTS = 3
+
 
 @command(
     "google",
@@ -30,15 +33,20 @@ def google_search(query: str, config: Config, num_results: int = 8) -> str:
         str: The results of the search.
     """
     search_results = []
-    if not query:
-        return json.dumps(search_results)
+    attempts = 0
 
-    results = DDGS().text(query)
-    if not results:
-        return json.dumps(search_results)
+    while attempts < DUCKDUCKGO_MAX_ATTEMPTS:
+        if not query:
+            return json.dumps(search_results)
 
-    for item in islice(results, num_results):
-        search_results.append(item)
+        results = DDGS().text(query)
+        search_results = list(islice(results, num_results))
+
+        if search_results:
+            break
+
+        time.sleep(1)
+        attempts += 1
 
     results = json.dumps(search_results, ensure_ascii=False, indent=4)
     return safe_google_results(results)

From 20a4922b4040b0f2168251cef03265d99c6ed43d Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Wed, 7 Jun 2023 01:37:43 +0200
Subject: [PATCH 14/97] Increase thresholds for PR autolabeler (#4602)

---
 .github/workflows/pr-label.yml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pr-label.yml b/.github/workflows/pr-label.yml
index 62a197de..e024f34b 100644
--- a/.github/workflows/pr-label.yml
+++ b/.github/workflows/pr-label.yml
@@ -48,11 +48,10 @@ jobs:
           s_label: 'size/s'
           s_max_size: 10
           m_label: 'size/m'
-          m_max_size: 50
+          m_max_size: 100
           l_label: 'size/l'
-          l_max_size: 200
+          l_max_size: 500
           xl_label: 'size/xl'
           message_if_xl: >
-            This PR exceeds the recommended size of 200 lines.
+            This PR exceeds the recommended size of 500 lines.
             Please make sure you are NOT addressing multiple issues with one PR.
-            Note this PR might be rejected due to its size  

From fdc6e12945f01a27dd88d83515cac61743056ba9 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Tue, 6 Jun 2023 17:54:02 -0700
Subject: [PATCH 15/97] Improve logic and error messages for file reading and
 writing with Python code (#4567)

* Fix issues with file reading and writing with Python code

* Change error message, use Workspace.get_path

---------

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 autogpt/commands/execute_code.py          | 23 ++++++++++++++++++-----
 autogpt/commands/file_operations_utils.py |  4 +++-
 tests/integration/test_execute_code.py    | 17 ++++++++++++-----
 tests/unit/test_file_operations.py        |  6 ++++++
 4 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 8826e478..999e40f8 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -9,6 +9,8 @@ from docker.errors import ImageNotFound
 from autogpt.commands.command import command
 from autogpt.config import Config
 from autogpt.logs import logger
+from autogpt.setup import CFG
+from autogpt.workspace.workspace import Workspace
 
 
 @command("execute_python_file", "Execute Python File", '"filename": "<filename>"')
@@ -21,17 +23,28 @@ def execute_python_file(filename: str, config: Config) -> str:
     Returns:
         str: The output of the file
     """
-    logger.info(f"Executing file '{filename}'")
+    logger.info(
+        f"Executing python file '{filename}' in working directory '{CFG.workspace_path}'"
+    )
 
     if not filename.endswith(".py"):
         return "Error: Invalid file type. Only .py files are allowed."
 
-    if not os.path.isfile(filename):
-        return f"Error: File '{filename}' does not exist."
+    workspace = Workspace(config.workspace_path, config.restrict_to_workspace)
+
+    path = workspace.get_path(filename)
+    if not path.is_file():
+        # Mimic the response that you get from the command line so that it's easier to identify
+        return (
+            f"python: can't open file '{filename}': [Errno 2] No such file or directory"
+        )
 
     if we_are_running_in_a_docker_container():
         result = subprocess.run(
-            ["python", filename], capture_output=True, encoding="utf8"
+            ["python", str(path)],
+            capture_output=True,
+            encoding="utf8",
+            cwd=CFG.workspace_path,
         )
         if result.returncode == 0:
             return result.stdout
@@ -63,7 +76,7 @@ def execute_python_file(filename: str, config: Config) -> str:
                     logger.info(status)
         container = client.containers.run(
             image_name,
-            ["python", str(Path(filename).relative_to(config.workspace_path))],
+            ["python", str(path.relative_to(workspace.root))],
             volumes={
                 config.workspace_path: {
                     "bind": "/workspace",
diff --git a/autogpt/commands/file_operations_utils.py b/autogpt/commands/file_operations_utils.py
index 7f3e418d..b0077968 100644
--- a/autogpt/commands/file_operations_utils.py
+++ b/autogpt/commands/file_operations_utils.py
@@ -146,7 +146,9 @@ def is_file_binary_fn(file_path: str):
 
 def read_textual_file(file_path: str, logger: logs.Logger) -> str:
     if not os.path.isfile(file_path):
-        raise FileNotFoundError(f"{file_path} not found!")
+        raise FileNotFoundError(
+            f"read_file {file_path} failed: no such file or directory"
+        )
     is_binary = is_file_binary_fn(file_path)
     file_extension = os.path.splitext(file_path)[1].lower()
     parser = extension_to_parser.get(file_extension)
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index fa3cf259..ce3c1e16 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -1,6 +1,7 @@
 import random
 import string
 import tempfile
+from typing import Callable
 
 import pytest
 from pytest_mock import MockerFixture
@@ -10,12 +11,12 @@ from autogpt.config import Config
 
 
 @pytest.fixture
-def config_allow_execute(config: Config, mocker: MockerFixture):
+def config_allow_execute(config: Config, mocker: MockerFixture) -> Callable:
     yield mocker.patch.object(config, "execute_local_commands", True)
 
 
 @pytest.fixture
-def python_test_file(config: Config, random_string):
+def python_test_file(config: Config, random_string) -> Callable:
     temp_file = tempfile.NamedTemporaryFile(dir=config.workspace_path, suffix=".py")
     temp_file.write(str.encode(f"print('Hello {random_string}!')"))
     temp_file.flush()
@@ -34,18 +35,24 @@ def test_execute_python_file(python_test_file: str, random_string: str, config):
     assert result.replace("\r", "") == f"Hello {random_string}!\n"
 
 
-def test_execute_python_file_invalid(config):
+def test_execute_python_file_invalid(config: Config):
     assert all(
         s in sut.execute_python_file("not_python", config).lower()
         for s in ["error:", "invalid", ".py"]
     )
+
+
+def test_execute_python_file_not_found(config: Config):
     assert all(
         s in sut.execute_python_file("notexist.py", config).lower()
-        for s in ["error:", "does not exist"]
+        for s in [
+            "python: can't open file 'notexist.py'",
+            "[errno 2] no such file or directory",
+        ]
     )
 
 
-def test_execute_shell(config_allow_execute, random_string, config):
+def test_execute_shell(config_allow_execute: bool, random_string: str, config: Config):
     result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
     assert f"Hello {random_string}!" in result
 
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index 1d0219eb..3da57375 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -229,6 +229,12 @@ def test_read_file(
     assert content.replace("\r", "") == file_content
 
 
+def test_read_file_not_found(config: Config):
+    filename = "does_not_exist.txt"
+    content = file_ops.read_file(filename, config)
+    assert "Error:" in content and filename in content and "no such file" in content
+
+
 def test_write_to_file(test_file_path: Path, config):
     new_content = "This is new content.\n"
     file_ops.write_to_file(str(test_file_path), new_content, config)

From 463dc547339304ca54709477d5f47e948a205dbf Mon Sep 17 00:00:00 2001
From: Jan <jan-github@phobia.de>
Date: Wed, 7 Jun 2023 03:14:47 +0200
Subject: [PATCH 16/97] Clean up apt cache in docker build (#2821)

* Dockerfile: reduce layer bloat

* Re-separate layers

---------

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 Dockerfile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3bcfddea..68be43d5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,11 +6,13 @@ FROM python:3.10-slim AS autogpt-base
 
 # Install browsers
 RUN apt-get update && apt-get install -y \
-    chromium-driver firefox-esr \
-    ca-certificates
+    chromium-driver firefox-esr ca-certificates \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
 
 # Install utilities
-RUN apt-get install -y curl jq wget git
+RUN apt-get update && apt-get install -y \
+    curl jq wget git \
+    && apt-get clean && rm -rf /var/lib/apt/lists/*
 
 # Set environment variables
 ENV PIP_NO_CACHE_DIR=yes \

From 624af3945c2b36d8ef08c49a68a4617c7f67a5b9 Mon Sep 17 00:00:00 2001
From: Robin Richtsfeld <robin.richtsfeld@gmail.com>
Date: Wed, 7 Jun 2023 07:12:12 +0200
Subject: [PATCH 17/97] Update .gitignore (#882)

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
---
 .gitignore | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 971c3368..2ce588d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,6 @@
 ## Original ignores
 autogpt/keys.py
-autogpt/*json
-autogpt/node_modules/
-autogpt/__pycache__/keys.cpython-310.pyc
-autogpt/auto_gpt_workspace
-package-lock.json
-*.pyc
+autogpt/*.json
 auto_gpt_workspace/*
 *.mpeg
 .env

From c9f1f050af55ba38285d5837d8b91959cf36774a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Borntr=C3=A4ger?= <marc.borntraeger@gmail.com>
Date: Wed, 7 Jun 2023 07:32:43 +0200
Subject: [PATCH 18/97] feat: log new-line so user can differentiate sections
 better in console (#1569)

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: Nicholas Tindle <nicktindle@outlook.com>
---
 autogpt/agent/agent.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 3dc4d390..93c9c283 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -166,6 +166,8 @@ class Agent:
                 NEXT_ACTION_FILE_NAME,
             )
 
+            # First log new-line so user can differentiate sections better in console
+            logger.typewriter_log("\n")
             logger.typewriter_log(
                 "NEXT ACTION: ",
                 Fore.CYAN,
@@ -252,6 +254,8 @@ class Agent:
                     logger.info("Exiting...")
                     break
             else:
+                # First log new-line so user can differentiate sections better in console
+                logger.typewriter_log("\n")
                 # Print authorized commands left value
                 logger.typewriter_log(
                     f"{Fore.CYAN}AUTHORISED COMMANDS LEFT: {Style.RESET_ALL}{self.next_action_count}"

From dae58f81677ef30c1c200031a469cb34e625aed8 Mon Sep 17 00:00:00 2001
From: Nicholas Tindle <nick@ntindle.com>
Date: Wed, 7 Jun 2023 01:46:09 -0500
Subject: [PATCH 19/97] Update .gitignore (#4610)

---
 .gitignore | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 2ce588d8..307a6723 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 ## Original ignores
 autogpt/keys.py
 autogpt/*.json
-auto_gpt_workspace/*
+**/auto_gpt_workspace/*
 *.mpeg
 .env
 azure.yaml
@@ -158,4 +158,4 @@ vicuna-*
 openai/
 
 # news
-CURRENT_BULLETIN.md
\ No newline at end of file
+CURRENT_BULLETIN.md

From 1e851ba3ea7616e4bdc4ea14bade4424b6fb832a Mon Sep 17 00:00:00 2001
From: Stefan Ayala <stefanayala3266@gmail.com>
Date: Wed, 7 Jun 2023 01:16:53 -0700
Subject: [PATCH 20/97] Feat  set token limits based on model (#4498)

* feat: set max token limits for better user experience

* fix: use OPEN_AI_CHAT_MODELS max limits

* fix: use the old default of 8000

* fix: formatting so isort/black checks pass

* fix: avoid circular dependencies

* fix: use better to avoid circular imports

* feat: introduce soft limits and use them

* fix: circular import issue and missing field

* fix: move import to avoid overriding doc comment

* feat: DRY things up and set token limit for fast llm models too

* tests: make linter tests happy

* test: use the max token limits in config.py test

* fix: remove fast token limit from config

* feat: remove smart token limit from config

* fix: remove unused soft_token_limit var

* fix: remove unneeded tests, settings aren't in config anymore

---------

Co-authored-by: k-boikov <64261260+k-boikov@users.noreply.github.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 .env.template             |  7 -------
 autogpt/agent/agent.py    |  6 ++++--
 autogpt/config/config.py  | 11 +----------
 tests/unit/test_config.py | 30 ------------------------------
 4 files changed, 5 insertions(+), 49 deletions(-)

diff --git a/.env.template b/.env.template
index d4d99baa..c78701a7 100644
--- a/.env.template
+++ b/.env.template
@@ -85,13 +85,6 @@ OPENAI_API_KEY=your-openai-api-key
 # SMART_LLM_MODEL=gpt-4
 # FAST_LLM_MODEL=gpt-3.5-turbo
 
-### LLM MODEL SETTINGS
-## FAST_TOKEN_LIMIT - Fast token limit for OpenAI (Default: 4000)
-## SMART_TOKEN_LIMIT - Smart token limit for OpenAI (Default: 8000)
-## When using --gpt3only this needs to be set to 4000.
-# FAST_TOKEN_LIMIT=4000
-# SMART_TOKEN_LIMIT=8000
-
 ### EMBEDDINGS
 ## EMBEDDING_MODEL       - Model to use for creating embeddings
 # EMBEDDING_MODEL=text-embedding-ada-002
diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 93c9c283..c21f31db 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -12,6 +12,7 @@ from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques
 from autogpt.json_utils.utilities import LLM_DEFAULT_RESPONSE_FORMAT, validate_json
 from autogpt.llm.base import ChatSequence
 from autogpt.llm.chat import chat_with_ai, create_chat_completion
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
 from autogpt.log_cycle.log_cycle import (
     FULL_MESSAGE_HISTORY_FILE_NAME,
@@ -82,6 +83,7 @@ class Agent:
         self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.cycle_count = 0
         self.log_cycle_handler = LogCycleHandler()
+        self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
 
     def start_interaction_loop(self):
         # Interaction Loop
@@ -132,7 +134,7 @@ class Agent:
                     self,
                     self.system_prompt,
                     self.triggering_prompt,
-                    cfg.fast_token_limit,
+                    self.fast_token_limit,
                     cfg.fast_llm_model,
                 )
 
@@ -290,7 +292,7 @@ class Agent:
                 memory_tlength = count_string_tokens(
                     str(self.history.summary_message()), cfg.fast_llm_model
                 )
-                if result_tlength + memory_tlength + 600 > cfg.fast_token_limit:
+                if result_tlength + memory_tlength + 600 > self.fast_token_limit:
                     result = f"Failure: command {command_name} returned too much output. \
                         Do not execute this command again with the same arguments."
 
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 5f76bb74..629e9ffb 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -56,9 +56,8 @@ class Config(metaclass=Singleton):
         )
         self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
         self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
-        self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 4000))
-        self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 8000))
         self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
+
         self.browse_spacy_language_model = os.getenv(
             "BROWSE_SPACY_LANGUAGE_MODEL", "en_core_web_sm"
         )
@@ -217,14 +216,6 @@ class Config(metaclass=Singleton):
         """Set the smart LLM model value."""
         self.smart_llm_model = value
 
-    def set_fast_token_limit(self, value: int) -> None:
-        """Set the fast token limit value."""
-        self.fast_token_limit = value
-
-    def set_smart_token_limit(self, value: int) -> None:
-        """Set the smart token limit value."""
-        self.smart_token_limit = value
-
     def set_embedding_model(self, value: str) -> None:
         """Set the model to use for creating embeddings."""
         self.embedding_model = value
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index eb6946c9..27daedcd 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -21,8 +21,6 @@ def test_initial_values(config: Config):
     assert config.speak_mode == False
     assert config.fast_llm_model == "gpt-3.5-turbo"
     assert config.smart_llm_model == "gpt-4"
-    assert config.fast_token_limit == 4000
-    assert config.smart_token_limit == 8000
 
 
 def test_set_continuous_mode(config: Config):
@@ -81,34 +79,6 @@ def test_set_smart_llm_model(config: Config):
     config.set_smart_llm_model(smart_llm_model)
 
 
-def test_set_fast_token_limit(config: Config):
-    """
-    Test if the set_fast_token_limit() method updates the fast_token_limit attribute.
-    """
-    # Store token limit to reset it after the test
-    fast_token_limit = config.fast_token_limit
-
-    config.set_fast_token_limit(5000)
-    assert config.fast_token_limit == 5000
-
-    # Reset token limit
-    config.set_fast_token_limit(fast_token_limit)
-
-
-def test_set_smart_token_limit(config: Config):
-    """
-    Test if the set_smart_token_limit() method updates the smart_token_limit attribute.
-    """
-    # Store token limit to reset it after the test
-    smart_token_limit = config.smart_token_limit
-
-    config.set_smart_token_limit(9000)
-    assert config.smart_token_limit == 9000
-
-    # Reset token limit
-    config.set_smart_token_limit(smart_token_limit)
-
-
 def test_set_debug_mode(config: Config):
     """
     Test if the set_debug_mode() method updates the debug_mode attribute.

From f4505add69790753d6085926076efd544005daec Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Wed, 7 Jun 2023 19:39:07 -0700
Subject: [PATCH 21/97] Set proxy headers as secrets (#4620)

---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7f965299..cf851ef3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -154,8 +154,8 @@ jobs:
         env:
           CI: true
           PROXY: ${{ secrets.PROXY }}
-          AGENT_MODE: ${{ vars.AGENT_MODE }}
-          AGENT_TYPE: ${{ vars.AGENT_TYPE }}
+          AGENT_MODE: ${{ secrets.AGENT_MODE }}
+          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
 
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v3

From bd2e26a20fd85869cbebfcd52a95ddada33c8f0a Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 9 Jun 2023 07:43:56 -0700
Subject: [PATCH 22/97] Inform users that challenges can be flaky (#4616)

* Inform users that challenges can be flaky

* Update challenge_decorator.py
---
 .../challenges/challenge_decorator/challenge_decorator.py  | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
index 1d1fbf91..5ef7f19e 100644
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -15,6 +15,8 @@ MAX_LEVEL_TO_IMPROVE_ON = (
     1  # we will attempt to beat 1 level above the current level for now.
 )
 
+CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt and reach out to us on the 'challenges' channel to let us know the challenge you're struggling with."
+
 
 def challenge(func: Callable[..., Any]) -> Callable[..., None]:
     @wraps(func)
@@ -34,7 +36,9 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
                     func(*args, **kwargs)
                     challenge.succeeded = True
                 except AssertionError as err:
-                    original_error = err
+                    original_error = AssertionError(
+                        f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
+                    )
                     challenge.succeeded = False
             else:
                 challenge.skipped = True
@@ -54,7 +58,6 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
                     pytest.xfail("Challenge failed")
                 if original_error:
                     raise original_error
-                raise AssertionError("Challenge failed")
             run_remaining -= 1
 
     return wrapper

From 3b0d49a3e02c55c449a2303df0f6aab42257b575 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 9 Jun 2023 07:50:57 -0700
Subject: [PATCH 23/97] Make test write file hard (#4481)

---
 .../basic_abilities/test_write_file.py        | 30 +++++--
 tests/challenges/current_score.json           |  2 +-
 tests/integration/agent_factory.py            | 85 ++++++++++---------
 3 files changed, 67 insertions(+), 50 deletions(-)

diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
index 033f76e6..9c472985 100644
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -1,5 +1,6 @@
+from typing import List
+
 import pytest
-from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
@@ -8,21 +9,34 @@ from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import run_interaction_loop
 from tests.utils import requires_api_key
 
-CYCLE_COUNT = 3
+CYCLE_COUNT_PER_LEVEL = [1, 1]
+EXPECTED_OUTPUTS_PER_LEVEL = [
+    {"hello_world.txt": ["Hello World"]},
+    {"hello_world_1.txt": ["Hello World"], "hello_world_2.txt": ["Hello World"]},
+]
 
 
 @requires_api_key("OPENAI_API_KEY")
 @pytest.mark.vcr
 @challenge
 def test_write_file(
-    writer_agent: Agent,
-    patched_api_requestor: MockerFixture,
+    file_system_agents: List[Agent],
+    patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
     config: Config,
     level_to_run: int,
 ) -> None:
-    file_path = str(writer_agent.workspace.get_path("hello_world.txt"))
-    run_interaction_loop(monkeypatch, writer_agent, CYCLE_COUNT)
+    file_system_agent = file_system_agents[level_to_run - 1]
+    run_interaction_loop(
+        monkeypatch, file_system_agent, CYCLE_COUNT_PER_LEVEL[level_to_run - 1]
+    )
 
-    content = read_file(file_path, config)
-    assert content == "Hello World", f"Expected 'Hello World', got {content}"
+    expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
+
+    for file_name, expected_lines in expected_outputs.items():
+        file_path = str(file_system_agent.workspace.get_path(file_name))
+        content = read_file(file_path, config)
+        for expected_line in expected_lines:
+            assert (
+                expected_line in content
+            ), f"Expected '{expected_line}' in file {file_name}, but it was not found"
diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index 6a133f3e..8be3fb5e 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -5,7 +5,7 @@
             "max_level_beaten": 1
         },
         "write_file": {
-            "max_level": 1,
+            "max_level": 2,
             "max_level_beaten": 1
         }
     },
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 30d9cc13..fb08411e 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -70,41 +70,39 @@ def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace
 
 
 @pytest.fixture
-def writer_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
+def file_system_agents(
+    agent_test_config, memory_json_file: NoMemory, workspace: Workspace
+):
+    agents = []
+    command_registry = get_command_registry(agent_test_config)
 
-    ai_config = AIConfig(
-        ai_name="write_to_file-GPT",
-        ai_role="an AI designed to use the write_to_file command to write 'Hello World' into a file named \"hello_world.txt\" and then use the task_complete command to complete the task.",
-        ai_goals=[
-            "Use the write_to_file command to write 'Hello World' into a file named \"hello_world.txt\".",
-            "Use the task_complete command to complete the task.",
-            "Do not use any other commands.",
-        ],
-    )
-    ai_config.command_registry = command_registry
+    ai_goals = [
+        "Write 'Hello World' into a file named \"hello_world.txt\".",
+        'Write \'Hello World\' into 2 files named "hello_world_1.txt"and "hello_world_2.txt".',
+    ]
 
-    triggering_prompt = (
-        "Determine which next command to use, and respond using the"
-        " format specified above:"
-    )
-    system_prompt = ai_config.construct_full_prompt()
-
-    agent = Agent(
-        ai_name="",
-        memory=memory_none,
-        command_registry=command_registry,
-        config=ai_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=triggering_prompt,
-        workspace_directory=workspace.root,
-    )
-
-    return agent
+    for ai_goal in ai_goals:
+        ai_config = AIConfig(
+            ai_name="File System Agent",
+            ai_role="an AI designed to manage a file system.",
+            ai_goals=[ai_goal],
+        )
+        ai_config.command_registry = command_registry
+        system_prompt = ai_config.construct_full_prompt()
+        Config().set_continuous_mode(False)
+        agents.append(
+            Agent(
+                ai_name="Information Retrieval Agent",
+                memory=memory_json_file,
+                command_registry=command_registry,
+                config=ai_config,
+                next_action_count=0,
+                system_prompt=system_prompt,
+                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+                workspace_directory=workspace.root,
+            )
+        )
+    return agents
 
 
 @pytest.fixture
@@ -145,15 +143,8 @@ def information_retrieval_agents(
     agent_test_config, memory_json_file, workspace: Workspace
 ):
     agents = []
-    command_registry = CommandRegistry()
-    enabled_command_categories = [
-        x
-        for x in COMMAND_CATEGORIES
-        if x not in agent_test_config.disabled_command_categories
-    ]
+    command_registry = get_command_registry(agent_test_config)
 
-    for command_category in enabled_command_categories:
-        command_registry.import_commands(command_category)
     ai_goals = [
         "Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
         "Write to a file called output.txt tesla's revenue in 2022.",
@@ -284,3 +275,15 @@ def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace):
     )
 
     return agent
+
+
+def get_command_registry(agent_test_config):
+    command_registry = CommandRegistry()
+    enabled_command_categories = [
+        x
+        for x in COMMAND_CATEGORIES
+        if x not in agent_test_config.disabled_command_categories
+    ]
+    for command_category in enabled_command_categories:
+        command_registry.import_commands(command_category)
+    return command_registry

From 12ed5a957b934839c274d5e2f1f13b0089f3f0bc Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 9 Jun 2023 08:40:06 -0700
Subject: [PATCH 24/97] Fix debug code challenge (#4632)

---
 .../basic_abilities/test_write_file.py        |  4 +-
 tests/challenges/current_score.json           |  4 +-
 .../debug_code/data/{two_sum.py => code.py}   | 10 +--
 tests/challenges/debug_code/data/test.py      | 31 ++++++++
 .../debug_code/data/two_sum_tests.py          | 30 --------
 .../debug_code/test_debug_code_challenge_a.py | 32 ++++++---
 .../test_information_retrieval_challenge_a.py |  6 +-
 .../test_information_retrieval_challenge_b.py |  7 +-
 .../test_kubernetes_template_challenge_a.py   |  5 +-
 .../memory/test_memory_challenge_a.py         |  8 ++-
 .../memory/test_memory_challenge_b.py         | 12 +++-
 .../memory/test_memory_challenge_c.py         | 12 ++--
 tests/challenges/utils.py                     | 14 ++++
 tests/integration/agent_factory.py            | 72 ++++++++++---------
 14 files changed, 143 insertions(+), 104 deletions(-)
 rename tests/challenges/debug_code/data/{two_sum.py => code.py} (59%)
 create mode 100644 tests/challenges/debug_code/data/test.py
 delete mode 100644 tests/challenges/debug_code/data/two_sum_tests.py

diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
index 9c472985..1c75a9b3 100644
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -6,7 +6,7 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT_PER_LEVEL = [1, 1]
@@ -34,7 +34,7 @@ def test_write_file(
     expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
 
     for file_name, expected_lines in expected_outputs.items():
-        file_path = str(file_system_agent.workspace.get_path(file_name))
+        file_path = get_workspace_path(file_system_agent, file_name)
         content = read_file(file_path, config)
         for expected_line in expected_lines:
             assert (
diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index 8be3fb5e..1f65e83e 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -11,8 +11,8 @@
     },
     "debug_code": {
         "debug_code_challenge_a": {
-            "max_level": 1,
-            "max_level_beaten": null
+            "max_level": 2,
+            "max_level_beaten": 1
         }
     },
     "information_retrieval": {
diff --git a/tests/challenges/debug_code/data/two_sum.py b/tests/challenges/debug_code/data/code.py
similarity index 59%
rename from tests/challenges/debug_code/data/two_sum.py
rename to tests/challenges/debug_code/data/code.py
index 305cff4e..df8120bf 100644
--- a/tests/challenges/debug_code/data/two_sum.py
+++ b/tests/challenges/debug_code/data/code.py
@@ -2,18 +2,12 @@
 from typing import List, Optional
 
 
-def two_sum(nums: List, target: int) -> Optional[int]:
+def two_sum(nums: List, target: int) -> Optional[List[int]]:
     seen = {}
     for i, num in enumerate(nums):
+        typo
         complement = target - num
         if complement in seen:
             return [seen[complement], i]
         seen[num] = i
     return None
-
-
-# Example usage:
-nums = [2, 7, 11, 15]
-target = 9
-result = two_sum(nums, target)
-print(result)  # Output: [0, 1]
diff --git a/tests/challenges/debug_code/data/test.py b/tests/challenges/debug_code/data/test.py
new file mode 100644
index 00000000..d85d1353
--- /dev/null
+++ b/tests/challenges/debug_code/data/test.py
@@ -0,0 +1,31 @@
+# mypy: ignore-errors
+from code import two_sum
+from typing import List
+
+
+def test_two_sum(nums: List, target: int, expected_result: List[int]) -> None:
+    result = two_sum(nums, target)
+    print(result)
+    assert (
+        result == expected_result
+    ), f"AssertionError: Expected the output to be {expected_result}"
+
+
+if __name__ == "__main__":
+    # test the trivial case with the first two numbers
+    nums = [2, 7, 11, 15]
+    target = 9
+    expected_result = [0, 1]
+    test_two_sum(nums, target, expected_result)
+
+    # test for ability to use zero and the same number twice
+    nums = [2, 7, 0, 15, 12, 0]
+    target = 0
+    expected_result = [2, 5]
+    test_two_sum(nums, target, expected_result)
+
+    # test for first and last index usage and negative numbers
+    nums = [-6, 7, 11, 4]
+    target = -2
+    expected_result = [0, 3]
+    test_two_sum(nums, target, expected_result)
diff --git a/tests/challenges/debug_code/data/two_sum_tests.py b/tests/challenges/debug_code/data/two_sum_tests.py
deleted file mode 100644
index 0eb89bcb..00000000
--- a/tests/challenges/debug_code/data/two_sum_tests.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# mypy: ignore-errors
-# we need a new line at the top of the file to avoid a syntax error
-
-
-def test_two_sum(nums, target, expected_result):
-    # These tests are appended to the two_sum file so we can ignore this error for now
-    result = two_sum(nums, target)
-    print(result)
-    assert (
-        result == expected_result
-    ), f"AssertionError: Expected the output to be {expected_result}"
-
-
-# test the trivial case with the first two numbers
-nums = [2, 7, 11, 15]
-target = 9
-expected_result = [0, 1]
-test_two_sum(nums, target, expected_result)
-
-# test for ability to use zero and the same number twice
-nums = [2, 7, 0, 15, 12, 0]
-target = 0
-expected_result = [2, 5]
-test_two_sum(nums, target, expected_result)
-
-# test for first and last index usage and negative numbers
-nums = [-6, 7, 11, 4]
-target = -2
-expected_result = [0, 3]
-test_two_sum(nums, target, expected_result)
diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
index 93df754e..130e9427 100644
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -5,20 +5,27 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.execute_code import execute_python_file
-from autogpt.commands.file_operations import append_to_file, write_to_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import (
+    copy_file_into_workspace,
+    get_workspace_path,
+    run_interaction_loop,
+)
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 5
+EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
+DIRECTORY_PATH = Path(__file__).parent / "data"
+CODE_FILE_PATH = "code.py"
+TEST_FILE_PATH = "test.py"
 
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
 @challenge
 def test_debug_code_challenge_a(
-    debug_code_agent: Agent,
+    debug_code_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     config: Config,
@@ -33,17 +40,20 @@ def test_debug_code_challenge_a(
     :config: The config object for the agent.
     :level_to_run: The level to run.
     """
+    debug_code_agent = debug_code_agents[level_to_run - 1]
 
-    file_path = str(debug_code_agent.workspace.get_path("code.py"))
-
-    code_file_path = Path(__file__).parent / "data" / "two_sum.py"
-    test_file_path = Path(__file__).parent / "data" / "two_sum_tests.py"
-
-    write_to_file(file_path, code_file_path.read_text(), config)
+    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
+    copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
 
     run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT)
 
-    append_to_file(file_path, test_file_path.read_text(), config)
+    output = execute_python_file(
+        get_workspace_path(debug_code_agent, TEST_FILE_PATH), config
+    )
 
-    output = execute_python_file(file_path, config)
     assert "error" not in output.lower(), f"Errors found in output: {output}!"
+
+    for expected_value in EXPECTED_VALUES:
+        assert (
+            expected_value in output
+        ), f"Expected output to contain {expected_value}, but it was not found in {output}!"
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index eb3d0c94..086623a8 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -4,13 +4,15 @@ from pytest_mock import MockerFixture
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
 from autogpt.agent import Agent
 
+OUTPUT_LOCATION = "output.txt"
+
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
@@ -31,7 +33,7 @@ def test_information_retrieval_challenge_a(
     information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
     run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT)
 
-    file_path = str(information_retrieval_agent.workspace.get_path("output.txt"))
+    file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION)
     content = read_file(file_path, config)
     expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
     for revenue in expected_revenues:
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index 51195f77..0b1a897a 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -7,10 +7,11 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
+OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
 
 
 @pytest.mark.vcr
@@ -35,10 +36,8 @@ def test_information_retrieval_challenge_b(
 
     with contextlib.suppress(SystemExit):
         run_interaction_loop(monkeypatch, get_nobel_prize_agent, CYCLE_COUNT)
+    file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION)
 
-    file_path = str(
-        get_nobel_prize_agent.workspace.get_path("2010_nobel_prize_winners.txt")
-    )
     content = read_file(file_path, config)
     assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
     assert (
diff --git a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index 8ea20b94..0cf1cb42 100644
--- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -6,10 +6,11 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
+OUTPUT_LOCATION = "kube.yaml"
 
 
 @pytest.mark.vcr
@@ -34,7 +35,7 @@ def test_kubernetes_template_challenge_a(
     """
     run_interaction_loop(monkeypatch, kubernetes_agent, CYCLE_COUNT)
 
-    file_path = str(kubernetes_agent.workspace.get_path("kube.yaml"))
+    file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION)
     content = read_file(file_path, config)
 
     for word in ["apiVersion", "kind", "metadata", "spec"]:
diff --git a/tests/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py
index 3e3251dd..336f8d76 100644
--- a/tests/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -5,9 +5,11 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import run_interaction_loop
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
 
+OUTPUT_LOCATION = "output.txt"
+
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
@@ -35,7 +37,7 @@ def test_memory_challenge_a(
 
     run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
 
-    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
     content = read_file(file_path, config)
     assert task_id in content, f"Expected the file to contain {task_id}"
 
@@ -58,7 +60,7 @@ def create_instructions_files(
     for i in range(1, num_files + 1):
         content = generate_content(i, task_id, base_filename, num_files)
         file_name = f"{base_filename}{i}.txt"
-        file_path = str(memory_management_agent.workspace.get_path(file_name))
+        file_path = get_workspace_path(memory_management_agent, file_name)
         write_to_file(file_path, content, config)
 
 
diff --git a/tests/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py
index 011fa17a..829afa1a 100644
--- a/tests/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -5,10 +5,15 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import generate_noise, run_interaction_loop
+from tests.challenges.utils import (
+    generate_noise,
+    get_workspace_path,
+    run_interaction_loop,
+)
 from tests.utils import requires_api_key
 
 NOISE = 1000
+OUTPUT_LOCATION = "output.txt"
 
 
 @pytest.mark.vcr
@@ -36,7 +41,7 @@ def test_memory_challenge_b(
 
     run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
 
-    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
     content = read_file(file_path, config)
     for task_id in task_ids:
         assert task_id in content, f"Expected the file to contain {task_id}"
@@ -61,7 +66,8 @@ def create_instructions_files(
     for i in range(1, level + 1):
         content = generate_content(i, task_ids, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
-        file_path = str(memory_management_agent.workspace.get_path(file_name))
+        file_path = get_workspace_path(memory_management_agent, file_name)
+
         write_to_file(file_path, content, config)
 
 
diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
index d7cc6994..2cd453d9 100644
--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -5,10 +5,15 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
-from tests.challenges.utils import generate_noise, run_interaction_loop
+from tests.challenges.utils import (
+    generate_noise,
+    get_workspace_path,
+    run_interaction_loop,
+)
 from tests.utils import requires_api_key
 
 NOISE = 1000
+OUTPUT_LOCATION = "output.txt"
 
 
 # @pytest.mark.vcr
@@ -53,8 +58,7 @@ def test_memory_challenge_c(
     )
 
     run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
-
-    file_path = str(memory_management_agent.workspace.get_path("output.txt"))
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
     content = read_file(file_path, config)
     for phrase in level_silly_phrases:
         assert phrase in content, f"Expected the file to contain {phrase}"
@@ -79,7 +83,7 @@ def create_instructions_files(
     for i in range(1, level + 1):
         content = generate_content(i, task_ids, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
-        file_path = str(memory_management_agent.workspace.get_path(file_name))
+        file_path = get_workspace_path(memory_management_agent, file_name)
         write_to_file(file_path, content, config)
 
 
diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py
index 3ffd136b..c5312601 100644
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -1,5 +1,7 @@
 import contextlib
 import random
+import shutil
+from pathlib import Path
 from typing import Generator
 
 import pytest
@@ -42,3 +44,15 @@ def run_interaction_loop(
     setup_mock_input(monkeypatch, cycle_count)
     with contextlib.suppress(SystemExit):
         agent.start_interaction_loop()
+
+
+def get_workspace_path(agent: Agent, file_name: str) -> str:
+    return str(agent.workspace.get_path(file_name))
+
+
+def copy_file_into_workspace(
+    agent: Agent, directory_path: Path, file_path: str
+) -> None:
+    workspace_code_file_path = get_workspace_path(agent, file_path)
+    code_file_path = directory_path / file_path
+    shutil.copy(code_file_path, workspace_code_file_path)
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index fb08411e..8f7128e2 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -240,41 +240,47 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
 
 
 @pytest.fixture
-def debug_code_agent(agent_test_config, memory_json_file, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.commands.execute_code")
-    command_registry.import_commands("autogpt.commands.improve_code")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
-
-    ai_config = AIConfig(
-        ai_name="Debug Code Agent",
-        ai_role="an autonomous agent that specializes in debugging python code",
-        ai_goals=[
-            "1-Run the code in the file named 'code.py' using the execute_code command.",
-            "2-Read code.py to understand why the code is not working as expected.",
-            "3-Modify code.py to fix the error.",
-            "Repeat step 1, 2 and 3 until the code is working as expected. When you're done use the task_complete command.",
-            "Do not use any other commands than execute_python_file and write_file",
+def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace):
+    agents = []
+    goals = [
+        [
+            "1- Run test.py using the execute_python_file command.",
+            "2- Read code.py using the read_file command.",
+            "3- Modify code.py using the write_to_file command."
+            "Repeat step 1, 2 and 3 until test.py runs without errors.",
         ],
-    )
-    ai_config.command_registry = command_registry
+        [
+            "1- Run test.py.",
+            "2- Read code.py.",
+            "3- Modify code.py."
+            "Repeat step 1, 2 and 3 until test.py runs without errors.",
+        ],
+        ["1- Make test.py run without errors."],
+    ]
 
-    system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
-    agent = Agent(
-        ai_name="Debug Code Agent",
-        memory=memory_json_file,
-        command_registry=command_registry,
-        config=ai_config,
-        next_action_count=0,
-        system_prompt=system_prompt,
-        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
-        workspace_directory=workspace.root,
-    )
-
-    return agent
+    for goal in goals:
+        ai_config = AIConfig(
+            ai_name="Debug Code Agent",
+            ai_role="an autonomous agent that specializes in debugging python code",
+            ai_goals=goal,
+        )
+        command_registry = get_command_registry(agent_test_config)
+        ai_config.command_registry = command_registry
+        system_prompt = ai_config.construct_full_prompt()
+        Config().set_continuous_mode(False)
+        agents.append(
+            Agent(
+                ai_name="Debug Code Agent",
+                memory=memory_json_file,
+                command_registry=command_registry,
+                config=ai_config,
+                next_action_count=0,
+                system_prompt=system_prompt,
+                triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+                workspace_directory=workspace.root,
+            )
+        )
+    return agents
 
 
 def get_command_registry(agent_test_config):

From 3d06b2e4c0f5a536c0350d3aeb48b83cf518bd2c Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 9 Jun 2023 10:02:03 -0700
Subject: [PATCH 25/97] Decrement information retrieval challenge a (#4637)

---
 tests/Auto-GPT-test-cassettes       | 2 +-
 tests/challenges/current_score.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index dbea8039..ce74e399 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit dbea803947475a53ee764aa4f74f304796f197fc
+Subproject commit ce74e399ea9c5e45fd80f6187f24bab715cbd5fb
diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index 1f65e83e..49f3b858 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -18,7 +18,7 @@
     "information_retrieval": {
         "information_retrieval_challenge_a": {
             "max_level": 3,
-            "max_level_beaten": 1
+            "max_level_beaten": null
         },
         "information_retrieval_challenge_b": {
             "max_level": 1,

From 94280b2d14c9000ec113322cb060e9d6d2d7c107 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Fri, 9 Jun 2023 10:40:32 -0700
Subject: [PATCH 26/97] Add command for directly executing python code (#4581)

* Add command for directly executing python code

* Fix docstring

* Clarify / update filename references

---------

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 autogpt/commands/execute_code.py       | 35 ++++++++++++++++++++
 tests/integration/test_execute_code.py | 44 ++++++++++++++++++++++++--
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 999e40f8..b164a85f 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -8,11 +8,46 @@ from docker.errors import ImageNotFound
 
 from autogpt.commands.command import command
 from autogpt.config import Config
+from autogpt.config.ai_config import AIConfig
 from autogpt.logs import logger
 from autogpt.setup import CFG
 from autogpt.workspace.workspace import Workspace
 
 
+@command(
+    "execute_python_code",
+    "Create a Python file and execute it",
+    '"code": "<code>", "basename": "<basename>"',
+)
+def execute_python_code(code: str, basename: str, config: Config) -> str:
+    """Create and execute a Python file in a Docker container and return the STDOUT of the
+    executed code. If there is any data that needs to be captured use a print statement
+
+    Args:
+        code (str): The Python code to run
+        basename (str): A name to be given to the Python file
+
+    Returns:
+        str: The STDOUT captured from the code when it ran
+    """
+    ai_name = AIConfig.load(config.ai_settings_file).ai_name
+    directory = os.path.join(config.workspace_path, ai_name, "executed_code")
+    os.makedirs(directory, exist_ok=True)
+
+    if not basename.endswith(".py"):
+        basename = basename + ".py"
+
+    path = os.path.join(directory, basename)
+
+    try:
+        with open(path, "w+", encoding="utf-8") as f:
+            f.write(code)
+
+        return execute_python_file(f.name, config)
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+
 @command("execute_python_file", "Execute Python File", '"filename": "<filename>"')
 def execute_python_file(filename: str, config: Config) -> str:
     """Execute a Python file in a Docker container and return the output
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index ce3c1e16..e4ecf991 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -1,3 +1,4 @@
+import os
 import random
 import string
 import tempfile
@@ -8,6 +9,7 @@ from pytest_mock import MockerFixture
 
 import autogpt.commands.execute_code as sut  # system under testing
 from autogpt.config import Config
+from autogpt.config.ai_config import AIConfig
 
 
 @pytest.fixture
@@ -16,9 +18,14 @@ def config_allow_execute(config: Config, mocker: MockerFixture) -> Callable:
 
 
 @pytest.fixture
-def python_test_file(config: Config, random_string) -> Callable:
+def random_code(random_string) -> Callable:
+    return f"print('Hello {random_string}!')"
+
+
+@pytest.fixture
+def python_test_file(config: Config, random_code: str) -> Callable:
     temp_file = tempfile.NamedTemporaryFile(dir=config.workspace_path, suffix=".py")
-    temp_file.write(str.encode(f"print('Hello {random_string}!')"))
+    temp_file.write(str.encode(random_code))
     temp_file.flush()
 
     yield temp_file.name
@@ -35,6 +42,39 @@ def test_execute_python_file(python_test_file: str, random_string: str, config):
     assert result.replace("\r", "") == f"Hello {random_string}!\n"
 
 
+def test_execute_python_code(random_code: str, random_string: str, config: Config):
+    ai_name = AIConfig.load(config.ai_settings_file).ai_name
+
+    result: str = sut.execute_python_code(random_code, "test_code", config)
+    assert result.replace("\r", "") == f"Hello {random_string}!\n"
+
+    # Check that the code is stored
+    destination = os.path.join(
+        config.workspace_path, ai_name, "executed_code", "test_code.py"
+    )
+    with open(destination) as f:
+        assert f.read() == random_code
+
+
+def test_execute_python_code_overwrites_file(
+    random_code: str, random_string: str, config: Config
+):
+    ai_name = AIConfig.load(config.ai_settings_file).ai_name
+    destination = os.path.join(
+        config.workspace_path, ai_name, "executed_code", "test_code.py"
+    )
+    os.makedirs(os.path.dirname(destination), exist_ok=True)
+
+    with open(destination, "w+") as f:
+        f.write("This will be overwritten")
+
+    sut.execute_python_code(random_code, "test_code.py", config)
+
+    # Check that the file is updated with the new code
+    with open(destination) as f:
+        assert f.read() == random_code
+
+
 def test_execute_python_file_invalid(config: Config):
     assert all(
         s in sut.execute_python_file("not_python", config).lower()

From 82150397852c9f1793a05600ca84b43d461683e8 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Fri, 9 Jun 2023 17:46:06 +0000
Subject: [PATCH 27/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index ce74e399..475b9365 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit ce74e399ea9c5e45fd80f6187f24bab715cbd5fb
+Subproject commit 475b93658ca4999f9f7344c930b3b83586477866

From cce50bef5091ba954215561347fd523913cc5ff4 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Fri, 9 Jun 2023 11:02:52 -0700
Subject: [PATCH 28/97] Fix issues with information retrieval challenge a
 (#4622)

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 tests/integration/agent_factory.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 8f7128e2..bad835dc 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -146,9 +146,9 @@ def information_retrieval_agents(
     command_registry = get_command_registry(agent_test_config)
 
     ai_goals = [
-        "Write to a file called output.txt tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
-        "Write to a file called output.txt tesla's revenue in 2022.",
-        "Write to a file called output.txt tesla's revenue every year since its creation.",
+        "Write to a file called output.txt containing tesla's revenue in 2022 after searching for 'tesla revenue 2022'.",
+        "Write to a file called output.txt containing tesla's revenue in 2022.",
+        "Write to a file called output.txt containing tesla's revenue every year since its creation.",
     ]
     for ai_goal in ai_goals:
         ai_config = AIConfig(

From ff4e53d0e66182bf58f6c6174d5bfe7912d02fd3 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Fri, 9 Jun 2023 11:48:20 -0700
Subject: [PATCH 29/97] Streamline / clarify shell command control
 configuration (#4628)

* Streamline / clarify shell command control configuration

* Fix lint
---
 .env.template                          | 23 ++++++++-----
 autogpt/commands/execute_code.py       | 21 ++++++------
 autogpt/config/config.py               | 22 +++++++------
 tests/integration/test_execute_code.py | 45 +++++++++++++++++++-------
 4 files changed, 71 insertions(+), 40 deletions(-)

diff --git a/.env.template b/.env.template
index c78701a7..c678e5d0 100644
--- a/.env.template
+++ b/.env.template
@@ -41,14 +41,6 @@
 ## For example, to disable coding related features, uncomment the next line
 # DISABLED_COMMAND_CATEGORIES=autogpt.commands.analyze_code,autogpt.commands.execute_code,autogpt.commands.git_operations,autogpt.commands.improve_code,autogpt.commands.write_tests
 
-## DENY_COMMANDS - The list of commands that are not allowed to be executed by Auto-GPT (Default: None)
-# the following are examples:
-# DENY_COMMANDS=cd,nano,vim,vi,emacs,rm,sudo,top,ping,ssh,scp
-
-## ALLOW_COMMANDS - ONLY those commands will be allowed to be executed by Auto-GPT
-# the following are examples:
-# ALLOW_COMMANDS=ls,git,cat,grep,find,echo,ps,curl,wget
-
 
 ################################################################################
 ### LLM PROVIDER
@@ -89,6 +81,21 @@ OPENAI_API_KEY=your-openai-api-key
 ## EMBEDDING_MODEL       - Model to use for creating embeddings
 # EMBEDDING_MODEL=text-embedding-ada-002
 
+################################################################################
+### SHELL EXECUTION
+################################################################################
+
+## SHELL_COMMAND_CONTROL - Whether to use "allowlist" or "denylist" to determine what shell commands can be executed (Default: denylist)
+# SHELL_COMMAND_CONTROL=denylist
+
+## ONLY if SHELL_COMMAND_CONTROL is set to denylist:
+## SHELL_DENYLIST - List of shell commands that ARE NOT allowed to be executed by Auto-GPT (Default: sudo,su)
+# SHELL_DENYLIST=sudo,su
+
+## ONLY if SHELL_COMMAND_CONTROL is set to allowlist:
+## SHELL_ALLOWLIST - List of shell commands that ARE allowed to be executed by Auto-GPT (Default: None)
+# SHELL_ALLOWLIST=
+
 ################################################################################
 ### MEMORY
 ################################################################################
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index b164a85f..9fd3d315 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -13,6 +13,9 @@ from autogpt.logs import logger
 from autogpt.setup import CFG
 from autogpt.workspace.workspace import Workspace
 
+ALLOWLIST_CONTROL = "allowlist"
+DENYLIST_CONTROL = "denylist"
+
 
 @command(
     "execute_python_code",
@@ -152,21 +155,15 @@ def validate_command(command: str, config: Config) -> bool:
     Returns:
         bool: True if the command is allowed, False otherwise
     """
-    tokens = command.split()
-
-    if not tokens:
+    if not command:
         return False
 
-    if config.deny_commands and tokens[0] in config.deny_commands:
-        return False
+    command_name = command.split()[0]
 
-    for keyword in config.allow_commands:
-        if keyword in tokens:
-            return True
-    if config.allow_commands:
-        return False
-
-    return True
+    if config.shell_command_control == ALLOWLIST_CONTROL:
+        return command_name in config.shell_allowlist
+    else:
+        return command_name not in config.shell_denylist
 
 
 @command(
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 629e9ffb..753c99fc 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -38,17 +38,21 @@ class Config(metaclass=Singleton):
         else:
             self.disabled_command_categories = []
 
-        deny_commands = os.getenv("DENY_COMMANDS")
-        if deny_commands:
-            self.deny_commands = deny_commands.split(",")
-        else:
-            self.deny_commands = []
+        self.shell_command_control = os.getenv("SHELL_COMMAND_CONTROL", "denylist")
 
-        allow_commands = os.getenv("ALLOW_COMMANDS")
-        if allow_commands:
-            self.allow_commands = allow_commands.split(",")
+        # DENY_COMMANDS is deprecated and included for backwards-compatibility
+        shell_denylist = os.getenv("SHELL_DENYLIST", os.getenv("DENY_COMMANDS"))
+        if shell_denylist:
+            self.shell_denylist = shell_denylist.split(",")
         else:
-            self.allow_commands = []
+            self.shell_denylist = ["sudo", "su"]
+
+        # ALLOW_COMMANDS is deprecated and included for backwards-compatibility
+        shell_allowlist = os.getenv("SHELL_ALLOWLIST", os.getenv("ALLOW_COMMANDS"))
+        if shell_allowlist:
+            self.shell_allowlist = shell_allowlist.split(",")
+        else:
+            self.shell_allowlist = []
 
         self.ai_settings_file = os.getenv("AI_SETTINGS_FILE", "ai_settings.yaml")
         self.prompt_settings_file = os.getenv(
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index e4ecf991..530e7702 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -5,18 +5,12 @@ import tempfile
 from typing import Callable
 
 import pytest
-from pytest_mock import MockerFixture
 
 import autogpt.commands.execute_code as sut  # system under testing
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
 
 
-@pytest.fixture
-def config_allow_execute(config: Config, mocker: MockerFixture) -> Callable:
-    yield mocker.patch.object(config, "execute_local_commands", True)
-
-
 @pytest.fixture
 def random_code(random_string) -> Callable:
     return f"print('Hello {random_string}!')"
@@ -92,14 +86,43 @@ def test_execute_python_file_not_found(config: Config):
     )
 
 
-def test_execute_shell(config_allow_execute: bool, random_string: str, config: Config):
+def test_execute_shell(random_string: str, config: Config):
     result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
     assert f"Hello {random_string}!" in result
 
 
-def test_execute_shell_deny_command(
-    python_test_file: str, config_allow_execute: bool, config: Config
-):
-    config.deny_commands = ["echo"]
+def test_execute_shell_local_commands_not_allowed(random_string: str, config: Config):
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    assert f"Hello {random_string}!" in result
+
+
+def test_execute_shell_denylist_should_deny(config: Config, random_string: str):
+    config.shell_denylist = ["echo"]
+
     result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
     assert "Error:" in result and "not allowed" in result
+
+
+def test_execute_shell_denylist_should_allow(config: Config, random_string: str):
+    config.shell_denylist = ["cat"]
+
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    assert "Hello" in result and random_string in result
+    assert "Error" not in result
+
+
+def test_execute_shell_allowlist_should_deny(config: Config, random_string: str):
+    config.shell_command_control = sut.ALLOWLIST_CONTROL
+    config.shell_allowlist = ["cat"]
+
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    assert "Error:" in result and "not allowed" in result
+
+
+def test_execute_shell_allowlist_should_allow(config: Config, random_string: str):
+    config.shell_command_control = sut.ALLOWLIST_CONTROL
+    config.shell_allowlist = ["echo"]
+
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    assert "Hello" in result and random_string in result
+    assert "Error" not in result

From 474a9c4d9563cba70675dc7bf73a4ab047d3b2c5 Mon Sep 17 00:00:00 2001
From: javableu <45064273+javableu@users.noreply.github.com>
Date: Sat, 10 Jun 2023 00:02:41 +0200
Subject: [PATCH 30/97] False believes challenge based on sally anne test.
 (#4167)

* False believes challenge based on sally anne test.

* Update test_memory_challenge_d.py

* Update challenge_d.md

Some text appearing in bold

* Update test_memory_challenge_d.py

* Update test_memory_challenge_d.py

* Update test_memory_challenge_d.py

* Update test_memory_challenge_d.py

black  test_memory_challenge_d.py

* Update test_memory_challenge_d.py

replaced the dynamic time depending of the level to a fix time

* Update test_memory_challenge_d.py

isort command for the libraries

* Refactored memory challenge a

---------

Co-authored-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 docs/challenges/memory/challenge_d.md         |  75 ++++++
 mkdocs.yml                                    |   1 +
 tests/challenges/current_score.json           |   4 +
 .../memory/test_memory_challenge_d.py         | 241 ++++++++++++++++++
 tests/integration/agent_factory.py            |   7 +-
 5 files changed, 323 insertions(+), 5 deletions(-)
 create mode 100644 docs/challenges/memory/challenge_d.md
 create mode 100644 tests/challenges/memory/test_memory_challenge_d.py

diff --git a/docs/challenges/memory/challenge_d.md b/docs/challenges/memory/challenge_d.md
new file mode 100644
index 00000000..7563cce5
--- /dev/null
+++ b/docs/challenges/memory/challenge_d.md
@@ -0,0 +1,75 @@
+# Memory Challenge C
+
+**Status**: Current level to beat: level 1
+
+**Command to try**: 
+```
+pytest -s tests/challenges/memory/test_memory_challenge_d.py --level=1
+``
+
+## Description
+
+The provided code is a unit test designed to validate an AI's ability to track events and beliefs of characters in a story involving moving objects, specifically marbles. This scenario is an advanced form of the classic "Sally-Anne test", a psychological test used to measure a child's social cognitive ability to understand that others' perspectives and beliefs may differ from their own.
+
+Here is an explanation of the challenge:
+
+The AI is given a series of events involving characters Sally, Anne, Bob, and Charlie, and the movements of different marbles. These events are designed as tests at increasing levels of complexity.
+
+For each level, the AI is expected to keep track of the events and the resulting beliefs of each character about the locations of each marble. These beliefs are affected by whether the character was inside or outside the room when events occurred, as characters inside the room are aware of the actions, while characters outside the room aren't.
+
+After the AI processes the events and generates the beliefs of each character, it writes these beliefs to an output file in JSON format.
+
+The check_beliefs function then checks the AI's beliefs against the expected beliefs for that level. The expected beliefs are predefined and represent the correct interpretation of the events for each level.
+
+If the AI's beliefs match the expected beliefs, it means the AI has correctly interpreted the events and the perspectives of each character. This would indicate that the AI has passed the test for that level.
+
+The test runs for levels up to the maximum level that the AI has successfully beaten, or up to a user-selected level.
+
+
+## Files
+
+- `instructions_1.txt`
+
+"Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).",
+
+
+- `instructions_2.txt`
+
+"Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speak to Sally about the marble A as instructed by Anne.",
+
+
+...and so on.
+
+- `instructions_n.txt`
+
+The expected believes of every characters are given in a list:
+
+expected_beliefs = {
+    1: {
+        'Sally': {
+            'marble A': 'basket S',
+        },
+        'Anne': {
+            'marble A': 'basket A',
+        }
+    },
+    2: {
+        'Sally': {
+            'marble A': 'sofa',  # Because Charlie told her
+        },
+        'Anne': {
+            'marble A': 'green box',  # Because she moved it there
+            'marble B': 'basket A',  # Because Bob put it there and she was in the room
+        },
+        'Bob': {
+            'B': 'basket A',  # Last place he put it
+        },
+        'Charlie': {
+            'A': 'sofa',  # Because Anne told him to tell Sally so
+        }
+    },...
+
+
+## Objective
+
+This test essentially checks if an AI can accurately model and track the beliefs of different characters based on their knowledge of events, which is a critical aspect of understanding and generating human-like narratives. This ability would be beneficial for tasks such as writing stories, dialogue systems, and more.
diff --git a/mkdocs.yml b/mkdocs.yml
index 48fa0cb5..50e06257 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -25,6 +25,7 @@ nav:
         - Memory Challenge A: challenges/memory/challenge_a.md
         - Memory Challenge B: challenges/memory/challenge_b.md
         - Memory Challenge C: challenges/memory/challenge_c.md
+        - Memory Challenge D: challenges/memory/challenge_d.md
       - Information retrieval:
         - Introduction: challenges/information_retrieval/introduction.md
         - Information Retrieval Challenge A: challenges/information_retrieval/challenge_a.md
diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index 49f3b858..b03a6808 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -43,6 +43,10 @@
         "memory_challenge_c": {
             "max_level": 5,
             "max_level_beaten": 1
+        },
+        "memory_challenge_d": {
+            "max_level": 5,
+            "max_level_beaten": null
         }
     }
 }
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
new file mode 100644
index 00000000..9e662e08
--- /dev/null
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -0,0 +1,241 @@
+import json
+from typing import Dict
+
+import pytest
+from pytest_mock import MockerFixture
+
+from autogpt.agent import Agent
+from autogpt.commands.file_operations import read_file, write_to_file
+from autogpt.config import Config
+from tests.challenges.challenge_decorator.challenge_decorator import challenge
+from tests.challenges.utils import get_workspace_path, run_interaction_loop
+from tests.utils import requires_api_key
+
+LEVEL_CURRENTLY_BEATEN = 1
+MAX_LEVEL = 5
+OUTPUT_LOCATION = "output.txt"
+
+
+# @pytest.mark.vcr
+@requires_api_key("OPENAI_API_KEY")
+@challenge
+def test_memory_challenge_d(
+    memory_management_agent: Agent,
+    patched_api_requestor: MockerFixture,
+    monkeypatch: pytest.MonkeyPatch,
+    config: Config,
+    level_to_run: int,
+) -> None:
+    """
+    The agent is given a series of events and must remember the respective beliefs of the characters.
+    Args:
+        memory_management_agent (Agent)
+        user_selected_level (int)
+    """
+    sally_anne_test_phrases = [
+        "Sally has a marble (marble A) and she puts it in her basket (basket S), then leaves the room. Anne moves marble A from Sally's basket (basket S) to her own basket (basket A).",
+        "Sally gives a new marble (marble B) to Bob who is outside with her. Bob goes into the room and places marble B into Anne's basket (basket A). Anne tells Bob to tell Sally that he lost the marble b. Bob leaves the room and speaks to Sally about the marble B. Meanwhile, after Bob left the room, Anne moves marble A into the green box, but tells Charlie to tell Sally that marble A is under the sofa. Charlie leaves the room and speaks to Sally about the marble A as instructed by Anne.",
+        "Sally gives a new marble (marble C) to Charlie who is outside with her. Charlie enters the room and exchanges marble C with marble B in Anne's basket (basket A). Anne tells Charlie to tell Sally that he put marble C into the red box. Charlie leaves the room and speak to Sally about marble C as instructed by Anne. Meanwhile, after Charlie leaves the room, Bob enters into the room and moves marble A from the green box to under the sofa, but tells Anne to tell Sally that marble A is in the green box. Anne leaves the room and speak to Sally about the marble A as instructed by Bob",
+        "Sally gives a new marble (marble D) to Anne. Anne gives the marble to Charlie. Charlie enters the room and gives marble D to Bob. Bob tells Charlie to tell Sally that he put marble D under the sofa. Bob put marble D under the sofa Charlie leaves the room and speaks to Sally about marble D. Meanwhile, after Charlie leaves the room, Bob takes marble A from under the sofa and places it in the blue box.",
+        "Sally gives a new marble (marble E) to Charlie who is outside with her. Charlie enters the room and places marble E in the red box. Anne, who is already in the room, takes marble E from the red box, and hides it under the sofa. Then Anne leaves the room and tells Sally that marble E is in the green box. Meanwhile, after Anne leaves the room, Charlie who re-enters the room takes marble D from under the sofa and places it in his own basket (basket C).",
+    ]
+    level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
+    create_instructions_files(
+        memory_management_agent, level_to_run, level_sally_anne_test_phrases, config
+    )
+    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
+    file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
+
+    content = read_file(file_path, config)
+    check_beliefs(content, level_to_run)
+
+
+def check_beliefs(content: str, level: int) -> None:
+    # Define the expected beliefs for each level
+    expected_beliefs = {
+        1: {
+            "Sally": {
+                "marble A": "basket S",
+            },
+            "Anne": {
+                "marble A": "basket A",
+            },
+        },
+        2: {
+            "Sally": {
+                "marble A": "sofa",  # Because Charlie told her
+                "marble B": "lost",  # Because Bob told her
+            },
+            "Anne": {
+                "marble A": "green box",  # Because she moved it there
+                "marble B": "basket A",  # Because Bob put it there and she was in the room
+            },
+            "Bob": {
+                "marble B": "basket A",  # Last place he put it
+            },
+            "Charlie": {
+                "marble A": "sofa",  # Because Anne told him to tell Sally so
+            },
+        },
+        3: {
+            "Sally": {
+                "marble A": "green box",  # Because Anne told her
+                "marble C": "red box",  # Because Charlie told her
+            },
+            "Anne": {
+                "marble A": "sofa",  # Because Bob moved it there and told her
+                "marble B": "basket A",  # Because Charlie exchanged marble C with marble B in her basket
+                "marble C": "basket A",  # Because Charlie exchanged marble C with marble B in her basket
+            },
+            "Bob": {
+                "marble A": "sofa",  # Because he moved it there
+                "marble B": "basket A",
+                # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room
+                "marble C": "basket A",
+                # Because Charlie exchanged marble C with marble B in Anne's basket, and he was in the room
+            },
+            "Charlie": {
+                "marble A": "sofa",  # Last place he knew it was
+                "marble B": "basket A",  # Because he exchanged marble C with marble B in Anne's basket
+                "marble C": "red box",  # Because Anne told him to tell Sally so
+            },
+        },
+        4: {
+            "Sally": {
+                "marble A": "green box",  # Because Anne told her in the last conversation
+                "marble C": "red box",  # Because Charlie told her
+                "marble D": "sofa",  # Because Charlie told her
+            },
+            "Anne": {
+                "marble A": "blue box",  # Because Bob moved it there, and she was not in the room to see
+                "marble B": "basket A",  # Last place she knew it was
+                "marble C": "basket A",  # Last place she knew it was
+                "marble D": "sofa",  # Because Bob moved it there, and she was in the room to see
+            },
+            "Bob": {
+                "marble A": "blue box",  # Because he moved it there
+                "marble B": "basket A",  # Last place he knew it was
+                "marble C": "basket A",  # Last place he knew it was
+                "marble D": "sofa",  # Because he moved it there
+            },
+            "Charlie": {
+                "marble A": "sofa",  # Last place he knew it was
+                "marble B": "basket A",  # Last place he knew it was
+                "marble C": "red box",  # Last place he knew it was
+                "marble D": "sofa",  # Because Bob told him to tell Sally so
+            },
+        },
+        5: {
+            "Sally": {
+                "marble A": "green box",  # Because Anne told her in the last level
+                "marble C": "red box",  # Because Charlie told her
+                "marble D": "sofa",  # Because Charlie told her
+                "marble E": "green box",  # Because Anne told her
+            },
+            "Anne": {
+                "marble A": "blue box",  # Last place she knew it was
+                "marble B": "basket A",  # Last place she knew it was
+                "marble C": "basket A",  # Last place she knew it was
+                "marble D": "basket C",  # Last place she knew it was
+                "marble E": "sofa",  # Because she moved it there
+            },
+            "Charlie": {
+                "marble A": "blue box",  # Last place he knew it was
+                "marble B": "basket A",  # Last place he knew it was
+                "marble C": "basket A",  # Last place he knew it was
+                "marble D": "basket C",  # Because he moved it there
+                "marble E": "red box",  # Last place he knew it was
+            },
+            "Bob": {
+                "marble A": "blue box",  # Last place he knew it was
+                "marble C": "red box",  # Last place he knew it was
+                "marble D": "sofa",  # Last place he knew it was
+            },
+        },
+    }
+
+    # Extract the beliefs from the AI's response
+    ai_beliefs = extract_beliefs(content)
+    # Check the AI's beliefs against the expected beliefs
+    for character, belief in expected_beliefs[level].items():
+        for marble, location in belief.items():
+            ai_belief = ai_beliefs.get(character, {}).get(marble, "")
+            assert (
+                location in ai_belief
+            ), f"For {character}'s {marble}, expected '{location}' to be in '{ai_belief}'"
+
+
+def extract_beliefs(content: str) -> Dict[str, Dict[str, str]]:
+    """Extract the beliefs of each character from the AI's output."""
+    # Parse the JSON content
+    content_dict = json.loads(content)
+    beliefs = content_dict.get("beliefs", {})
+    return beliefs
+
+
+def create_instructions_files(
+    memory_management_agent: Agent,
+    level: int,
+    test_phrases: list,
+    config: Config,
+    base_filename: str = "instructions_",
+) -> None:
+    """
+    Creates a series of instructions files for the memory challenge.
+    Args:
+        level:
+        memory_management_agent (Agent)
+        test_phrases (list)
+        config (Config)
+        base_filename (str, optional)
+    """
+    for i in range(1, level + 1):
+        content = generate_content(i, test_phrases, base_filename, level)
+        file_name = f"{base_filename}{i}.txt"
+        file_path = get_workspace_path(memory_management_agent, file_name)
+        write_to_file(file_path, content, config)
+
+
+def generate_content(
+    index: int, test_phrases: list, base_filename: str, level: int
+) -> str:
+    """
+    Args:
+        index: int
+        test_phrases: list
+        base_filename: str
+        level: int
+    Returns: str
+    """
+    phrase = test_phrases[index - 1]
+
+    if index != level:
+        return f"The current event is '{phrase}'.\nRead the file {base_filename}{index + 1}.txt using the read_file command."
+    else:
+        return f"""
+        This event is '{phrase}'
+        rules:
+        1. Everyone inside the room see and know what are the actions of the people inside the room.
+        2. Everyone outside the room do not see and do not know the actions of the people inside the room.
+        3. Only write about the marbles that are present in the level
+        4. Marbles names are marble A, marble B, marble C, marble D, ...
+        5. Believed_location_of_the_specific_marble describes where is the marble like drawer, basket S, sofa, ...
+        6. Do not use expression like <Anne's basket> use <basket A> instead.
+        7. All characters believe what they're being told by other characters.
+        Instructions:
+        I) Write the following information in the file output.txt in JSON format:
+        1. The respective beliefs of the characters (which means where every marble is according to character x, y, z. Character x should say where it believes every marble it is aware exist is)
+        The format should be as follows:
+        {{
+            "beliefs": {{
+                "<character_name>": {{
+                    "<marble_name>": "<believed_location_of_the_specific_marble>",
+                    ...
+                }},
+                ...
+            }},
+        }}
+        Example of output (only use this to understand and learn how to use the format. Nothing else):
+        {{"beliefs": {{"Sally": {{"marble A": "basket A"}}, "Bob": {{"marble B": "basket S"}}, "Anne": {{"marble A": "green box"}}, "Charlie": {{"marble B": "sofa"}}}}
+        II) The file output.txt has not been created yet. You need to create it. After that, use the task_complete command.
+        """
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index bad835dc..4be96481 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -107,10 +107,7 @@ def file_system_agents(
 
 @pytest.fixture
 def memory_management_agent(agent_test_config, memory_json_file, workspace: Workspace):
-    command_registry = CommandRegistry()
-    command_registry.import_commands("autogpt.commands.file_operations")
-    command_registry.import_commands("autogpt.app")
-    command_registry.import_commands("autogpt.commands.task_statuses")
+    command_registry = get_command_registry(agent_test_config)
 
     ai_config = AIConfig(
         ai_name="Follow-Instructions-GPT",
@@ -125,7 +122,7 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work
     system_prompt = ai_config.construct_full_prompt()
 
     agent = Agent(
-        ai_name="",
+        ai_name="Follow-Instructions-GPT",
         memory=memory_json_file,
         command_registry=command_registry,
         config=ai_config,

From 923c67e92a3d27f6f3f274d6931dd6d2d35f88af Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Fri, 9 Jun 2023 22:06:01 +0000
Subject: [PATCH 31/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 475b9365..02e380c5 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 475b93658ca4999f9f7344c930b3b83586477866
+Subproject commit 02e380c5f5e537598b33eeff5ea1fd6c49c9f588

From 3081f56ecb1575e2ac47fa5c4015b2561d528d48 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 9 Jun 2023 15:18:56 -0700
Subject: [PATCH 32/97] Quicker logs in pytest (#4486)

---
 .github/workflows/benchmarks.yml | 31 -------------------------------
 .github/workflows/ci.yml         |  1 +
 .github/workflows/docker-ci.yml  |  1 +
 tests/conftest.py                | 14 ++++++++++++++
 4 files changed, 16 insertions(+), 31 deletions(-)
 delete mode 100644 .github/workflows/benchmarks.yml

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
deleted file mode 100644
index 272fca17..00000000
--- a/.github/workflows/benchmarks.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-name: Run Benchmarks
-
-on:
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    env:
-      python-version: '3.10'
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v3
-
-    - name: Set up Python ${{ env.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ env.python-version }}
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-
-    - name: benchmark
-      env:
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-      run: |
-         python benchmark/benchmark_entrepreneur_gpt_with_undecisive_user.py
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index cf851ef3..8e3d5484 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -156,6 +156,7 @@ jobs:
           PROXY: ${{ secrets.PROXY }}
           AGENT_MODE: ${{ secrets.AGENT_MODE }}
           AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PLAIN_OUTPUT: True
 
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v3
diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
index ff43666c..cacb58c5 100644
--- a/.github/workflows/docker-ci.yml
+++ b/.github/workflows/docker-ci.yml
@@ -102,6 +102,7 @@ jobs:
       - id: test
         name: Run tests
         env:
+          PLAIN_OUTPUT: True
           CI: true
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
         run: |
diff --git a/tests/conftest.py b/tests/conftest.py
index 0ee023b5..8e607c39 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 
 import pytest
@@ -5,6 +6,7 @@ from pytest_mock import MockerFixture
 
 from autogpt.config.config import Config
 from autogpt.llm.api_manager import ApiManager
+from autogpt.logs import TypingConsoleHandler
 from autogpt.workspace import Workspace
 
 pytest_plugins = [
@@ -43,3 +45,15 @@ def api_manager() -> ApiManager:
     if ApiManager in ApiManager._instances:
         del ApiManager._instances[ApiManager]
     return ApiManager()
+
+
+@pytest.fixture(autouse=True)
+def patch_emit(monkeypatch):
+    # convert plain_output to a boolean
+
+    if bool(os.environ.get("PLAIN_OUTPUT")):
+
+        def quick_emit(self, record: str):
+            print(self.format(record))
+
+        monkeypatch.setattr(TypingConsoleHandler, "emit", quick_emit)

From 5fe600af9d1101ac53ee2dd9a65c3beb923eaf44 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Fri, 9 Jun 2023 15:28:30 -0700
Subject: [PATCH 33/97] Clean up and fix issues with env configuration and
 .env.template (#4630)

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 .env.template                                 | 203 ++++++++----------
 autogpt/commands/audio_text.py                |  15 +-
 autogpt/commands/google_search.py             |   5 +-
 autogpt/config/config.py                      |  39 ++--
 autogpt/speech/eleven_labs.py                 |   6 +-
 autogpt/speech/say.py                         |  14 +-
 .../{brian.py => stream_elements_speech.py}   |  11 +-
 docs/configuration/options.md                 |  54 +++++
 tests/unit/test_config.py                     |   2 +-
 9 files changed, 208 insertions(+), 141 deletions(-)
 rename autogpt/speech/{brian.py => stream_elements_speech.py} (77%)
 create mode 100644 docs/configuration/options.md

diff --git a/.env.template b/.env.template
index c678e5d0..f0b3c7cb 100644
--- a/.env.template
+++ b/.env.template
@@ -1,10 +1,16 @@
+# For further descriptions of these settings see docs/configuration/options.md or go to docs.agpt.co
+
 ################################################################################
 ### AUTO-GPT - GENERAL SETTINGS
 ################################################################################
 
+## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key)
+OPENAI_API_KEY=your-openai-api-key
+
 ## EXECUTE_LOCAL_COMMANDS - Allow local command execution (Default: False)
-## RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True)
 # EXECUTE_LOCAL_COMMANDS=False
+
+## RESTRICT_TO_WORKSPACE - Restrict file operations to workspace ./auto_gpt_workspace (Default: True)
 # RESTRICT_TO_WORKSPACE=True
 
 ## USER_AGENT - Define the user-agent used by the requests library to browse website (string)
@@ -18,67 +24,41 @@
 
 ## AUTHORISE COMMAND KEY - Key to authorise commands
 # AUTHORISE_COMMAND_KEY=y
+
 ## EXIT_KEY - Key to exit AUTO-GPT
 # EXIT_KEY=n
 
-## PLAIN_OUTPUT - Enabeling plain output will disable spinner (Default: False)
-## Note: Spinner is used to indicate that Auto-GPT is working on something in the background
+## PLAIN_OUTPUT - Plain output, which disables the spinner (Default: False)
 # PLAIN_OUTPUT=False
 
-## DISABLED_COMMAND_CATEGORIES - The list of categories of commands that are disabled. Each of the below are an option:
-## autogpt.commands.analyze_code
-## autogpt.commands.audio_text
-## autogpt.commands.execute_code
-## autogpt.commands.file_operations
-## autogpt.commands.git_operations
-## autogpt.commands.google_search
-## autogpt.commands.image_gen
-## autogpt.commands.improve_code
-## autogpt.commands.web_selenium
-## autogpt.commands.write_tests
-## autogpt.app
-## autogpt.commands.task_statuses
-## For example, to disable coding related features, uncomment the next line
-# DISABLED_COMMAND_CATEGORIES=autogpt.commands.analyze_code,autogpt.commands.execute_code,autogpt.commands.git_operations,autogpt.commands.improve_code,autogpt.commands.write_tests
+## DISABLED_COMMAND_CATEGORIES - The list of categories of commands that are disabled (Default: None)
+# DISABLED_COMMAND_CATEGORIES=
 
 
 ################################################################################
 ### LLM PROVIDER
 ################################################################################
 
-### OPENAI
-## OPENAI_API_KEY - OpenAI API Key (Example: my-openai-api-key)
-
-
-## NOTE: https://platform.openai.com/docs/api-reference/completions
-# The temperature setting in language models like GPT controls the balance between predictable and random responses. 
-# Lower temperature makes the responses more focused and deterministic, while higher temperature makes them more 
-# creative and varied. The temperature range typically goes from 0 to 2 in OpenAI's implementation.
-##
 ## TEMPERATURE - Sets temperature in OpenAI (Default: 0)
-##
-###
+# TEMPERATURE=0
+
+## OPENAI_ORGANIZATION - Your OpenAI Organization key (Default: None)
+# OPENAI_ORGANIZATION=
 
 ## USE_AZURE - Use Azure OpenAI or not (Default: False)
-OPENAI_API_KEY=your-openai-api-key
-# TEMPERATURE=0
 # USE_AZURE=False
-# OPENAI_ORGANIZATION=your-openai-organization-key-if-applicable
-
-### AZURE
-# moved to `azure.yaml.template`
 
 ################################################################################
 ### LLM MODELS
 ################################################################################
 
-## SMART_LLM_MODEL - Smart language model (Default: gpt-4)
+## SMART_LLM_MODEL - Smart language model (Default: gpt-3.5-turbo)
+# SMART_LLM_MODEL=gpt-3.5-turbo
+
 ## FAST_LLM_MODEL - Fast language model (Default: gpt-3.5-turbo)
-# SMART_LLM_MODEL=gpt-4
 # FAST_LLM_MODEL=gpt-3.5-turbo
 
-### EMBEDDINGS
-## EMBEDDING_MODEL       - Model to use for creating embeddings
+## EMBEDDING_MODEL - Model to use for creating embeddings
 # EMBEDDING_MODEL=text-embedding-ada-002
 
 ################################################################################
@@ -100,124 +80,129 @@ OPENAI_API_KEY=your-openai-api-key
 ### MEMORY
 ################################################################################
 
-### MEMORY_BACKEND - Memory backend type
-## json_file - Default
-## redis - Redis (if configured)
-## MEMORY_INDEX - Name of index created in Memory backend (Default: auto-gpt)
-# MEMORY_BACKEND=json_file
-# MEMORY_INDEX=auto-gpt-memory
+### General
+
+## MEMORY_BACKEND - Memory backend type
+# MEMORY_BACKEND=json_file
+
+## MEMORY_INDEX - Value used in the Memory backend for scoping, naming, or indexing (Default: auto-gpt)
+# MEMORY_INDEX=auto-gpt
+
+### Redis
 
-### REDIS
 ## REDIS_HOST - Redis host (Default: localhost, use "redis" for docker-compose)
-## REDIS_PORT - Redis port (Default: 6379)
-## REDIS_PASSWORD - Redis password (Default: "")
-## WIPE_REDIS_ON_START - Wipes data / index on start (Default: True)
 # REDIS_HOST=localhost
+
+## REDIS_PORT - Redis port (Default: 6379)
 # REDIS_PORT=6379
+
+## REDIS_PASSWORD - Redis password (Default: "")
 # REDIS_PASSWORD=
+
+## WIPE_REDIS_ON_START - Wipes data / index on start (Default: True)
 # WIPE_REDIS_ON_START=True
 
 ################################################################################
 ### IMAGE GENERATION PROVIDER
 ################################################################################
 
-### COMMON SETTINGS
-## IMAGE_PROVIDER - Image provider - dalle, huggingface, or sdwebui
-## IMAGE_SIZE - Image size (Example: 256)
-## Image sizes for dalle: 256, 512, 1024
+### Common
+
+## IMAGE_PROVIDER - Image provider (Default: dalle)
 # IMAGE_PROVIDER=dalle
+
+## IMAGE_SIZE - Image size (Default: 256)
 # IMAGE_SIZE=256
 
-### HUGGINGFACE
-## HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4)
-## HUGGINGFACE_API_TOKEN - HuggingFace API token (Example: my-huggingface-api-token)
-# HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4
-# HUGGINGFACE_API_TOKEN=your-huggingface-api-token
+### Huggingface (IMAGE_PROVIDER=huggingface)
 
-### STABLE DIFFUSION WEBUI
-## SD_WEBUI_AUTH - Stable diffusion webui username:password pair (Example: username:password)
-## SD_WEBUI_URL - Stable diffusion webui API URL (Example: http://127.0.0.1:7860)
+## HUGGINGFACE_IMAGE_MODEL - Text-to-image model from Huggingface (Default: CompVis/stable-diffusion-v1-4)
+# HUGGINGFACE_IMAGE_MODEL=CompVis/stable-diffusion-v1-4
+
+## HUGGINGFACE_API_TOKEN - HuggingFace API token (Default: None)
+# HUGGINGFACE_API_TOKEN=
+
+### Stable Diffusion (IMAGE_PROVIDER=sdwebui)
+
+## SD_WEBUI_AUTH - Stable Diffusion Web UI username:password pair (Default: None)
 # SD_WEBUI_AUTH=
-# SD_WEBUI_URL=http://127.0.0.1:7860
+
+## SD_WEBUI_URL - Stable Diffusion Web UI API URL (Default: http://localhost:7860)
+# SD_WEBUI_URL=http://localhost:7860
 
 ################################################################################
 ### AUDIO TO TEXT PROVIDER
 ################################################################################
 
-### HUGGINGFACE
-# HUGGINGFACE_AUDIO_TO_TEXT_MODEL=facebook/wav2vec2-base-960h
+## AUDIO_TO_TEXT_PROVIDER - Audio-to-text provider (Default: huggingface)
+# AUDIO_TO_TEXT_PROVIDER=huggingface
+
+## HUGGINGFACE_AUDIO_TO_TEXT_MODEL - The model for HuggingFace to use (Default: CompVis/stable-diffusion-v1-4)
+# HUGGINGFACE_AUDIO_TO_TEXT_MODEL=CompVis/stable-diffusion-v1-4
 
 ################################################################################
-### GIT Provider for repository actions
-################################################################################
-
 ### GITHUB
-## GITHUB_API_KEY - Github API key / PAT (Example: github_pat_123)
-## GITHUB_USERNAME - Github username
-# GITHUB_API_KEY=github_pat_123
-# GITHUB_USERNAME=your-github-username
+################################################################################
+
+## GITHUB_API_KEY - Github API key / PAT (Default: None)
+# GITHUB_API_KEY=
+
+## GITHUB_USERNAME - Github username (Default: None)
+# GITHUB_USERNAME=
 
 ################################################################################
 ### WEB BROWSING
 ################################################################################
 
-### BROWSER
 ## HEADLESS_BROWSER - Whether to run the browser in headless mode (default: True)
-## USE_WEB_BROWSER - Sets the web-browser driver to use with selenium (default: chrome).
-##   Note: set this to either 'chrome', 'firefox', 'safari' or 'edge' depending on your current browser
 # HEADLESS_BROWSER=True
+
+## USE_WEB_BROWSER - Sets the web-browser driver to use with selenium (default: chrome)
 # USE_WEB_BROWSER=chrome
-## BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunks to summarize (in number of tokens, excluding the response. 75 % of FAST_TOKEN_LIMIT is usually wise )
+
+## BROWSE_CHUNK_MAX_LENGTH - When browsing website, define the length of chunks to summarize (Default: 3000)
 # BROWSE_CHUNK_MAX_LENGTH=3000
-## BROWSE_SPACY_LANGUAGE_MODEL is used to split sentences. Install additional languages via pip, and set the model name here. Example Chinese:  python -m spacy download zh_core_web_sm
+
+## BROWSE_SPACY_LANGUAGE_MODEL - spaCy language model](https://spacy.io/usage/models) to use when creating chunks. (Default: en_core_web_sm)
 # BROWSE_SPACY_LANGUAGE_MODEL=en_core_web_sm
 
-### GOOGLE
-## GOOGLE_API_KEY - Google API key (Example: my-google-api-key)
-## CUSTOM_SEARCH_ENGINE_ID - Custom search engine ID (Example: my-custom-search-engine-id)
-# GOOGLE_API_KEY=your-google-api-key
-# CUSTOM_SEARCH_ENGINE_ID=your-custom-search-engine-id
+## GOOGLE_API_KEY - Google API key (Default: None)
+# GOOGLE_API_KEY=
+
+## GOOGLE_CUSTOM_SEARCH_ENGINE_ID - Google custom search engine ID (Default: None)
+# GOOGLE_CUSTOM_SEARCH_ENGINE_ID=
 
 ################################################################################
-### TTS PROVIDER
+### TEXT TO SPEECH PROVIDER
 ################################################################################
 
-### MAC OS
-## USE_MAC_OS_TTS - Use Mac OS TTS or not (Default: False)
-# USE_MAC_OS_TTS=False
+## TEXT_TO_SPEECH_PROVIDER - Which Text to Speech provider to use (Default: gtts)
+# TEXT_TO_SPEECH_PROVIDER=gtts
 
-### STREAMELEMENTS
-## USE_BRIAN_TTS - Use Brian TTS or not (Default: False)
-# USE_BRIAN_TTS=False
+### Only if TEXT_TO_SPEECH_PROVIDER=streamelements
+## STREAMELEMENTS_VOICE - Voice to use for StreamElements (Default: Brian)
+# STREAMELEMENTS_VOICE=Brian
 
-### ELEVENLABS
-## ELEVENLABS_API_KEY - Eleven Labs API key (Example: my-elevenlabs-api-key)
-## ELEVENLABS_VOICE_1_ID - Eleven Labs voice 1 ID (Example: my-voice-id-1)
-## ELEVENLABS_VOICE_2_ID - Eleven Labs voice 2 ID (Example: my-voice-id-2)
-# ELEVENLABS_API_KEY=your-elevenlabs-api-key
-# ELEVENLABS_VOICE_1_ID=your-voice-id-1
-# ELEVENLABS_VOICE_2_ID=your-voice-id-2
+### Only if TEXT_TO_SPEECH_PROVIDER=elevenlabs
+## ELEVENLABS_API_KEY - Eleven Labs API key (Default: None)
+# ELEVENLABS_API_KEY=
 
-################################################################################
-### TWITTER API
-################################################################################
-
-# TW_CONSUMER_KEY=
-# TW_CONSUMER_SECRET=
-# TW_ACCESS_TOKEN=
-# TW_ACCESS_TOKEN_SECRET=
+## ELEVENLABS_VOICE_ID - Eleven Labs voice ID (Example: None)
+# ELEVENLABS_VOICE_ID=
 
 ################################################################################
 ### ALLOWLISTED PLUGINS
 ################################################################################
 
-#ALLOWLISTED_PLUGINS - Sets the listed plugins that are allowed (Example: plugin1,plugin2,plugin3)
-#DENYLISTED_PLUGINS - Sets the listed plugins that are not allowed (Example: plugin1,plugin2,plugin3)
-ALLOWLISTED_PLUGINS=
-DENYLISTED_PLUGINS=
+## ALLOWLISTED_PLUGINS - Sets the listed plugins that are allowed (Default: None)
+# ALLOWLISTED_PLUGINS=
+
+## DENYLISTED_PLUGINS - Sets the listed plugins that are not allowed (Default: None)
+# DENYLISTED_PLUGINS=
 
 ################################################################################
-### CHAT PLUGIN SETTINGS
+### CHAT MESSAGES
 ################################################################################
-# CHAT_MESSAGES_ENABLED - Enable chat messages (Default: False)
+
+## CHAT_MESSAGES_ENABLED - Enable chat messages (Default: False)
 # CHAT_MESSAGES_ENABLED=False
diff --git a/autogpt/commands/audio_text.py b/autogpt/commands/audio_text.py
index ba4fb347..57aa1a88 100644
--- a/autogpt/commands/audio_text.py
+++ b/autogpt/commands/audio_text.py
@@ -44,6 +44,17 @@ def read_audio(audio: bytes, config: Config) -> str:
     Returns:
         str: The text from the audio
     """
+    if config.audio_to_text_provider == "huggingface":
+        text = read_huggingface_audio(audio, config)
+        if text:
+            return f"The audio says: {text}"
+        else:
+            return f"Error, couldn't convert audio to text"
+
+    return "Error: No audio to text provider given"
+
+
+def read_huggingface_audio(audio: bytes, config: Config) -> str:
     model = config.huggingface_audio_to_text_model
     api_url = f"https://api-inference.huggingface.co/models/{model}"
     api_token = config.huggingface_api_token
@@ -60,5 +71,5 @@ def read_audio(audio: bytes, config: Config) -> str:
         data=audio,
     )
 
-    text = json.loads(response.content.decode("utf-8"))["text"]
-    return f"The audio says: {text}"
+    response_json = json.loads(response.content.decode("utf-8"))
+    return response_json.get("text")
diff --git a/autogpt/commands/google_search.py b/autogpt/commands/google_search.py
index aa34861d..f15885ee 100644
--- a/autogpt/commands/google_search.py
+++ b/autogpt/commands/google_search.py
@@ -56,7 +56,8 @@ def google_search(query: str, config: Config, num_results: int = 8) -> str:
     "google",
     "Google Search",
     '"query": "<query>"',
-    lambda config: bool(config.google_api_key) and bool(config.custom_search_engine_id),
+    lambda config: bool(config.google_api_key)
+    and bool(config.google_custom_search_engine_id),
     "Configure google_api_key and custom_search_engine_id.",
 )
 def google_official_search(
@@ -78,7 +79,7 @@ def google_official_search(
     try:
         # Get the Google API key and Custom Search Engine ID from the config file
         api_key = config.google_api_key
-        custom_search_engine_id = config.custom_search_engine_id
+        custom_search_engine_id = config.google_custom_search_engine_id
 
         # Initialize the Custom Search API service
         service = build("customsearch", "v1", developerKey=api_key)
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 753c99fc..3737308c 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -59,7 +59,7 @@ class Config(metaclass=Singleton):
             "PROMPT_SETTINGS_FILE", "prompt_settings.yaml"
         )
         self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo")
-        self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
+        self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-3.5-turbo")
         self.embedding_model = os.getenv("EMBEDDING_MODEL", "text-embedding-ada-002")
 
         self.browse_spacy_language_model = os.getenv(
@@ -87,22 +87,34 @@ class Config(metaclass=Singleton):
             openai.organization = self.openai_organization
 
         self.elevenlabs_api_key = os.getenv("ELEVENLABS_API_KEY")
-        self.elevenlabs_voice_1_id = os.getenv("ELEVENLABS_VOICE_1_ID")
-        self.elevenlabs_voice_2_id = os.getenv("ELEVENLABS_VOICE_2_ID")
+        # ELEVENLABS_VOICE_1_ID is deprecated and included for backwards-compatibility
+        self.elevenlabs_voice_id = os.getenv(
+            "ELEVENLABS_VOICE_ID", os.getenv("ELEVENLABS_VOICE_1_ID")
+        )
+        self.streamelements_voice = os.getenv("STREAMELEMENTS_VOICE", "Brian")
 
-        self.use_mac_os_tts = False
-        self.use_mac_os_tts = os.getenv("USE_MAC_OS_TTS")
+        # Backwards-compatibility shim for deprecated env variables
+        if os.getenv("USE_MAC_OS_TTS"):
+            default_tts_provider = "macos"
+        elif self.elevenlabs_api_key:
+            default_tts_provider = "elevenlabs"
+        elif os.getenv("USE_BRIAN_TTS"):
+            default_tts_provider = "streamelements"
+        else:
+            default_tts_provider = "gtts"
 
-        self.chat_messages_enabled = os.getenv("CHAT_MESSAGES_ENABLED") == "True"
-
-        self.use_brian_tts = False
-        self.use_brian_tts = os.getenv("USE_BRIAN_TTS")
+        self.text_to_speech_provider = os.getenv(
+            "TEXT_TO_SPEECH_PROVIDER", default_tts_provider
+        )
 
         self.github_api_key = os.getenv("GITHUB_API_KEY")
         self.github_username = os.getenv("GITHUB_USERNAME")
 
         self.google_api_key = os.getenv("GOOGLE_API_KEY")
-        self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
+        # CUSTOM_SEARCH_ENGINE_ID is deprecated and included for backwards-compatibility
+        self.google_custom_search_engine_id = os.getenv(
+            "GOOGLE_CUSTOM_SEARCH_ENGINE_ID", os.getenv("CUSTOM_SEARCH_ENGINE_ID")
+        )
 
         self.image_provider = os.getenv("IMAGE_PROVIDER")
         self.image_size = int(os.getenv("IMAGE_SIZE", 256))
@@ -110,6 +122,7 @@ class Config(metaclass=Singleton):
         self.huggingface_image_model = os.getenv(
             "HUGGINGFACE_IMAGE_MODEL", "CompVis/stable-diffusion-v1-4"
         )
+        self.audio_to_text_provider = os.getenv("AUDIO_TO_TEXT_PROVIDER", "huggingface")
         self.huggingface_audio_to_text_model = os.getenv(
             "HUGGINGFACE_AUDIO_TO_TEXT_MODEL"
         )
@@ -153,6 +166,8 @@ class Config(metaclass=Singleton):
         else:
             self.plugins_denylist = []
 
+        self.chat_messages_enabled = os.getenv("CHAT_MESSAGES_ENABLED") == "True"
+
     def get_azure_deployment_id_for_model(self, model: str) -> str:
         """
         Returns the relevant deployment id for the model specified.
@@ -234,7 +249,7 @@ class Config(metaclass=Singleton):
 
     def set_elevenlabs_voice_1_id(self, value: str) -> None:
         """Set the ElevenLabs Voice 1 ID value."""
-        self.elevenlabs_voice_1_id = value
+        self.elevenlabs_voice_id = value
 
     def set_elevenlabs_voice_2_id(self, value: str) -> None:
         """Set the ElevenLabs Voice 2 ID value."""
@@ -246,7 +261,7 @@ class Config(metaclass=Singleton):
 
     def set_custom_search_engine_id(self, value: str) -> None:
         """Set the custom search engine id value."""
-        self.custom_search_engine_id = value
+        self.google_custom_search_engine_id = value
 
     def set_debug_mode(self, value: bool) -> None:
         """Set the debug mode value."""
diff --git a/autogpt/speech/eleven_labs.py b/autogpt/speech/eleven_labs.py
index c1e3aff5..5952508d 100644
--- a/autogpt/speech/eleven_labs.py
+++ b/autogpt/speech/eleven_labs.py
@@ -38,11 +38,11 @@ class ElevenLabsSpeech(VoiceBase):
             "xi-api-key": cfg.elevenlabs_api_key,
         }
         self._voices = default_voices.copy()
-        if cfg.elevenlabs_voice_1_id in voice_options:
-            cfg.elevenlabs_voice_1_id = voice_options[cfg.elevenlabs_voice_1_id]
+        if cfg.elevenlabs_voice_id in voice_options:
+            cfg.elevenlabs_voice_id = voice_options[cfg.elevenlabs_voice_id]
         if cfg.elevenlabs_voice_2_id in voice_options:
             cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
-        self._use_custom_voice(cfg.elevenlabs_voice_1_id, 0)
+        self._use_custom_voice(cfg.elevenlabs_voice_id, 0)
         self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)
 
     def _use_custom_voice(self, voice, voice_index) -> None:
diff --git a/autogpt/speech/say.py b/autogpt/speech/say.py
index 4cc82e19..06f580f0 100644
--- a/autogpt/speech/say.py
+++ b/autogpt/speech/say.py
@@ -4,10 +4,10 @@ from threading import Semaphore
 
 from autogpt.config.config import Config
 from autogpt.speech.base import VoiceBase
-from autogpt.speech.brian import BrianSpeech
 from autogpt.speech.eleven_labs import ElevenLabsSpeech
 from autogpt.speech.gtts import GTTSVoice
 from autogpt.speech.macos_tts import MacOSTTS
+from autogpt.speech.stream_elements_speech import StreamElementsSpeech
 
 _QUEUE_SEMAPHORE = Semaphore(
     1
@@ -33,14 +33,14 @@ def say_text(text: str, voice_index: int = 0) -> None:
 
 def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
     """Get the voice engine to use for the given configuration"""
-    default_voice_engine = GTTSVoice()
-    if config.elevenlabs_api_key:
+    tts_provider = config.text_to_speech_provider
+    if tts_provider == "elevenlabs":
         voice_engine = ElevenLabsSpeech()
-    elif config.use_mac_os_tts == "True":
+    elif tts_provider == "macos":
         voice_engine = MacOSTTS()
-    elif config.use_brian_tts == "True":
-        voice_engine = BrianSpeech()
+    elif tts_provider == "streamelements":
+        voice_engine = StreamElementsSpeech()
     else:
         voice_engine = GTTSVoice()
 
-    return default_voice_engine, voice_engine
+    return GTTSVoice(), voice_engine
diff --git a/autogpt/speech/brian.py b/autogpt/speech/stream_elements_speech.py
similarity index 77%
rename from autogpt/speech/brian.py
rename to autogpt/speech/stream_elements_speech.py
index f63c206b..9019cf09 100644
--- a/autogpt/speech/brian.py
+++ b/autogpt/speech/stream_elements_speech.py
@@ -7,23 +7,24 @@ from playsound import playsound
 from autogpt.speech.base import VoiceBase
 
 
-class BrianSpeech(VoiceBase):
-    """Brian speech module for autogpt"""
+class StreamElementsSpeech(VoiceBase):
+    """Streamelements speech module for autogpt"""
 
     def _setup(self) -> None:
         """Setup the voices, API key, etc."""
 
-    def _speech(self, text: str, _: int = 0) -> bool:
-        """Speak text using Brian with the streamelements API
+    def _speech(self, text: str, voice: str, _: int = 0) -> bool:
+        """Speak text using the streamelements API
 
         Args:
             text (str): The text to speak
+            voice (str): The voice to use
 
         Returns:
             bool: True if the request was successful, False otherwise
         """
         tts_url = (
-            f"https://api.streamelements.com/kappa/v2/speech?voice=Brian&text={text}"
+            f"https://api.streamelements.com/kappa/v2/speech?voice={voice}&text={text}"
         )
         response = requests.get(tts_url)
 
diff --git a/docs/configuration/options.md b/docs/configuration/options.md
new file mode 100644
index 00000000..125a3a45
--- /dev/null
+++ b/docs/configuration/options.md
@@ -0,0 +1,54 @@
+# Configuration
+
+Configuration is controlled through the `Config` object. You can set configuration variables via the `.env` file. If you don't have a `.env` file, create a copy of `.env.template` in your `Auto-GPT` folder and name it `.env`.
+
+## Environment Variables
+
+- `AI_SETTINGS_FILE`: Location of AI Settings file. Default: ai_settings.yaml
+- `ALLOWLISTED_PLUGINS`: List of plugins allowed. Optional.
+- `AUDIO_TO_TEXT_PROVIDER`: Audio To Text Provider. Only option currently is `huggingface`. Default: huggingface
+- `AUTHORISE_COMMAND_KEY`: Key response accepted when authorising commands. Default: y
+- `BROWSE_CHUNK_MAX_LENGTH`: When browsing website, define the length of chunks to summarize. Default: 3000
+- `BROWSE_SPACY_LANGUAGE_MODEL`: [spaCy language model](https://spacy.io/usage/models) to use when creating chunks. Default: en_core_web_sm
+- `CHAT_MESSAGES_ENABLED`: Enable chat messages. Optional
+- `DENYLISTED_PLUGINS`: List of plugins not allowed. Optional.
+- `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.analyze_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
+- `ELEVENLABS_API_KEY`: ElevenLabs API Key. Optional.
+- `ELEVENLABS_VOICE_ID`: ElevenLabs Voice ID. Optional.
+- `EMBEDDING_MODEL`: LLM Model to use for embedding tasks. Default: text-embedding-ada-002
+- `EXECUTE_LOCAL_COMMANDS`: If shell commands should be executed locally. Default: False
+- `EXIT_KEY`: Exit key accepted to exit. Default: n
+- `FAST_LLM_MODEL`: LLM Model to use for most tasks. Default: gpt-3.5-turbo
+- `GITHUB_API_KEY`: [Github API Key](https://github.com/settings/tokens). Optional.
+- `GITHUB_USERNAME`: GitHub Username. Optional.
+- `GOOGLE_API_KEY`: Google API key. Optional.
+- `GOOGLE_CUSTOM_SEARCH_ENGINE_ID`: [Google custom search engine ID](https://programmablesearchengine.google.com/controlpanel/all). Optional.
+- `HEADLESS_BROWSER`: Use a headless browser while Auto-GPT uses a web browser. Setting to `False` will allow you to see Auto-GPT operate the browser. Default: True
+- `HUGGINGFACE_API_TOKEN`: HuggingFace API, to be used for both image generation and audio to text. Optional.
+- `HUGGINGFACE_AUDIO_TO_TEXT_MODEL`: HuggingFace audio to text model. Default: CompVis/stable-diffusion-v1-4
+- `HUGGINGFACE_IMAGE_MODEL`: HuggingFace model to use for image generation. Default: CompVis/stable-diffusion-v1-4
+- `IMAGE_PROVIDER`: Image provider. Options are `dalle`, `huggingface`, and `sdwebui`. Default: dalle
+- `IMAGE_SIZE`: Default size of image to generate. Default: 256
+- `MEMORY_BACKEND`: Memory back-end to use. Currently `json_file` is the only supported and enabled backend. Default: json_file
+- `MEMORY_INDEX`: Value used in the Memory backend for scoping, naming, or indexing. Default: auto-gpt
+- `OPENAI_API_KEY`: *REQUIRED*- Your [OpenAI API Key](https://platform.openai.com/account/api-keys).
+- `OPENAI_ORGANIZATION`: Organization ID in OpenAI. Optional.
+- `PLAIN_OUTPUT`: Plain output, which disables the spinner. Default: False
+- `PROMPT_SETTINGS_FILE`: Location of Prompt Settings file. Default: prompt_settings.yaml
+- `REDIS_HOST`: Redis Host. Default: localhost
+- `REDIS_PASSWORD`: Redis Password. Optional. Default:
+- `REDIS_PORT`: Redis Port. Default: 6379
+- `RESTRICT_TO_WORKSPACE`: The restrict file reading and writing to the workspace directory. Default: True
+- `SD_WEBUI_AUTH`: Stable Diffusion Web UI username:password pair. Optional.
+- `SD_WEBUI_URL`: Stable Diffusion Web UI URL. Default: http://localhost:7860
+- `SHELL_ALLOWLIST`: List of shell commands that ARE allowed to be executed by Auto-GPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `allowlist`. Default: None
+- `SHELL_COMMAND_CONTROL`: Whether to use `allowlist` or `denylist` to determine what shell commands can be executed (Default: denylist)
+- `SHELL_DENYLIST`: List of shell commands that ARE NOT allowed to be executed by Auto-GPT. Only applies if `SHELL_COMMAND_CONTROL` is set to `denylist`. Default: sudo,su
+- `SMART_LLM_MODEL`: LLM Model to use for "smart" tasks. Default: gpt-3.5-turbo
+- `STREAMELEMENTS_VOICE`: StreamElements voice to use. Default: Brian
+- `TEMPERATURE`: Value of temperature given to OpenAI. Value from 0 to 2. Lower is more deterministic, higher is more random. See https://platform.openai.com/docs/api-reference/completions/create#completions/create-temperature
+- `TEXT_TO_SPEECH_PROVIDER`: Text to Speech Provider. Options are `gtts`, `macos`, `elevenlabs`, and `streamelements`. Default: gtts
+- `USER_AGENT`: User-Agent given when browsing websites. Default: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
+- `USE_AZURE`: Use Azure's LLM Default: False
+- `USE_WEB_BROWSER`: Which web browser to use. Options are `chrome`, `firefox`, `safari` or `edge` Default: chrome
+- `WIPE_REDIS_ON_START`: Wipes data / index on start. Default: True
\ No newline at end of file
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index 27daedcd..9a95cef1 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -20,7 +20,7 @@ def test_initial_values(config: Config):
     assert config.continuous_mode == False
     assert config.speak_mode == False
     assert config.fast_llm_model == "gpt-3.5-turbo"
-    assert config.smart_llm_model == "gpt-4"
+    assert config.smart_llm_model == "gpt-3.5-turbo"
 
 
 def test_set_continuous_mode(config: Config):

From 3f2547295f8b62c894cd0fa78a2407b934ccce10 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Fri, 9 Jun 2023 22:31:11 +0000
Subject: [PATCH 34/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 02e380c5..e1d52a89 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 02e380c5f5e537598b33eeff5ea1fd6c49c9f588
+Subproject commit e1d52a892375874f27b299e01ba09c4300f45702

From fdca233fe303d5e880384342fb857f7033224707 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Fri, 9 Jun 2023 17:25:03 -0700
Subject: [PATCH 35/97] Fix prompt issue causing 'No Command' issues and
 challenge to fail (#4623)

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 autogpt/agent/agent.py    | 2 +-
 autogpt/prompts/prompt.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index c21f31db..b2470f99 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -45,7 +45,7 @@ class Agent:
 
         triggering_prompt: The last sentence the AI will see before answering.
             For Auto-GPT, this prompt is:
-            Determine which next command to use, and respond using the format specified
+            Determine exactly one command to use, and respond using the format specified
               above:
             The triggering prompt is not part of the system prompt because between the
               system prompt and the triggering
diff --git a/autogpt/prompts/prompt.py b/autogpt/prompts/prompt.py
index eeeea3f9..61238657 100644
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -11,9 +11,7 @@ from autogpt.utils import clean_input
 
 CFG = Config()
 
-DEFAULT_TRIGGERING_PROMPT = (
-    "Determine which next command to use, and respond using the format specified above:"
-)
+DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the format specified above:"
 
 
 def build_default_prompt_generator() -> PromptGenerator:

From f5a447308d016351a6d2cfd400636f951b4d83f5 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Sat, 10 Jun 2023 00:29:33 +0000
Subject: [PATCH 36/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index e1d52a89..5cf78f4a 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit e1d52a892375874f27b299e01ba09c4300f45702
+Subproject commit 5cf78f4af1158c636a0ca997365a7ad6d8343e26

From 3c51ff501f5eb46ede7921ae31d4296c7199160d Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 9 Jun 2023 20:46:06 -0700
Subject: [PATCH 37/97] dcrement memory challenge c (#4639)

---
 tests/challenges/current_score.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index b03a6808..4d747f03 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -42,7 +42,7 @@
         },
         "memory_challenge_c": {
             "max_level": 5,
-            "max_level_beaten": 1
+            "max_level_beaten": null
         },
         "memory_challenge_d": {
             "max_level": 5,

From 6ff8478118935b72c35f3ec1b31e74f2a1aa2e90 Mon Sep 17 00:00:00 2001
From: DGdev91 <DGdev91@users.noreply.github.com>
Date: Sat, 10 Jun 2023 13:57:42 +0200
Subject: [PATCH 38/97] Add settings for custom base url (#2594)

* Add settings for custom base url and embedding dimension

Making the openai base url and embedding dimension configurable, these are useful to integrate AutoGPT with other models, like LLaMA

* Update to milvus.py to load the configuration also in the init_collection function

* Update radismem.py to get rid of Config() loading

* Update local.py to get rid of Config() loading

* Correct code format (python black)

* Revert DEFAULT_EMBED_DIM name to EMBED_DIM to keep tests  valid

* Better description for EMBED_DIM setting

* Set MockConfig to the type Config in Milvus test

* Fix formatting

* Update Milvus test, using Config() instead of building a mock config

* using the last milvus test code from main

* Remove embed_dim , no more needed after #4208

* Add example for OPENAI_BASE_URL

---------

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 .env.template            | 4 ++++
 autogpt/config/config.py | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/.env.template b/.env.template
index f0b3c7cb..bf6e2453 100644
--- a/.env.template
+++ b/.env.template
@@ -22,6 +22,10 @@ OPENAI_API_KEY=your-openai-api-key
 ## PROMPT_SETTINGS_FILE - Specifies which Prompt Settings file to use (defaults to prompt_settings.yaml)
 # PROMPT_SETTINGS_FILE=prompt_settings.yaml
 
+## OPENAI_API_BASE_URL - Custom url for the OpenAI API, useful for connecting to custom backends. No effect if USE_AZURE is true, leave blank to keep the default url 
+# the following is an example:
+# OPENAI_API_BASE_URL=http://localhost:443/v1
+
 ## AUTHORISE COMMAND KEY - Key to authorise commands
 # AUTHORISE_COMMAND_KEY=y
 
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 3737308c..df77b383 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -82,6 +82,8 @@ class Config(metaclass=Singleton):
             openai.api_type = self.openai_api_type
             openai.api_base = self.openai_api_base
             openai.api_version = self.openai_api_version
+        elif os.getenv("OPENAI_API_BASE_URL", None):
+            openai.api_base = os.getenv("OPENAI_API_BASE_URL")
 
         if self.openai_organization is not None:
             openai.organization = self.openai_organization

From 15c6b0c1c33b811df13c9414a9616c948be1f851 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Sat, 10 Jun 2023 13:16:00 -0700
Subject: [PATCH 39/97] Implement directory-based plugin system (#4548)

* Implement directory-based plugin system

* Fix Selenium test

---------

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 .github/PULL_REQUEST_TEMPLATE.md              |   2 +-
 .github/workflows/ci.yml                      |   2 +-
 .pre-commit-config.yaml                       |   2 +-
 autogpt/plugins.py                            |  25 ++
 scripts/install_plugin_deps.py                |  10 +
 tests/integration/test_plugins.py             |   6 +-
 tests/integration/test_web_selenium.py        |   4 +
 .../test_plugins/auto_gpt_guanaco/__init__.py | 274 ++++++++++++++++++
 8 files changed, 319 insertions(+), 6 deletions(-)
 create mode 100644 tests/unit/data/test_plugins/auto_gpt_guanaco/__init__.py

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index f159c646..efb67868 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -41,7 +41,7 @@ By following these guidelines, your PRs are more likely to be merged quickly aft
     black .
     isort .
     mypy
-    autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports autogpt tests --in-place
+    autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring autogpt tests --in-place
     ```
 
 <!-- If you haven't added tests, please explain why. If you have, check the appropriate box. If you've ensured your PR is atomic and well-documented, check the corresponding boxes. -->
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8e3d5484..90bd4b53 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -69,7 +69,7 @@ jobs:
 
       - name: Check for unused imports and pass statements
         run: |
-          cmd="autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports autogpt tests"
+          cmd="autoflake --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring autogpt tests"
           $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
 
   test:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 53928603..0aaad257 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -31,7 +31,7 @@ repos:
     hooks:
       - id: autoflake
         name: autoflake
-        entry: autoflake --in-place --remove-all-unused-imports --recursive --ignore-init-module-imports autogpt tests
+        entry: autoflake --in-place --remove-all-unused-imports --recursive --ignore-init-module-imports --ignore-pass-after-docstring autogpt tests
         language: python
         types: [ python ]
       - id: pytest-check
diff --git a/autogpt/plugins.py b/autogpt/plugins.py
index f36ba36e..eccea9ab 100644
--- a/autogpt/plugins.py
+++ b/autogpt/plugins.py
@@ -1,8 +1,10 @@
 """Handles loading of plugins."""
 
 import importlib.util
+import inspect
 import json
 import os
+import sys
 import zipfile
 from pathlib import Path
 from typing import List
@@ -217,6 +219,28 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
     logger.debug(f"Allowlisted Plugins: {cfg.plugins_allowlist}")
     logger.debug(f"Denylisted Plugins: {cfg.plugins_denylist}")
 
+    # Directory-based plugins
+    for plugin_path in [f.path for f in os.scandir(cfg.plugins_dir) if f.is_dir()]:
+        # Avoid going into __pycache__ or other hidden directories
+        if plugin_path.startswith("__"):
+            continue
+
+        plugin_module_path = plugin_path.split(os.path.sep)
+        plugin_module_name = plugin_module_path[-1]
+        qualified_module_name = ".".join(plugin_module_path)
+
+        __import__(qualified_module_name)
+        plugin = sys.modules[qualified_module_name]
+
+        for _, class_obj in inspect.getmembers(plugin):
+            if (
+                hasattr(class_obj, "_abc_impl")
+                and AutoGPTPluginTemplate in class_obj.__bases__
+                and denylist_allowlist_check(plugin_module_name, cfg)
+            ):
+                loaded_plugins.append(class_obj())
+
+    # Zip-based plugins
     for plugin in plugins_path_path.glob("*.zip"):
         if moduleList := inspect_zip_for_modules(str(plugin), debug):
             for module in moduleList:
@@ -236,6 +260,7 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                         and denylist_allowlist_check(a_module.__name__, cfg)
                     ):
                         loaded_plugins.append(a_module())
+
     # OpenAI plugins
     if cfg.plugins_openai:
         manifests_specs = fetch_openai_plugins_manifest_and_spec(cfg)
diff --git a/scripts/install_plugin_deps.py b/scripts/install_plugin_deps.py
index d15c166f..00d9f8a3 100644
--- a/scripts/install_plugin_deps.py
+++ b/scripts/install_plugin_deps.py
@@ -2,6 +2,7 @@ import os
 import subprocess
 import sys
 import zipfile
+from glob import glob
 from pathlib import Path
 
 
@@ -16,6 +17,8 @@ def install_plugin_dependencies():
         None
     """
     plugins_dir = Path(os.getenv("PLUGINS_DIR", "plugins"))
+
+    # Install zip-based plugins
     for plugin in plugins_dir.glob("*.zip"):
         with zipfile.ZipFile(str(plugin), "r") as zfile:
             try:
@@ -30,6 +33,13 @@ def install_plugin_dependencies():
             except KeyError:
                 continue
 
+    # Install directory-based plugins
+    for requirements_file in glob(f"{plugins_dir}/*/requirements.txt"):
+        subprocess.check_call(
+            [sys.executable, "-m", "pip", "install", "-r", requirements_file],
+            stdout=subprocess.DEVNULL,
+        )
+
 
 if __name__ == "__main__":
     install_plugin_dependencies()
diff --git a/tests/integration/test_plugins.py b/tests/integration/test_plugins.py
index d5f62cca..828200c2 100644
--- a/tests/integration/test_plugins.py
+++ b/tests/integration/test_plugins.py
@@ -39,7 +39,7 @@ def mock_config_openai_plugin():
 
         plugins_dir = PLUGINS_TEST_DIR
         plugins_openai = [PLUGIN_TEST_OPENAI]
-        plugins_denylist = ["AutoGPTPVicuna"]
+        plugins_denylist = ["AutoGPTPVicuna", "auto_gpt_guanaco"]
         plugins_allowlist = [PLUGIN_TEST_OPENAI]
 
     return MockConfig()
@@ -60,7 +60,7 @@ def mock_config_generic_plugin():
         plugins_dir = PLUGINS_TEST_DIR
         plugins_openai = []
         plugins_denylist = []
-        plugins_allowlist = ["AutoGPTPVicuna"]
+        plugins_allowlist = ["AutoGPTPVicuna", "auto_gpt_guanaco"]
 
     return MockConfig()
 
@@ -68,4 +68,4 @@ def mock_config_generic_plugin():
 def test_scan_plugins_generic(mock_config_generic_plugin):
     # Test that the function returns the correct number of plugins
     result = scan_plugins(mock_config_generic_plugin, debug=True)
-    assert len(result) == 1
+    assert len(result) == 2
diff --git a/tests/integration/test_web_selenium.py b/tests/integration/test_web_selenium.py
index 2a03a3c0..7a92b123 100644
--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -1,9 +1,13 @@
+import pytest
 from pytest_mock import MockerFixture
 
 from autogpt.commands.web_selenium import browse_website
 from autogpt.config import Config
+from tests.utils import requires_api_key
 
 
+@pytest.mark.vcr
+@requires_api_key("OPENAI_API_KEY")
 def test_browse_website(config: Config, patched_api_requestor: MockerFixture):
     url = "https://barrel-roll.com"
     question = "How to execute a barrel roll"
diff --git a/tests/unit/data/test_plugins/auto_gpt_guanaco/__init__.py b/tests/unit/data/test_plugins/auto_gpt_guanaco/__init__.py
new file mode 100644
index 00000000..f915553c
--- /dev/null
+++ b/tests/unit/data/test_plugins/auto_gpt_guanaco/__init__.py
@@ -0,0 +1,274 @@
+"""This is the Test plugin for Auto-GPT."""
+from typing import Any, Dict, List, Optional, Tuple, TypeVar
+
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
+
+PromptGenerator = TypeVar("PromptGenerator")
+
+
+class AutoGPTGuanaco(AutoGPTPluginTemplate):
+    """
+    This is plugin for Auto-GPT.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._name = "Auto-GPT-Guanaco"
+        self._version = "0.1.0"
+        self._description = "This is a Guanaco local model plugin."
+
+    def can_handle_on_response(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_response method.
+
+        Returns:
+            bool: True if the plugin can handle the on_response method."""
+        return False
+
+    def on_response(self, response: str, *args, **kwargs) -> str:
+        """This method is called when a response is received from the model."""
+        if len(response):
+            print("OMG OMG It's Alive!")
+        else:
+            print("Is it alive?")
+
+    def can_handle_post_prompt(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_prompt method.
+
+        Returns:
+            bool: True if the plugin can handle the post_prompt method."""
+        return False
+
+    def post_prompt(self, prompt: PromptGenerator) -> PromptGenerator:
+        """This method is called just after the generate_prompt is called,
+            but actually before the prompt is generated.
+
+        Args:
+            prompt (PromptGenerator): The prompt generator.
+
+        Returns:
+            PromptGenerator: The prompt generator.
+        """
+
+    def can_handle_on_planning(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_planning method.
+
+        Returns:
+            bool: True if the plugin can handle the on_planning method."""
+        return False
+
+    def on_planning(
+        self, prompt: PromptGenerator, messages: List[str]
+    ) -> Optional[str]:
+        """This method is called before the planning chat completeion is done.
+
+        Args:
+            prompt (PromptGenerator): The prompt generator.
+            messages (List[str]): The list of messages.
+        """
+
+    def can_handle_post_planning(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_planning method.
+
+        Returns:
+            bool: True if the plugin can handle the post_planning method."""
+        return False
+
+    def post_planning(self, response: str) -> str:
+        """This method is called after the planning chat completeion is done.
+
+        Args:
+            response (str): The response.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_pre_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the pre_instruction method.
+
+        Returns:
+            bool: True if the plugin can handle the pre_instruction method."""
+        return False
+
+    def pre_instruction(self, messages: List[str]) -> List[str]:
+        """This method is called before the instruction chat is done.
+
+        Args:
+            messages (List[str]): The list of context messages.
+
+        Returns:
+            List[str]: The resulting list of messages.
+        """
+
+    def can_handle_on_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_instruction method.
+
+        Returns:
+            bool: True if the plugin can handle the on_instruction method."""
+        return False
+
+    def on_instruction(self, messages: List[str]) -> Optional[str]:
+        """This method is called when the instruction chat is done.
+
+        Args:
+            messages (List[str]): The list of context messages.
+
+        Returns:
+            Optional[str]: The resulting message.
+        """
+
+    def can_handle_post_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_instruction method.
+
+        Returns:
+            bool: True if the plugin can handle the post_instruction method."""
+        return False
+
+    def post_instruction(self, response: str) -> str:
+        """This method is called after the instruction chat is done.
+
+        Args:
+            response (str): The response.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_pre_command(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the pre_command method.
+
+        Returns:
+            bool: True if the plugin can handle the pre_command method."""
+        return False
+
+    def pre_command(
+        self, command_name: str, arguments: Dict[str, Any]
+    ) -> Tuple[str, Dict[str, Any]]:
+        """This method is called before the command is executed.
+
+        Args:
+            command_name (str): The command name.
+            arguments (Dict[str, Any]): The arguments.
+
+        Returns:
+            Tuple[str, Dict[str, Any]]: The command name and the arguments.
+        """
+
+    def can_handle_post_command(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_command method.
+
+        Returns:
+            bool: True if the plugin can handle the post_command method."""
+        return False
+
+    def post_command(self, command_name: str, response: str) -> str:
+        """This method is called after the command is executed.
+
+        Args:
+            command_name (str): The command name.
+            response (str): The response.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_chat_completion(
+        self,
+        messages: list[Dict[Any, Any]],
+        model: str,
+        temperature: float,
+        max_tokens: int,
+    ) -> bool:
+        """This method is called to check that the plugin can
+          handle the chat_completion method.
+
+        Args:
+            messages (Dict[Any, Any]): The messages.
+            model (str): The model name.
+            temperature (float): The temperature.
+            max_tokens (int): The max tokens.
+
+          Returns:
+              bool: True if the plugin can handle the chat_completion method."""
+        return False
+
+    def handle_chat_completion(
+        self,
+        messages: list[Dict[Any, Any]],
+        model: str,
+        temperature: float,
+        max_tokens: int,
+    ) -> str:
+        """This method is called when the chat completion is done.
+
+        Args:
+            messages (Dict[Any, Any]): The messages.
+            model (str): The model name.
+            temperature (float): The temperature.
+            max_tokens (int): The max tokens.
+
+        Returns:
+            str: The resulting response.
+        """
+
+    def can_handle_text_embedding(self, text: str) -> bool:
+        """This method is called to check that the plugin can
+          handle the text_embedding method.
+        Args:
+            text (str): The text to be convert to embedding.
+          Returns:
+              bool: True if the plugin can handle the text_embedding method."""
+        return False
+
+    def handle_text_embedding(self, text: str) -> list:
+        """This method is called when the chat completion is done.
+        Args:
+            text (str): The text to be convert to embedding.
+        Returns:
+            list: The text embedding.
+        """
+
+    def can_handle_user_input(self, user_input: str) -> bool:
+        """This method is called to check that the plugin can
+        handle the user_input method.
+
+        Args:
+            user_input (str): The user input.
+
+        Returns:
+            bool: True if the plugin can handle the user_input method."""
+        return False
+
+    def user_input(self, user_input: str) -> str:
+        """This method is called to request user input to the user.
+
+        Args:
+            user_input (str): The question or prompt to ask the user.
+
+        Returns:
+            str: The user input.
+        """
+
+    def can_handle_report(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the report method.
+
+        Returns:
+            bool: True if the plugin can handle the report method."""
+        return False
+
+    def report(self, message: str) -> None:
+        """This method is called to report a message to the user.
+
+        Args:
+            message (str): The message to report.
+        """

From 6b9e3b21d3b20de8c81de45581d319cefdff204b Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Sat, 10 Jun 2023 14:47:26 -0700
Subject: [PATCH 40/97] Add config as attribute to Agent, rename old config to
 ai_config (#4638)

* Add config as attribute to Agent, rename old config to ai_config

* Code review: Pass ai_config

---------

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 autogpt/agent/agent.py               | 72 +++++++++++++++-------------
 autogpt/llm/chat.py                  |  4 +-
 autogpt/main.py                      |  3 +-
 autogpt/memory/message_history.py    |  4 +-
 tests/integration/agent_factory.py   | 25 ++++++----
 tests/unit/test_agent.py             | 24 +++++-----
 tests/unit/test_get_self_feedback.py |  8 +++-
 7 files changed, 81 insertions(+), 59 deletions(-)

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index b2470f99..7a4c0e35 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -65,29 +65,31 @@ class Agent:
         memory: VectorMemory,
         next_action_count: int,
         command_registry: CommandRegistry,
-        config: AIConfig,
+        ai_config: AIConfig,
         system_prompt: str,
         triggering_prompt: str,
         workspace_directory: str,
+        config: Config,
     ):
-        cfg = Config()
         self.ai_name = ai_name
         self.memory = memory
         self.history = MessageHistory(self)
         self.next_action_count = next_action_count
         self.command_registry = command_registry
         self.config = config
+        self.ai_config = ai_config
         self.system_prompt = system_prompt
         self.triggering_prompt = triggering_prompt
-        self.workspace = Workspace(workspace_directory, cfg.restrict_to_workspace)
+        self.workspace = Workspace(workspace_directory, config.restrict_to_workspace)
         self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.cycle_count = 0
         self.log_cycle_handler = LogCycleHandler()
-        self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
+        self.fast_token_limit = OPEN_AI_CHAT_MODELS.get(
+            config.fast_llm_model
+        ).max_tokens
 
     def start_interaction_loop(self):
         # Interaction Loop
-        cfg = Config()
         self.cycle_count = 0
         command_name = None
         arguments = None
@@ -112,34 +114,36 @@ class Agent:
             self.cycle_count += 1
             self.log_cycle_handler.log_count_within_cycle = 0
             self.log_cycle_handler.log_cycle(
-                self.config.ai_name,
+                self.ai_config.ai_name,
                 self.created_at,
                 self.cycle_count,
                 [m.raw() for m in self.history],
                 FULL_MESSAGE_HISTORY_FILE_NAME,
             )
             if (
-                cfg.continuous_mode
-                and cfg.continuous_limit > 0
-                and self.cycle_count > cfg.continuous_limit
+                self.config.continuous_mode
+                and self.config.continuous_limit > 0
+                and self.cycle_count > self.config.continuous_limit
             ):
                 logger.typewriter_log(
-                    "Continuous Limit Reached: ", Fore.YELLOW, f"{cfg.continuous_limit}"
+                    "Continuous Limit Reached: ",
+                    Fore.YELLOW,
+                    f"{self.config.continuous_limit}",
                 )
                 break
             # Send message to AI, get response
-            with Spinner("Thinking... ", plain_output=cfg.plain_output):
+            with Spinner("Thinking... ", plain_output=self.config.plain_output):
                 assistant_reply = chat_with_ai(
-                    cfg,
+                    self.config,
                     self,
                     self.system_prompt,
                     self.triggering_prompt,
                     self.fast_token_limit,
-                    cfg.fast_llm_model,
+                    self.config.fast_llm_model,
                 )
 
             assistant_reply_json = fix_json_using_multiple_techniques(assistant_reply)
-            for plugin in cfg.plugins:
+            for plugin in self.config.plugins:
                 if not plugin.can_handle_post_planning():
                     continue
                 assistant_reply_json = plugin.post_planning(assistant_reply_json)
@@ -150,10 +154,10 @@ class Agent:
                 # Get command name and arguments
                 try:
                     print_assistant_thoughts(
-                        self.ai_name, assistant_reply_json, cfg.speak_mode
+                        self.ai_name, assistant_reply_json, self.config.speak_mode
                     )
                     command_name, arguments = get_command(assistant_reply_json)
-                    if cfg.speak_mode:
+                    if self.config.speak_mode:
                         say_text(f"I want to execute {command_name}")
 
                     arguments = self._resolve_pathlike_command_args(arguments)
@@ -161,7 +165,7 @@ class Agent:
                 except Exception as e:
                     logger.error("Error: \n", str(e))
             self.log_cycle_handler.log_cycle(
-                self.config.ai_name,
+                self.ai_config.ai_name,
                 self.created_at,
                 self.cycle_count,
                 assistant_reply_json,
@@ -177,7 +181,7 @@ class Agent:
                 f"ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}",
             )
 
-            if not cfg.continuous_mode and self.next_action_count == 0:
+            if not self.config.continuous_mode and self.next_action_count == 0:
                 # ### GET USER AUTHORIZATION TO EXECUTE COMMAND ###
                 # Get key press: Prompt the user to press enter to continue or escape
                 # to exit
@@ -188,13 +192,13 @@ class Agent:
                     f"{self.ai_name}..."
                 )
                 while True:
-                    if cfg.chat_messages_enabled:
+                    if self.config.chat_messages_enabled:
                         console_input = clean_input("Waiting for your response...")
                     else:
                         console_input = clean_input(
                             Fore.MAGENTA + "Input:" + Style.RESET_ALL
                         )
-                    if console_input.lower().strip() == cfg.authorise_key:
+                    if console_input.lower().strip() == self.config.authorise_key:
                         user_input = "GENERATE NEXT COMMAND JSON"
                         break
                     elif console_input.lower().strip() == "s":
@@ -205,7 +209,7 @@ class Agent:
                         )
                         thoughts = assistant_reply_json.get("thoughts", {})
                         self_feedback_resp = self.get_self_feedback(
-                            thoughts, cfg.fast_llm_model
+                            thoughts, self.config.fast_llm_model
                         )
                         logger.typewriter_log(
                             f"SELF FEEDBACK: {self_feedback_resp}",
@@ -218,7 +222,9 @@ class Agent:
                     elif console_input.lower().strip() == "":
                         logger.warn("Invalid input format.")
                         continue
-                    elif console_input.lower().startswith(f"{cfg.authorise_key} -"):
+                    elif console_input.lower().startswith(
+                        f"{self.config.authorise_key} -"
+                    ):
                         try:
                             self.next_action_count = abs(
                                 int(console_input.split(" ")[1])
@@ -231,14 +237,14 @@ class Agent:
                             )
                             continue
                         break
-                    elif console_input.lower() == cfg.exit_key:
+                    elif console_input.lower() == self.config.exit_key:
                         user_input = "EXIT"
                         break
                     else:
                         user_input = console_input
                         command_name = "human_feedback"
                         self.log_cycle_handler.log_cycle(
-                            self.config.ai_name,
+                            self.ai_config.ai_name,
                             self.created_at,
                             self.cycle_count,
                             user_input,
@@ -271,7 +277,7 @@ class Agent:
             elif command_name == "self_feedback":
                 result = f"Self feedback: {user_input}"
             else:
-                for plugin in cfg.plugins:
+                for plugin in self.config.plugins:
                     if not plugin.can_handle_pre_command():
                         continue
                     command_name, arguments = plugin.pre_command(
@@ -281,22 +287,22 @@ class Agent:
                     self.command_registry,
                     command_name,
                     arguments,
-                    self.config.prompt_generator,
-                    config=cfg,
+                    self.ai_config.prompt_generator,
+                    config=self.config,
                 )
                 result = f"Command {command_name} returned: " f"{command_result}"
 
                 result_tlength = count_string_tokens(
-                    str(command_result), cfg.fast_llm_model
+                    str(command_result), self.config.fast_llm_model
                 )
                 memory_tlength = count_string_tokens(
-                    str(self.history.summary_message()), cfg.fast_llm_model
+                    str(self.history.summary_message()), self.config.fast_llm_model
                 )
                 if result_tlength + memory_tlength + 600 > self.fast_token_limit:
                     result = f"Failure: command {command_name} returned too much output. \
                         Do not execute this command again with the same arguments."
 
-                for plugin in cfg.plugins:
+                for plugin in self.config.plugins:
                     if not plugin.can_handle_post_command():
                         continue
                     result = plugin.post_command(command_name, result)
@@ -337,7 +343,7 @@ class Agent:
         Returns:
             str: A feedback response generated using the provided thoughts dictionary.
         """
-        ai_role = self.config.ai_role
+        ai_role = self.ai_config.ai_role
 
         feedback_prompt = f"Below is a message from me, an AI Agent, assuming the role of {ai_role}. whilst keeping knowledge of my slight limitations as an AI Agent Please evaluate my thought process, reasoning, and plan, and provide a concise paragraph outlining potential improvements. Consider adding or removing ideas that do not align with my role and explaining why, prioritizing thoughts based on their significance, or simply refining my overall thought process."
         reasoning = thoughts.get("reasoning", "")
@@ -349,7 +355,7 @@ class Agent:
         prompt.add("user", feedback_prompt + feedback_thoughts)
 
         self.log_cycle_handler.log_cycle(
-            self.config.ai_name,
+            self.ai_config.ai_name,
             self.created_at,
             self.cycle_count,
             prompt.raw(),
@@ -359,7 +365,7 @@ class Agent:
         feedback = create_chat_completion(prompt)
 
         self.log_cycle_handler.log_cycle(
-            self.config.ai_name,
+            self.ai_config.ai_name,
             self.created_at,
             self.cycle_count,
             feedback,
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index 7cb59825..60afc93a 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -150,7 +150,7 @@ def chat_with_ai(
         if not plugin.can_handle_on_planning():
             continue
         plugin_response = plugin.on_planning(
-            agent.config.prompt_generator, message_sequence.raw()
+            agent.ai_config.prompt_generator, message_sequence.raw()
         )
         if not plugin_response or plugin_response == "":
             continue
@@ -181,7 +181,7 @@ def chat_with_ai(
         logger.debug("")
     logger.debug("----------- END OF CONTEXT ----------------")
     agent.log_cycle_handler.log_cycle(
-        agent.config.ai_name,
+        agent.ai_config.ai_name,
         agent.created_at,
         agent.cycle_count,
         message_sequence.raw(),
diff --git a/autogpt/main.py b/autogpt/main.py
index efc70aae..ab0a1533 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -189,9 +189,10 @@ def run_auto_gpt(
         memory=memory,
         next_action_count=next_action_count,
         command_registry=command_registry,
-        config=ai_config,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
         workspace_directory=workspace_directory,
+        ai_config=ai_config,
+        config=cfg,
     )
     agent.start_interaction_loop()
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index fcb96a94..1505bd65 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -184,7 +184,7 @@ Latest Development:
 
         prompt = ChatSequence.for_model(cfg.fast_llm_model, [Message("user", prompt)])
         self.agent.log_cycle_handler.log_cycle(
-            self.agent.config.ai_name,
+            self.agent.ai_config.ai_name,
             self.agent.created_at,
             self.agent.cycle_count,
             prompt.raw(),
@@ -194,7 +194,7 @@ Latest Development:
         self.summary = create_chat_completion(prompt)
 
         self.agent.log_cycle_handler.log_cycle(
-            self.agent.config.ai_name,
+            self.agent.ai_config.ai_name,
             self.agent.created_at,
             self.agent.cycle_count,
             self.summary,
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 4be96481..714a3ac5 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -59,7 +59,8 @@ def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace
         ai_name="",
         memory=memory_none,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -95,7 +96,8 @@ def file_system_agents(
                 ai_name="Information Retrieval Agent",
                 memory=memory_json_file,
                 command_registry=command_registry,
-                config=ai_config,
+                ai_config=ai_config,
+                config=agent_test_config,
                 next_action_count=0,
                 system_prompt=system_prompt,
                 triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -125,7 +127,8 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work
         ai_name="Follow-Instructions-GPT",
         memory=memory_json_file,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -161,7 +164,8 @@ def information_retrieval_agents(
                 ai_name="Information Retrieval Agent",
                 memory=memory_json_file,
                 command_registry=command_registry,
-                config=ai_config,
+                ai_config=ai_config,
+                config=agent_test_config,
                 next_action_count=0,
                 system_prompt=system_prompt,
                 triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -172,7 +176,9 @@ def information_retrieval_agents(
 
 
 @pytest.fixture
-def kubernetes_agent(memory_json_file, workspace: Workspace):
+def kubernetes_agent(
+    agent_test_config: Config, memory_json_file: NoMemory, workspace: Workspace
+) -> Agent:
     command_registry = CommandRegistry()
     command_registry.import_commands("autogpt.commands.file_operations")
     command_registry.import_commands("autogpt.app")
@@ -193,7 +199,8 @@ def kubernetes_agent(memory_json_file, workspace: Workspace):
         ai_name="Kubernetes-Demo",
         memory=memory_json_file,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -226,7 +233,8 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
         ai_name="Get-PhysicsNobelPrize",
         memory=memory_json_file,
         command_registry=command_registry,
-        config=ai_config,
+        ai_config=ai_config,
+        config=agent_test_config,
         next_action_count=0,
         system_prompt=system_prompt,
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
@@ -270,7 +278,8 @@ def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace)
                 ai_name="Debug Code Agent",
                 memory=memory_json_file,
                 command_registry=command_registry,
-                config=ai_config,
+                ai_config=ai_config,
+                config=agent_test_config,
                 next_action_count=0,
                 system_prompt=system_prompt,
                 triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
diff --git a/tests/unit/test_agent.py b/tests/unit/test_agent.py
index 4f05e36b..3fb896ba 100644
--- a/tests/unit/test_agent.py
+++ b/tests/unit/test_agent.py
@@ -4,28 +4,30 @@ import pytest
 
 from autogpt.agent import Agent
 from autogpt.config import AIConfig
+from autogpt.config.config import Config
 
 
 @pytest.fixture
-def agent():
+def agent(config: Config):
     ai_name = "Test AI"
     memory = MagicMock()
     next_action_count = 0
     command_registry = MagicMock()
-    config = AIConfig()
+    ai_config = AIConfig(ai_name=ai_name)
     system_prompt = "System prompt"
     triggering_prompt = "Triggering prompt"
     workspace_directory = "workspace_directory"
 
     agent = Agent(
-        ai_name,
-        memory,
-        next_action_count,
-        command_registry,
-        config,
-        system_prompt,
-        triggering_prompt,
-        workspace_directory,
+        ai_name=ai_name,
+        memory=memory,
+        next_action_count=next_action_count,
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        system_prompt=system_prompt,
+        triggering_prompt=triggering_prompt,
+        workspace_directory=workspace_directory,
     )
     return agent
 
@@ -36,7 +38,7 @@ def test_agent_initialization(agent: Agent):
     assert agent.history.messages == []
     assert agent.next_action_count == 0
     assert agent.command_registry == agent.command_registry
-    assert agent.config == agent.config
+    assert agent.ai_config == agent.ai_config
     assert agent.system_prompt == "System prompt"
     assert agent.triggering_prompt == "Triggering prompt"
 
diff --git a/tests/unit/test_get_self_feedback.py b/tests/unit/test_get_self_feedback.py
index 64268898..ba3e10fe 100644
--- a/tests/unit/test_get_self_feedback.py
+++ b/tests/unit/test_get_self_feedback.py
@@ -1,12 +1,15 @@
 from datetime import datetime
 
+from pytest_mock import MockerFixture
+
 from autogpt.agent.agent import Agent
 from autogpt.config import AIConfig
+from autogpt.config.config import Config
 from autogpt.llm.chat import create_chat_completion
 from autogpt.log_cycle.log_cycle import LogCycleHandler
 
 
-def test_get_self_feedback(mocker):
+def test_get_self_feedback(config: Config, mocker: MockerFixture):
     # Define a sample thoughts dictionary
     thoughts = {
         "reasoning": "Sample reasoning.",
@@ -32,7 +35,8 @@ def test_get_self_feedback(mocker):
     agent_mock = mocker.MagicMock(spec=Agent)
 
     # Mock the config attribute of the Agent instance
-    agent_mock.config = AIConfig()
+    agent_mock.config = config
+    agent_mock.ai_config = AIConfig()
 
     # Mock the log_cycle_handler attribute of the Agent instance
     agent_mock.log_cycle_handler = LogCycleHandler()

From c1ee8cb62e4fb67d2a7c65c416a693224a333181 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sat, 10 Jun 2023 15:07:02 -0700
Subject: [PATCH 41/97] Upload logs as artifacts (#4640)

---
 .github/workflows/ci.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 90bd4b53..0718c46f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -245,3 +245,10 @@ jobs:
             echo $TOKEN | gh auth login --with-token
             gh api repos/$REPO/issues/$PR_NUMBER/comments -X POST -F body="You changed AutoGPT's behaviour. The cassettes have been updated and will be merged to the submodule when this Pull Request gets merged."
           fi
+
+      - name: Upload logs as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-logs
+          path: logs/

From 097ce08908fa911bc0e282af54d605611c5092ae Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sat, 10 Jun 2023 15:11:24 -0700
Subject: [PATCH 42/97] Create benchmarks.yml (#4647)

---
 .github/workflows/benchmarks.yml              | 73 +++++++++++++++++++
 tests/Auto-GPT-test-cassettes                 |  2 +-
 .../challenge_decorator.py                    |  7 +-
 .../memory/test_memory_challenge_d.py         |  2 +-
 4 files changed, 80 insertions(+), 4 deletions(-)
 create mode 100644 .github/workflows/benchmarks.yml

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
new file mode 100644
index 00000000..e9f46d04
--- /dev/null
+++ b/.github/workflows/benchmarks.yml
@@ -0,0 +1,73 @@
+name: Benchmarks
+
+on:
+  schedule:
+    - cron: '0 8 * * *'
+  workflow_dispatch:
+
+jobs:
+  Benchmark:
+    name: Benchmark - ${{ matrix.config.task-name }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - python-version: ["3.10"]
+            task: "tests/challenges"
+            task-name: "Mandatory Tasks"
+          - python-version: ["3.10"]
+            task: "--beat-challenges -ra tests/challenges"
+            task-name: "Challenging Tasks"
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          ref: master
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - id: get_date
+        name: Get date
+        run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT
+
+      - name: Set up Python dependency cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}-${{ steps.get_date.outputs.date }}
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+
+      - name: Run pytest with coverage
+        run: |
+          rm -rf tests/Auto-GPT-test-cassettes
+          pytest -n auto ${{ matrix.task }}
+        env:
+          CI: true
+          PROXY: ${{ secrets.PROXY }}
+          AGENT_MODE: ${{ secrets.AGENT_MODE }}
+          AGENT_TYPE: ${{ secrets.AGENT_TYPE }}
+          PLAIN_OUTPUT: True
+
+      - name: Upload logs as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-logs-${{ matrix.config.task-name }}
+          path: logs/
+
+      - name: Upload cassettes as artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: cassettes-${{ matrix.config.task-name }}
+          path: tests/Auto-GPT-test-cassettes/
diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 5cf78f4a..c154c67a 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 5cf78f4af1158c636a0ca997365a7ad6d8343e26
+Subproject commit c154c67a58fceb534b6969bfbf92928c4ae54ea3
diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
index 5ef7f19e..d887e21d 100644
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -22,7 +22,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
     @wraps(func)
     def wrapper(*args: Any, **kwargs: Any) -> None:
         run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
-        original_error = None
+        original_error: Optional[Exception] = None
 
         while run_remaining > 0:
             current_score, new_score, new_score_location = get_scores()
@@ -40,6 +40,9 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
                         f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
                     )
                     challenge.succeeded = False
+                except Exception as err:
+                    original_error = err
+                    challenge.succeeded = False
             else:
                 challenge.skipped = True
             if os.environ.get("CI") == "true":
@@ -55,7 +58,7 @@ def challenge(func: Callable[..., Any]) -> Callable[..., None]:
             if not challenge.succeeded:
                 if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
                     # xfail
-                    pytest.xfail("Challenge failed")
+                    pytest.xfail(str(original_error))
                 if original_error:
                     raise original_error
             run_remaining -= 1
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
index 9e662e08..e788f65d 100644
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -16,7 +16,7 @@ MAX_LEVEL = 5
 OUTPUT_LOCATION = "output.txt"
 
 
-# @pytest.mark.vcr
+@pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
 @challenge
 def test_memory_challenge_d(

From 0594ba33a2c58e6006718692ecf059e713340704 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Sat, 10 Jun 2023 15:48:50 -0700
Subject: [PATCH 43/97] Pass agent to commands instead of config (#4645)

* Add config as attribute to Agent, rename old config to ai_config

* Code review: Pass ai_config

* Pass agent to commands instead of config

* Lint

* Fix merge error

* Fix memory challenge a

---------

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 autogpt/agent/agent.py                        |   7 +-
 autogpt/app.py                                |  34 +++--
 autogpt/commands/analyze_code.py              |  12 +-
 autogpt/commands/audio_text.py                |  22 ++--
 autogpt/commands/execute_code.py              |  36 ++---
 autogpt/commands/file_operations.py           |  44 +++----
 autogpt/commands/git_operations.py            |  14 +-
 autogpt/commands/google_search.py             |  13 +-
 autogpt/commands/image_gen.py                 |  44 +++----
 autogpt/commands/improve_code.py              |  11 +-
 autogpt/commands/task_statuses.py             |   8 +-
 autogpt/commands/web_requests.py              |  18 ++-
 autogpt/commands/web_selenium.py              |  30 ++---
 autogpt/commands/write_tests.py               |  11 +-
 autogpt/llm/chat.py                           |   2 +-
 autogpt/memory/message_history.py             |   4 +-
 .../basic_abilities/test_write_file.py        |   4 +-
 .../debug_code/test_debug_code_challenge_a.py |   5 +-
 .../test_information_retrieval_challenge_a.py |   4 +-
 .../test_information_retrieval_challenge_b.py |   5 +-
 .../test_kubernetes_template_challenge_a.py   |   5 +-
 .../memory/test_memory_challenge_a.py         |  10 +-
 .../memory/test_memory_challenge_b.py         |   9 +-
 .../memory/test_memory_challenge_c.py         |  12 +-
 .../memory/test_memory_challenge_d.py         |  10 +-
 tests/conftest.py                             |  34 +++++
 tests/integration/test_execute_code.py        |  66 +++++-----
 tests/integration/test_image_gen.py           |  81 ++++++------
 tests/integration/test_web_selenium.py        |  10 +-
 tests/unit/test_browse_scrape_links.py        |  21 +--
 tests/unit/test_browse_scrape_text.py         |  25 ++--
 tests/unit/test_file_operations.py            | 124 ++++++++++--------
 tests/unit/test_git_commands.py               |  11 +-
 tests/unit/test_google_search.py              |  18 ++-
 tests/unit/test_make_agent.py                 |  12 +-
 35 files changed, 390 insertions(+), 386 deletions(-)

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 7a4c0e35..44180642 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -4,7 +4,6 @@ from datetime import datetime
 
 from colorama import Fore, Style
 
-from autogpt.app import execute_command, get_command
 from autogpt.commands.command import CommandRegistry
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
@@ -89,6 +88,9 @@ class Agent:
         ).max_tokens
 
     def start_interaction_loop(self):
+        # Avoid circular imports
+        from autogpt.app import execute_command, get_command
+
         # Interaction Loop
         self.cycle_count = 0
         command_name = None
@@ -287,8 +289,7 @@ class Agent:
                     self.command_registry,
                     command_name,
                     arguments,
-                    self.ai_config.prompt_generator,
-                    config=self.config,
+                    agent=self,
                 )
                 result = f"Command {command_name} returned: " f"{command_result}"
 
diff --git a/autogpt/app.py b/autogpt/app.py
index 525deddc..eb25fa7d 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -2,12 +2,11 @@
 import json
 from typing import Dict, List, Union
 
+from autogpt.agent.agent import Agent
 from autogpt.agent.agent_manager import AgentManager
 from autogpt.commands.command import CommandRegistry, command
 from autogpt.commands.web_requests import scrape_links, scrape_text
-from autogpt.config import Config
 from autogpt.processing.text import summarize_text
-from autogpt.prompts.generator import PromptGenerator
 from autogpt.speech import say_text
 from autogpt.url_utils.validators import validate_url
 
@@ -87,9 +86,8 @@ def map_command_synonyms(command_name: str):
 def execute_command(
     command_registry: CommandRegistry,
     command_name: str,
-    arguments,
-    prompt: PromptGenerator,
-    config: Config,
+    arguments: dict[str, str],
+    agent: Agent,
 ):
     """Execute the command and return the result
 
@@ -105,7 +103,7 @@ def execute_command(
 
         # If the command is found, call it with the provided arguments
         if cmd:
-            return cmd(**arguments, config=config)
+            return cmd(**arguments, agent=agent)
 
         # TODO: Remove commands below after they are moved to the command registry.
         command_name = map_command_synonyms(command_name.lower())
@@ -113,7 +111,7 @@ def execute_command(
         # TODO: Change these to take in a file rather than pasted code, if
         # non-file is given, return instructions "Input should be a python
         # filepath, write your code to file and try again
-        for command in prompt.commands:
+        for command in agent.prompt.commands:
             if (
                 command_name == command["label"].lower()
                 or command_name == command["name"].lower()
@@ -132,7 +130,7 @@ def execute_command(
     "get_text_summary", "Get text summary", '"url": "<url>", "question": "<question>"'
 )
 @validate_url
-def get_text_summary(url: str, question: str, config: Config) -> str:
+def get_text_summary(url: str, question: str, agent: Agent) -> str:
     """Get the text summary of a webpage
 
     Args:
@@ -142,7 +140,7 @@ def get_text_summary(url: str, question: str, config: Config) -> str:
     Returns:
         str: The summary of the text
     """
-    text = scrape_text(url, config)
+    text = scrape_text(url, agent)
     summary, _ = summarize_text(text, question=question)
 
     return f""" "Result" : {summary}"""
@@ -150,7 +148,7 @@ def get_text_summary(url: str, question: str, config: Config) -> str:
 
 @command("get_hyperlinks", "Get hyperlinks", '"url": "<url>"')
 @validate_url
-def get_hyperlinks(url: str, config: Config) -> Union[str, List[str]]:
+def get_hyperlinks(url: str, agent: Agent) -> Union[str, List[str]]:
     """Get all hyperlinks on a webpage
 
     Args:
@@ -159,7 +157,7 @@ def get_hyperlinks(url: str, config: Config) -> Union[str, List[str]]:
     Returns:
         str or list: The hyperlinks on the page
     """
-    return scrape_links(url, config)
+    return scrape_links(url, agent)
 
 
 @command(
@@ -167,7 +165,7 @@ def get_hyperlinks(url: str, config: Config) -> Union[str, List[str]]:
     "Start GPT Agent",
     '"name": "<name>", "task": "<short_task_desc>", "prompt": "<prompt>"',
 )
-def start_agent(name: str, task: str, prompt: str, config: Config, model=None) -> str:
+def start_agent(name: str, task: str, prompt: str, agent: Agent, model=None) -> str:
     """Start an agent with a given name, task, and prompt
 
     Args:
@@ -188,11 +186,11 @@ def start_agent(name: str, task: str, prompt: str, config: Config, model=None) -
     agent_intro = f"{voice_name} here, Reporting for duty!"
 
     # Create agent
-    if config.speak_mode:
+    if agent.config.speak_mode:
         say_text(agent_intro, 1)
     key, ack = agent_manager.create_agent(task, first_message, model)
 
-    if config.speak_mode:
+    if agent.config.speak_mode:
         say_text(f"Hello {voice_name}. Your task is as follows. {task}.")
 
     # Assign task (prompt), get response
@@ -202,7 +200,7 @@ def start_agent(name: str, task: str, prompt: str, config: Config, model=None) -
 
 
 @command("message_agent", "Message GPT Agent", '"key": "<key>", "message": "<message>"')
-def message_agent(key: str, message: str, config: Config) -> str:
+def message_agent(key: str, message: str, agent: Agent) -> str:
     """Message an agent with a given key and message"""
     # Check if the key is a valid integer
     if is_valid_int(key):
@@ -211,13 +209,13 @@ def message_agent(key: str, message: str, config: Config) -> str:
         return "Invalid key, must be an integer."
 
     # Speak response
-    if config.speak_mode:
+    if agent.config.speak_mode:
         say_text(agent_response, 1)
     return agent_response
 
 
 @command("list_agents", "List GPT Agents", "() -> str")
-def list_agents(config: Config) -> str:
+def list_agents(agent: Agent) -> str:
     """List all agents
 
     Returns:
@@ -229,7 +227,7 @@ def list_agents(config: Config) -> str:
 
 
 @command("delete_agent", "Delete GPT Agent", '"key": "<key>"')
-def delete_agent(key: str, config: Config) -> str:
+def delete_agent(key: str, agent: Agent) -> str:
     """Delete an agent with a given key
 
     Args:
diff --git a/autogpt/commands/analyze_code.py b/autogpt/commands/analyze_code.py
index 4de68334..ca7fcb01 100644
--- a/autogpt/commands/analyze_code.py
+++ b/autogpt/commands/analyze_code.py
@@ -1,21 +1,17 @@
 """Code evaluation module."""
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.llm.utils import call_ai_function
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "analyze_code",
     "Analyze Code",
     '"code": "<full_code_string>"',
 )
-def analyze_code(code: str, config: Config) -> list[str]:
+def analyze_code(code: str, agent: Agent) -> list[str]:
     """
     A function that takes in a string and returns a response from create chat
       completion api call.
@@ -33,4 +29,6 @@ def analyze_code(code: str, config: Config) -> list[str]:
         "Analyzes the given code and returns a list of suggestions for improvements."
     )
 
-    return call_ai_function(function_string, args, description_string, config=config)
+    return call_ai_function(
+        function_string, args, description_string, config=agent.config
+    )
diff --git a/autogpt/commands/audio_text.py b/autogpt/commands/audio_text.py
index 57aa1a88..2991fff3 100644
--- a/autogpt/commands/audio_text.py
+++ b/autogpt/commands/audio_text.py
@@ -1,14 +1,10 @@
 """Commands for converting audio to text."""
 import json
-from typing import TYPE_CHECKING
 
 import requests
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
-from autogpt.config import Config
-
-if TYPE_CHECKING:
-    from autogpt.config import Config
 
 
 @command(
@@ -19,7 +15,7 @@ if TYPE_CHECKING:
     and config.huggingface_api_token,
     "Configure huggingface_audio_to_text_model and Hugging Face api token.",
 )
-def read_audio_from_file(filename: str, config: Config) -> str:
+def read_audio_from_file(filename: str, agent: Agent) -> str:
     """
     Convert audio to text.
 
@@ -31,10 +27,10 @@ def read_audio_from_file(filename: str, config: Config) -> str:
     """
     with open(filename, "rb") as audio_file:
         audio = audio_file.read()
-    return read_audio(audio, config)
+    return read_audio(audio, agent.config)
 
 
-def read_audio(audio: bytes, config: Config) -> str:
+def read_audio(audio: bytes, agent: Agent) -> str:
     """
     Convert audio to text.
 
@@ -44,8 +40,8 @@ def read_audio(audio: bytes, config: Config) -> str:
     Returns:
         str: The text from the audio
     """
-    if config.audio_to_text_provider == "huggingface":
-        text = read_huggingface_audio(audio, config)
+    if agent.config.audio_to_text_provider == "huggingface":
+        text = read_huggingface_audio(audio, agent.config)
         if text:
             return f"The audio says: {text}"
         else:
@@ -54,10 +50,10 @@ def read_audio(audio: bytes, config: Config) -> str:
     return "Error: No audio to text provider given"
 
 
-def read_huggingface_audio(audio: bytes, config: Config) -> str:
-    model = config.huggingface_audio_to_text_model
+def read_huggingface_audio(audio: bytes, agent: Agent) -> str:
+    model = agent.config.huggingface_audio_to_text_model
     api_url = f"https://api-inference.huggingface.co/models/{model}"
-    api_token = config.huggingface_api_token
+    api_token = agent.config.huggingface_api_token
     headers = {"Authorization": f"Bearer {api_token}"}
 
     if api_token is None:
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 9fd3d315..6d3703f1 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -4,11 +4,11 @@ import subprocess
 from pathlib import Path
 
 import docker
+from confection import Config
 from docker.errors import ImageNotFound
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
-from autogpt.config import Config
-from autogpt.config.ai_config import AIConfig
 from autogpt.logs import logger
 from autogpt.setup import CFG
 from autogpt.workspace.workspace import Workspace
@@ -22,7 +22,7 @@ DENYLIST_CONTROL = "denylist"
     "Create a Python file and execute it",
     '"code": "<code>", "basename": "<basename>"',
 )
-def execute_python_code(code: str, basename: str, config: Config) -> str:
+def execute_python_code(code: str, basename: str, agent: Agent) -> str:
     """Create and execute a Python file in a Docker container and return the STDOUT of the
     executed code. If there is any data that needs to be captured use a print statement
 
@@ -33,8 +33,8 @@ def execute_python_code(code: str, basename: str, config: Config) -> str:
     Returns:
         str: The STDOUT captured from the code when it ran
     """
-    ai_name = AIConfig.load(config.ai_settings_file).ai_name
-    directory = os.path.join(config.workspace_path, ai_name, "executed_code")
+    ai_name = agent.ai_name
+    directory = os.path.join(agent.config.workspace_path, ai_name, "executed_code")
     os.makedirs(directory, exist_ok=True)
 
     if not basename.endswith(".py"):
@@ -46,13 +46,13 @@ def execute_python_code(code: str, basename: str, config: Config) -> str:
         with open(path, "w+", encoding="utf-8") as f:
             f.write(code)
 
-        return execute_python_file(f.name, config)
+        return execute_python_file(f.name, agent)
     except Exception as e:
         return f"Error: {str(e)}"
 
 
 @command("execute_python_file", "Execute Python File", '"filename": "<filename>"')
-def execute_python_file(filename: str, config: Config) -> str:
+def execute_python_file(filename: str, agent: Agent) -> str:
     """Execute a Python file in a Docker container and return the output
 
     Args:
@@ -68,7 +68,9 @@ def execute_python_file(filename: str, config: Config) -> str:
     if not filename.endswith(".py"):
         return "Error: Invalid file type. Only .py files are allowed."
 
-    workspace = Workspace(config.workspace_path, config.restrict_to_workspace)
+    workspace = Workspace(
+        agent.config.workspace_path, agent.config.restrict_to_workspace
+    )
 
     path = workspace.get_path(filename)
     if not path.is_file():
@@ -116,7 +118,7 @@ def execute_python_file(filename: str, config: Config) -> str:
             image_name,
             ["python", str(path.relative_to(workspace.root))],
             volumes={
-                config.workspace_path: {
+                agent.config.workspace_path: {
                     "bind": "/workspace",
                     "mode": "ro",
                 }
@@ -175,7 +177,7 @@ def validate_command(command: str, config: Config) -> bool:
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
     "in your config file: .env - do not attempt to bypass the restriction.",
 )
-def execute_shell(command_line: str, config: Config) -> str:
+def execute_shell(command_line: str, agent: Agent) -> str:
     """Execute a shell command and return the output
 
     Args:
@@ -184,14 +186,14 @@ def execute_shell(command_line: str, config: Config) -> str:
     Returns:
         str: The output of the command
     """
-    if not validate_command(command_line, config):
+    if not validate_command(command_line, agent.config):
         logger.info(f"Command '{command_line}' not allowed")
         return "Error: This Shell Command is not allowed."
 
     current_dir = Path.cwd()
     # Change dir into workspace if necessary
-    if not current_dir.is_relative_to(config.workspace_path):
-        os.chdir(config.workspace_path)
+    if not current_dir.is_relative_to(agent.config.workspace_path):
+        os.chdir(agent.config.workspace_path)
 
     logger.info(
         f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
@@ -215,7 +217,7 @@ def execute_shell(command_line: str, config: Config) -> str:
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
     "in your config. Do not attempt to bypass the restriction.",
 )
-def execute_shell_popen(command_line, config: Config) -> str:
+def execute_shell_popen(command_line, agent: Agent) -> str:
     """Execute a shell command with Popen and returns an english description
     of the event and the process id
 
@@ -225,14 +227,14 @@ def execute_shell_popen(command_line, config: Config) -> str:
     Returns:
         str: Description of the fact that the process started and its id
     """
-    if not validate_command(command_line, config):
+    if not validate_command(command_line, agent.config):
         logger.info(f"Command '{command_line}' not allowed")
         return "Error: This Shell Command is not allowed."
 
     current_dir = os.getcwd()
     # Change dir into workspace if necessary
-    if config.workspace_path not in current_dir:
-        os.chdir(config.workspace_path)
+    if agent.config.workspace_path not in current_dir:
+        os.chdir(agent.config.workspace_path)
 
     logger.info(
         f"Executing command '{command_line}' in working directory '{os.getcwd()}'"
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index cb5fb36c..5d9649be 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -5,12 +5,14 @@ import hashlib
 import os
 import os.path
 import re
-from typing import TYPE_CHECKING, Generator, Literal
+from typing import Generator, Literal
 
 import requests
 from colorama import Back, Fore
+from confection import Config
 from requests.adapters import HTTPAdapter, Retry
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.commands.file_operations_utils import read_textual_file
 from autogpt.logs import logger
@@ -18,10 +20,6 @@ from autogpt.memory.vector import MemoryItem, VectorMemory
 from autogpt.spinner import Spinner
 from autogpt.utils import readable_file_size
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
-
 Operation = Literal["write", "append", "delete"]
 
 
@@ -103,7 +101,7 @@ def is_duplicate_operation(
 
 
 def log_operation(
-    operation: str, filename: str, config: Config, checksum: str | None = None
+    operation: str, filename: str, agent: Agent, checksum: str | None = None
 ) -> None:
     """Log the file operation to the file_logger.txt
 
@@ -116,7 +114,9 @@ def log_operation(
     if checksum is not None:
         log_entry += f" #{checksum}"
     logger.debug(f"Logging file operation: {log_entry}")
-    append_to_file(config.file_logger_path, f"{log_entry}\n", config, should_log=False)
+    append_to_file(
+        agent.config.file_logger_path, f"{log_entry}\n", agent, should_log=False
+    )
 
 
 def split_file(
@@ -152,7 +152,7 @@ def split_file(
 
 
 @command("read_file", "Read a file", '"filename": "<filename>"')
-def read_file(filename: str, config: Config) -> str:
+def read_file(filename: str, agent: Agent) -> str:
     """Read a file and return the contents
 
     Args:
@@ -201,7 +201,7 @@ def ingest_file(
 
 
 @command("write_to_file", "Write to file", '"filename": "<filename>", "text": "<text>"')
-def write_to_file(filename: str, text: str, config: Config) -> str:
+def write_to_file(filename: str, text: str, agent: Agent) -> str:
     """Write text to a file
 
     Args:
@@ -212,14 +212,14 @@ def write_to_file(filename: str, text: str, config: Config) -> str:
         str: A message indicating success or failure
     """
     checksum = text_checksum(text)
-    if is_duplicate_operation("write", filename, config, checksum):
+    if is_duplicate_operation("write", filename, agent.config, checksum):
         return "Error: File has already been updated."
     try:
         directory = os.path.dirname(filename)
         os.makedirs(directory, exist_ok=True)
         with open(filename, "w", encoding="utf-8") as f:
             f.write(text)
-        log_operation("write", filename, config, checksum)
+        log_operation("write", filename, agent, checksum)
         return "File written to successfully."
     except Exception as err:
         return f"Error: {err}"
@@ -233,7 +233,7 @@ def write_to_file(filename: str, text: str, config: Config) -> str:
     '"occurrence_index": "<occurrence_index>"',
 )
 def replace_in_file(
-    filename: str, old_text: str, new_text: str, config: Config, occurrence_index=None
+    filename: str, old_text: str, new_text: str, agent: Agent, occurrence_index=None
 ):
     """Update a file by replacing one or all occurrences of old_text with new_text using Python's built-in string
     manipulation and regular expression modules for cross-platform file editing similar to sed and awk.
@@ -280,7 +280,7 @@ def replace_in_file(
 
         with open(filename, "r", encoding="utf-8") as f:
             checksum = text_checksum(f.read())
-        log_operation("update", filename, config, checksum=checksum)
+        log_operation("update", filename, agent, checksum=checksum)
 
         return f"File {filename} updated successfully."
     except Exception as e:
@@ -291,7 +291,7 @@ def replace_in_file(
     "append_to_file", "Append to file", '"filename": "<filename>", "text": "<text>"'
 )
 def append_to_file(
-    filename: str, text: str, config: Config, should_log: bool = True
+    filename: str, text: str, agent: Agent, should_log: bool = True
 ) -> str:
     """Append text to a file
 
@@ -312,7 +312,7 @@ def append_to_file(
         if should_log:
             with open(filename, "r", encoding="utf-8") as f:
                 checksum = text_checksum(f.read())
-            log_operation("append", filename, config, checksum=checksum)
+            log_operation("append", filename, agent, checksum=checksum)
 
         return "Text appended successfully."
     except Exception as err:
@@ -320,7 +320,7 @@ def append_to_file(
 
 
 @command("delete_file", "Delete file", '"filename": "<filename>"')
-def delete_file(filename: str, config: Config) -> str:
+def delete_file(filename: str, agent: Agent) -> str:
     """Delete a file
 
     Args:
@@ -329,18 +329,18 @@ def delete_file(filename: str, config: Config) -> str:
     Returns:
         str: A message indicating success or failure
     """
-    if is_duplicate_operation("delete", filename, config):
+    if is_duplicate_operation("delete", filename, agent.config):
         return "Error: File has already been deleted."
     try:
         os.remove(filename)
-        log_operation("delete", filename, config)
+        log_operation("delete", filename, agent)
         return "File deleted successfully."
     except Exception as err:
         return f"Error: {err}"
 
 
 @command("list_files", "List Files in Directory", '"directory": "<directory>"')
-def list_files(directory: str, config: Config) -> list[str]:
+def list_files(directory: str, agent: Agent) -> list[str]:
     """lists files in a directory recursively
 
     Args:
@@ -356,7 +356,7 @@ def list_files(directory: str, config: Config) -> list[str]:
             if file.startswith("."):
                 continue
             relative_path = os.path.relpath(
-                os.path.join(root, file), config.workspace_path
+                os.path.join(root, file), agent.config.workspace_path
             )
             found_files.append(relative_path)
 
@@ -370,7 +370,7 @@ def list_files(directory: str, config: Config) -> list[str]:
     lambda config: config.allow_downloads,
     "Error: You do not have user authorization to download files locally.",
 )
-def download_file(url, filename, config: Config):
+def download_file(url, filename, agent: Agent):
     """Downloads a file
     Args:
         url (str): URL of the file to download
@@ -380,7 +380,7 @@ def download_file(url, filename, config: Config):
         directory = os.path.dirname(filename)
         os.makedirs(directory, exist_ok=True)
         message = f"{Fore.YELLOW}Downloading file from {Back.LIGHTBLUE_EX}{url}{Back.RESET}{Fore.RESET}"
-        with Spinner(message, plain_output=config.plain_output) as spinner:
+        with Spinner(message, plain_output=agent.config.plain_output) as spinner:
             session = requests.Session()
             retry = Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504])
             adapter = HTTPAdapter(max_retries=retry)
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index c32a8cc3..e844fd41 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -1,15 +1,11 @@
 """Git operations for autogpt"""
-from typing import TYPE_CHECKING
 
 from git.repo import Repo
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
-from autogpt.config import Config
 from autogpt.url_utils.validators import validate_url
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "clone_repository",
@@ -19,7 +15,7 @@ if TYPE_CHECKING:
     "Configure github_username and github_api_key.",
 )
 @validate_url
-def clone_repository(url: str, clone_path: str, config: Config) -> str:
+def clone_repository(url: str, clone_path: str, agent: Agent) -> str:
     """Clone a GitHub repository locally.
 
     Args:
@@ -30,8 +26,10 @@ def clone_repository(url: str, clone_path: str, config: Config) -> str:
         str: The result of the clone operation.
     """
     split_url = url.split("//")
-    auth_repo_url = f"//{config.github_username}:{config.github_api_key}@".join(
-        split_url
+    auth_repo_url = (
+        f"//{agent.config.github_username}:{agent.config.github_api_key}@".join(
+            split_url
+        )
     )
     try:
         Repo.clone_from(url=auth_repo_url, to_path=clone_path)
diff --git a/autogpt/commands/google_search.py b/autogpt/commands/google_search.py
index f15885ee..b9d243f9 100644
--- a/autogpt/commands/google_search.py
+++ b/autogpt/commands/google_search.py
@@ -4,15 +4,12 @@ from __future__ import annotations
 import json
 import time
 from itertools import islice
-from typing import TYPE_CHECKING
 
 from duckduckgo_search import DDGS
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 DUCKDUCKGO_MAX_ATTEMPTS = 3
 
 
@@ -22,7 +19,7 @@ DUCKDUCKGO_MAX_ATTEMPTS = 3
     '"query": "<query>"',
     lambda config: not config.google_api_key,
 )
-def google_search(query: str, config: Config, num_results: int = 8) -> str:
+def google_search(query: str, agent: Agent, num_results: int = 8) -> str:
     """Return the results of a Google search
 
     Args:
@@ -61,7 +58,7 @@ def google_search(query: str, config: Config, num_results: int = 8) -> str:
     "Configure google_api_key and custom_search_engine_id.",
 )
 def google_official_search(
-    query: str, config: Config, num_results: int = 8
+    query: str, agent: Agent, num_results: int = 8
 ) -> str | list[str]:
     """Return the results of a Google search using the official Google API
 
@@ -78,8 +75,8 @@ def google_official_search(
 
     try:
         # Get the Google API key and Custom Search Engine ID from the config file
-        api_key = config.google_api_key
-        custom_search_engine_id = config.google_custom_search_engine_id
+        api_key = agent.config.google_api_key
+        custom_search_engine_id = agent.config.google_custom_search_engine_id
 
         # Initialize the Custom Search API service
         service = build("customsearch", "v1", developerKey=api_key)
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index 04d86564..b2dc9ea4 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -4,19 +4,15 @@ import json
 import time
 import uuid
 from base64 import b64decode
-from typing import TYPE_CHECKING
 
 import openai
 import requests
 from PIL import Image
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
-from autogpt.config import Config
 from autogpt.logs import logger
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "generate_image",
@@ -25,7 +21,7 @@ if TYPE_CHECKING:
     lambda config: config.image_provider,
     "Requires a image provider to be set.",
 )
-def generate_image(prompt: str, config: Config, size: int = 256) -> str:
+def generate_image(prompt: str, agent: Agent, size: int = 256) -> str:
     """Generate an image from a prompt.
 
     Args:
@@ -35,21 +31,21 @@ def generate_image(prompt: str, config: Config, size: int = 256) -> str:
     Returns:
         str: The filename of the image
     """
-    filename = f"{config.workspace_path}/{str(uuid.uuid4())}.jpg"
+    filename = f"{agent.config.workspace_path}/{str(uuid.uuid4())}.jpg"
 
     # DALL-E
-    if config.image_provider == "dalle":
-        return generate_image_with_dalle(prompt, filename, size, config)
+    if agent.config.image_provider == "dalle":
+        return generate_image_with_dalle(prompt, filename, size, agent)
     # HuggingFace
-    elif config.image_provider == "huggingface":
-        return generate_image_with_hf(prompt, filename, config)
+    elif agent.config.image_provider == "huggingface":
+        return generate_image_with_hf(prompt, filename, agent)
     # SD WebUI
-    elif config.image_provider == "sdwebui":
-        return generate_image_with_sd_webui(prompt, filename, config, size)
+    elif agent.config.image_provider == "sdwebui":
+        return generate_image_with_sd_webui(prompt, filename, agent, size)
     return "No Image Provider Set"
 
 
-def generate_image_with_hf(prompt: str, filename: str, config: Config) -> str:
+def generate_image_with_hf(prompt: str, filename: str, agent: Agent) -> str:
     """Generate an image with HuggingFace's API.
 
     Args:
@@ -59,15 +55,13 @@ def generate_image_with_hf(prompt: str, filename: str, config: Config) -> str:
     Returns:
         str: The filename of the image
     """
-    API_URL = (
-        f"https://api-inference.huggingface.co/models/{config.huggingface_image_model}"
-    )
-    if config.huggingface_api_token is None:
+    API_URL = f"https://api-inference.huggingface.co/models/{agent.config.huggingface_image_model}"
+    if agent.config.huggingface_api_token is None:
         raise ValueError(
             "You need to set your Hugging Face API token in the config file."
         )
     headers = {
-        "Authorization": f"Bearer {config.huggingface_api_token}",
+        "Authorization": f"Bearer {agent.config.huggingface_api_token}",
         "X-Use-Cache": "false",
     }
 
@@ -110,7 +104,7 @@ def generate_image_with_hf(prompt: str, filename: str, config: Config) -> str:
 
 
 def generate_image_with_dalle(
-    prompt: str, filename: str, size: int, config: Config
+    prompt: str, filename: str, size: int, agent: Agent
 ) -> str:
     """Generate an image with DALL-E.
 
@@ -136,7 +130,7 @@ def generate_image_with_dalle(
         n=1,
         size=f"{size}x{size}",
         response_format="b64_json",
-        api_key=config.openai_api_key,
+        api_key=agent.config.openai_api_key,
     )
 
     logger.info(f"Image Generated for prompt:{prompt}")
@@ -152,7 +146,7 @@ def generate_image_with_dalle(
 def generate_image_with_sd_webui(
     prompt: str,
     filename: str,
-    config: Config,
+    agent: Agent,
     size: int = 512,
     negative_prompt: str = "",
     extra: dict = {},
@@ -169,13 +163,13 @@ def generate_image_with_sd_webui(
     """
     # Create a session and set the basic auth if needed
     s = requests.Session()
-    if config.sd_webui_auth:
-        username, password = config.sd_webui_auth.split(":")
+    if agent.config.sd_webui_auth:
+        username, password = agent.config.sd_webui_auth.split(":")
         s.auth = (username, password or "")
 
     # Generate the images
     response = requests.post(
-        f"{config.sd_webui_url}/sdapi/v1/txt2img",
+        f"{agent.config.sd_webui_url}/sdapi/v1/txt2img",
         json={
             "prompt": prompt,
             "negative_prompt": negative_prompt,
diff --git a/autogpt/commands/improve_code.py b/autogpt/commands/improve_code.py
index 60e517ef..05e9b51c 100644
--- a/autogpt/commands/improve_code.py
+++ b/autogpt/commands/improve_code.py
@@ -1,21 +1,18 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.llm.utils import call_ai_function
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "improve_code",
     "Get Improved Code",
     '"suggestions": "<list_of_suggestions>", "code": "<full_code_string>"',
 )
-def improve_code(suggestions: list[str], code: str, config: Config) -> str:
+def improve_code(suggestions: list[str], code: str, agent: Agent) -> str:
     """
     A function that takes in code and suggestions and returns a response from create
       chat completion api call.
@@ -36,4 +33,6 @@ def improve_code(suggestions: list[str], code: str, config: Config) -> str:
         " provided, making no other changes."
     )
 
-    return call_ai_function(function_string, args, description_string, config=config)
+    return call_ai_function(
+        function_string, args, description_string, config=agent.config
+    )
diff --git a/autogpt/commands/task_statuses.py b/autogpt/commands/task_statuses.py
index 9f60209c..283328a3 100644
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/task_statuses.py
@@ -1,21 +1,19 @@
 """Task Statuses module."""
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, NoReturn
+from typing import NoReturn
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.logs import logger
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "task_complete",
     "Task Complete (Shutdown)",
     '"reason": "<reason>"',
 )
-def task_complete(reason: str, config: Config) -> NoReturn:
+def task_complete(reason: str, agent: Agent) -> NoReturn:
     """
     A function that takes in a string and exits the program
 
diff --git a/autogpt/commands/web_requests.py b/autogpt/commands/web_requests.py
index d7de8dc9..765c3778 100644
--- a/autogpt/commands/web_requests.py
+++ b/autogpt/commands/web_requests.py
@@ -1,20 +1,24 @@
 """Browse a webpage and summarize it using the LLM model"""
 from __future__ import annotations
 
+from typing import TYPE_CHECKING
+
 import requests
 from bs4 import BeautifulSoup
 from requests import Response
 
-from autogpt.config import Config
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
 from autogpt.url_utils.validators import validate_url
 
 session = requests.Session()
 
+if TYPE_CHECKING:
+    from autogpt.agent.agent import Agent
+
 
 @validate_url
 def get_response(
-    url: str, config: Config, timeout: int = 10
+    url: str, agent: Agent, timeout: int = 10
 ) -> tuple[None, str] | tuple[Response, None]:
     """Get the response from a URL
 
@@ -30,7 +34,7 @@ def get_response(
         requests.exceptions.RequestException: If the HTTP request fails
     """
     try:
-        session.headers.update({"User-Agent": config.user_agent})
+        session.headers.update({"User-Agent": agent.config.user_agent})
         response = session.get(url, timeout=timeout)
 
         # Check if the response contains an HTTP error
@@ -48,7 +52,7 @@ def get_response(
         return None, f"Error: {str(re)}"
 
 
-def scrape_text(url: str, config: Config) -> str:
+def scrape_text(url: str, agent: Agent) -> str:
     """Scrape text from a webpage
 
     Args:
@@ -57,7 +61,7 @@ def scrape_text(url: str, config: Config) -> str:
     Returns:
         str: The scraped text
     """
-    response, error_message = get_response(url, config)
+    response, error_message = get_response(url, agent)
     if error_message:
         return error_message
     if not response:
@@ -76,7 +80,7 @@ def scrape_text(url: str, config: Config) -> str:
     return text
 
 
-def scrape_links(url: str, config: Config) -> str | list[str]:
+def scrape_links(url: str, agent: Agent) -> str | list[str]:
     """Scrape links from a webpage
 
     Args:
@@ -85,7 +89,7 @@ def scrape_links(url: str, config: Config) -> str | list[str]:
     Returns:
        str | list[str]: The scraped links
     """
-    response, error_message = get_response(url, config)
+    response, error_message = get_response(url, agent)
     if error_message:
         return error_message
     if not response:
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 3cc99282..14036c85 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -4,7 +4,7 @@ from __future__ import annotations
 import logging
 from pathlib import Path
 from sys import platform
-from typing import TYPE_CHECKING, Optional, Type
+from typing import Optional, Type
 
 from bs4 import BeautifulSoup
 from selenium.common.exceptions import WebDriverException
@@ -27,15 +27,13 @@ from webdriver_manager.chrome import ChromeDriverManager
 from webdriver_manager.firefox import GeckoDriverManager
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
 from autogpt.url_utils.validators import validate_url
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 BrowserOptions = ChromeOptions | EdgeOptions | FirefoxOptions | SafariOptions
 
 FILE_DIR = Path(__file__).parent.parent
@@ -47,7 +45,7 @@ FILE_DIR = Path(__file__).parent.parent
     '"url": "<url>", "question": "<what_you_want_to_find_on_website>"',
 )
 @validate_url
-def browse_website(url: str, question: str, config: Config) -> str:
+def browse_website(url: str, question: str, agent: Agent) -> str:
     """Browse a website and return the answer and links to the user
 
     Args:
@@ -58,7 +56,7 @@ def browse_website(url: str, question: str, config: Config) -> str:
         Tuple[str, WebDriver]: The answer and links to the user and the webdriver
     """
     try:
-        driver, text = scrape_text_with_selenium(url, config)
+        driver, text = scrape_text_with_selenium(url, agent)
     except WebDriverException as e:
         # These errors are often quite long and include lots of context.
         # Just grab the first line.
@@ -66,7 +64,7 @@ def browse_website(url: str, question: str, config: Config) -> str:
         return f"Error: {msg}"
 
     add_header(driver)
-    summary = summarize_memorize_webpage(url, text, question, config, driver)
+    summary = summarize_memorize_webpage(url, text, question, agent, driver)
     links = scrape_links_with_selenium(driver, url)
 
     # Limit links to 5
@@ -76,7 +74,7 @@ def browse_website(url: str, question: str, config: Config) -> str:
     return f"Answer gathered from website: {summary}\n\nLinks: {links}"
 
 
-def scrape_text_with_selenium(url: str, config: Config) -> tuple[WebDriver, str]:
+def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
     """Scrape text from a website using selenium
 
     Args:
@@ -94,23 +92,23 @@ def scrape_text_with_selenium(url: str, config: Config) -> tuple[WebDriver, str]
         "safari": SafariOptions,
     }
 
-    options: BrowserOptions = options_available[config.selenium_web_browser]()
+    options: BrowserOptions = options_available[agent.config.selenium_web_browser]()
     options.add_argument(
         "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5615.49 Safari/537.36"
     )
 
-    if config.selenium_web_browser == "firefox":
-        if config.selenium_headless:
+    if agent.config.selenium_web_browser == "firefox":
+        if agent.config.selenium_headless:
             options.headless = True
             options.add_argument("--disable-gpu")
         driver = FirefoxDriver(
             service=GeckoDriverService(GeckoDriverManager().install()), options=options
         )
-    elif config.selenium_web_browser == "edge":
+    elif agent.config.selenium_web_browser == "edge":
         driver = EdgeDriver(
             service=EdgeDriverService(EdgeDriverManager().install()), options=options
         )
-    elif config.selenium_web_browser == "safari":
+    elif agent.config.selenium_web_browser == "safari":
         # Requires a bit more setup on the users end
         # See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari
         driver = SafariDriver(options=options)
@@ -120,7 +118,7 @@ def scrape_text_with_selenium(url: str, config: Config) -> tuple[WebDriver, str]
             options.add_argument("--remote-debugging-port=9222")
 
         options.add_argument("--no-sandbox")
-        if config.selenium_headless:
+        if agent.config.selenium_headless:
             options.add_argument("--headless=new")
             options.add_argument("--disable-gpu")
 
@@ -205,7 +203,7 @@ def summarize_memorize_webpage(
     url: str,
     text: str,
     question: str,
-    config: Config,
+    agent: Agent,
     driver: Optional[WebDriver] = None,
 ) -> str:
     """Summarize text using the OpenAI API
@@ -225,7 +223,7 @@ def summarize_memorize_webpage(
     text_length = len(text)
     logger.info(f"Text length: {text_length} characters")
 
-    memory = get_memory(config)
+    memory = get_memory(agent.config)
 
     new_memory = MemoryItem.from_webpage(text, url, question=question)
     memory.add(new_memory)
diff --git a/autogpt/commands/write_tests.py b/autogpt/commands/write_tests.py
index a63c265f..c09930b9 100644
--- a/autogpt/commands/write_tests.py
+++ b/autogpt/commands/write_tests.py
@@ -2,21 +2,18 @@
 from __future__ import annotations
 
 import json
-from typing import TYPE_CHECKING
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
 from autogpt.llm.utils import call_ai_function
 
-if TYPE_CHECKING:
-    from autogpt.config import Config
-
 
 @command(
     "write_tests",
     "Write Tests",
     '"code": "<full_code_string>", "focus": "<list_of_focus_areas>"',
 )
-def write_tests(code: str, focus: list[str], config: Config) -> str:
+def write_tests(code: str, focus: list[str], agent: Agent) -> str:
     """
     A function that takes in code and focus topics and returns a response from create
       chat completion api call.
@@ -38,4 +35,6 @@ def write_tests(code: str, focus: list[str], config: Config) -> str:
         " specific areas if required."
     )
 
-    return call_ai_function(function_string, args, description_string, config=config)
+    return call_ai_function(
+        function_string, args, description_string, config=agent.config
+    )
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index 60afc93a..9ed07cb2 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -181,7 +181,7 @@ def chat_with_ai(
         logger.debug("")
     logger.debug("----------- END OF CONTEXT ----------------")
     agent.log_cycle_handler.log_cycle(
-        agent.ai_config.ai_name,
+        agent.ai_name,
         agent.created_at,
         agent.cycle_count,
         message_sequence.raw(),
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index 1505bd65..22f96a4a 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -184,7 +184,7 @@ Latest Development:
 
         prompt = ChatSequence.for_model(cfg.fast_llm_model, [Message("user", prompt)])
         self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_config.ai_name,
+            self.agent.ai_name,
             self.agent.created_at,
             self.agent.cycle_count,
             prompt.raw(),
@@ -194,7 +194,7 @@ Latest Development:
         self.summary = create_chat_completion(prompt)
 
         self.agent.log_cycle_handler.log_cycle(
-            self.agent.ai_config.ai_name,
+            self.agent.ai_name,
             self.agent.created_at,
             self.agent.cycle_count,
             self.summary,
diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
index 1c75a9b3..e84529bc 100644
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -4,7 +4,6 @@ import pytest
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
@@ -23,7 +22,6 @@ def test_write_file(
     file_system_agents: List[Agent],
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
 ) -> None:
     file_system_agent = file_system_agents[level_to_run - 1]
@@ -35,7 +33,7 @@ def test_write_file(
 
     for file_name, expected_lines in expected_outputs.items():
         file_path = get_workspace_path(file_system_agent, file_name)
-        content = read_file(file_path, config)
+        content = read_file(file_path, file_system_agent)
         for expected_line in expected_lines:
             assert (
                 expected_line in content
diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
index 130e9427..ca85675c 100644
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -5,7 +5,6 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.execute_code import execute_python_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
     copy_file_into_workspace,
@@ -28,7 +27,6 @@ def test_debug_code_challenge_a(
     debug_code_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
-    config: Config,
     level_to_run: int,
 ) -> None:
     """
@@ -37,7 +35,6 @@ def test_debug_code_challenge_a(
     :param debug_code_agent: The agent to test.
     :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
     :patched_api_requestor: Sends api requests to our API CI pipeline
-    :config: The config object for the agent.
     :level_to_run: The level to run.
     """
     debug_code_agent = debug_code_agents[level_to_run - 1]
@@ -48,7 +45,7 @@ def test_debug_code_challenge_a(
     run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT)
 
     output = execute_python_file(
-        get_workspace_path(debug_code_agent, TEST_FILE_PATH), config
+        get_workspace_path(debug_code_agent, TEST_FILE_PATH), debug_code_agent
     )
 
     assert "error" not in output.lower(), f"Errors found in output: {output}!"
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index 086623a8..ee2c17c9 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -2,7 +2,6 @@ import pytest
 from pytest_mock import MockerFixture
 
 from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
@@ -21,7 +20,6 @@ def test_information_retrieval_challenge_a(
     information_retrieval_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
-    config: Config,
     level_to_run: int,
 ) -> None:
     """
@@ -34,7 +32,7 @@ def test_information_retrieval_challenge_a(
     run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT)
 
     file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, config)
+    content = read_file(file_path, information_retrieval_agent)
     expected_revenues = EXPECTED_REVENUES[level_to_run - 1]
     for revenue in expected_revenues:
         assert (
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index 0b1a897a..94ca4e44 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -5,7 +5,6 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
@@ -22,7 +21,6 @@ def test_information_retrieval_challenge_b(
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
-    config: Config,
 ) -> None:
     """
     Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
@@ -31,14 +29,13 @@ def test_information_retrieval_challenge_b(
     :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
     :param patched_api_requestor: APIRequestor Patch to override the openai.api_requestor module for testing.
     :param level_to_run: The level to run.
-    :param config: The config object.
     """
 
     with contextlib.suppress(SystemExit):
         run_interaction_loop(monkeypatch, get_nobel_prize_agent, CYCLE_COUNT)
     file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION)
 
-    content = read_file(file_path, config)
+    content = read_file(file_path, get_nobel_prize_agent)
     assert "Andre Geim" in content, "Expected the file to contain Andre Geim"
     assert (
         "Konstantin Novoselov" in content
diff --git a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index 0cf1cb42..93a2695f 100644
--- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -4,7 +4,6 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
@@ -20,7 +19,6 @@ def test_kubernetes_template_challenge_a(
     kubernetes_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
-    config: Config,
     level_to_run: int,
 ) -> None:
     """
@@ -30,13 +28,12 @@ def test_kubernetes_template_challenge_a(
     Args:
         kubernetes_agent (Agent)
         monkeypatch (pytest.MonkeyPatch)
-        config (Config)
         level_to_run (int)
     """
     run_interaction_loop(monkeypatch, kubernetes_agent, CYCLE_COUNT)
 
     file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, config)
+    content = read_file(file_path, kubernetes_agent)
 
     for word in ["apiVersion", "kind", "metadata", "spec"]:
         assert word in content, f"Expected the file to contain {word}"
diff --git a/tests/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py
index 336f8d76..b67af6f7 100644
--- a/tests/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -3,7 +3,6 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
@@ -18,7 +17,6 @@ def test_memory_challenge_a(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
 ) -> None:
     """
@@ -28,17 +26,16 @@ def test_memory_challenge_a(
         memory_management_agent (Agent)
         patched_api_requestor (MockerFixture)
         monkeypatch (pytest.MonkeyPatch)
-        config (Config)
         level_to_run (int)
     """
 
     task_id = "2314"
-    create_instructions_files(memory_management_agent, level_to_run, task_id, config)
+    create_instructions_files(memory_management_agent, level_to_run, task_id)
 
     run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
 
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, config)
+    content = read_file(file_path, memory_management_agent)
     assert task_id in content, f"Expected the file to contain {task_id}"
 
 
@@ -46,7 +43,6 @@ def create_instructions_files(
     memory_management_agent: Agent,
     num_files: int,
     task_id: str,
-    config: Config,
     base_filename: str = "instructions_",
 ) -> None:
     """
@@ -61,7 +57,7 @@ def create_instructions_files(
         content = generate_content(i, task_id, base_filename, num_files)
         file_name = f"{base_filename}{i}.txt"
         file_path = get_workspace_path(memory_management_agent, file_name)
-        write_to_file(file_path, content, config)
+        write_to_file(file_path, content, memory_management_agent)
 
 
 def generate_content(
diff --git a/tests/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py
index 829afa1a..4a4d30e0 100644
--- a/tests/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -3,7 +3,6 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
     generate_noise,
@@ -23,7 +22,6 @@ def test_memory_challenge_b(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
 ) -> None:
     """
@@ -37,12 +35,12 @@ def test_memory_challenge_b(
         level_to_run (int)
     """
     task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
-    create_instructions_files(memory_management_agent, level_to_run, task_ids, config)
+    create_instructions_files(memory_management_agent, level_to_run, task_ids)
 
     run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
 
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, config)
+    content = read_file(file_path, memory_management_agent)
     for task_id in task_ids:
         assert task_id in content, f"Expected the file to contain {task_id}"
 
@@ -51,7 +49,6 @@ def create_instructions_files(
     memory_management_agent: Agent,
     level: int,
     task_ids: list,
-    config: Config,
     base_filename: str = "instructions_",
 ) -> None:
     """
@@ -68,7 +65,7 @@ def create_instructions_files(
         file_name = f"{base_filename}{i}.txt"
         file_path = get_workspace_path(memory_management_agent, file_name)
 
-        write_to_file(file_path, content, config)
+        write_to_file(file_path, content, memory_management_agent)
 
 
 def generate_content(index: int, task_ids: list, base_filename: str, level: int) -> str:
diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
index 2cd453d9..2479da25 100644
--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -3,7 +3,6 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import (
     generate_noise,
@@ -24,7 +23,6 @@ def test_memory_challenge_c(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
 ) -> None:
     """
@@ -36,7 +34,6 @@ def test_memory_challenge_c(
         memory_management_agent (Agent)
         patched_api_requestor (MockerFixture)
         monkeypatch (pytest.MonkeyPatch)
-        config (Config)
         level_to_run (int)
     """
     silly_phrases = [
@@ -54,12 +51,14 @@ def test_memory_challenge_c(
 
     level_silly_phrases = silly_phrases[:level_to_run]
     create_instructions_files(
-        memory_management_agent, level_to_run, level_silly_phrases, config=config
+        memory_management_agent,
+        level_to_run,
+        level_silly_phrases,
     )
 
     run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
-    content = read_file(file_path, config)
+    content = read_file(file_path, agent=memory_management_agent)
     for phrase in level_silly_phrases:
         assert phrase in content, f"Expected the file to contain {phrase}"
 
@@ -68,7 +67,6 @@ def create_instructions_files(
     memory_management_agent: Agent,
     level: int,
     task_ids: list,
-    config: Config,
     base_filename: str = "instructions_",
 ) -> None:
     """
@@ -84,7 +82,7 @@ def create_instructions_files(
         content = generate_content(i, task_ids, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
         file_path = get_workspace_path(memory_management_agent, file_name)
-        write_to_file(file_path, content, config)
+        write_to_file(file_path, content, memory_management_agent)
 
 
 def generate_content(
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
index e788f65d..fc7b5a33 100644
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -6,7 +6,6 @@ from pytest_mock import MockerFixture
 
 from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
-from autogpt.config import Config
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
 from tests.utils import requires_api_key
@@ -23,7 +22,6 @@ def test_memory_challenge_d(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
-    config: Config,
     level_to_run: int,
 ) -> None:
     """
@@ -41,12 +39,12 @@ def test_memory_challenge_d(
     ]
     level_sally_anne_test_phrases = sally_anne_test_phrases[:level_to_run]
     create_instructions_files(
-        memory_management_agent, level_to_run, level_sally_anne_test_phrases, config
+        memory_management_agent, level_to_run, level_sally_anne_test_phrases
     )
     run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
 
-    content = read_file(file_path, config)
+    content = read_file(file_path, memory_management_agent)
     check_beliefs(content, level_to_run)
 
 
@@ -177,7 +175,6 @@ def create_instructions_files(
     memory_management_agent: Agent,
     level: int,
     test_phrases: list,
-    config: Config,
     base_filename: str = "instructions_",
 ) -> None:
     """
@@ -186,14 +183,13 @@ def create_instructions_files(
         level:
         memory_management_agent (Agent)
         test_phrases (list)
-        config (Config)
         base_filename (str, optional)
     """
     for i in range(1, level + 1):
         content = generate_content(i, test_phrases, base_filename, level)
         file_name = f"{base_filename}{i}.txt"
         file_path = get_workspace_path(memory_management_agent, file_name)
-        write_to_file(file_path, content, config)
+        write_to_file(file_path, content, memory_management_agent)
 
 
 def generate_content(
diff --git a/tests/conftest.py b/tests/conftest.py
index 8e607c39..ca6b4d4c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,9 +4,14 @@ from pathlib import Path
 import pytest
 from pytest_mock import MockerFixture
 
+from autogpt.agent.agent import Agent
+from autogpt.commands.command import CommandRegistry
+from autogpt.config.ai_config import AIConfig
 from autogpt.config.config import Config
 from autogpt.llm.api_manager import ApiManager
 from autogpt.logs import TypingConsoleHandler
+from autogpt.memory.vector import get_memory
+from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
 
 pytest_plugins = [
@@ -57,3 +62,32 @@ def patch_emit(monkeypatch):
             print(self.format(record))
 
         monkeypatch.setattr(TypingConsoleHandler, "emit", quick_emit)
+
+
+@pytest.fixture
+def agent(config: Config, workspace: Workspace) -> Agent:
+    ai_config = AIConfig(
+        ai_name="Base",
+        ai_role="A base AI",
+        ai_goals=[],
+    )
+
+    command_registry = CommandRegistry()
+    ai_config.command_registry = command_registry
+
+    config.set_memory_backend("json_file")
+    memory_json_file = get_memory(config, init=True)
+
+    system_prompt = ai_config.construct_full_prompt()
+
+    return Agent(
+        ai_name=ai_config.ai_name,
+        memory=memory_json_file,
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        next_action_count=0,
+        system_prompt=system_prompt,
+        triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
+        workspace_directory=workspace.root,
+    )
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index 530e7702..16d6c4d9 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -7,8 +7,8 @@ from typing import Callable
 import pytest
 
 import autogpt.commands.execute_code as sut  # system under testing
+from autogpt.agent.agent import Agent
 from autogpt.config import Config
-from autogpt.config.ai_config import AIConfig
 
 
 @pytest.fixture
@@ -31,54 +31,54 @@ def random_string():
     return "".join(random.choice(string.ascii_lowercase) for _ in range(10))
 
 
-def test_execute_python_file(python_test_file: str, random_string: str, config):
-    result: str = sut.execute_python_file(python_test_file, config)
+def test_execute_python_file(python_test_file: str, random_string: str, agent: Agent):
+    result: str = sut.execute_python_file(python_test_file, agent=agent)
     assert result.replace("\r", "") == f"Hello {random_string}!\n"
 
 
-def test_execute_python_code(random_code: str, random_string: str, config: Config):
-    ai_name = AIConfig.load(config.ai_settings_file).ai_name
+def test_execute_python_code(random_code: str, random_string: str, agent: Agent):
+    ai_name = agent.ai_name
 
-    result: str = sut.execute_python_code(random_code, "test_code", config)
+    result: str = sut.execute_python_code(random_code, "test_code", agent=agent)
     assert result.replace("\r", "") == f"Hello {random_string}!\n"
 
     # Check that the code is stored
     destination = os.path.join(
-        config.workspace_path, ai_name, "executed_code", "test_code.py"
+        agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
     )
     with open(destination) as f:
         assert f.read() == random_code
 
 
 def test_execute_python_code_overwrites_file(
-    random_code: str, random_string: str, config: Config
+    random_code: str, random_string: str, agent: Agent
 ):
-    ai_name = AIConfig.load(config.ai_settings_file).ai_name
+    ai_name = agent.ai_name
     destination = os.path.join(
-        config.workspace_path, ai_name, "executed_code", "test_code.py"
+        agent.config.workspace_path, ai_name, "executed_code", "test_code.py"
     )
     os.makedirs(os.path.dirname(destination), exist_ok=True)
 
     with open(destination, "w+") as f:
         f.write("This will be overwritten")
 
-    sut.execute_python_code(random_code, "test_code.py", config)
+    sut.execute_python_code(random_code, "test_code.py", agent=agent)
 
     # Check that the file is updated with the new code
     with open(destination) as f:
         assert f.read() == random_code
 
 
-def test_execute_python_file_invalid(config: Config):
+def test_execute_python_file_invalid(agent: Agent):
     assert all(
-        s in sut.execute_python_file("not_python", config).lower()
+        s in sut.execute_python_file("not_python", agent).lower()
         for s in ["error:", "invalid", ".py"]
     )
 
 
-def test_execute_python_file_not_found(config: Config):
+def test_execute_python_file_not_found(agent: Agent):
     assert all(
-        s in sut.execute_python_file("notexist.py", config).lower()
+        s in sut.execute_python_file("notexist.py", agent).lower()
         for s in [
             "python: can't open file 'notexist.py'",
             "[errno 2] no such file or directory",
@@ -86,43 +86,43 @@ def test_execute_python_file_not_found(config: Config):
     )
 
 
-def test_execute_shell(random_string: str, config: Config):
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+def test_execute_shell(random_string: str, agent: Agent):
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert f"Hello {random_string}!" in result
 
 
-def test_execute_shell_local_commands_not_allowed(random_string: str, config: Config):
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+def test_execute_shell_local_commands_not_allowed(random_string: str, agent: Agent):
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert f"Hello {random_string}!" in result
 
 
-def test_execute_shell_denylist_should_deny(config: Config, random_string: str):
-    config.shell_denylist = ["echo"]
+def test_execute_shell_denylist_should_deny(agent: Agent, random_string: str):
+    agent.config.shell_denylist = ["echo"]
 
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert "Error:" in result and "not allowed" in result
 
 
-def test_execute_shell_denylist_should_allow(config: Config, random_string: str):
-    config.shell_denylist = ["cat"]
+def test_execute_shell_denylist_should_allow(agent: Agent, random_string: str):
+    agent.config.shell_denylist = ["cat"]
 
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert "Hello" in result and random_string in result
     assert "Error" not in result
 
 
-def test_execute_shell_allowlist_should_deny(config: Config, random_string: str):
-    config.shell_command_control = sut.ALLOWLIST_CONTROL
-    config.shell_allowlist = ["cat"]
+def test_execute_shell_allowlist_should_deny(agent: Agent, random_string: str):
+    agent.config.shell_command_control = sut.ALLOWLIST_CONTROL
+    agent.config.shell_allowlist = ["cat"]
 
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert "Error:" in result and "not allowed" in result
 
 
-def test_execute_shell_allowlist_should_allow(config: Config, random_string: str):
-    config.shell_command_control = sut.ALLOWLIST_CONTROL
-    config.shell_allowlist = ["echo"]
+def test_execute_shell_allowlist_should_allow(agent: Agent, random_string: str):
+    agent.config.shell_command_control = sut.ALLOWLIST_CONTROL
+    agent.config.shell_allowlist = ["echo"]
 
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", config)
+    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert "Hello" in result and random_string in result
     assert "Error" not in result
diff --git a/tests/integration/test_image_gen.py b/tests/integration/test_image_gen.py
index 0156c9e5..a606d8da 100644
--- a/tests/integration/test_image_gen.py
+++ b/tests/integration/test_image_gen.py
@@ -6,6 +6,7 @@ from unittest.mock import patch
 import pytest
 from PIL import Image
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.image_gen import generate_image, generate_image_with_sd_webui
 from tests.utils import requires_api_key
 
@@ -18,10 +19,10 @@ def image_size(request):
 
 @requires_api_key("OPENAI_API_KEY")
 @pytest.mark.vcr
-def test_dalle(config, workspace, image_size, patched_api_requestor):
+def test_dalle(agent: Agent, workspace, image_size, patched_api_requestor):
     """Test DALL-E image generation."""
     generate_and_validate(
-        config,
+        agent,
         workspace,
         image_provider="dalle",
         image_size=image_size,
@@ -36,10 +37,10 @@ def test_dalle(config, workspace, image_size, patched_api_requestor):
     "image_model",
     ["CompVis/stable-diffusion-v1-4", "stabilityai/stable-diffusion-2-1"],
 )
-def test_huggingface(config, workspace, image_size, image_model):
+def test_huggingface(agent: Agent, workspace, image_size, image_model):
     """Test HuggingFace image generation."""
     generate_and_validate(
-        config,
+        agent,
         workspace,
         image_provider="huggingface",
         image_size=image_size,
@@ -48,10 +49,10 @@ def test_huggingface(config, workspace, image_size, image_model):
 
 
 @pytest.mark.xfail(reason="SD WebUI call does not work.")
-def test_sd_webui(config, workspace, image_size):
+def test_sd_webui(agent: Agent, workspace, image_size):
     """Test SD WebUI image generation."""
     generate_and_validate(
-        config,
+        agent,
         workspace,
         image_provider="sd_webui",
         image_size=image_size,
@@ -59,11 +60,11 @@ def test_sd_webui(config, workspace, image_size):
 
 
 @pytest.mark.xfail(reason="SD WebUI call does not work.")
-def test_sd_webui_negative_prompt(config, workspace, image_size):
+def test_sd_webui_negative_prompt(agent: Agent, workspace, image_size):
     gen_image = functools.partial(
         generate_image_with_sd_webui,
         prompt="astronaut riding a horse",
-        config=config,
+        agent=agent,
         size=image_size,
         extra={"seed": 123},
     )
@@ -87,7 +88,7 @@ def lst(txt):
 
 
 def generate_and_validate(
-    config,
+    agent: Agent,
     workspace,
     image_size,
     image_provider,
@@ -95,11 +96,11 @@ def generate_and_validate(
     **kwargs,
 ):
     """Generate an image and validate the output."""
-    config.image_provider = image_provider
-    config.huggingface_image_model = hugging_face_image_model
+    agent.config.image_provider = image_provider
+    agent.config.huggingface_image_model = hugging_face_image_model
     prompt = "astronaut riding a horse"
 
-    image_path = lst(generate_image(prompt, config, image_size, **kwargs))
+    image_path = lst(generate_image(prompt, agent, image_size, **kwargs))
     assert image_path.exists()
     with Image.open(image_path) as img:
         assert img.size == (image_size, image_size)
@@ -120,7 +121,7 @@ def generate_and_validate(
 )
 @pytest.mark.parametrize("delay", [10, 0])
 def test_huggingface_fail_request_with_delay(
-    config, workspace, image_size, image_model, return_text, delay
+    agent: Agent, workspace, image_size, image_model, return_text, delay
 ):
     return_text = return_text.replace("[model]", image_model).replace(
         "[delay]", str(delay)
@@ -138,13 +139,13 @@ def test_huggingface_fail_request_with_delay(
             mock_post.return_value.ok = False
             mock_post.return_value.text = return_text
 
-        config.image_provider = "huggingface"
-        config.huggingface_image_model = image_model
+        agent.config.image_provider = "huggingface"
+        agent.config.huggingface_image_model = image_model
         prompt = "astronaut riding a horse"
 
         with patch("time.sleep") as mock_sleep:
             # Verify request fails.
-            result = generate_image(prompt, config, image_size)
+            result = generate_image(prompt, agent, image_size)
             assert result == "Error creating image."
 
             # Verify retry was called with delay if delay is in return_text
@@ -154,8 +155,8 @@ def test_huggingface_fail_request_with_delay(
                 mock_sleep.assert_not_called()
 
 
-def test_huggingface_fail_request_with_delay(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_with_delay(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
@@ -166,10 +167,10 @@ def test_huggingface_fail_request_with_delay(mocker, config):
     # Mock time.sleep
     mock_sleep = mocker.patch("time.sleep")
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
@@ -177,8 +178,8 @@ def test_huggingface_fail_request_with_delay(mocker, config):
     mock_sleep.assert_called_with(0)
 
 
-def test_huggingface_fail_request_no_delay(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_no_delay(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
@@ -191,10 +192,10 @@ def test_huggingface_fail_request_no_delay(mocker, config):
     # Mock time.sleep
     mock_sleep = mocker.patch("time.sleep")
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
@@ -202,8 +203,8 @@ def test_huggingface_fail_request_no_delay(mocker, config):
     mock_sleep.assert_not_called()
 
 
-def test_huggingface_fail_request_bad_json(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_bad_json(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
@@ -214,10 +215,10 @@ def test_huggingface_fail_request_bad_json(mocker, config):
     # Mock time.sleep
     mock_sleep = mocker.patch("time.sleep")
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
@@ -225,28 +226,28 @@ def test_huggingface_fail_request_bad_json(mocker, config):
     mock_sleep.assert_not_called()
 
 
-def test_huggingface_fail_request_bad_image(mocker, config):
-    config.huggingface_api_token = "1"
+def test_huggingface_fail_request_bad_image(mocker, agent: Agent):
+    agent.config.huggingface_api_token = "1"
 
     # Mock requests.post
     mock_post = mocker.patch("requests.post")
     mock_post.return_value.status_code = 200
 
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
-    result = generate_image("astronaut riding a horse", config, 512)
+    result = generate_image("astronaut riding a horse", agent, 512)
 
     assert result == "Error creating image."
 
 
-def test_huggingface_fail_missing_api_token(mocker, config):
-    config.image_provider = "huggingface"
-    config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
+def test_huggingface_fail_missing_api_token(mocker, agent: Agent):
+    agent.config.image_provider = "huggingface"
+    agent.config.huggingface_image_model = "CompVis/stable-diffusion-v1-4"
 
     # Mock requests.post to raise ValueError
     mock_post = mocker.patch("requests.post", side_effect=ValueError)
 
     # Verify request raises an error.
     with pytest.raises(ValueError):
-        generate_image("astronaut riding a horse", config, 512)
+        generate_image("astronaut riding a horse", agent, 512)
diff --git a/tests/integration/test_web_selenium.py b/tests/integration/test_web_selenium.py
index 7a92b123..6f594a78 100644
--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -1,18 +1,14 @@
-import pytest
 from pytest_mock import MockerFixture
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.web_selenium import browse_website
-from autogpt.config import Config
-from tests.utils import requires_api_key
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-def test_browse_website(config: Config, patched_api_requestor: MockerFixture):
+def test_browse_website(agent: Agent, patched_api_requestor: MockerFixture):
     url = "https://barrel-roll.com"
     question = "How to execute a barrel roll"
 
-    response = browse_website(url, question, config)
+    response = browse_website(url, question, agent)
     assert "Error" in response
     # Sanity check that the response is not too long
     assert len(response) < 200
diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
index 2d1e8f90..5975e086 100644
--- a/tests/unit/test_browse_scrape_links.py
+++ b/tests/unit/test_browse_scrape_links.py
@@ -3,6 +3,7 @@
 # Dependencies:
 # pip install pytest-mock
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.web_requests import scrape_links
 
 """
@@ -42,14 +43,14 @@ class TestScrapeLinks:
     provided with a valid url that returns a webpage with hyperlinks.
     """
 
-    def test_valid_url_with_hyperlinks(self, config):
+    def test_valid_url_with_hyperlinks(self, agent: Agent):
         url = "https://www.google.com"
-        result = scrape_links(url, config=config)
+        result = scrape_links(url, agent=agent)
         assert len(result) > 0
         assert isinstance(result, list)
         assert isinstance(result[0], str)
 
-    def test_valid_url(self, mocker, config):
+    def test_valid_url(self, mocker, agent: Agent):
         """Test that the function returns correctly formatted hyperlinks when given a valid url."""
         # Mock the requests.get() function to return a response with sample HTML containing hyperlinks
         mock_response = mocker.Mock()
@@ -60,12 +61,12 @@ class TestScrapeLinks:
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with a valid URL
-        result = scrape_links("https://www.example.com", config)
+        result = scrape_links("https://www.example.com", agent)
 
         # Assert that the function returns correctly formatted hyperlinks
         assert result == ["Google (https://www.google.com)"]
 
-    def test_invalid_url(self, mocker, config):
+    def test_invalid_url(self, mocker, agent: Agent):
         """Test that the function returns "error" when given an invalid url."""
         # Mock the requests.get() function to return an HTTP error response
         mock_response = mocker.Mock()
@@ -73,12 +74,12 @@ class TestScrapeLinks:
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with an invalid URL
-        result = scrape_links("https://www.invalidurl.com", config)
+        result = scrape_links("https://www.invalidurl.com", agent)
 
         # Assert that the function returns "error"
         assert "Error:" in result
 
-    def test_no_hyperlinks(self, mocker, config):
+    def test_no_hyperlinks(self, mocker, agent: Agent):
         """Test that the function returns an empty list when the html contains no hyperlinks."""
         # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
         mock_response = mocker.Mock()
@@ -87,12 +88,12 @@ class TestScrapeLinks:
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with a URL containing no hyperlinks
-        result = scrape_links("https://www.example.com", config)
+        result = scrape_links("https://www.example.com", agent)
 
         # Assert that the function returns an empty list
         assert result == []
 
-    def test_scrape_links_with_few_hyperlinks(self, mocker, config):
+    def test_scrape_links_with_few_hyperlinks(self, mocker, agent: Agent):
         """Test that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks."""
         mock_response = mocker.Mock()
         mock_response.status_code = 200
@@ -108,7 +109,7 @@ class TestScrapeLinks:
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function being tested
-        result = scrape_links("https://www.example.com", config)
+        result = scrape_links("https://www.example.com", agent)
 
         # Assert that the function returns a list of formatted hyperlinks
         assert isinstance(result, list)
diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py
index 3d916bc7..23a80c54 100644
--- a/tests/unit/test_browse_scrape_text.py
+++ b/tests/unit/test_browse_scrape_text.py
@@ -3,6 +3,7 @@
 import pytest
 import requests
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.web_requests import scrape_text
 
 """
@@ -42,7 +43,7 @@ Additional aspects:
 
 
 class TestScrapeText:
-    def test_scrape_text_with_valid_url(self, mocker, config):
+    def test_scrape_text_with_valid_url(self, mocker, agent: Agent):
         """Tests that scrape_text() returns the expected text when given a valid URL."""
         # Mock the requests.get() method to return a response with expected text
         expected_text = "This is some sample text"
@@ -57,14 +58,14 @@ class TestScrapeText:
         # Call the function with a valid URL and assert that it returns the
         #  expected text
         url = "http://www.example.com"
-        assert scrape_text(url, config) == expected_text
+        assert scrape_text(url, agent) == expected_text
 
-    def test_invalid_url(self, config):
+    def test_invalid_url(self, agent: Agent):
         """Tests that an error is raised when an invalid url is provided."""
         url = "invalidurl.com"
-        pytest.raises(ValueError, scrape_text, url, config)
+        pytest.raises(ValueError, scrape_text, url, agent)
 
-    def test_unreachable_url(self, mocker, config):
+    def test_unreachable_url(self, mocker, agent: Agent):
         """Test that scrape_text returns an error message when an invalid or unreachable url is provided."""
         # Mock the requests.get() method to raise an exception
         mocker.patch(
@@ -74,10 +75,10 @@ class TestScrapeText:
         # Call the function with an invalid URL and assert that it returns an error
         #  message
         url = "http://thiswebsitedoesnotexist.net/"
-        error_message = scrape_text(url, config)
+        error_message = scrape_text(url, agent)
         assert "Error:" in error_message
 
-    def test_no_text(self, mocker, config):
+    def test_no_text(self, mocker, agent: Agent):
         """Test that scrape_text returns an empty string when the html page contains no text to be scraped."""
         # Mock the requests.get() method to return a response with no text
         mock_response = mocker.Mock()
@@ -87,20 +88,20 @@ class TestScrapeText:
 
         # Call the function with a valid URL and assert that it returns an empty string
         url = "http://www.example.com"
-        assert scrape_text(url, config) == ""
+        assert scrape_text(url, agent) == ""
 
-    def test_http_error(self, mocker, config):
+    def test_http_error(self, mocker, agent: Agent):
         """Test that scrape_text returns an error message when the response status code is an http error (>=400)."""
         # Mock the requests.get() method to return a response with a 404 status code
         mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404))
 
         # Call the function with a URL
-        result = scrape_text("https://www.example.com", config)
+        result = scrape_text("https://www.example.com", agent)
 
         # Check that the function returns an error message
         assert result == "Error: HTTP 404 error"
 
-    def test_scrape_text_with_html_tags(self, mocker, config):
+    def test_scrape_text_with_html_tags(self, mocker, agent: Agent):
         """Test that scrape_text() properly handles HTML tags."""
         # Create a mock response object with HTML containing tags
         html = "<html><body><p>This is <b>bold</b> text.</p></body></html>"
@@ -110,7 +111,7 @@ class TestScrapeText:
         mocker.patch("requests.Session.get", return_value=mock_response)
 
         # Call the function with a URL
-        result = scrape_text("https://www.example.com", config)
+        result = scrape_text("https://www.example.com", agent)
 
         # Check that the function properly handles HTML tags
         assert result == "This is bold text."
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index 3da57375..d3e6991d 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -12,7 +12,7 @@ import pytest
 from pytest_mock import MockerFixture
 
 import autogpt.commands.file_operations as file_ops
-from autogpt.config import Config
+from autogpt.agent.agent import Agent
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
 from autogpt.utils import readable_file_size
@@ -42,7 +42,7 @@ def mock_MemoryItem_from_text(mocker: MockerFixture, mock_embedding: Embedding):
 
 
 @pytest.fixture()
-def test_file_path(config, workspace: Workspace):
+def test_file_path(workspace: Workspace):
     return workspace.get_path("test_file.txt")
 
 
@@ -55,22 +55,22 @@ def test_file(test_file_path: Path):
 
 
 @pytest.fixture()
-def test_file_with_content_path(test_file: TextIOWrapper, file_content, config):
+def test_file_with_content_path(test_file: TextIOWrapper, file_content, agent: Agent):
     test_file.write(file_content)
     test_file.close()
     file_ops.log_operation(
-        "write", test_file.name, config, file_ops.text_checksum(file_content)
+        "write", test_file.name, agent, file_ops.text_checksum(file_content)
     )
     return Path(test_file.name)
 
 
 @pytest.fixture()
-def test_directory(config, workspace: Workspace):
+def test_directory(workspace: Workspace):
     return workspace.get_path("test_directory")
 
 
 @pytest.fixture()
-def test_nested_file(config, workspace: Workspace):
+def test_nested_file(workspace: Workspace):
     return workspace.get_path("nested/test_file.txt")
 
 
@@ -117,7 +117,7 @@ def test_file_operations_state(test_file: TextIOWrapper):
     assert file_ops.file_operations_state(test_file.name) == expected_state
 
 
-def test_is_duplicate_operation(config: Config, mocker: MockerFixture):
+def test_is_duplicate_operation(agent: Agent, mocker: MockerFixture):
     # Prepare a fake state dictionary for the function to use
     state = {
         "path/to/file1.txt": "checksum1",
@@ -128,42 +128,48 @@ def test_is_duplicate_operation(config: Config, mocker: MockerFixture):
     # Test cases with write operations
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", config, "checksum1"
+            "write", "path/to/file1.txt", agent.config, "checksum1"
         )
         is True
     )
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file1.txt", config, "checksum2"
+            "write", "path/to/file1.txt", agent.config, "checksum2"
         )
         is False
     )
     assert (
         file_ops.is_duplicate_operation(
-            "write", "path/to/file3.txt", config, "checksum3"
+            "write", "path/to/file3.txt", agent.config, "checksum3"
         )
         is False
     )
     # Test cases with append operations
     assert (
         file_ops.is_duplicate_operation(
-            "append", "path/to/file1.txt", config, "checksum1"
+            "append", "path/to/file1.txt", agent.config, "checksum1"
         )
         is False
     )
     # Test cases with delete operations
     assert (
-        file_ops.is_duplicate_operation("delete", "path/to/file1.txt", config) is False
+        file_ops.is_duplicate_operation(
+            "delete", "path/to/file1.txt", config=agent.config
+        )
+        is False
     )
     assert (
-        file_ops.is_duplicate_operation("delete", "path/to/file3.txt", config) is True
+        file_ops.is_duplicate_operation(
+            "delete", "path/to/file3.txt", config=agent.config
+        )
+        is True
     )
 
 
 # Test logging a file operation
-def test_log_operation(config: Config):
-    file_ops.log_operation("log_test", "path/to/test", config)
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+def test_log_operation(agent: Agent):
+    file_ops.log_operation("log_test", "path/to/test", agent=agent)
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         content = f.read()
     assert f"log_test: path/to/test\n" in content
 
@@ -175,9 +181,9 @@ def test_text_checksum(file_content: str):
     assert checksum != different_checksum
 
 
-def test_log_operation_with_checksum(config: Config):
-    file_ops.log_operation("log_test", "path/to/test", config, checksum="ABCDEF")
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+def test_log_operation_with_checksum(agent: Agent):
+    file_ops.log_operation("log_test", "path/to/test", agent=agent, checksum="ABCDEF")
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         content = f.read()
     assert f"log_test: path/to/test #ABCDEF\n" in content
 
@@ -223,66 +229,66 @@ def test_read_file(
     mock_MemoryItem_from_text,
     test_file_with_content_path: Path,
     file_content,
-    config: Config,
+    agent: Agent,
 ):
-    content = file_ops.read_file(test_file_with_content_path, config)
+    content = file_ops.read_file(test_file_with_content_path, agent=agent)
     assert content.replace("\r", "") == file_content
 
 
-def test_read_file_not_found(config: Config):
+def test_read_file_not_found(agent: Agent):
     filename = "does_not_exist.txt"
-    content = file_ops.read_file(filename, config)
+    content = file_ops.read_file(filename, agent=agent)
     assert "Error:" in content and filename in content and "no such file" in content
 
 
-def test_write_to_file(test_file_path: Path, config):
+def test_write_to_file(test_file_path: Path, agent: Agent):
     new_content = "This is new content.\n"
-    file_ops.write_to_file(str(test_file_path), new_content, config)
+    file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
     with open(test_file_path, "r", encoding="utf-8") as f:
         content = f.read()
     assert content == new_content
 
 
-def test_write_file_logs_checksum(test_file_path: Path, config):
+def test_write_file_logs_checksum(test_file_path: Path, agent: Agent):
     new_content = "This is new content.\n"
     new_checksum = file_ops.text_checksum(new_content)
-    file_ops.write_to_file(str(test_file_path), new_content, config)
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+    file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         log_entry = f.read()
     assert log_entry == f"write: {test_file_path} #{new_checksum}\n"
 
 
-def test_write_file_fails_if_content_exists(test_file_path: Path, config):
+def test_write_file_fails_if_content_exists(test_file_path: Path, agent: Agent):
     new_content = "This is new content.\n"
     file_ops.log_operation(
         "write",
         str(test_file_path),
-        config,
+        agent=agent,
         checksum=file_ops.text_checksum(new_content),
     )
-    result = file_ops.write_to_file(str(test_file_path), new_content, config)
+    result = file_ops.write_to_file(str(test_file_path), new_content, agent=agent)
     assert result == "Error: File has already been updated."
 
 
 def test_write_file_succeeds_if_content_different(
-    test_file_with_content_path: Path, config
+    test_file_with_content_path: Path, agent: Agent
 ):
     new_content = "This is different content.\n"
     result = file_ops.write_to_file(
-        str(test_file_with_content_path), new_content, config
+        str(test_file_with_content_path), new_content, agent=agent
     )
     assert result == "File written to successfully."
 
 
 # Update file testing
-def test_replace_in_file_all_occurrences(test_file, test_file_path, config):
+def test_replace_in_file_all_occurrences(test_file, test_file_path, agent: Agent):
     old_content = "This is a test file.\n we test file here\na test is needed"
     expected_content = (
         "This is a update file.\n we update file here\na update is needed"
     )
     test_file.write(old_content)
     test_file.close()
-    file_ops.replace_in_file(test_file_path, "test", "update", config)
+    file_ops.replace_in_file(test_file_path, "test", "update", agent=agent)
     with open(test_file_path) as f:
         new_content = f.read()
     print(new_content)
@@ -290,13 +296,13 @@ def test_replace_in_file_all_occurrences(test_file, test_file_path, config):
     assert new_content == expected_content
 
 
-def test_replace_in_file_one_occurrence(test_file, test_file_path, config):
+def test_replace_in_file_one_occurrence(test_file, test_file_path, agent: Agent):
     old_content = "This is a test file.\n we test file here\na test is needed"
     expected_content = "This is a test file.\n we update file here\na test is needed"
     test_file.write(old_content)
     test_file.close()
     file_ops.replace_in_file(
-        test_file_path, "test", "update", config, occurrence_index=1
+        test_file_path, "test", "update", agent=agent, occurrence_index=1
     )
     with open(test_file_path) as f:
         new_content = f.read()
@@ -304,7 +310,7 @@ def test_replace_in_file_one_occurrence(test_file, test_file_path, config):
     assert new_content == expected_content
 
 
-def test_replace_in_file_multiline_old_text(test_file, test_file_path, config):
+def test_replace_in_file_multiline_old_text(test_file, test_file_path, agent: Agent):
     old_content = "This is a multi_line\ntest for testing\nhow well this function\nworks when the input\nis multi-lined"
     expected_content = "This is a multi_line\nfile. succeeded test\nis multi-lined"
     test_file.write(old_content)
@@ -313,7 +319,7 @@ def test_replace_in_file_multiline_old_text(test_file, test_file_path, config):
         test_file_path,
         "\ntest for testing\nhow well this function\nworks when the input\n",
         "\nfile. succeeded test\n",
-        config,
+        agent=agent,
     )
     with open(test_file_path) as f:
         new_content = f.read()
@@ -321,11 +327,11 @@ def test_replace_in_file_multiline_old_text(test_file, test_file_path, config):
     assert new_content == expected_content
 
 
-def test_append_to_file(test_nested_file: Path, config):
+def test_append_to_file(test_nested_file: Path, agent: Agent):
     append_text = "This is appended text.\n"
-    file_ops.write_to_file(test_nested_file, append_text, config)
+    file_ops.write_to_file(test_nested_file, append_text, agent=agent)
 
-    file_ops.append_to_file(test_nested_file, append_text, config)
+    file_ops.append_to_file(test_nested_file, append_text, agent=agent)
 
     with open(test_nested_file, "r") as f:
         content_after = f.read()
@@ -333,11 +339,13 @@ def test_append_to_file(test_nested_file: Path, config):
     assert content_after == append_text + append_text
 
 
-def test_append_to_file_uses_checksum_from_appended_file(test_file_path: Path, config):
+def test_append_to_file_uses_checksum_from_appended_file(
+    test_file_path: Path, agent: Agent
+):
     append_text = "This is appended text.\n"
-    file_ops.append_to_file(test_file_path, append_text, config)
-    file_ops.append_to_file(test_file_path, append_text, config)
-    with open(config.file_logger_path, "r", encoding="utf-8") as f:
+    file_ops.append_to_file(test_file_path, append_text, agent=agent)
+    file_ops.append_to_file(test_file_path, append_text, agent=agent)
+    with open(agent.config.file_logger_path, "r", encoding="utf-8") as f:
         log_contents = f.read()
 
     digest = hashlib.md5()
@@ -351,25 +359,25 @@ def test_append_to_file_uses_checksum_from_appended_file(test_file_path: Path, c
     )
 
 
-def test_delete_file(test_file_with_content_path: Path, config):
-    result = file_ops.delete_file(str(test_file_with_content_path), config)
+def test_delete_file(test_file_with_content_path: Path, agent: Agent):
+    result = file_ops.delete_file(str(test_file_with_content_path), agent=agent)
     assert result == "File deleted successfully."
     assert os.path.exists(test_file_with_content_path) is False
 
 
-def test_delete_missing_file(config):
+def test_delete_missing_file(agent: Agent):
     filename = "path/to/file/which/does/not/exist"
     # confuse the log
-    file_ops.log_operation("write", filename, config, checksum="fake")
+    file_ops.log_operation("write", filename, agent=agent, checksum="fake")
     try:
         os.remove(filename)
     except FileNotFoundError as err:
-        assert str(err) in file_ops.delete_file(filename, config)
+        assert str(err) in file_ops.delete_file(filename, agent=agent)
         return
     assert False, f"Failed to test delete_file; {filename} not expected to exist"
 
 
-def test_list_files(workspace: Workspace, test_directory: Path, config):
+def test_list_files(workspace: Workspace, test_directory: Path, agent: Agent):
     # Case 1: Create files A and B, search for A, and ensure we don't return A and B
     file_a = workspace.get_path("file_a.txt")
     file_b = workspace.get_path("file_b.txt")
@@ -387,7 +395,7 @@ def test_list_files(workspace: Workspace, test_directory: Path, config):
     with open(os.path.join(test_directory, file_a.name), "w") as f:
         f.write("This is file A in the subdirectory.")
 
-    files = file_ops.list_files(str(workspace.root), config)
+    files = file_ops.list_files(str(workspace.root), agent=agent)
     assert file_a.name in files
     assert file_b.name in files
     assert os.path.join(Path(test_directory).name, file_a.name) in files
@@ -400,17 +408,17 @@ def test_list_files(workspace: Workspace, test_directory: Path, config):
 
     # Case 2: Search for a file that does not exist and make sure we don't throw
     non_existent_file = "non_existent_file.txt"
-    files = file_ops.list_files("", config)
+    files = file_ops.list_files("", agent=agent)
     assert non_existent_file not in files
 
 
-def test_download_file(workspace: Workspace, config):
+def test_download_file(workspace: Workspace, agent: Agent):
     url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.2.2.tar.gz"
     local_name = workspace.get_path("auto-gpt.tar.gz")
     size = 365023
     readable_size = readable_file_size(size)
     assert (
-        file_ops.download_file(url, local_name, config)
+        file_ops.download_file(url, local_name, agent=agent)
         == f'Successfully downloaded and locally stored file: "{local_name}"! (Size: {readable_size})'
     )
     assert os.path.isfile(local_name) is True
@@ -418,10 +426,10 @@ def test_download_file(workspace: Workspace, config):
 
     url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.0.0.tar.gz"
     assert "Got an HTTP Error whilst trying to download file" in file_ops.download_file(
-        url, local_name, config
+        url, local_name, agent=agent
     )
 
     url = "https://thiswebsiteiswrong.hmm/v0.0.0.tar.gz"
     assert "Failed to establish a new connection:" in file_ops.download_file(
-        url, local_name, config
+        url, local_name, agent=agent
     )
diff --git a/tests/unit/test_git_commands.py b/tests/unit/test_git_commands.py
index 375a9cf4..a6defdfc 100644
--- a/tests/unit/test_git_commands.py
+++ b/tests/unit/test_git_commands.py
@@ -2,6 +2,7 @@ import pytest
 from git.exc import GitCommandError
 from git.repo.base import Repo
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.git_operations import clone_repository
 
 
@@ -10,7 +11,7 @@ def mock_clone_from(mocker):
     return mocker.patch.object(Repo, "clone_from")
 
 
-def test_clone_auto_gpt_repository(workspace, mock_clone_from, config):
+def test_clone_auto_gpt_repository(workspace, mock_clone_from, agent: Agent):
     mock_clone_from.return_value = None
 
     repo = "github.com/Significant-Gravitas/Auto-GPT.git"
@@ -20,16 +21,16 @@ def test_clone_auto_gpt_repository(workspace, mock_clone_from, config):
 
     expected_output = f"Cloned {url} to {clone_path}"
 
-    clone_result = clone_repository(url=url, clone_path=clone_path, config=config)
+    clone_result = clone_repository(url=url, clone_path=clone_path, agent=agent)
 
     assert clone_result == expected_output
     mock_clone_from.assert_called_once_with(
-        url=f"{scheme}{config.github_username}:{config.github_api_key}@{repo}",
+        url=f"{scheme}{agent.config.github_username}:{agent.config.github_api_key}@{repo}",
         to_path=clone_path,
     )
 
 
-def test_clone_repository_error(workspace, mock_clone_from, config):
+def test_clone_repository_error(workspace, mock_clone_from, agent: Agent):
     url = "https://github.com/this-repository/does-not-exist.git"
     clone_path = str(workspace.get_path("does-not-exist"))
 
@@ -37,6 +38,6 @@ def test_clone_repository_error(workspace, mock_clone_from, config):
         "clone", "fatal: repository not found", ""
     )
 
-    result = clone_repository(url=url, clone_path=clone_path, config=config)
+    result = clone_repository(url=url, clone_path=clone_path, agent=agent)
 
     assert "Error: " in result
diff --git a/tests/unit/test_google_search.py b/tests/unit/test_google_search.py
index e379f78e..3f039fdb 100644
--- a/tests/unit/test_google_search.py
+++ b/tests/unit/test_google_search.py
@@ -3,6 +3,7 @@ import json
 import pytest
 from googleapiclient.errors import HttpError
 
+from autogpt.agent.agent import Agent
 from autogpt.commands.google_search import (
     google_official_search,
     google_search,
@@ -39,13 +40,13 @@ def test_safe_google_results_invalid_input():
     ],
 )
 def test_google_search(
-    query, num_results, expected_output, return_value, mocker, config
+    query, num_results, expected_output, return_value, mocker, agent: Agent
 ):
     mock_ddg = mocker.Mock()
     mock_ddg.return_value = return_value
 
     mocker.patch("autogpt.commands.google_search.DDGS.text", mock_ddg)
-    actual_output = google_search(query, config, num_results=num_results)
+    actual_output = google_search(query, agent=agent, num_results=num_results)
     expected_output = safe_google_results(expected_output)
     assert actual_output == expected_output
 
@@ -79,10 +80,15 @@ def mock_googleapiclient(mocker):
     ],
 )
 def test_google_official_search(
-    query, num_results, expected_output, search_results, mock_googleapiclient, config
+    query,
+    num_results,
+    expected_output,
+    search_results,
+    mock_googleapiclient,
+    agent: Agent,
 ):
     mock_googleapiclient.return_value = search_results
-    actual_output = google_official_search(query, config, num_results=num_results)
+    actual_output = google_official_search(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)
 
 
@@ -113,7 +119,7 @@ def test_google_official_search_errors(
     mock_googleapiclient,
     http_code,
     error_msg,
-    config,
+    agent: Agent,
 ):
     class resp:
         def __init__(self, _status, _reason):
@@ -130,5 +136,5 @@ def test_google_official_search_errors(
     )
 
     mock_googleapiclient.side_effect = error
-    actual_output = google_official_search(query, config, num_results=num_results)
+    actual_output = google_official_search(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
index cff20ee3..23eea027 100644
--- a/tests/unit/test_make_agent.py
+++ b/tests/unit/test_make_agent.py
@@ -2,11 +2,11 @@ from unittest.mock import MagicMock
 
 from pytest_mock import MockerFixture
 
+from autogpt.agent.agent import Agent
 from autogpt.app import list_agents, start_agent
-from autogpt.config import Config
 
 
-def test_make_agent(config: Config, mocker: MockerFixture) -> None:
+def test_make_agent(agent: Agent, mocker: MockerFixture) -> None:
     """Test that an agent can be created"""
     mock = mocker.patch("openai.ChatCompletion.create")
 
@@ -16,9 +16,9 @@ def test_make_agent(config: Config, mocker: MockerFixture) -> None:
     response.usage.prompt_tokens = 1
     response.usage.completion_tokens = 1
     mock.return_value = response
-    start_agent("Test Agent", "chat", "Hello, how are you?", config, "gpt-3.5-turbo")
-    agents = list_agents(config)
+    start_agent("Test Agent", "chat", "Hello, how are you?", agent, "gpt-3.5-turbo")
+    agents = list_agents(agent)
     assert "List of agents:\n0: chat" == agents
-    start_agent("Test Agent 2", "write", "Hello, how are you?", config, "gpt-3.5-turbo")
-    agents = list_agents(config)
+    start_agent("Test Agent 2", "write", "Hello, how are you?", agent, "gpt-3.5-turbo")
+    agents = list_agents(agent.config)
     assert "List of agents:\n0: chat\n1: write" == agents

From 4e621280bbb47c949f531c7797c58a91ee0218db Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Sat, 10 Jun 2023 22:51:23 +0000
Subject: [PATCH 44/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index c154c67a..427de672 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit c154c67a58fceb534b6969bfbf92928c4ae54ea3
+Subproject commit 427de6721cb5209a7a34359a81b71d60e80a110a

From 6fb9b6d03b268e799672f9b889d5c61efefcb29e Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sun, 11 Jun 2023 07:21:26 -0700
Subject: [PATCH 45/97] Retry regression tests (#4648)

---
 requirements.txt                              |  1 +
 .../basic_abilities/test_browse_website.py    |  5 +-
 .../basic_abilities/test_write_file.py        |  5 +-
 .../challenge_decorator.py                    | 95 +++++++++++--------
 .../debug_code/test_debug_code_challenge_a.py |  5 +-
 .../test_information_retrieval_challenge_a.py |  5 +-
 .../test_information_retrieval_challenge_b.py |  5 +-
 .../test_kubernetes_template_challenge_a.py   |  5 +-
 .../memory/test_memory_challenge_a.py         |  6 +-
 .../memory/test_memory_challenge_b.py         |  5 +-
 .../memory/test_memory_challenge_c.py         |  6 +-
 .../memory/test_memory_challenge_d.py         |  5 +-
 12 files changed, 64 insertions(+), 84 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 31f7706a..16ed8558 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -61,3 +61,4 @@ pytest-mock
 vcrpy @ git+https://github.com/Significant-Gravitas/vcrpy.git@master
 pytest-recording
 pytest-xdist
+flaky
diff --git a/tests/challenges/basic_abilities/test_browse_website.py b/tests/challenges/basic_abilities/test_browse_website.py
index b918434c..6ee80d33 100644
--- a/tests/challenges/basic_abilities/test_browse_website.py
+++ b/tests/challenges/basic_abilities/test_browse_website.py
@@ -3,14 +3,11 @@ import pytest
 from autogpt.agent import Agent
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import run_interaction_loop
-from tests.utils import requires_api_key
 
 CYCLE_COUNT = 2
 
 
-@requires_api_key("OPENAI_API_KEY")
-@pytest.mark.vcr
-@challenge
+@challenge()
 def test_browse_website(
     browser_agent: Agent,
     patched_api_requestor: None,
diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
index e84529bc..9aa837e9 100644
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -6,7 +6,6 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
-from tests.utils import requires_api_key
 
 CYCLE_COUNT_PER_LEVEL = [1, 1]
 EXPECTED_OUTPUTS_PER_LEVEL = [
@@ -15,9 +14,7 @@ EXPECTED_OUTPUTS_PER_LEVEL = [
 ]
 
 
-@requires_api_key("OPENAI_API_KEY")
-@pytest.mark.vcr
-@challenge
+@challenge()
 def test_write_file(
     file_system_agents: List[Agent],
     patched_api_requestor: None,
diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
index d887e21d..7f721de1 100644
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -3,6 +3,7 @@ from functools import wraps
 from typing import Any, Callable, Optional
 
 import pytest
+from flaky import flaky  # type: ignore
 
 from tests.challenges.challenge_decorator.challenge import Challenge
 from tests.challenges.challenge_decorator.challenge_utils import create_challenge
@@ -10,6 +11,7 @@ from tests.challenges.challenge_decorator.score_utils import (
     get_scores,
     update_new_score,
 )
+from tests.utils import requires_api_key
 
 MAX_LEVEL_TO_IMPROVE_ON = (
     1  # we will attempt to beat 1 level above the current level for now.
@@ -18,52 +20,61 @@ MAX_LEVEL_TO_IMPROVE_ON = (
 CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt and reach out to us on the 'challenges' channel to let us know the challenge you're struggling with."
 
 
-def challenge(func: Callable[..., Any]) -> Callable[..., None]:
-    @wraps(func)
-    def wrapper(*args: Any, **kwargs: Any) -> None:
-        run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
-        original_error: Optional[Exception] = None
+def challenge(
+    max_runs: int = 2, min_passes: int = 1, api_key: str = "OPENAI_API_KEY"
+) -> Callable[[Callable[..., Any]], Callable[..., None]]:
+    def decorator(func: Callable[..., Any]) -> Callable[..., None]:
+        @requires_api_key(api_key)
+        @pytest.mark.vcr
+        @flaky(max_runs=max_runs, min_passes=min_passes)
+        @wraps(func)
+        def wrapper(*args: Any, **kwargs: Any) -> None:
+            run_remaining = MAX_LEVEL_TO_IMPROVE_ON if Challenge.BEAT_CHALLENGES else 1
+            original_error: Optional[Exception] = None
 
-        while run_remaining > 0:
-            current_score, new_score, new_score_location = get_scores()
-            level_to_run = kwargs["level_to_run"] if "level_to_run" in kwargs else None
-            challenge = create_challenge(
-                func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
-            )
-            if challenge.level_to_run is not None:
-                kwargs["level_to_run"] = challenge.level_to_run
-                try:
-                    func(*args, **kwargs)
-                    challenge.succeeded = True
-                except AssertionError as err:
-                    original_error = AssertionError(
-                        f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
+            while run_remaining > 0:
+                current_score, new_score, new_score_location = get_scores()
+                level_to_run = (
+                    kwargs["level_to_run"] if "level_to_run" in kwargs else None
+                )
+                challenge = create_challenge(
+                    func, current_score, Challenge.BEAT_CHALLENGES, level_to_run
+                )
+                if challenge.level_to_run is not None:
+                    kwargs["level_to_run"] = challenge.level_to_run
+                    try:
+                        func(*args, **kwargs)
+                        challenge.succeeded = True
+                    except AssertionError as err:
+                        original_error = AssertionError(
+                            f"{CHALLENGE_FAILED_MESSAGE}\n{err}"
+                        )
+                        challenge.succeeded = False
+                    except Exception as err:
+                        original_error = err
+                        challenge.succeeded = False
+                else:
+                    challenge.skipped = True
+                if os.environ.get("CI") == "true":
+                    new_max_level_beaten = get_new_max_level_beaten(
+                        challenge, Challenge.BEAT_CHALLENGES
                     )
-                    challenge.succeeded = False
-                except Exception as err:
-                    original_error = err
-                    challenge.succeeded = False
-            else:
-                challenge.skipped = True
-            if os.environ.get("CI") == "true":
-                new_max_level_beaten = get_new_max_level_beaten(
-                    challenge, Challenge.BEAT_CHALLENGES
-                )
-                update_new_score(
-                    new_score_location, new_score, challenge, new_max_level_beaten
-                )
-            if challenge.level_to_run is None:
-                pytest.skip("This test has not been unlocked yet.")
+                    update_new_score(
+                        new_score_location, new_score, challenge, new_max_level_beaten
+                    )
+                if challenge.level_to_run is None:
+                    pytest.skip("This test has not been unlocked yet.")
 
-            if not challenge.succeeded:
-                if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
-                    # xfail
-                    pytest.xfail(str(original_error))
-                if original_error:
-                    raise original_error
-            run_remaining -= 1
+                if not challenge.succeeded:
+                    if Challenge.BEAT_CHALLENGES or challenge.is_new_challenge:
+                        pytest.xfail(str(original_error))
+                    if original_error:
+                        raise original_error
+                run_remaining -= 1
 
-    return wrapper
+        return wrapper
+
+    return decorator
 
 
 def get_new_max_level_beaten(
diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
index ca85675c..43f524c7 100644
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -11,7 +11,6 @@ from tests.challenges.utils import (
     get_workspace_path,
     run_interaction_loop,
 )
-from tests.utils import requires_api_key
 
 CYCLE_COUNT = 5
 EXPECTED_VALUES = ["[0, 1]", "[2, 5]", "[0, 3]"]
@@ -20,9 +19,7 @@ CODE_FILE_PATH = "code.py"
 TEST_FILE_PATH = "test.py"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_debug_code_challenge_a(
     debug_code_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index ee2c17c9..8e0fdb63 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -4,7 +4,6 @@ from pytest_mock import MockerFixture
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
-from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
 EXPECTED_REVENUES = [["81"], ["81"], ["81", "53", "24", "21", "11", "7", "4", "3", "2"]]
@@ -13,9 +12,7 @@ from autogpt.agent import Agent
 OUTPUT_LOCATION = "output.txt"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_information_retrieval_challenge_a(
     information_retrieval_agents: Agent,
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index 94ca4e44..0672b879 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -7,15 +7,12 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
-from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "2010_nobel_prize_winners.txt"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_information_retrieval_challenge_b(
     get_nobel_prize_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index 93a2695f..c2eaab75 100644
--- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -6,15 +6,12 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
-from tests.utils import requires_api_key
 
 CYCLE_COUNT = 3
 OUTPUT_LOCATION = "kube.yaml"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_kubernetes_template_challenge_a(
     kubernetes_agent: Agent,
     monkeypatch: pytest.MonkeyPatch,
diff --git a/tests/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py
index b67af6f7..7050696a 100644
--- a/tests/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -5,14 +5,11 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
-from tests.utils import requires_api_key
 
 OUTPUT_LOCATION = "output.txt"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_memory_challenge_a(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
@@ -28,7 +25,6 @@ def test_memory_challenge_a(
         monkeypatch (pytest.MonkeyPatch)
         level_to_run (int)
     """
-
     task_id = "2314"
     create_instructions_files(memory_management_agent, level_to_run, task_id)
 
diff --git a/tests/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py
index 4a4d30e0..4abc4188 100644
--- a/tests/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -9,15 +9,12 @@ from tests.challenges.utils import (
     get_workspace_path,
     run_interaction_loop,
 )
-from tests.utils import requires_api_key
 
 NOISE = 1000
 OUTPUT_LOCATION = "output.txt"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_memory_challenge_b(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
index 2479da25..f8f4ad98 100644
--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -9,16 +9,12 @@ from tests.challenges.utils import (
     get_workspace_path,
     run_interaction_loop,
 )
-from tests.utils import requires_api_key
 
 NOISE = 1000
 OUTPUT_LOCATION = "output.txt"
 
 
-# @pytest.mark.vcr
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_memory_challenge_c(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
index fc7b5a33..ff35e183 100644
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -8,16 +8,13 @@ from autogpt.agent import Agent
 from autogpt.commands.file_operations import read_file, write_to_file
 from tests.challenges.challenge_decorator.challenge_decorator import challenge
 from tests.challenges.utils import get_workspace_path, run_interaction_loop
-from tests.utils import requires_api_key
 
 LEVEL_CURRENTLY_BEATEN = 1
 MAX_LEVEL = 5
 OUTPUT_LOCATION = "output.txt"
 
 
-@pytest.mark.vcr
-@requires_api_key("OPENAI_API_KEY")
-@challenge
+@challenge()
 def test_memory_challenge_d(
     memory_management_agent: Agent,
     patched_api_requestor: MockerFixture,

From 9150f32f8b8602395534795ddd2d930a1684e419 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sun, 11 Jun 2023 07:34:57 -0700
Subject: [PATCH 46/97] Fix benchmark logs (#4653)

---
 .github/workflows/benchmarks.yml                            | 6 +++---
 tests/challenges/challenge_decorator/challenge_decorator.py | 2 +-
 tests/integration/agent_factory.py                          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index e9f46d04..685647b8 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -7,7 +7,7 @@ on:
 
 jobs:
   Benchmark:
-    name: Benchmark - ${{ matrix.config.task-name }}
+    name: ${{ matrix.config.task-name }}
     runs-on: ubuntu-latest
     timeout-minutes: 30
     strategy:
@@ -30,7 +30,7 @@ jobs:
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v2
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: ${{ matrix.config.python-version }}
 
       - id: get_date
         name: Get date
@@ -50,7 +50,7 @@ jobs:
       - name: Run pytest with coverage
         run: |
           rm -rf tests/Auto-GPT-test-cassettes
-          pytest -n auto ${{ matrix.task }}
+          pytest -n auto ${{ matrix.config.task }}
         env:
           CI: true
           PROXY: ${{ secrets.PROXY }}
diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
index 7f721de1..e1fd80c4 100644
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -17,7 +17,7 @@ MAX_LEVEL_TO_IMPROVE_ON = (
     1  # we will attempt to beat 1 level above the current level for now.
 )
 
-CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt and reach out to us on the 'challenges' channel to let us know the challenge you're struggling with."
+CHALLENGE_FAILED_MESSAGE = "Challenges can sometimes fail randomly, please run this test again and if it fails reach out to us on https://discord.gg/autogpt in the 'challenges' channel to let us know the challenge you're struggling with."
 
 
 def challenge(
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 714a3ac5..fff3867e 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -93,7 +93,7 @@ def file_system_agents(
         Config().set_continuous_mode(False)
         agents.append(
             Agent(
-                ai_name="Information Retrieval Agent",
+                ai_name="File System Agent",
                 memory=memory_json_file,
                 command_registry=command_registry,
                 ai_config=ai_config,

From bc5dbb66922afd39820577be25b9c1527389bd7f Mon Sep 17 00:00:00 2001
From: Kinance <kinance@gmail.com>
Date: Mon, 12 Jun 2023 05:04:41 +0900
Subject: [PATCH 47/97] Implement Batch Summarization in MessageHistory Class
 to manage context length under model's token limit (#4652)

* Implement Batch Running Summarization to avoid max token error

* Rename test func
---
 autogpt/memory/message_history.py  |  54 ++++++++---
 tests/unit/test_message_history.py | 145 +++++++++++++++++++++++++++++
 2 files changed, 186 insertions(+), 13 deletions(-)
 create mode 100644 tests/unit/test_message_history.py

diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index 22f96a4a..2ae53055 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -14,7 +14,8 @@ from autogpt.json_utils.utilities import (
     is_string_valid_json,
 )
 from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
-from autogpt.llm.utils import create_chat_completion
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
+from autogpt.llm.utils import count_string_tokens, create_chat_completion
 from autogpt.log_cycle.log_cycle import PROMPT_SUMMARY_FILE_NAME, SUMMARY_FILE_NAME
 from autogpt.logs import logger
 
@@ -167,20 +168,49 @@ class MessageHistory:
             elif event.role == "user":
                 new_events.remove(event)
 
+        # Summarize events and current summary in batch to a new running summary
+
+        # Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
+        # TODO make this default dynamic
+        prompt_template_length = 100
+        max_tokens = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
+        batch = []
+        batch_tlength = 0
+
+        # TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
+        for event in new_events:
+            event_tlength = count_string_tokens(str(event), cfg.fast_llm_model)
+
+            if batch_tlength + event_tlength > max_tokens - prompt_template_length:
+                # The batch is full. Summarize it and start a new one.
+                self.summarize_batch(batch, cfg)
+                batch = [event]
+                batch_tlength = event_tlength
+            else:
+                batch.append(event)
+                batch_tlength += event_tlength
+
+        if batch:
+            # There's an unprocessed batch. Summarize it.
+            self.summarize_batch(batch, cfg)
+
+        return self.summary_message()
+
+    def summarize_batch(self, new_events_batch, cfg):
         prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
 
-You will receive the current summary and the your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
+        You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
 
-Summary So Far:
-"""
-{self.summary}
-"""
+        Summary So Far:
+        """
+        {self.summary}
+        """
 
-Latest Development:
-"""
-{new_events or "Nothing new happened."}
-"""
-'''
+        Latest Development:
+        """
+        {new_events_batch or "Nothing new happened."}
+        """
+        '''
 
         prompt = ChatSequence.for_model(cfg.fast_llm_model, [Message("user", prompt)])
         self.agent.log_cycle_handler.log_cycle(
@@ -200,5 +230,3 @@ Latest Development:
             self.summary,
             SUMMARY_FILE_NAME,
         )
-
-        return self.summary_message()
diff --git a/tests/unit/test_message_history.py b/tests/unit/test_message_history.py
new file mode 100644
index 00000000..6fdf75e6
--- /dev/null
+++ b/tests/unit/test_message_history.py
@@ -0,0 +1,145 @@
+import math
+import time
+from unittest.mock import MagicMock
+
+import pytest
+
+from autogpt.agent import Agent
+from autogpt.config import AIConfig
+from autogpt.config.config import Config
+from autogpt.llm.base import ChatSequence, Message
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
+from autogpt.llm.utils import count_string_tokens
+from autogpt.memory.message_history import MessageHistory
+
+
+@pytest.fixture
+def agent(config: Config):
+    ai_name = "Test AI"
+    memory = MagicMock()
+    next_action_count = 0
+    command_registry = MagicMock()
+    ai_config = AIConfig(ai_name=ai_name)
+    system_prompt = "System prompt"
+    triggering_prompt = "Triggering prompt"
+    workspace_directory = "workspace_directory"
+
+    agent = Agent(
+        ai_name=ai_name,
+        memory=memory,
+        next_action_count=next_action_count,
+        command_registry=command_registry,
+        ai_config=ai_config,
+        config=config,
+        system_prompt=system_prompt,
+        triggering_prompt=triggering_prompt,
+        workspace_directory=workspace_directory,
+    )
+    return agent
+
+
+def test_message_history_batch_summary(mocker, agent):
+    config = Config()
+    history = MessageHistory(agent)
+    model = config.fast_llm_model
+    message_tlength = 0
+    message_count = 0
+
+    # Setting the mock output and inputs
+    mock_summary_text = "I executed browse_website command for each of the websites returned from Google search, but none of them have any job openings."
+    mock_summary = mocker.patch(
+        "autogpt.memory.message_history.create_chat_completion",
+        return_value=mock_summary_text,
+    )
+
+    system_prompt = 'You are AIJobSearcher, an AI designed to search for job openings for software engineer role\nYour decisions must always be made independently without seeking user assistance. Play to your strengths as an LLM and pursue simple strategies with no legal complications.\n\nGOALS:\n\n1. Find any job openings for software engineers online\n2. Go through each of the websites and job openings to summarize their requirements and URL, and skip that if you already visit the website\n\nIt takes money to let you run. Your API budget is $5.000\n\nConstraints:\n1. ~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.\n2. If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.\n3. No user assistance\n4. Exclusively use the commands listed in double quotes e.g. "command name"\n\nCommands:\n1. google_search: Google Search, args: "query": "<query>"\n2. browse_website: Browse Website, args: "url": "<url>", "question": "<what_you_want_to_find_on_website>"\n3. task_complete: Task Complete (Shutdown), args: "reason": "<reason>"\n\nResources:\n1. Internet access for searches and information gathering.\n2. Long Term memory management.\n3. GPT-3.5 powered Agents for delegation of simple tasks.\n4. File output.\n\nPerformance Evaluation:\n1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities.\n2. Constructively self-criticize your big-picture behavior constantly.\n3. Reflect on past decisions and strategies to refine your approach.\n4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.\n5. Write all code to a file.\n\nYou should only respond in JSON format as described below \nResponse Format: \n{\n    "thoughts": {\n        "text": "thought",\n        "reasoning": "reasoning",\n        "plan": "- short bulleted\\n- list that conveys\\n- long-term plan",\n        "criticism": "constructive self-criticism",\n        "speak": "thoughts summary to say to user"\n    },\n    "command": {\n        "name": "command name",\n        "args": {\n            "arg name": "value"\n        }\n    }\n} \nEnsure the response can be parsed by Python json.loads'
+    message_sequence = ChatSequence.for_model(
+        model,
+        [
+            Message("system", system_prompt),
+            Message("system", f"The current time and date is {time.strftime('%c')}"),
+        ],
+    )
+    insertion_index = len(message_sequence)
+
+    user_input = "Determine which next command to use, and respond using the format specified above:'"
+    user_input_msg = Message("user", user_input)
+    history.append(user_input_msg)
+
+    # mock a reponse from AI
+    assistant_reply = '{\n    "thoughts": {\n        "text": "I will use the \'google_search\' command to find more websites with job openings for software engineering manager role.",\n        "reasoning": "Since the previous website did not provide any relevant information, I will use the \'google_search\' command to find more websites with job openings for software engineer role.",\n        "plan": "- Use \'google_search\' command to find more websites with job openings for software engineer role",\n        "criticism": "I need to ensure that I am able to extract the relevant information from each website and job opening.",\n        "speak": "I will now use the \'google_search\' command to find more websites with job openings for software engineer role."\n    },\n    "command": {\n        "name": "google_search",\n        "args": {\n            "query": "software engineer job openings"\n        }\n    }\n}'
+    msg = Message("assistant", assistant_reply, "ai_response")
+    history.append(msg)
+    message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+    message_count += 1
+
+    # mock some websites returned from google search command in the past
+    result = "Command google_search returned: ["
+    for i in range(50):
+        result += "http://www.job" + str(i) + ".com,"
+    result += "]"
+    msg = Message("system", result, "action_result")
+    history.append(msg)
+    message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+    message_count += 1
+
+    user_input = "Determine which next command to use, and respond using the format specified above:'"
+    user_input_msg = Message("user", user_input)
+    history.append(user_input_msg)
+
+    # mock numbers of AI response and action results from browse_website commands in the past, doesn't need the thoughts part, as the summarization code discard them anyway
+    for i in range(50):
+        assistant_reply = (
+            '{\n    "command": {\n        "name": "browse_website",\n        "args": {\n            "url": "https://www.job'
+            + str(i)
+            + '.com",\n            "question": "software engineer"\n        }\n    }\n}'
+        )
+        msg = Message("assistant", assistant_reply, "ai_response")
+        history.append(msg)
+        message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+        message_count += 1
+
+        result = (
+            "Command browse_website returned: Answer gathered from website: The text in job"
+            + str(i)
+            + " does not provide information on specific job requirements or a job URL.]",
+        )
+        msg = Message("system", result, "action_result")
+        history.append(msg)
+        message_tlength += count_string_tokens(str(msg), config.fast_llm_model)
+        message_count += 1
+
+        user_input = "Determine which next command to use, and respond using the format specified above:'"
+        user_input_msg = Message("user", user_input)
+        history.append(user_input_msg)
+
+    # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
+    for cycle in reversed(list(history.per_cycle())):
+        messages_to_add = [msg for msg in cycle if msg is not None]
+        message_sequence.insert(insertion_index, *messages_to_add)
+        break
+
+    # count the expected token length of the trimmed message by reducing the token length of messages in the last cycle
+    for message in messages_to_add:
+        if message.role != "user":
+            message_tlength -= count_string_tokens(str(message), config.fast_llm_model)
+            message_count -= 1
+
+    # test the main trim_message function
+    new_summary_message, trimmed_messages = history.trim_messages(
+        current_message_chain=list(message_sequence),
+    )
+
+    expected_call_count = math.ceil(
+        message_tlength / (OPEN_AI_CHAT_MODELS.get(config.fast_llm_model).max_tokens)
+    )
+    # Expecting 2 batches because of over max token
+    assert mock_summary.call_count == expected_call_count  # 2 at the time of writing
+    # Expecting 100 messages because 50 pairs of ai_response and action_result, based on the range set above
+    assert len(trimmed_messages) == message_count  # 100 at the time of writing
+    assert new_summary_message == Message(
+        role="system",
+        content="This reminds you of these events from your past: \n"
+        + mock_summary_text,
+        type=None,
+    )

From fd04db12faaf966f36f51b202df124610c59b988 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Sun, 11 Jun 2023 14:19:42 -0700
Subject: [PATCH 48/97] Use prompt_toolkit to enable keyboard navigation in CLI
 (#4649)

* Use prompt_toolkit to enable keyboard navigation in CLI

* Also update other tests

---------

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 autogpt/utils.py                       | 6 +++++-
 docs/challenges/building_challenges.md | 2 +-
 requirements.txt                       | 1 +
 tests/challenges/utils.py              | 2 +-
 tests/integration/test_setup.py        | 6 +++---
 5 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/autogpt/utils.py b/autogpt/utils.py
index 653841a2..91e570a0 100644
--- a/autogpt/utils.py
+++ b/autogpt/utils.py
@@ -5,10 +5,14 @@ import requests
 import yaml
 from colorama import Fore, Style
 from git.repo import Repo
+from prompt_toolkit import ANSI, PromptSession
+from prompt_toolkit.history import InMemoryHistory
 
 from autogpt.config import Config
 from autogpt.logs import logger
 
+session = PromptSession(history=InMemoryHistory())
+
 
 def batch(iterable, max_batch_length: int, overlap: int = 0):
     """Batch data from iterable into slices of length N. The last batch may be shorter."""
@@ -52,7 +56,7 @@ def clean_input(prompt: str = "", talk=False):
 
         # ask for input, default when just pressing Enter is y
         logger.info("Asking user via keyboard...")
-        answer = input(prompt)
+        answer = session.prompt(ANSI(prompt))
         return answer
     except KeyboardInterrupt:
         logger.info("You interrupted Auto-GPT")
diff --git a/docs/challenges/building_challenges.md b/docs/challenges/building_challenges.md
index 0bd416cc..0c3d89ac 100644
--- a/docs/challenges/building_challenges.md
+++ b/docs/challenges/building_challenges.md
@@ -111,7 +111,7 @@ def test_information_retrieval_challenge_a(kubernetes_agent, monkeypatch) -> Non
     """
     input_sequence = ["s", "s", "s", "s", "s", "EXIT"]
     gen = input_generator(input_sequence)
-    monkeypatch.setattr("builtins.input", lambda _: next(gen))
+    monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
 
     with contextlib.suppress(SystemExit):
         run_interaction_loop(kubernetes_agent, None)
diff --git a/requirements.txt b/requirements.txt
index 16ed8558..8c171f83 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,6 +27,7 @@ click
 charset-normalizer>=3.1.0
 spacy>=3.0.0,<4.0.0
 en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl
+prompt_toolkit>=3.0.38
 
 ##Dev
 coverage
diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py
index c5312601..9a357d96 100644
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -35,7 +35,7 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
         yield from input_sequence
 
     gen = input_generator()
-    monkeypatch.setattr("builtins.input", lambda _: next(gen))
+    monkeypatch.setattr("autogpt.utils.session.prompt", lambda _: next(gen))
 
 
 def run_interaction_loop(
diff --git a/tests/integration/test_setup.py b/tests/integration/test_setup.py
index 4e2a505d..5217d72a 100644
--- a/tests/integration/test_setup.py
+++ b/tests/integration/test_setup.py
@@ -11,7 +11,7 @@ from tests.utils import requires_api_key
 @requires_api_key("OPENAI_API_KEY")
 def test_generate_aiconfig_automatic_default(patched_api_requestor):
     user_inputs = [""]
-    with patch("builtins.input", side_effect=user_inputs):
+    with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
         ai_config = prompt_user()
 
     assert isinstance(ai_config, AIConfig)
@@ -44,7 +44,7 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
         "",
         "",
     ]
-    with patch("builtins.input", side_effect=user_inputs):
+    with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
         ai_config = prompt_user()
 
     assert isinstance(ai_config, AIConfig)
@@ -65,7 +65,7 @@ def test_prompt_user_manual_mode(patched_api_requestor):
         "",
         "",
     ]
-    with patch("builtins.input", side_effect=user_inputs):
+    with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
         ai_config = prompt_user()
 
     assert isinstance(ai_config, AIConfig)

From 2ce6ae670722d08e6c02b29640341ded0f53f4a1 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sun, 11 Jun 2023 14:34:02 -0700
Subject: [PATCH 49/97] Change memory challenge expectations (#4657)

---
 .github/workflows/benchmarks.yml              |  4 ++--
 .../memory/test_memory_challenge_c.py         | 22 +++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 685647b8..ed01127b 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -14,10 +14,10 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - python-version: ["3.10"]
+          - python-version: "3.10"
             task: "tests/challenges"
             task-name: "Mandatory Tasks"
-          - python-version: ["3.10"]
+          - python-version: "3.10"
             task: "--beat-challenges -ra tests/challenges"
             task-name: "Challenging Tasks"
 
diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
index f8f4ad98..2754d6b0 100644
--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -10,7 +10,7 @@ from tests.challenges.utils import (
     run_interaction_loop,
 )
 
-NOISE = 1000
+NOISE = 1200
 OUTPUT_LOCATION = "output.txt"
 
 
@@ -33,16 +33,16 @@ def test_memory_challenge_c(
         level_to_run (int)
     """
     silly_phrases = [
-        "The purple elephant danced on a rainbow while eating a taco.",
-        "The sneaky toaster stole my socks and ran away to Hawaii.",
-        "My pet rock sings better than Beyoncé on Tuesdays.",
-        "The giant hamster rode a unicycle through the crowded mall.",
-        "The talking tree gave me a high-five and then flew away.",
-        "I have a collection of invisible hats that I wear on special occasions.",
-        "The flying spaghetti monster stole my sandwich and left a note saying 'thanks for the snack!'",
-        "My imaginary friend is a dragon who loves to play video games.",
-        "I once saw a cloud shaped like a giant chicken eating a pizza.",
-        "The ninja unicorn disguised itself as a potted plant and infiltrated the office.",
+        "The purple elephant danced on a rainbow while eating a taco",
+        "The sneaky toaster stole my socks and ran away to Hawaii",
+        "My pet rock sings better than Beyoncé on Tuesdays",
+        "The giant hamster rode a unicycle through the crowded mall",
+        "The talking tree gave me a high-five and then flew away",
+        "I have a collection of invisible hats that I wear on special occasions",
+        "The flying spaghetti monster stole my sandwich and left a note saying 'thanks for the snack'",
+        "My imaginary friend is a dragon who loves to play video games",
+        "I once saw a cloud shaped like a giant chicken eating a pizza",
+        "The ninja unicorn disguised itself as a potted plant and infiltrated the office",
     ]
 
     level_silly_phrases = silly_phrases[:level_to_run]

From a7faab32fea2ed15248a0279e2c10766f8896a47 Mon Sep 17 00:00:00 2001
From: digger yu <digger-yu@outlook.com>
Date: Mon, 12 Jun 2023 07:17:48 +0800
Subject: [PATCH 50/97] Fix typo in docs/setup.md (#4613)

fix typo: depedencies -> dependencies

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 docs/setup.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/setup.md b/docs/setup.md
index 257e07c1..e894ebe2 100644
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -172,7 +172,7 @@ If you need to upgrade Docker Compose to a newer version, you can follow the ins
 
 Once you have a recent version of docker-compose, run the commands below in your Auto-GPT folder.
 
-1. Build the image. If you have pulled the image from Docker Hub, skip this step (NOTE: You *will* need to do this if you are modifying requirements.txt to add/remove depedencies like Python libs/frameworks) 
+1. Build the image. If you have pulled the image from Docker Hub, skip this step (NOTE: You *will* need to do this if you are modifying requirements.txt to add/remove dependencies like Python libs/frameworks) 
 
         :::shell
         docker-compose build auto-gpt

From e60dfca9fc4cb0e9a550e80d734f66f5e1d6b919 Mon Sep 17 00:00:00 2001
From: David <scenaristeur@gmail.com>
Date: Mon, 12 Jun 2023 01:44:18 +0200
Subject: [PATCH 51/97] Fix run.sh shebang (#4561)

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 run.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/run.sh b/run.sh
index 62463555..287499f8 100755
--- a/run.sh
+++ b/run.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 function find_python_command() {
     if command -v python &> /dev/null
@@ -26,4 +26,4 @@ if $PYTHON_CMD -c "import sys; sys.exit(sys.version_info < (3, 10))"; then
     read -p "Press any key to continue..."
 else
     echo "Python 3.10 or higher is required to run Auto GPT."
-fi
\ No newline at end of file
+fi

From 62dd1675a07c2bc87498e98c23210a61a19ec2ae Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 12 Jun 2023 02:00:57 +0200
Subject: [PATCH 52/97] Add basic Netlify config (#4662)

---
 netlify.toml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 netlify.toml

diff --git a/netlify.toml b/netlify.toml
new file mode 100644
index 00000000..43e79f0f
--- /dev/null
+++ b/netlify.toml
@@ -0,0 +1,6 @@
+# Netlify config for Auto-GPT docs
+
+[build]
+  publish = "public/"
+  command = "mkdocs build -d public"
+  ignore = "git diff --quiet HEAD^ HEAD docs mkdocs.yml CONTRIBUTING.md CODE_OF_CONDUCT.md LICENSE"

From ff46c16805a53fed4309376bb5361710232a614d Mon Sep 17 00:00:00 2001
From: Kinance <kinance@gmail.com>
Date: Mon, 12 Jun 2023 09:13:47 +0900
Subject: [PATCH 53/97] Remove extra spaces in summarization prompt (#4660)

* Implement Batch Running Summarization to avoid max token error (#4652)

* Fix extra space in prompt

---------

Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 autogpt/memory/message_history.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index 2ae53055..7f307536 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -199,18 +199,18 @@ class MessageHistory:
     def summarize_batch(self, new_events_batch, cfg):
         prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
 
-        You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
+You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
 
-        Summary So Far:
-        """
-        {self.summary}
-        """
+Summary So Far:
+"""
+{self.summary}
+"""
 
-        Latest Development:
-        """
-        {new_events_batch or "Nothing new happened."}
-        """
-        '''
+Latest Development:
+"""
+{new_events_batch or "Nothing new happened."}
+"""
+'''
 
         prompt = ChatSequence.for_model(cfg.fast_llm_model, [Message("user", prompt)])
         self.agent.log_cycle_handler.log_cycle(

From a9d177eeeb7b7b0fb9db9cb35084a932d9846717 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 12 Jun 2023 02:20:39 +0200
Subject: [PATCH 54/97] Remove unused function `split_file` from
 file_operations.py (#4658)

---
 autogpt/commands/file_operations.py | 32 -------------------------
 tests/unit/test_file_operations.py  | 37 -----------------------------
 2 files changed, 69 deletions(-)

diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 5d9649be..a0a61539 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -119,38 +119,6 @@ def log_operation(
     )
 
 
-def split_file(
-    content: str, max_length: int = 4000, overlap: int = 0
-) -> Generator[str, None, None]:
-    """
-    Split text into chunks of a specified maximum length with a specified overlap
-    between chunks.
-
-    :param content: The input text to be split into chunks
-    :param max_length: The maximum length of each chunk,
-        default is 4000 (about 1k token)
-    :param overlap: The number of overlapping characters between chunks,
-        default is no overlap
-    :return: A generator yielding chunks of text
-    """
-    start = 0
-    content_length = len(content)
-
-    while start < content_length:
-        end = start + max_length
-        if end + overlap < content_length:
-            chunk = content[start : end + max(overlap - 1, 0)]
-        else:
-            chunk = content[start:content_length]
-
-            # Account for the case where the last chunk is shorter than the overlap, so it has already been consumed
-            if len(chunk) <= overlap:
-                break
-
-        yield chunk
-        start += max_length - overlap
-
-
 @command("read_file", "Read a file", '"filename": "<filename>"')
 def read_file(filename: str, agent: Agent) -> str:
     """Read a file and return the contents
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index d3e6991d..27af9373 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -188,43 +188,6 @@ def test_log_operation_with_checksum(agent: Agent):
     assert f"log_test: path/to/test #ABCDEF\n" in content
 
 
-@pytest.mark.parametrize(
-    "max_length, overlap, content, expected",
-    [
-        (
-            4,
-            1,
-            "abcdefghij",
-            ["abcd", "defg", "ghij"],
-        ),
-        (
-            4,
-            0,
-            "abcdefghijkl",
-            ["abcd", "efgh", "ijkl"],
-        ),
-        (
-            4,
-            0,
-            "abcdefghijklm",
-            ["abcd", "efgh", "ijkl", "m"],
-        ),
-        (
-            4,
-            0,
-            "abcdefghijk",
-            ["abcd", "efgh", "ijk"],
-        ),
-    ],
-)
-# Test splitting a file into chunks
-def test_split_file(max_length, overlap, content, expected):
-    assert (
-        list(file_ops.split_file(content, max_length=max_length, overlap=overlap))
-        == expected
-    )
-
-
 def test_read_file(
     mock_MemoryItem_from_text,
     test_file_with_content_path: Path,

From d5afbbee26614ab0d5fa6e09af7bafb063ad3bcf Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Mon, 12 Jun 2023 08:03:14 -0700
Subject: [PATCH 55/97] Add challenge name and level to pytest logs (#4661)

---
 autogpt/log_cycle/log_cycle.py                |  5 ++++-
 .../basic_abilities/test_browse_website.py    |  5 ++++-
 .../basic_abilities/test_write_file.py        |  7 +++++-
 .../challenge_decorator/challenge.py          |  3 ++-
 .../challenge_decorator.py                    |  1 +
 tests/challenges/conftest.py                  |  5 +++++
 .../debug_code/test_debug_code_challenge_a.py |  5 ++++-
 .../test_information_retrieval_challenge_a.py |  9 +++++++-
 .../test_information_retrieval_challenge_b.py |  9 +++++++-
 .../test_kubernetes_template_challenge_a.py   |  5 ++++-
 .../memory/test_memory_challenge_a.py         |  9 +++++++-
 .../memory/test_memory_challenge_b.py         |  9 +++++++-
 .../memory/test_memory_challenge_c.py         |  9 +++++++-
 .../memory/test_memory_challenge_d.py         |  9 +++++++-
 tests/challenges/utils.py                     | 22 +++++++++++++++++--
 tests/integration/test_web_selenium.py        |  4 ++++
 16 files changed, 102 insertions(+), 14 deletions(-)

diff --git a/autogpt/log_cycle/log_cycle.py b/autogpt/log_cycle/log_cycle.py
index 8daed25c..71aecda8 100644
--- a/autogpt/log_cycle/log_cycle.py
+++ b/autogpt/log_cycle/log_cycle.py
@@ -34,7 +34,7 @@ class LogCycleHandler:
         if os.environ.get("OVERWRITE_DEBUG") == "1":
             outer_folder_name = "auto_gpt"
         else:
-            ai_name_short = ai_name[:15] if ai_name else DEFAULT_PREFIX
+            ai_name_short = self.get_agent_short_name(ai_name)
             outer_folder_name = f"{created_at}_{ai_name_short}"
 
         outer_folder_path = os.path.join(log_directory, "DEBUG", outer_folder_name)
@@ -42,6 +42,9 @@ class LogCycleHandler:
 
         return outer_folder_path
 
+    def get_agent_short_name(self, ai_name):
+        return ai_name[:15] if ai_name else DEFAULT_PREFIX
+
     def create_inner_directory(self, outer_folder_path: str, cycle_count: int) -> str:
         nested_folder_name = str(cycle_count).zfill(3)
         nested_folder_path = os.path.join(outer_folder_path, nested_folder_name)
diff --git a/tests/challenges/basic_abilities/test_browse_website.py b/tests/challenges/basic_abilities/test_browse_website.py
index 6ee80d33..1c4eb27e 100644
--- a/tests/challenges/basic_abilities/test_browse_website.py
+++ b/tests/challenges/basic_abilities/test_browse_website.py
@@ -13,9 +13,12 @@ def test_browse_website(
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     file_path = browser_agent.workspace.get_path("browse_website.txt")
-    run_interaction_loop(monkeypatch, browser_agent, CYCLE_COUNT)
+    run_interaction_loop(
+        monkeypatch, browser_agent, CYCLE_COUNT, challenge_name, level_to_run
+    )
 
     # content = read_file(file_path, config)
     content = open(file_path, encoding="utf-8").read()
diff --git a/tests/challenges/basic_abilities/test_write_file.py b/tests/challenges/basic_abilities/test_write_file.py
index 9aa837e9..39a45ec6 100644
--- a/tests/challenges/basic_abilities/test_write_file.py
+++ b/tests/challenges/basic_abilities/test_write_file.py
@@ -20,10 +20,15 @@ def test_write_file(
     patched_api_requestor: None,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     file_system_agent = file_system_agents[level_to_run - 1]
     run_interaction_loop(
-        monkeypatch, file_system_agent, CYCLE_COUNT_PER_LEVEL[level_to_run - 1]
+        monkeypatch,
+        file_system_agent,
+        CYCLE_COUNT_PER_LEVEL[level_to_run - 1],
+        challenge_name,
+        level_to_run,
     )
 
     expected_outputs = EXPECTED_OUTPUTS_PER_LEVEL[level_to_run - 1]
diff --git a/tests/challenges/challenge_decorator/challenge.py b/tests/challenges/challenge_decorator/challenge.py
index fd3b60cb..e875ac99 100644
--- a/tests/challenges/challenge_decorator/challenge.py
+++ b/tests/challenges/challenge_decorator/challenge.py
@@ -3,6 +3,7 @@ from typing import Optional
 
 class Challenge:
     BEAT_CHALLENGES = False
+    DEFAULT_CHALLENGE_NAME = "default_challenge_name"
 
     def __init__(
         self,
@@ -10,7 +11,7 @@ class Challenge:
         category: str,
         max_level: int,
         is_new_challenge: bool,
-        max_level_beaten: Optional[int],
+        max_level_beaten: Optional[int] = None,
         level_to_run: Optional[int] = None,
     ) -> None:
         self.name = name
diff --git a/tests/challenges/challenge_decorator/challenge_decorator.py b/tests/challenges/challenge_decorator/challenge_decorator.py
index e1fd80c4..52d796c0 100644
--- a/tests/challenges/challenge_decorator/challenge_decorator.py
+++ b/tests/challenges/challenge_decorator/challenge_decorator.py
@@ -42,6 +42,7 @@ def challenge(
                 )
                 if challenge.level_to_run is not None:
                     kwargs["level_to_run"] = challenge.level_to_run
+                    kwargs["challenge_name"] = challenge.name
                     try:
                         func(*args, **kwargs)
                         challenge.succeeded = True
diff --git a/tests/challenges/conftest.py b/tests/challenges/conftest.py
index dff45f11..c0604c31 100644
--- a/tests/challenges/conftest.py
+++ b/tests/challenges/conftest.py
@@ -51,6 +51,11 @@ def level_to_run(request: FixtureRequest) -> int:
     return request.config.option.level
 
 
+@pytest.fixture
+def challenge_name() -> str:
+    return Challenge.DEFAULT_CHALLENGE_NAME
+
+
 @pytest.fixture(autouse=True)
 def check_beat_challenges(request: FixtureRequest) -> None:
     Challenge.BEAT_CHALLENGES = request.config.getoption("--beat-challenges")
diff --git a/tests/challenges/debug_code/test_debug_code_challenge_a.py b/tests/challenges/debug_code/test_debug_code_challenge_a.py
index 43f524c7..305c9693 100644
--- a/tests/challenges/debug_code/test_debug_code_challenge_a.py
+++ b/tests/challenges/debug_code/test_debug_code_challenge_a.py
@@ -25,6 +25,7 @@ def test_debug_code_challenge_a(
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Test whether the agent can debug a simple code snippet.
@@ -39,7 +40,9 @@ def test_debug_code_challenge_a(
     copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, CODE_FILE_PATH)
     copy_file_into_workspace(debug_code_agent, DIRECTORY_PATH, TEST_FILE_PATH)
 
-    run_interaction_loop(monkeypatch, debug_code_agent, CYCLE_COUNT)
+    run_interaction_loop(
+        monkeypatch, debug_code_agent, CYCLE_COUNT, challenge_name, level_to_run
+    )
 
     output = execute_python_file(
         get_workspace_path(debug_code_agent, TEST_FILE_PATH), debug_code_agent
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
index 8e0fdb63..56d5d4ec 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_a.py
@@ -18,6 +18,7 @@ def test_information_retrieval_challenge_a(
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs and checking the output file content.
@@ -26,7 +27,13 @@ def test_information_retrieval_challenge_a(
     :param monkeypatch: pytest's monkeypatch utility for modifying builtins.
     """
     information_retrieval_agent = information_retrieval_agents[level_to_run - 1]
-    run_interaction_loop(monkeypatch, information_retrieval_agent, CYCLE_COUNT)
+    run_interaction_loop(
+        monkeypatch,
+        information_retrieval_agent,
+        CYCLE_COUNT,
+        challenge_name,
+        level_to_run,
+    )
 
     file_path = get_workspace_path(information_retrieval_agent, OUTPUT_LOCATION)
     content = read_file(file_path, information_retrieval_agent)
diff --git a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
index 0672b879..580b8b0b 100644
--- a/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
+++ b/tests/challenges/information_retrieval/test_information_retrieval_challenge_b.py
@@ -18,6 +18,7 @@ def test_information_retrieval_challenge_b(
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Test the challenge_b function in a given agent by mocking user inputs and checking the output file content.
@@ -29,7 +30,13 @@ def test_information_retrieval_challenge_b(
     """
 
     with contextlib.suppress(SystemExit):
-        run_interaction_loop(monkeypatch, get_nobel_prize_agent, CYCLE_COUNT)
+        run_interaction_loop(
+            monkeypatch,
+            get_nobel_prize_agent,
+            CYCLE_COUNT,
+            challenge_name,
+            level_to_run,
+        )
     file_path = get_workspace_path(get_nobel_prize_agent, OUTPUT_LOCATION)
 
     content = read_file(file_path, get_nobel_prize_agent)
diff --git a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
index c2eaab75..98f239cb 100644
--- a/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
+++ b/tests/challenges/kubernetes/test_kubernetes_template_challenge_a.py
@@ -17,6 +17,7 @@ def test_kubernetes_template_challenge_a(
     monkeypatch: pytest.MonkeyPatch,
     patched_api_requestor: MockerFixture,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Test the challenge_a function in a given agent by mocking user inputs
@@ -27,7 +28,9 @@ def test_kubernetes_template_challenge_a(
         monkeypatch (pytest.MonkeyPatch)
         level_to_run (int)
     """
-    run_interaction_loop(monkeypatch, kubernetes_agent, CYCLE_COUNT)
+    run_interaction_loop(
+        monkeypatch, kubernetes_agent, CYCLE_COUNT, challenge_name, level_to_run
+    )
 
     file_path = get_workspace_path(kubernetes_agent, OUTPUT_LOCATION)
     content = read_file(file_path, kubernetes_agent)
diff --git a/tests/challenges/memory/test_memory_challenge_a.py b/tests/challenges/memory/test_memory_challenge_a.py
index 7050696a..41453b25 100644
--- a/tests/challenges/memory/test_memory_challenge_a.py
+++ b/tests/challenges/memory/test_memory_challenge_a.py
@@ -15,6 +15,7 @@ def test_memory_challenge_a(
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     The agent reads a file containing a task_id. Then, it reads a series of other files.
@@ -28,7 +29,13 @@ def test_memory_challenge_a(
     task_id = "2314"
     create_instructions_files(memory_management_agent, level_to_run, task_id)
 
-    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
 
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
     content = read_file(file_path, memory_management_agent)
diff --git a/tests/challenges/memory/test_memory_challenge_b.py b/tests/challenges/memory/test_memory_challenge_b.py
index 4abc4188..b381df1b 100644
--- a/tests/challenges/memory/test_memory_challenge_b.py
+++ b/tests/challenges/memory/test_memory_challenge_b.py
@@ -20,6 +20,7 @@ def test_memory_challenge_b(
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     The agent reads a series of files, each containing a task_id and noise. After reading 'n' files,
@@ -34,7 +35,13 @@ def test_memory_challenge_b(
     task_ids = [str(i * 1111) for i in range(1, level_to_run + 1)]
     create_instructions_files(memory_management_agent, level_to_run, task_ids)
 
-    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
 
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
     content = read_file(file_path, memory_management_agent)
diff --git a/tests/challenges/memory/test_memory_challenge_c.py b/tests/challenges/memory/test_memory_challenge_c.py
index 2754d6b0..3cfeb2c0 100644
--- a/tests/challenges/memory/test_memory_challenge_c.py
+++ b/tests/challenges/memory/test_memory_challenge_c.py
@@ -20,6 +20,7 @@ def test_memory_challenge_c(
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     Instead of reading task Ids from files as with the previous challenges, the agent now must remember
@@ -52,7 +53,13 @@ def test_memory_challenge_c(
         level_silly_phrases,
     )
 
-    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
     content = read_file(file_path, agent=memory_management_agent)
     for phrase in level_silly_phrases:
diff --git a/tests/challenges/memory/test_memory_challenge_d.py b/tests/challenges/memory/test_memory_challenge_d.py
index ff35e183..ae44831b 100644
--- a/tests/challenges/memory/test_memory_challenge_d.py
+++ b/tests/challenges/memory/test_memory_challenge_d.py
@@ -20,6 +20,7 @@ def test_memory_challenge_d(
     patched_api_requestor: MockerFixture,
     monkeypatch: pytest.MonkeyPatch,
     level_to_run: int,
+    challenge_name: str,
 ) -> None:
     """
     The agent is given a series of events and must remember the respective beliefs of the characters.
@@ -38,7 +39,13 @@ def test_memory_challenge_d(
     create_instructions_files(
         memory_management_agent, level_to_run, level_sally_anne_test_phrases
     )
-    run_interaction_loop(monkeypatch, memory_management_agent, level_to_run + 2)
+    run_interaction_loop(
+        monkeypatch,
+        memory_management_agent,
+        level_to_run + 2,
+        challenge_name,
+        level_to_run,
+    )
     file_path = get_workspace_path(memory_management_agent, OUTPUT_LOCATION)
 
     content = read_file(file_path, memory_management_agent)
diff --git a/tests/challenges/utils.py b/tests/challenges/utils.py
index 9a357d96..0b5f54c5 100644
--- a/tests/challenges/utils.py
+++ b/tests/challenges/utils.py
@@ -2,11 +2,12 @@ import contextlib
 import random
 import shutil
 from pathlib import Path
-from typing import Generator
+from typing import Any, Generator
 
 import pytest
 
 from autogpt.agent import Agent
+from autogpt.log_cycle.log_cycle import LogCycleHandler
 
 
 def generate_noise(noise_size: int) -> str:
@@ -39,13 +40,30 @@ def setup_mock_input(monkeypatch: pytest.MonkeyPatch, cycle_count: int) -> None:
 
 
 def run_interaction_loop(
-    monkeypatch: pytest.MonkeyPatch, agent: Agent, cycle_count: int
+    monkeypatch: pytest.MonkeyPatch,
+    agent: Agent,
+    cycle_count: int,
+    challenge_name: str,
+    level_to_run: int,
 ) -> None:
     setup_mock_input(monkeypatch, cycle_count)
+
+    setup_mock_log_cycle_agent_name(monkeypatch, challenge_name, level_to_run)
     with contextlib.suppress(SystemExit):
         agent.start_interaction_loop()
 
 
+def setup_mock_log_cycle_agent_name(
+    monkeypatch: pytest.MonkeyPatch, challenge_name: str, level_to_run: int
+) -> None:
+    def mock_get_agent_short_name(*args: Any, **kwargs: Any) -> str:
+        return f"{challenge_name}_level_{level_to_run}"
+
+    monkeypatch.setattr(
+        LogCycleHandler, "get_agent_short_name", mock_get_agent_short_name
+    )
+
+
 def get_workspace_path(agent: Agent, file_name: str) -> str:
     return str(agent.workspace.get_path(file_name))
 
diff --git a/tests/integration/test_web_selenium.py b/tests/integration/test_web_selenium.py
index 6f594a78..baf3653c 100644
--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -1,9 +1,13 @@
+import pytest
 from pytest_mock import MockerFixture
 
 from autogpt.agent.agent import Agent
 from autogpt.commands.web_selenium import browse_website
+from tests.utils import requires_api_key
 
 
+@pytest.mark.vcr
+@requires_api_key("OPENAI_API_KEY")
 def test_browse_website(agent: Agent, patched_api_requestor: MockerFixture):
     url = "https://barrel-roll.com"
     question = "How to execute a barrel roll"

From f277e6aa2dcc1000c49e08dc18780af03f3a6f7f Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Tue, 13 Jun 2023 00:14:15 +0200
Subject: [PATCH 56/97] Strip spaces from the end of truncated ai_name in
 log_cycle.py (#4672)

---
 autogpt/log_cycle/log_cycle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autogpt/log_cycle/log_cycle.py b/autogpt/log_cycle/log_cycle.py
index 71aecda8..ebceb57e 100644
--- a/autogpt/log_cycle/log_cycle.py
+++ b/autogpt/log_cycle/log_cycle.py
@@ -43,7 +43,7 @@ class LogCycleHandler:
         return outer_folder_path
 
     def get_agent_short_name(self, ai_name):
-        return ai_name[:15] if ai_name else DEFAULT_PREFIX
+        return ai_name[:15].rstrip() if ai_name else DEFAULT_PREFIX
 
     def create_inner_directory(self, outer_folder_path: str, cycle_count: int) -> str:
         nested_folder_name = str(cycle_count).zfill(3)

From 7bf39cbb72a838fae5fc2ac70f6b8a5cf421e6ca Mon Sep 17 00:00:00 2001
From: Kinance <kinance@gmail.com>
Date: Tue, 13 Jun 2023 08:29:11 +0900
Subject: [PATCH 57/97] Include the token length of the current summary (#4670)

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 autogpt/memory/message_history.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index 7f307536..f4a2217f 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -174,6 +174,7 @@ class MessageHistory:
         # TODO make this default dynamic
         prompt_template_length = 100
         max_tokens = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
+        summary_tlength = count_string_tokens(str(self.summary), cfg.fast_llm_model)
         batch = []
         batch_tlength = 0
 
@@ -181,9 +182,15 @@ class MessageHistory:
         for event in new_events:
             event_tlength = count_string_tokens(str(event), cfg.fast_llm_model)
 
-            if batch_tlength + event_tlength > max_tokens - prompt_template_length:
+            if (
+                batch_tlength + event_tlength
+                > max_tokens - prompt_template_length - summary_tlength
+            ):
                 # The batch is full. Summarize it and start a new one.
                 self.summarize_batch(batch, cfg)
+                summary_tlength = count_string_tokens(
+                    str(self.summary), cfg.fast_llm_model
+                )
                 batch = [event]
                 batch_tlength = event_tlength
             else:

From 07d9b584f7e1cb1364e2f456831c1c6fc6ce9855 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Tue, 13 Jun 2023 09:54:50 -0700
Subject: [PATCH 58/97] Correct and clean up JSON handling (#4655)

* Correct and clean up JSON handling

* Use ast for message history too

* Lint

* Add comments explaining why we use literal_eval

* Add descriptions to llm_response_format schema

* Parse responses in code blocks

* Be more careful when parsing in code blocks

* Lint
---
 autogpt/agent/agent.py                        |  13 +-
 autogpt/commands/execute_code.py              |   2 +-
 autogpt/json_utils/json_fix_general.py        | 121 ---------
 autogpt/json_utils/json_fix_llm.py            | 239 ------------------
 autogpt/json_utils/llm_response_format_1.json |  24 +-
 autogpt/json_utils/utilities.py               |  64 +++--
 autogpt/memory/message_history.py             |   8 +-
 autogpt/prompts/generator.py                  |  19 +-
 autogpt/prompts/prompt.py                     |   2 +-
 prompt_settings.yaml                          |   3 +-
 tests/challenges/current_score.json           |   2 +-
 tests/unit/test_json_parser.py                |  71 ------
 tests/unit/test_json_utils_llm.py             | 114 ---------
 tests/unit/test_llm_utils.py                  | 128 ----------
 tests/unit/test_utils.py                      |  55 ++++
 15 files changed, 135 insertions(+), 730 deletions(-)
 delete mode 100644 autogpt/json_utils/json_fix_general.py
 delete mode 100644 autogpt/json_utils/json_fix_llm.py
 delete mode 100644 tests/unit/test_json_parser.py
 delete mode 100644 tests/unit/test_json_utils_llm.py
 delete mode 100644 tests/unit/test_llm_utils.py

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 44180642..5a236f67 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -1,3 +1,4 @@
+import json
 import signal
 import sys
 from datetime import datetime
@@ -7,8 +8,7 @@ from colorama import Fore, Style
 from autogpt.commands.command import CommandRegistry
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
-from autogpt.json_utils.json_fix_llm import fix_json_using_multiple_techniques
-from autogpt.json_utils.utilities import LLM_DEFAULT_RESPONSE_FORMAT, validate_json
+from autogpt.json_utils.utilities import extract_json_from_response, validate_json
 from autogpt.llm.base import ChatSequence
 from autogpt.llm.chat import chat_with_ai, create_chat_completion
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
@@ -144,7 +144,13 @@ class Agent:
                     self.config.fast_llm_model,
                 )
 
-            assistant_reply_json = fix_json_using_multiple_techniques(assistant_reply)
+            try:
+                assistant_reply_json = extract_json_from_response(assistant_reply)
+                validate_json(assistant_reply_json)
+            except json.JSONDecodeError as e:
+                logger.error(f"Exception while validating assistant reply JSON: {e}")
+                assistant_reply_json = {}
+
             for plugin in self.config.plugins:
                 if not plugin.can_handle_post_planning():
                     continue
@@ -152,7 +158,6 @@ class Agent:
 
             # Print Assistant thoughts
             if assistant_reply_json != {}:
-                validate_json(assistant_reply_json, LLM_DEFAULT_RESPONSE_FORMAT)
                 # Get command name and arguments
                 try:
                     print_assistant_thoughts(
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 6d3703f1..109caa3a 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -4,11 +4,11 @@ import subprocess
 from pathlib import Path
 
 import docker
-from confection import Config
 from docker.errors import ImageNotFound
 
 from autogpt.agent.agent import Agent
 from autogpt.commands.command import command
+from autogpt.config import Config
 from autogpt.logs import logger
 from autogpt.setup import CFG
 from autogpt.workspace.workspace import Workspace
diff --git a/autogpt/json_utils/json_fix_general.py b/autogpt/json_utils/json_fix_general.py
deleted file mode 100644
index e485aca1..00000000
--- a/autogpt/json_utils/json_fix_general.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""This module contains functions to fix JSON strings using general programmatic approaches, suitable for addressing
-common JSON formatting issues."""
-from __future__ import annotations
-
-import contextlib
-import json
-import re
-from typing import Optional
-
-from autogpt.config import Config
-from autogpt.json_utils.utilities import extract_char_position
-from autogpt.logs import logger
-
-CFG = Config()
-
-
-def fix_invalid_escape(json_to_load: str, error_message: str) -> str:
-    """Fix invalid escape sequences in JSON strings.
-
-    Args:
-        json_to_load (str): The JSON string.
-        error_message (str): The error message from the JSONDecodeError
-          exception.
-
-    Returns:
-        str: The JSON string with invalid escape sequences fixed.
-    """
-    while error_message.startswith("Invalid \\escape"):
-        bad_escape_location = extract_char_position(error_message)
-        json_to_load = (
-            json_to_load[:bad_escape_location] + json_to_load[bad_escape_location + 1 :]
-        )
-        try:
-            json.loads(json_to_load)
-            return json_to_load
-        except json.JSONDecodeError as e:
-            logger.debug("json loads error - fix invalid escape", e)
-            error_message = str(e)
-    return json_to_load
-
-
-def balance_braces(json_string: str) -> Optional[str]:
-    """
-    Balance the braces in a JSON string.
-
-    Args:
-        json_string (str): The JSON string.
-
-    Returns:
-        str: The JSON string with braces balanced.
-    """
-
-    open_braces_count = json_string.count("{")
-    close_braces_count = json_string.count("}")
-
-    while open_braces_count > close_braces_count:
-        json_string += "}"
-        close_braces_count += 1
-
-    while close_braces_count > open_braces_count:
-        json_string = json_string.rstrip("}")
-        close_braces_count -= 1
-
-    with contextlib.suppress(json.JSONDecodeError):
-        json.loads(json_string)
-        return json_string
-
-
-def add_quotes_to_property_names(json_string: str) -> str:
-    """
-    Add quotes to property names in a JSON string.
-
-    Args:
-        json_string (str): The JSON string.
-
-    Returns:
-        str: The JSON string with quotes added to property names.
-    """
-
-    def replace_func(match: re.Match) -> str:
-        return f'"{match[1]}":'
-
-    property_name_pattern = re.compile(r"(\w+):")
-    corrected_json_string = property_name_pattern.sub(replace_func, json_string)
-
-    try:
-        json.loads(corrected_json_string)
-        return corrected_json_string
-    except json.JSONDecodeError as e:
-        raise e
-
-
-def correct_json(json_to_load: str) -> str:
-    """
-    Correct common JSON errors.
-    Args:
-        json_to_load (str): The JSON string.
-    """
-
-    try:
-        logger.debug("json", json_to_load)
-        json.loads(json_to_load)
-        return json_to_load
-    except json.JSONDecodeError as e:
-        logger.debug("json loads error", e)
-        error_message = str(e)
-        if error_message.startswith("Invalid \\escape"):
-            json_to_load = fix_invalid_escape(json_to_load, error_message)
-        if error_message.startswith(
-            "Expecting property name enclosed in double quotes"
-        ):
-            json_to_load = add_quotes_to_property_names(json_to_load)
-            try:
-                json.loads(json_to_load)
-                return json_to_load
-            except json.JSONDecodeError as e:
-                logger.debug("json loads error - add quotes", e)
-                error_message = str(e)
-        if balanced_str := balance_braces(json_to_load):
-            return balanced_str
-    return json_to_load
diff --git a/autogpt/json_utils/json_fix_llm.py b/autogpt/json_utils/json_fix_llm.py
deleted file mode 100644
index 9e9fe533..00000000
--- a/autogpt/json_utils/json_fix_llm.py
+++ /dev/null
@@ -1,239 +0,0 @@
-"""This module contains functions to fix JSON strings generated by LLM models, such as ChatGPT, using the assistance
-of the ChatGPT API or LLM models."""
-from __future__ import annotations
-
-import contextlib
-import json
-from typing import Any, Dict
-
-from colorama import Fore
-from regex import regex
-
-from autogpt.config import Config
-from autogpt.json_utils.json_fix_general import correct_json
-from autogpt.llm.utils import call_ai_function
-from autogpt.logs import logger
-from autogpt.speech import say_text
-
-JSON_SCHEMA = """
-{
-    "command": {
-        "name": "command name",
-        "args": {
-            "arg name": "value"
-        }
-    },
-    "thoughts":
-    {
-        "text": "thought",
-        "reasoning": "reasoning",
-        "plan": "- short bulleted\n- list that conveys\n- long-term plan",
-        "criticism": "constructive self-criticism",
-        "speak": "thoughts summary to say to user"
-    }
-}
-"""
-
-CFG = Config()
-
-
-def auto_fix_json(json_string: str, schema: str) -> str:
-    """Fix the given JSON string to make it parseable and fully compliant with
-        the provided schema using GPT-3.
-
-    Args:
-        json_string (str): The JSON string to fix.
-        schema (str): The schema to use to fix the JSON.
-    Returns:
-        str: The fixed JSON string.
-    """
-    # Try to fix the JSON using GPT:
-    function_string = "def fix_json(json_string: str, schema:str=None) -> str:"
-    args = [f"'''{json_string}'''", f"'''{schema}'''"]
-    description_string = (
-        "This function takes a JSON string and ensures that it"
-        " is parseable and fully compliant with the provided schema. If an object"
-        " or field specified in the schema isn't contained within the correct JSON,"
-        " it is omitted. The function also escapes any double quotes within JSON"
-        " string values to ensure that they are valid. If the JSON string contains"
-        " any None or NaN values, they are replaced with null before being parsed."
-    )
-
-    # If it doesn't already start with a "`", add one:
-    if not json_string.startswith("`"):
-        json_string = "```json\n" + json_string + "\n```"
-    result_string = call_ai_function(
-        function_string, args, description_string, model=CFG.fast_llm_model
-    )
-    logger.debug("------------ JSON FIX ATTEMPT ---------------")
-    logger.debug(f"Original JSON: {json_string}")
-    logger.debug("-----------")
-    logger.debug(f"Fixed JSON: {result_string}")
-    logger.debug("----------- END OF FIX ATTEMPT ----------------")
-
-    try:
-        json.loads(result_string)  # just check the validity
-        return result_string
-    except json.JSONDecodeError:  # noqa: E722
-        # Get the call stack:
-        # import traceback
-        # call_stack = traceback.format_exc()
-        # print(f"Failed to fix JSON: '{json_string}' "+call_stack)
-        return "failed"
-
-
-def fix_json_using_multiple_techniques(assistant_reply: str) -> Dict[Any, Any]:
-    """Fix the given JSON string to make it parseable and fully compliant with two techniques.
-
-    Args:
-        json_string (str): The JSON string to fix.
-
-    Returns:
-        str: The fixed JSON string.
-    """
-    assistant_reply = assistant_reply.strip()
-    if assistant_reply.startswith("```json"):
-        assistant_reply = assistant_reply[7:]
-    if assistant_reply.endswith("```"):
-        assistant_reply = assistant_reply[:-3]
-    try:
-        return json.loads(assistant_reply)  # just check the validity
-    except json.JSONDecodeError:  # noqa: E722
-        pass
-
-    if assistant_reply.startswith("json "):
-        assistant_reply = assistant_reply[5:]
-        assistant_reply = assistant_reply.strip()
-    try:
-        return json.loads(assistant_reply)  # just check the validity
-    except json.JSONDecodeError:  # noqa: E722
-        pass
-
-    # Parse and print Assistant response
-    assistant_reply_json = fix_and_parse_json(assistant_reply)
-    logger.debug("Assistant reply JSON: %s", str(assistant_reply_json))
-    if assistant_reply_json == {}:
-        assistant_reply_json = attempt_to_fix_json_by_finding_outermost_brackets(
-            assistant_reply
-        )
-
-    logger.debug("Assistant reply JSON 2: %s", str(assistant_reply_json))
-    if assistant_reply_json != {}:
-        return assistant_reply_json
-
-    logger.error(
-        "Error: The following AI output couldn't be converted to a JSON:\n",
-        assistant_reply,
-    )
-    if CFG.speak_mode:
-        say_text("I have received an invalid JSON response from the OpenAI API.")
-
-    return {}
-
-
-def fix_and_parse_json(
-    json_to_load: str, try_to_fix_with_gpt: bool = True
-) -> Dict[Any, Any]:
-    """Fix and parse JSON string
-
-    Args:
-        json_to_load (str): The JSON string.
-        try_to_fix_with_gpt (bool, optional): Try to fix the JSON with GPT.
-            Defaults to True.
-
-    Returns:
-        str or dict[Any, Any]: The parsed JSON.
-    """
-
-    with contextlib.suppress(json.JSONDecodeError):
-        json_to_load = json_to_load.replace("\t", "")
-        return json.loads(json_to_load)
-
-    with contextlib.suppress(json.JSONDecodeError):
-        json_to_load = correct_json(json_to_load)
-        return json.loads(json_to_load)
-    # Let's do something manually:
-    # sometimes GPT responds with something BEFORE the braces:
-    # "I'm sorry, I don't understand. Please try again."
-    # {"text": "I'm sorry, I don't understand. Please try again.",
-    #  "confidence": 0.0}
-    # So let's try to find the first brace and then parse the rest
-    #  of the string
-    try:
-        brace_index = json_to_load.index("{")
-        maybe_fixed_json = json_to_load[brace_index:]
-        last_brace_index = maybe_fixed_json.rindex("}")
-        maybe_fixed_json = maybe_fixed_json[: last_brace_index + 1]
-        return json.loads(maybe_fixed_json)
-    except (json.JSONDecodeError, ValueError) as e:
-        return try_ai_fix(try_to_fix_with_gpt, e, json_to_load)
-
-
-def try_ai_fix(
-    try_to_fix_with_gpt: bool, exception: Exception, json_to_load: str
-) -> Dict[Any, Any]:
-    """Try to fix the JSON with the AI
-
-    Args:
-        try_to_fix_with_gpt (bool): Whether to try to fix the JSON with the AI.
-        exception (Exception): The exception that was raised.
-        json_to_load (str): The JSON string to load.
-
-    Raises:
-        exception: If try_to_fix_with_gpt is False.
-
-    Returns:
-        str or dict[Any, Any]: The JSON string or dictionary.
-    """
-    if not try_to_fix_with_gpt:
-        raise exception
-    if CFG.debug_mode:
-        logger.warn(
-            "Warning: Failed to parse AI output, attempting to fix."
-            "\n If you see this warning frequently, it's likely that"
-            " your prompt is confusing the AI. Try changing it up"
-            " slightly."
-        )
-    # Now try to fix this up using the ai_functions
-    ai_fixed_json = auto_fix_json(json_to_load, JSON_SCHEMA)
-
-    if ai_fixed_json != "failed":
-        return json.loads(ai_fixed_json)
-    # This allows the AI to react to the error message,
-    #   which usually results in it correcting its ways.
-    # logger.error("Failed to fix AI output, telling the AI.")
-    return {}
-
-
-def attempt_to_fix_json_by_finding_outermost_brackets(json_string: str):
-    if CFG.speak_mode and CFG.debug_mode:
-        say_text(
-            "I have received an invalid JSON response from the OpenAI API. "
-            "Trying to fix it now."
-        )
-        logger.error("Attempting to fix JSON by finding outermost brackets\n")
-
-    try:
-        json_pattern = regex.compile(r"\{(?:[^{}]|(?R))*\}")
-        json_match = json_pattern.search(json_string)
-
-        if json_match:
-            # Extract the valid JSON object from the string
-            json_string = json_match.group(0)
-            logger.typewriter_log(
-                title="Apparently json was fixed.", title_color=Fore.GREEN
-            )
-            if CFG.speak_mode and CFG.debug_mode:
-                say_text("Apparently json was fixed.")
-        else:
-            return {}
-
-    except (json.JSONDecodeError, ValueError):
-        if CFG.debug_mode:
-            logger.error(f"Error: Invalid JSON: {json_string}\n")
-        if CFG.speak_mode:
-            say_text("Didn't work. I will have to ignore this response then.")
-        logger.error("Error: Invalid JSON, setting it to empty JSON now.\n")
-        json_string = {}
-
-    return fix_and_parse_json(json_string)
diff --git a/autogpt/json_utils/llm_response_format_1.json b/autogpt/json_utils/llm_response_format_1.json
index 9aa33352..17101dda 100644
--- a/autogpt/json_utils/llm_response_format_1.json
+++ b/autogpt/json_utils/llm_response_format_1.json
@@ -5,11 +5,25 @@
         "thoughts": {
             "type": "object",
             "properties": {
-                "text": {"type": "string"},
-                "reasoning": {"type": "string"},
-                "plan": {"type": "string"},
-                "criticism": {"type": "string"},
-                "speak": {"type": "string"}
+                "text": {
+                    "type": "string",
+                    "description": "thoughts"
+                },
+                "reasoning": {
+                    "type": "string"
+                },
+                "plan": {
+                    "type": "string",
+                    "description": "- short bulleted\n- list that conveys\n- long-term plan"
+                },
+                "criticism": {
+                    "type": "string",
+                    "description": "constructive self-criticism"
+                },
+                "speak": {
+                    "type": "string",
+                    "description": "thoughts summary to say to user"
+                }
             },
             "required": ["text", "reasoning", "plan", "criticism", "speak"],
             "additionalProperties": false
diff --git a/autogpt/json_utils/utilities.py b/autogpt/json_utils/utilities.py
index 933de8e9..01848871 100644
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -1,7 +1,8 @@
 """Utilities for the json_fixes package."""
+import ast
 import json
 import os.path
-import re
+from typing import Any
 
 from jsonschema import Draft7Validator
 
@@ -12,37 +13,47 @@ CFG = Config()
 LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"
 
 
-def extract_char_position(error_message: str) -> int:
-    """Extract the character position from the JSONDecodeError message.
+def extract_json_from_response(response_content: str) -> dict:
+    # Sometimes the response includes the JSON in a code block with ```
+    if response_content.startswith("```") and response_content.endswith("```"):
+        # Discard the first and last ```, then re-join in case the response naturally included ```
+        response_content = "```".join(response_content.split("```")[1:-1])
 
-    Args:
-        error_message (str): The error message from the JSONDecodeError
-          exception.
-
-    Returns:
-        int: The character position.
-    """
-
-    char_pattern = re.compile(r"\(char (\d+)\)")
-    if match := char_pattern.search(error_message):
-        return int(match[1])
-    else:
-        raise ValueError("Character position not found in the error message.")
+    # response content comes from OpenAI as a Python `str(content_dict)`, literal_eval reverses this
+    try:
+        return ast.literal_eval(response_content)
+    except BaseException as e:
+        logger.error(f"Error parsing JSON response with literal_eval {e}")
+        # TODO: How to raise an error here without causing the program to exit?
+        return {}
 
 
-def validate_json(json_object: object, schema_name: str) -> dict | None:
+def llm_response_schema(
+    schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT,
+) -> dict[str, Any]:
+    filename = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
+    with open(filename, "r") as f:
+        return json.load(f)
+
+
+def validate_json(
+    json_object: object, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
+) -> bool:
     """
     :type schema_name: object
     :param schema_name: str
     :type json_object: object
+
+    Returns:
+        bool: Whether the json_object is valid or not
     """
-    scheme_file = os.path.join(os.path.dirname(__file__), f"{schema_name}.json")
-    with open(scheme_file, "r") as f:
-        schema = json.load(f)
+    schema = llm_response_schema(schema_name)
     validator = Draft7Validator(schema)
 
     if errors := sorted(validator.iter_errors(json_object), key=lambda e: e.path):
-        logger.error("The JSON object is invalid.")
+        for error in errors:
+            logger.error(f"JSON Validation Error: {error}")
+
         if CFG.debug_mode:
             logger.error(
                 json.dumps(json_object, indent=4)
@@ -51,10 +62,11 @@ def validate_json(json_object: object, schema_name: str) -> dict | None:
 
             for error in errors:
                 logger.error(f"Error: {error.message}")
-    else:
-        logger.debug("The JSON object is valid.")
+        return False
 
-    return json_object
+    logger.debug("The JSON object is valid.")
+
+    return True
 
 
 def validate_json_string(json_string: str, schema_name: str) -> dict | None:
@@ -66,7 +78,9 @@ def validate_json_string(json_string: str, schema_name: str) -> dict | None:
 
     try:
         json_loaded = json.loads(json_string)
-        return validate_json(json_loaded, schema_name)
+        if not validate_json(json_loaded, schema_name):
+            return None
+        return json_loaded
     except:
         return None
 
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index f4a2217f..be524125 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -11,6 +11,7 @@ if TYPE_CHECKING:
 from autogpt.config import Config
 from autogpt.json_utils.utilities import (
     LLM_DEFAULT_RESPONSE_FORMAT,
+    extract_json_from_response,
     is_string_valid_json,
 )
 from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
@@ -153,13 +154,14 @@ class MessageHistory:
 
                 # Remove "thoughts" dictionary from "content"
                 try:
-                    content_dict = json.loads(event.content)
+                    content_dict = extract_json_from_response(event.content)
                     if "thoughts" in content_dict:
                         del content_dict["thoughts"]
                     event.content = json.dumps(content_dict)
-                except json.decoder.JSONDecodeError:
+                except json.JSONDecodeError as e:
+                    logger.error(f"Error: Invalid JSON: {e}")
                     if cfg.debug_mode:
-                        logger.error(f"Error: Invalid JSON: {event.content}\n")
+                        logger.error(f"{event.content}")
 
             elif event.role.lower() == "system":
                 event.role = "your computer"
diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py
index adf64894..7101acfe 100644
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -1,7 +1,8 @@
 """ A module for generating custom prompt strings."""
-import json
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 
+from autogpt.json_utils.utilities import llm_response_schema
+
 if TYPE_CHECKING:
     from autogpt.commands.command import CommandRegistry
 
@@ -25,16 +26,6 @@ class PromptGenerator:
         self.command_registry: CommandRegistry | None = None
         self.name = "Bob"
         self.role = "AI"
-        self.response_format = {
-            "thoughts": {
-                "text": "thought",
-                "reasoning": "reasoning",
-                "plan": "- short bulleted\n- list that conveys\n- long-term plan",
-                "criticism": "constructive self-criticism",
-                "speak": "thoughts summary to say to user",
-            },
-            "command": {"name": "command name", "args": {"arg name": "value"}},
-        }
 
     def add_constraint(self, constraint: str) -> None:
         """
@@ -144,7 +135,6 @@ class PromptGenerator:
         Returns:
             str: The generated prompt string.
         """
-        formatted_response_format = json.dumps(self.response_format, indent=4)
         return (
             f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n"
             "Commands:\n"
@@ -152,7 +142,6 @@ class PromptGenerator:
             f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n"
             "Performance Evaluation:\n"
             f"{self._generate_numbered_list(self.performance_evaluation)}\n\n"
-            "You should only respond in JSON format as described below \nResponse"
-            f" Format: \n{formatted_response_format} \nEnsure the response can be"
-            " parsed by Python json.loads"
+            "Respond with only valid JSON conforming to the following schema: \n"
+            f"{llm_response_schema()}\n"
         )
diff --git a/autogpt/prompts/prompt.py b/autogpt/prompts/prompt.py
index 61238657..17d78bd1 100644
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -11,7 +11,7 @@ from autogpt.utils import clean_input
 
 CFG = Config()
 
-DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the format specified above:"
+DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the JSON schema specified previously:"
 
 
 def build_default_prompt_generator() -> PromptGenerator:
diff --git a/prompt_settings.yaml b/prompt_settings.yaml
index b8e7c0d2..244886b5 100644
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -14,6 +14,5 @@ performance_evaluations: [
   'Continuously review and analyze your actions to ensure you are performing to the best of your abilities.',
   'Constructively self-criticize your big-picture behavior constantly.',
   'Reflect on past decisions and strategies to refine your approach.',
-  'Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.',
-  'Write all code to a file.'
+  'Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps.'
 ]
diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index 4d747f03..d8af6ab7 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -49,4 +49,4 @@
             "max_level_beaten": null
         }
     }
-}
+}
\ No newline at end of file
diff --git a/tests/unit/test_json_parser.py b/tests/unit/test_json_parser.py
deleted file mode 100644
index be5f0733..00000000
--- a/tests/unit/test_json_parser.py
+++ /dev/null
@@ -1,71 +0,0 @@
-from unittest import TestCase
-
-from autogpt.json_utils.json_fix_llm import fix_and_parse_json
-
-
-class TestParseJson(TestCase):
-    def test_valid_json(self):
-        """Test that a valid JSON string is parsed correctly."""
-        json_str = '{"name": "John", "age": 30, "city": "New York"}'
-        obj = fix_and_parse_json(json_str)
-        self.assertEqual(obj, {"name": "John", "age": 30, "city": "New York"})
-
-    def test_invalid_json_minor(self):
-        """Test that an invalid JSON string can not be fixed without gpt"""
-        json_str = '{"name": "John", "age": 30, "city": "New York",}'
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
-
-    def test_invalid_json_major_with_gpt(self):
-        """Test that an invalid JSON string raises an error when try_to_fix_with_gpt is False"""
-        json_str = 'BEGIN: "name": "John" - "age": 30 - "city": "New York" :END'
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
-
-    def test_invalid_json_major_without_gpt(self):
-        """Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False"""
-        json_str = 'BEGIN: "name": "John" - "age": 30 - "city": "New York" :END'
-        # Assert that this raises an exception:
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
-
-    def test_invalid_json_leading_sentence_with_gpt(self):
-        """Test that a REALLY invalid JSON string raises an error when try_to_fix_with_gpt is False"""
-        json_str = """I suggest we start by browsing the repository to find any issues that we can fix.
-
-{
-    "command": {
-        "name": "browse_website",
-        "args":{
-            "url": "https://github.com/Torantulino/Auto-GPT"
-        }
-    },
-    "thoughts":
-    {
-        "text": "I suggest we start browsing the repository to find any issues that we can fix.",
-        "reasoning": "Browsing the repository will give us an idea of the current state of the codebase and identify any issues that we can address to improve the repo.",
-        "plan": "- Look through the repository to find any issues.\n- Investigate any issues to determine what needs to be fixed\n- Identify possible solutions to fix the issues\n- Open Pull Requests with fixes",
-        "criticism": "I should be careful while browsing so as not to accidentally introduce any new bugs or issues.",
-        "speak": "I will start browsing the repository to find any issues we can fix."
-    }
-}"""
-        good_obj = {
-            "command": {
-                "name": "browse_website",
-                "args": {"url": "https://github.com/Torantulino/Auto-GPT"},
-            },
-            "thoughts": {
-                "text": "I suggest we start browsing the repository to find any issues that we can fix.",
-                "reasoning": "Browsing the repository will give us an idea of the current state of the codebase and identify any issues that we can address to improve the repo.",
-                "plan": "- Look through the repository to find any issues.\n- Investigate any issues to determine what needs to be fixed\n- Identify possible solutions to fix the issues\n- Open Pull Requests with fixes",
-                "criticism": "I should be careful while browsing so as not to accidentally introduce any new bugs or issues.",
-                "speak": "I will start browsing the repository to find any issues we can fix.",
-            },
-        }
-
-        # # Assert that this can be fixed with GPT
-        # self.assertEqual(fix_and_parse_json(json_str), good_obj)
-
-        # Assert that trying to fix this without GPT raises an exception
-        with self.assertRaises(Exception):
-            fix_and_parse_json(json_str, try_to_fix_with_gpt=False)
diff --git a/tests/unit/test_json_utils_llm.py b/tests/unit/test_json_utils_llm.py
deleted file mode 100644
index 93e01acb..00000000
--- a/tests/unit/test_json_utils_llm.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# Generated by CodiumAI
-
-from autogpt.json_utils.json_fix_llm import (
-    fix_and_parse_json,
-    fix_json_using_multiple_techniques,
-)
-
-"""
-Code Analysis
-
-Objective:
-- The objective of the function is to fix a given JSON string to make it parseable and fully compliant with two techniques.
-
-Inputs:
-- The function takes in a string called 'assistant_reply', which is the JSON string to be fixed.
-
-Flow:
-- The function first calls the 'fix_and_parse_json' function to parse and print the Assistant response.
-- If the parsed JSON is an empty dictionary, the function calls the 'attempt_to_fix_json_by_finding_outermost_brackets' function to fix the JSON string.
-- If the parsed JSON is not an empty dictionary, the function returns the parsed JSON.
-- If the parsed JSON is an empty dictionary and cannot be fixed, the function logs an error and returns an empty dictionary.
-
-Outputs:
-- The main output of the function is a dictionary containing the fixed JSON string.
-
-Additional aspects:
-- The function uses two techniques to fix the JSON string: parsing and finding outermost brackets.
-- The function logs an error if the JSON string cannot be fixed and returns an empty dictionary.
-- The function uses the 'CFG' object to determine whether to speak the error message or not.
-"""
-
-
-class TestFixJsonUsingMultipleTechniques:
-    # Tests that the function successfully fixes and parses a JSON string that is already compliant with both techniques.
-    def test_fix_and_parse_json_happy_path(self):
-        # Happy path test case where the JSON string is already compliant with both techniques
-        json_string = '{"text": "Hello world", "confidence": 0.9}'
-        expected_output = {"text": "Hello world", "confidence": 0.9}
-        assert fix_json_using_multiple_techniques(json_string) == expected_output
-
-    # Tests that the function successfully fixes and parses a JSON string that contains only whitespace characters.
-    # @requires_api_key("OPEN_API_KEY")
-    def test_fix_and_parse_json_whitespace(self, mocker):
-        # Happy path test case where the JSON string contains only whitespace characters
-        json_string = "   \n\t   "
-
-        # mock try_ai_fix to avoid calling the AI model:
-        mocker.patch("autogpt.json_utils.json_fix_llm.try_ai_fix", return_value={})
-
-        expected_output = {}
-        assert fix_json_using_multiple_techniques(json_string) == expected_output
-
-    # Tests that the function successfully converts a string with arrays to an array
-    def test_fix_and_parse_json_array(self):
-        # Happy path test case where the JSON string contains an array of string
-        json_string = '[ "Add type hints", "Move docstrings", "Consider using" ]'
-        expected_output = ["Add type hints", "Move docstrings", "Consider using"]
-        assert fix_json_using_multiple_techniques(json_string) == expected_output
-
-    # Tests that the function returns an empty dictionary when the JSON string is not parseable and cannot be fixed using either technique.
-    # @requires_api_key("OPEN_API_KEY")
-    def test_fix_and_parse_json_can_not(self, mocker):
-        # Edge case test case where the JSON string is not parseable and cannot be fixed using either technique
-        json_string = "This is not a JSON string"
-
-        # mock try_ai_fix to avoid calling the AI model:
-        mocker.patch("autogpt.json_utils.json_fix_llm.try_ai_fix", return_value={})
-
-        expected_output = {}
-
-        # Use the actual function name in the test
-        result = fix_json_using_multiple_techniques(json_string)
-
-        assert result == expected_output
-
-    # Tests that the function returns an empty dictionary when the JSON string is empty.
-    # @requires_api_key("OPEN_API_KEY")
-    def test_fix_and_parse_json_empty_string(self, mocker):
-        # Arrange
-        json_string = ""
-
-        # Act
-        # mock try_ai_fix to avoid calling the AI model:
-        mocker.patch("autogpt.json_utils.json_fix_llm.try_ai_fix", return_value={})
-
-        result = fix_and_parse_json(json_string)
-
-        # Assert
-        assert result == {}
-
-    # Tests that the function successfully fixes and parses a JSON string that contains escape characters.
-    def test_fix_and_parse_json_escape_characters(self):
-        # Arrange
-        json_string = '{"text": "This is a \\"test\\" string."}'
-
-        # Act
-        result = fix_json_using_multiple_techniques(json_string)
-
-        # Assert
-        assert result == {"text": 'This is a "test" string.'}
-
-    # Tests that the function successfully fixes and parses a JSON string that contains nested objects or arrays.
-    def test_fix_and_parse_json_nested_objects(self):
-        # Arrange
-        json_string = '{"person": {"name": "John", "age": 30}, "hobbies": ["reading", "swimming"]}'
-
-        # Act
-        result = fix_json_using_multiple_techniques(json_string)
-
-        # Assert
-        assert result == {
-            "person": {"name": "John", "age": 30},
-            "hobbies": ["reading", "swimming"],
-        }
diff --git a/tests/unit/test_llm_utils.py b/tests/unit/test_llm_utils.py
deleted file mode 100644
index 8c4de115..00000000
--- a/tests/unit/test_llm_utils.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from unittest.mock import patch
-
-import pytest
-from openai.error import APIError, RateLimitError
-
-from autogpt.llm import utils as llm_utils
-
-
-@pytest.fixture(params=[RateLimitError, APIError])
-def error(request):
-    if request.param == APIError:
-        return request.param("Error", http_status=502)
-    else:
-        return request.param("Error")
-
-
-def error_factory(error_instance, error_count, retry_count, warn_user=True):
-    class RaisesError:
-        def __init__(self):
-            self.count = 0
-
-        @llm_utils.retry_openai_api(
-            num_retries=retry_count, backoff_base=0.001, warn_user=warn_user
-        )
-        def __call__(self):
-            self.count += 1
-            if self.count <= error_count:
-                raise error_instance
-            return self.count
-
-    return RaisesError()
-
-
-def test_retry_open_api_no_error(capsys):
-    @llm_utils.retry_openai_api()
-    def f():
-        return 1
-
-    result = f()
-    assert result == 1
-
-    output = capsys.readouterr()
-    assert output.out == ""
-    assert output.err == ""
-
-
-@pytest.mark.parametrize(
-    "error_count, retry_count, failure",
-    [(2, 10, False), (2, 2, False), (10, 2, True), (3, 2, True), (1, 0, True)],
-    ids=["passing", "passing_edge", "failing", "failing_edge", "failing_no_retries"],
-)
-def test_retry_open_api_passing(capsys, error, error_count, retry_count, failure):
-    call_count = min(error_count, retry_count) + 1
-
-    raises = error_factory(error, error_count, retry_count)
-    if failure:
-        with pytest.raises(type(error)):
-            raises()
-    else:
-        result = raises()
-        assert result == call_count
-
-    assert raises.count == call_count
-
-    output = capsys.readouterr()
-
-    if error_count and retry_count:
-        if type(error) == RateLimitError:
-            assert "Reached rate limit, passing..." in output.out
-            assert "Please double check" in output.out
-        if type(error) == APIError:
-            assert "API Bad gateway" in output.out
-    else:
-        assert output.out == ""
-
-
-def test_retry_open_api_rate_limit_no_warn(capsys):
-    error_count = 2
-    retry_count = 10
-
-    raises = error_factory(RateLimitError, error_count, retry_count, warn_user=False)
-    result = raises()
-    call_count = min(error_count, retry_count) + 1
-    assert result == call_count
-    assert raises.count == call_count
-
-    output = capsys.readouterr()
-
-    assert "Reached rate limit, passing..." in output.out
-    assert "Please double check" not in output.out
-
-
-def test_retry_openapi_other_api_error(capsys):
-    error_count = 2
-    retry_count = 10
-
-    raises = error_factory(APIError("Error", http_status=500), error_count, retry_count)
-
-    with pytest.raises(APIError):
-        raises()
-    call_count = 1
-    assert raises.count == call_count
-
-    output = capsys.readouterr()
-    assert output.out == ""
-
-
-def test_check_model(api_manager):
-    """
-    Test if check_model() returns original model when valid.
-    Test if check_model() returns gpt-3.5-turbo when model is invalid.
-    """
-    with patch("openai.Model.list") as mock_list_models:
-        # Test when correct model is returned
-        mock_list_models.return_value = {"data": [{"id": "gpt-4"}]}
-        result = llm_utils.check_model("gpt-4", "smart_llm_model")
-        assert result == "gpt-4"
-
-        # Reset api manager models
-        api_manager.models = None
-
-        # Test when incorrect model is returned
-        mock_list_models.return_value = {"data": [{"id": "gpt-3.5-turbo"}]}
-        result = llm_utils.check_model("gpt-4", "fast_llm_model")
-        assert result == "gpt-3.5-turbo"
-
-        # Reset api manager models
-        api_manager.models = None
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index 099176ba..f9a471c2 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -1,8 +1,10 @@
 import os
 from unittest.mock import patch
 
+import pytest
 import requests
 
+from autogpt.json_utils.utilities import extract_json_from_response, validate_json
 from autogpt.utils import (
     get_bulletin_from_web,
     get_current_git_branch,
@@ -13,6 +15,37 @@ from autogpt.utils import (
 from tests.utils import skip_in_ci
 
 
+@pytest.fixture
+def valid_json_response() -> dict:
+    return {
+        "thoughts": {
+            "text": "My task is complete. I will use the 'task_complete' command to shut down.",
+            "reasoning": "I will use the 'task_complete' command because it allows me to shut down and signal that my task is complete.",
+            "plan": "I will use the 'task_complete' command with the reason 'Task complete: retrieved Tesla's revenue in 2022.' to shut down.",
+            "criticism": "I need to ensure that I have completed all necessary tasks before shutting down.",
+            "speak": "",
+        },
+        "command": {
+            "name": "task_complete",
+            "args": {"reason": "Task complete: retrieved Tesla's revenue in 2022."},
+        },
+    }
+
+
+@pytest.fixture
+def invalid_json_response() -> dict:
+    return {
+        "thoughts": {
+            "text": "My task is complete. I will use the 'task_complete' command to shut down.",
+            "reasoning": "I will use the 'task_complete' command because it allows me to shut down and signal that my task is complete.",
+            "plan": "I will use the 'task_complete' command with the reason 'Task complete: retrieved Tesla's revenue in 2022.' to shut down.",
+            "criticism": "I need to ensure that I have completed all necessary tasks before shutting down.",
+            "speak": "",
+        },
+        "command": {"name": "", "args": {}},
+    }
+
+
 def test_validate_yaml_file_valid():
     with open("valid_test_file.yaml", "w") as f:
         f.write("setting: value")
@@ -150,3 +183,25 @@ def test_get_current_git_branch_failure(mock_repo):
     branch_name = get_current_git_branch()
 
     assert branch_name == ""
+
+
+def test_validate_json_valid(valid_json_response):
+    assert validate_json(valid_json_response)
+
+
+def test_validate_json_invalid(invalid_json_response):
+    assert not validate_json(valid_json_response)
+
+
+def test_extract_json_from_response(valid_json_response: dict):
+    emulated_response_from_openai = str(valid_json_response)
+    assert (
+        extract_json_from_response(emulated_response_from_openai) == valid_json_response
+    )
+
+
+def test_extract_json_from_response_wrapped_in_code_block(valid_json_response: dict):
+    emulated_response_from_openai = "```" + str(valid_json_response) + "```"
+    assert (
+        extract_json_from_response(emulated_response_from_openai) == valid_json_response
+    )

From 857c330d5809955aeba3df064f3801bdd6d063f9 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Tue, 13 Jun 2023 16:59:27 +0000
Subject: [PATCH 59/97] Update challenge scores

---
 tests/challenges/current_score.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index d8af6ab7..4d747f03 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -49,4 +49,4 @@
             "max_level_beaten": null
         }
     }
-}
\ No newline at end of file
+}

From 3df8c1b50140ed43cf50d1e9881f87e0380f9d95 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Tue, 13 Jun 2023 11:29:20 -0700
Subject: [PATCH 60/97] Make benchmarks create cassettes without using them
 (#4664)

---
 .github/workflows/benchmarks.yml |  2 +-
 tests/challenges/conftest.py     |  6 +++---
 tests/vcr/__init__.py            | 18 ++++++++++++++----
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index ed01127b..e40abf2f 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -50,7 +50,7 @@ jobs:
       - name: Run pytest with coverage
         run: |
           rm -rf tests/Auto-GPT-test-cassettes
-          pytest -n auto ${{ matrix.config.task }}
+          pytest -n auto --record-mode=all ${{ matrix.config.task }}
         env:
           CI: true
           PROXY: ${{ secrets.PROXY }}
diff --git a/tests/challenges/conftest.py b/tests/challenges/conftest.py
index c0604c31..0c13af91 100644
--- a/tests/challenges/conftest.py
+++ b/tests/challenges/conftest.py
@@ -6,7 +6,7 @@ from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
 
 from tests.challenges.challenge_decorator.challenge import Challenge
-from tests.vcr import BASE_VCR_CONFIG, before_record_response
+from tests.vcr import before_record_response
 
 
 def before_record_response_filter_errors(
@@ -20,9 +20,9 @@ def before_record_response_filter_errors(
 
 
 @pytest.fixture(scope="module")
-def vcr_config() -> Dict[str, Any]:
+def vcr_config(get_base_vcr_config: Dict[str, Any]) -> Dict[str, Any]:
     # this fixture is called by the pytest-recording vcr decorator.
-    return BASE_VCR_CONFIG | {
+    return get_base_vcr_config | {
         "before_record_response": before_record_response_filter_errors,
     }
 
diff --git a/tests/vcr/__init__.py b/tests/vcr/__init__.py
index e1a2620c..04ce79fc 100644
--- a/tests/vcr/__init__.py
+++ b/tests/vcr/__init__.py
@@ -6,8 +6,8 @@ from pytest_mock import MockerFixture
 
 from .vcr_filter import PROXY, before_record_request, before_record_response
 
+DEFAULT_RECORD_MODE = "new_episodes"
 BASE_VCR_CONFIG = {
-    "record_mode": "new_episodes",
     "before_record_request": before_record_request,
     "before_record_response": before_record_response,
     "filter_headers": [
@@ -20,9 +20,19 @@ BASE_VCR_CONFIG = {
 
 
 @pytest.fixture(scope="session")
-def vcr_config():
-    # this fixture is called by the pytest-recording vcr decorator.
-    return BASE_VCR_CONFIG
+def vcr_config(get_base_vcr_config):
+    return get_base_vcr_config
+
+
+@pytest.fixture(scope="session")
+def get_base_vcr_config(request):
+    record_mode = request.config.getoption("--record-mode", default="new_episodes")
+    config = BASE_VCR_CONFIG
+
+    if record_mode is None:
+        config["record_mode"] = DEFAULT_RECORD_MODE
+
+    return config
 
 
 @pytest.fixture()

From 0c8f2cfd1c3522761011148adfdeff515c6e7b6c Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Tue, 13 Jun 2023 12:18:39 -0700
Subject: [PATCH 61/97] Fix autogpt docker image not working because missing
 prompt_settings (#4680)

Co-authored-by: Richard Beales <rich@richbeales.net>
---
 .dockerignore | 1 +
 Dockerfile    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.dockerignore b/.dockerignore
index 9ac55109..fef46549 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -2,6 +2,7 @@
 *.template
 *.yaml
 *.yml
+!prompt_settings.yaml
 
 *.md
 *.png
diff --git a/Dockerfile b/Dockerfile
index 68be43d5..a31c78ab 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -40,6 +40,7 @@ WORKDIR /app
 ONBUILD COPY autogpt/ ./autogpt
 ONBUILD COPY scripts/ ./scripts
 ONBUILD COPY plugins/ ./plugins
+ONBUILD COPY prompt_settings.yaml ./prompt_settings.yaml
 ONBUILD RUN mkdir ./data
 
 FROM autogpt-${BUILD_TYPE} AS auto-gpt

From 49d1a5a17be3eb303155803b7de33c2d64df680e Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Tue, 13 Jun 2023 20:54:55 -0700
Subject: [PATCH 62/97] Rework plugin config to be file-based (#4673)

---
 .env.template                               |  14 +-
 autogpt/config/config.py                    |  18 +++
 autogpt/{plugins.py => plugins/__init__.py} |  59 +++-----
 autogpt/plugins/plugin_config.py            |  14 ++
 autogpt/plugins/plugins_config.py           |  81 +++++++++++
 docs/configuration/options.md               |   3 +-
 docs/plugins.md                             |  12 ++
 tests/conftest.py                           |  20 ++-
 tests/integration/test_plugins.py           |  71 ----------
 tests/unit/test_plugins.py                  | 148 ++++++++++++--------
 10 files changed, 263 insertions(+), 177 deletions(-)
 rename autogpt/{plugins.py => plugins/__init__.py} (87%)
 create mode 100644 autogpt/plugins/plugin_config.py
 create mode 100644 autogpt/plugins/plugins_config.py
 delete mode 100644 tests/integration/test_plugins.py

diff --git a/.env.template b/.env.template
index bf6e2453..06745245 100644
--- a/.env.template
+++ b/.env.template
@@ -19,6 +19,9 @@ OPENAI_API_KEY=your-openai-api-key
 ## AI_SETTINGS_FILE - Specifies which AI Settings file to use (defaults to ai_settings.yaml)
 # AI_SETTINGS_FILE=ai_settings.yaml
 
+## PLUGINS_CONFIG_FILE - The path to the plugins_config.yaml file (Default plugins_config.yaml)
+# PLUGINS_CONFIG_FILE=plugins_config.yaml
+
 ## PROMPT_SETTINGS_FILE - Specifies which Prompt Settings file to use (defaults to prompt_settings.yaml)
 # PROMPT_SETTINGS_FILE=prompt_settings.yaml
 
@@ -38,7 +41,6 @@ OPENAI_API_KEY=your-openai-api-key
 ## DISABLED_COMMAND_CATEGORIES - The list of categories of commands that are disabled (Default: None)
 # DISABLED_COMMAND_CATEGORIES=
 
-
 ################################################################################
 ### LLM PROVIDER
 ################################################################################
@@ -194,16 +196,6 @@ OPENAI_API_KEY=your-openai-api-key
 ## ELEVENLABS_VOICE_ID - Eleven Labs voice ID (Example: None)
 # ELEVENLABS_VOICE_ID=
 
-################################################################################
-### ALLOWLISTED PLUGINS
-################################################################################
-
-## ALLOWLISTED_PLUGINS - Sets the listed plugins that are allowed (Default: None)
-# ALLOWLISTED_PLUGINS=
-
-## DENYLISTED_PLUGINS - Sets the listed plugins that are not allowed (Default: None)
-# DENYLISTED_PLUGINS=
-
 ################################################################################
 ### CHAT MESSAGES
 ################################################################################
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index df77b383..92712dd7 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -7,6 +7,7 @@ import yaml
 from auto_gpt_plugin_template import AutoGPTPluginTemplate
 from colorama import Fore
 
+import autogpt
 from autogpt.singleton import Singleton
 
 
@@ -156,20 +157,37 @@ class Config(metaclass=Singleton):
         self.plugins: List[AutoGPTPluginTemplate] = []
         self.plugins_openai = []
 
+        # Deprecated. Kept for backwards-compatibility. Will remove in a future version.
         plugins_allowlist = os.getenv("ALLOWLISTED_PLUGINS")
         if plugins_allowlist:
             self.plugins_allowlist = plugins_allowlist.split(",")
         else:
             self.plugins_allowlist = []
 
+        # Deprecated. Kept for backwards-compatibility. Will remove in a future version.
         plugins_denylist = os.getenv("DENYLISTED_PLUGINS")
         if plugins_denylist:
             self.plugins_denylist = plugins_denylist.split(",")
         else:
             self.plugins_denylist = []
 
+        # Avoid circular imports
+        from autogpt.plugins import DEFAULT_PLUGINS_CONFIG_FILE
+
+        self.plugins_config_file = os.getenv(
+            "PLUGINS_CONFIG_FILE", DEFAULT_PLUGINS_CONFIG_FILE
+        )
+        self.load_plugins_config()
+
         self.chat_messages_enabled = os.getenv("CHAT_MESSAGES_ENABLED") == "True"
 
+    def load_plugins_config(self) -> "autogpt.plugins.PluginsConfig":
+        # Avoid circular import
+        from autogpt.plugins.plugins_config import PluginsConfig
+
+        self.plugins_config = PluginsConfig.load_config(global_config=self)
+        return self.plugins_config
+
     def get_azure_deployment_id_for_model(self, model: str) -> str:
         """
         Returns the relevant deployment id for the model specified.
diff --git a/autogpt/plugins.py b/autogpt/plugins/__init__.py
similarity index 87%
rename from autogpt/plugins.py
rename to autogpt/plugins/__init__.py
index eccea9ab..60022352 100644
--- a/autogpt/plugins.py
+++ b/autogpt/plugins/__init__.py
@@ -16,10 +16,14 @@ import requests
 from auto_gpt_plugin_template import AutoGPTPluginTemplate
 from openapi_python_client.config import Config as OpenAPIConfig
 
-from autogpt.config import Config
+from autogpt.config.config import Config
 from autogpt.logs import logger
 from autogpt.models.base_open_ai_plugin import BaseOpenAIPlugin
 
+DEFAULT_PLUGINS_CONFIG_FILE = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "..", "..", "plugins_config.yaml"
+)
+
 
 def inspect_zip_for_modules(zip_path: str, debug: bool = False) -> list[str]:
     """
@@ -215,9 +219,7 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
     loaded_plugins = []
     # Generic plugins
     plugins_path_path = Path(cfg.plugins_dir)
-
-    logger.debug(f"Allowlisted Plugins: {cfg.plugins_allowlist}")
-    logger.debug(f"Denylisted Plugins: {cfg.plugins_denylist}")
+    plugins_config = cfg.plugins_config
 
     # Directory-based plugins
     for plugin_path in [f.path for f in os.scandir(cfg.plugins_dir) if f.is_dir()]:
@@ -232,11 +234,14 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
         __import__(qualified_module_name)
         plugin = sys.modules[qualified_module_name]
 
+        if not plugins_config.is_enabled(plugin_module_name):
+            logger.warn(f"Plugin {plugin_module_name} found but not configured")
+            continue
+
         for _, class_obj in inspect.getmembers(plugin):
             if (
                 hasattr(class_obj, "_abc_impl")
                 and AutoGPTPluginTemplate in class_obj.__bases__
-                and denylist_allowlist_check(plugin_module_name, cfg)
             ):
                 loaded_plugins.append(class_obj())
 
@@ -249,6 +254,12 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                 logger.debug(f"Plugin: {plugin} Module: {module}")
                 zipped_package = zipimporter(str(plugin))
                 zipped_module = zipped_package.load_module(str(module.parent))
+                plugin_module_name = zipped_module.__name__.split(os.path.sep)[-1]
+
+                if not plugins_config.is_enabled(plugin_module_name):
+                    logger.warn(f"Plugin {plugin_module_name} found but not configured")
+                    continue
+
                 for key in dir(zipped_module):
                     if key.startswith("__"):
                         continue
@@ -257,7 +268,6 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                     if (
                         "_abc_impl" in a_keys
                         and a_module.__name__ != "AutoGPTPluginTemplate"
-                        and denylist_allowlist_check(a_module.__name__, cfg)
                     ):
                         loaded_plugins.append(a_module())
 
@@ -269,40 +279,15 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                 manifests_specs, cfg, debug
             )
             for url, openai_plugin_meta in manifests_specs_clients.items():
-                if denylist_allowlist_check(url, cfg):
-                    plugin = BaseOpenAIPlugin(openai_plugin_meta)
-                    loaded_plugins.append(plugin)
+                if not plugins_config.is_enabled(url):
+                    logger.warn(f"Plugin {plugin_module_name} found but not configured")
+                    continue
+
+                plugin = BaseOpenAIPlugin(openai_plugin_meta)
+                loaded_plugins.append(plugin)
 
     if loaded_plugins:
         logger.info(f"\nPlugins found: {len(loaded_plugins)}\n" "--------------------")
     for plugin in loaded_plugins:
         logger.info(f"{plugin._name}: {plugin._version} - {plugin._description}")
     return loaded_plugins
-
-
-def denylist_allowlist_check(plugin_name: str, cfg: Config) -> bool:
-    """Check if the plugin is in the allowlist or denylist.
-
-    Args:
-        plugin_name (str): Name of the plugin.
-        cfg (Config): Config object.
-
-    Returns:
-        True or False
-    """
-    logger.debug(f"Checking if plugin {plugin_name} should be loaded")
-    if (
-        plugin_name in cfg.plugins_denylist
-        or "all" in cfg.plugins_denylist
-        or "none" in cfg.plugins_allowlist
-    ):
-        logger.debug(f"Not loading plugin {plugin_name} as it was in the denylist.")
-        return False
-    if plugin_name in cfg.plugins_allowlist or "all" in cfg.plugins_allowlist:
-        logger.debug(f"Loading plugin {plugin_name} as it was in the allowlist.")
-        return True
-    ack = input(
-        f"WARNING: Plugin {plugin_name} found. But not in the"
-        f" allowlist... Load? ({cfg.authorise_key}/{cfg.exit_key}): "
-    )
-    return ack.lower() == cfg.authorise_key
diff --git a/autogpt/plugins/plugin_config.py b/autogpt/plugins/plugin_config.py
new file mode 100644
index 00000000..53a83b16
--- /dev/null
+++ b/autogpt/plugins/plugin_config.py
@@ -0,0 +1,14 @@
+from typing import Any
+
+
+class PluginConfig:
+    """Class for holding configuration of a single plugin"""
+
+    def __init__(self, name: str, enabled: bool = False, config: dict[str, Any] = None):
+        self.name = name
+        self.enabled = enabled
+        # Arbitray config options for this plugin. API keys or plugin-specific options live here.
+        self.config = config or {}
+
+    def __repr__(self):
+        return f"PluginConfig('{self.name}', {self.enabled}, {str(self.config)}"
diff --git a/autogpt/plugins/plugins_config.py b/autogpt/plugins/plugins_config.py
new file mode 100644
index 00000000..7e04e795
--- /dev/null
+++ b/autogpt/plugins/plugins_config.py
@@ -0,0 +1,81 @@
+import os
+from typing import Any, Union
+
+import yaml
+
+from autogpt.config.config import Config
+from autogpt.logs import logger
+from autogpt.plugins.plugin_config import PluginConfig
+
+
+class PluginsConfig:
+    """Class for holding configuration of all plugins"""
+
+    def __init__(self, plugins_config: dict[str, Any]):
+        self.plugins = {}
+        for name, plugin in plugins_config.items():
+            if type(plugin) == dict:
+                self.plugins[name] = PluginConfig(
+                    name,
+                    plugin.get("enabled", False),
+                    plugin.get("config", {}),
+                )
+            elif type(plugin) == PluginConfig:
+                self.plugins[name] = plugin
+            else:
+                raise ValueError(f"Invalid plugin config data type: {type(plugin)}")
+
+    def __repr__(self):
+        return f"PluginsConfig({self.plugins})"
+
+    def get(self, name: str) -> Union[PluginConfig, None]:
+        return self.plugins.get(name)
+
+    def is_enabled(self, name) -> bool:
+        plugin_config = self.plugins.get(name)
+        return plugin_config and plugin_config.enabled
+
+    @classmethod
+    def load_config(cls, global_config: Config) -> "PluginsConfig":
+        empty_config = cls({})
+
+        try:
+            config_data = cls.deserialize_config_file(global_config=global_config)
+            if type(config_data) != dict:
+                logger.error(
+                    f"Expected plugins config to be a dict, got {type(config_data)}, continuing without plugins"
+                )
+                return empty_config
+            return cls(config_data)
+
+        except BaseException as e:
+            logger.error(
+                f"Plugin config is invalid, continuing without plugins. Error: {e}"
+            )
+            return empty_config
+
+    @classmethod
+    def deserialize_config_file(cls, global_config: Config) -> dict[str, Any]:
+        plugins_config_path = global_config.plugins_config_file
+        if not os.path.exists(plugins_config_path):
+            logger.warn("plugins_config.yaml does not exist, creating base config.")
+            cls.create_empty_plugins_config(global_config=global_config)
+
+        with open(plugins_config_path, "r") as f:
+            return yaml.load(f, Loader=yaml.FullLoader)
+
+    @staticmethod
+    def create_empty_plugins_config(global_config: Config):
+        """Create an empty plugins_config.yaml file. Fill it with values from old env variables."""
+        base_config = {}
+
+        # Backwards-compatibility shim
+        for plugin_name in global_config.plugins_denylist:
+            base_config[plugin_name] = {"enabled": False, "config": {}}
+
+        for plugin_name in global_config.plugins_allowlist:
+            base_config[plugin_name] = {"enabled": True, "config": {}}
+
+        with open(global_config.plugins_config_file, "w+") as f:
+            f.write(yaml.dump(base_config))
+            return base_config
diff --git a/docs/configuration/options.md b/docs/configuration/options.md
index 125a3a45..b2cbf6bc 100644
--- a/docs/configuration/options.md
+++ b/docs/configuration/options.md
@@ -5,13 +5,11 @@ Configuration is controlled through the `Config` object. You can set configurati
 ## Environment Variables
 
 - `AI_SETTINGS_FILE`: Location of AI Settings file. Default: ai_settings.yaml
-- `ALLOWLISTED_PLUGINS`: List of plugins allowed. Optional.
 - `AUDIO_TO_TEXT_PROVIDER`: Audio To Text Provider. Only option currently is `huggingface`. Default: huggingface
 - `AUTHORISE_COMMAND_KEY`: Key response accepted when authorising commands. Default: y
 - `BROWSE_CHUNK_MAX_LENGTH`: When browsing website, define the length of chunks to summarize. Default: 3000
 - `BROWSE_SPACY_LANGUAGE_MODEL`: [spaCy language model](https://spacy.io/usage/models) to use when creating chunks. Default: en_core_web_sm
 - `CHAT_MESSAGES_ENABLED`: Enable chat messages. Optional
-- `DENYLISTED_PLUGINS`: List of plugins not allowed. Optional.
 - `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.analyze_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
 - `ELEVENLABS_API_KEY`: ElevenLabs API Key. Optional.
 - `ELEVENLABS_VOICE_ID`: ElevenLabs Voice ID. Optional.
@@ -34,6 +32,7 @@ Configuration is controlled through the `Config` object. You can set configurati
 - `OPENAI_API_KEY`: *REQUIRED*- Your [OpenAI API Key](https://platform.openai.com/account/api-keys).
 - `OPENAI_ORGANIZATION`: Organization ID in OpenAI. Optional.
 - `PLAIN_OUTPUT`: Plain output, which disables the spinner. Default: False
+- `PLUGINS_CONFIG_FILE`: Path of plugins_config.yaml file. Default: plugins_config.yaml
 - `PROMPT_SETTINGS_FILE`: Location of Prompt Settings file. Default: prompt_settings.yaml
 - `REDIS_HOST`: Redis Host. Default: localhost
 - `REDIS_PASSWORD`: Redis Password. Optional. Default:
diff --git a/docs/plugins.md b/docs/plugins.md
index cc4a3299..74e96f2e 100644
--- a/docs/plugins.md
+++ b/docs/plugins.md
@@ -2,6 +2,18 @@
 
 ⚠️💀 **WARNING** 💀⚠️: Review the code of any plugin you use thoroughly, as plugins can execute any Python code, potentially leading to malicious activities, such as stealing your API keys.
 
+To configure plugins, you can create or edit the `plugins_config.yaml` file in the root directory of Auto-GPT. This file allows you to enable or disable plugins as desired. For specific configuration instructions, please refer to the documentation provided for each plugin. The file should be formatted in YAML. Here is an example for your reference:
+
+```yaml
+plugin_a:
+  config:
+    api_key: my-api-key
+  enabled: false
+plugin_b:
+  config: {}
+  enabled: true
+```
+
 See our [Plugins Repo](https://github.com/Significant-Gravitas/Auto-GPT-Plugins) for more info on how to install all the amazing plugins the community has built!
 
 Alternatively, developers can use the [Auto-GPT Plugin Template](https://github.com/Significant-Gravitas/Auto-GPT-Plugin-Template) as a starting point for creating your own plugins.
diff --git a/tests/conftest.py b/tests/conftest.py
index ca6b4d4c..2342a3b0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,7 +1,9 @@
 import os
 from pathlib import Path
+from tempfile import TemporaryDirectory
 
 import pytest
+import yaml
 from pytest_mock import MockerFixture
 
 from autogpt.agent.agent import Agent
@@ -32,9 +34,25 @@ def workspace(workspace_root: Path) -> Workspace:
     return Workspace(workspace_root, restrict_to_workspace=True)
 
 
+@pytest.fixture
+def temp_plugins_config_file():
+    """Create a plugins_config.yaml file in a temp directory so that it doesn't mess with existing ones"""
+    config_directory = TemporaryDirectory()
+    config_file = os.path.join(config_directory.name, "plugins_config.yaml")
+    with open(config_file, "w+") as f:
+        f.write(yaml.dump({}))
+
+    yield config_file
+
+
 @pytest.fixture()
-def config(mocker: MockerFixture, workspace: Workspace) -> Config:
+def config(
+    temp_plugins_config_file: str, mocker: MockerFixture, workspace: Workspace
+) -> Config:
     config = Config()
+    config.plugins_dir = "tests/unit/data/test_plugins"
+    config.plugins_config_file = temp_plugins_config_file
+    config.load_plugins_config()
 
     # Do a little setup and teardown since the config object is a singleton
     mocker.patch.multiple(
diff --git a/tests/integration/test_plugins.py b/tests/integration/test_plugins.py
deleted file mode 100644
index 828200c2..00000000
--- a/tests/integration/test_plugins.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import pytest
-
-from autogpt.config import Config
-from autogpt.plugins import scan_plugins
-
-PLUGINS_TEST_DIR = "tests/unit/data/test_plugins"
-PLUGIN_TEST_OPENAI = "https://weathergpt.vercel.app/"
-
-
-@pytest.fixture
-def mock_config_denylist_allowlist_check():
-    class MockConfig:
-        """Mock config object for testing the denylist_allowlist_check function"""
-
-        plugins_denylist = ["BadPlugin"]
-        plugins_allowlist = ["GoodPlugin"]
-        authorise_key = "y"
-        exit_key = "n"
-
-    return MockConfig()
-
-
-@pytest.fixture
-def config_with_plugins():
-    """Mock config object for testing the scan_plugins function"""
-    # Test that the function returns the correct number of plugins
-    cfg = Config()
-    cfg.plugins_dir = PLUGINS_TEST_DIR
-    cfg.plugins_openai = ["https://weathergpt.vercel.app/"]
-    return cfg
-
-
-@pytest.fixture
-def mock_config_openai_plugin():
-    """Mock config object for testing the scan_plugins function"""
-
-    class MockConfig:
-        """Mock config object for testing the scan_plugins function"""
-
-        plugins_dir = PLUGINS_TEST_DIR
-        plugins_openai = [PLUGIN_TEST_OPENAI]
-        plugins_denylist = ["AutoGPTPVicuna", "auto_gpt_guanaco"]
-        plugins_allowlist = [PLUGIN_TEST_OPENAI]
-
-    return MockConfig()
-
-
-def test_scan_plugins_openai(mock_config_openai_plugin):
-    # Test that the function returns the correct number of plugins
-    result = scan_plugins(mock_config_openai_plugin, debug=True)
-    assert len(result) == 1
-
-
-@pytest.fixture
-def mock_config_generic_plugin():
-    """Mock config object for testing the scan_plugins function"""
-
-    # Test that the function returns the correct number of plugins
-    class MockConfig:
-        plugins_dir = PLUGINS_TEST_DIR
-        plugins_openai = []
-        plugins_denylist = []
-        plugins_allowlist = ["AutoGPTPVicuna", "auto_gpt_guanaco"]
-
-    return MockConfig()
-
-
-def test_scan_plugins_generic(mock_config_generic_plugin):
-    # Test that the function returns the correct number of plugins
-    result = scan_plugins(mock_config_generic_plugin, debug=True)
-    assert len(result) == 2
diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 6aa8dd47..3a6f6d70 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -1,10 +1,61 @@
-import pytest
+import os
 
-from autogpt.plugins import denylist_allowlist_check, inspect_zip_for_modules
+import yaml
+
+from autogpt.config.config import Config
+from autogpt.plugins import inspect_zip_for_modules, scan_plugins
+from autogpt.plugins.plugin_config import PluginConfig
 
 PLUGINS_TEST_DIR = "tests/unit/data/test_plugins"
 PLUGIN_TEST_ZIP_FILE = "Auto-GPT-Plugin-Test-master.zip"
 PLUGIN_TEST_INIT_PY = "Auto-GPT-Plugin-Test-master/src/auto_gpt_vicuna/__init__.py"
+PLUGIN_TEST_OPENAI = "https://weathergpt.vercel.app/"
+
+
+def test_scan_plugins_openai(config: Config):
+    config.plugins_openai = [PLUGIN_TEST_OPENAI]
+    plugins_config = config.plugins_config
+    plugins_config.plugins[PLUGIN_TEST_OPENAI] = PluginConfig(
+        name=PLUGIN_TEST_OPENAI, enabled=True
+    )
+
+    # Test that the function returns the correct number of plugins
+    result = scan_plugins(config, debug=True)
+    assert len(result) == 1
+
+
+def test_scan_plugins_generic(config: Config):
+    # Test that the function returns the correct number of plugins
+    plugins_config = config.plugins_config
+    plugins_config.plugins["auto_gpt_guanaco"] = PluginConfig(
+        name="auto_gpt_guanaco", enabled=True
+    )
+    plugins_config.plugins["auto_gpt_vicuna"] = PluginConfig(
+        name="auto_gptp_vicuna", enabled=True
+    )
+    result = scan_plugins(config, debug=True)
+    plugin_class_names = [plugin.__class__.__name__ for plugin in result]
+
+    assert len(result) == 2
+    assert "AutoGPTGuanaco" in plugin_class_names
+    assert "AutoGPTPVicuna" in plugin_class_names
+
+
+def test_scan_plugins_not_enabled(config: Config):
+    # Test that the function returns the correct number of plugins
+    plugins_config = config.plugins_config
+    plugins_config.plugins["auto_gpt_guanaco"] = PluginConfig(
+        name="auto_gpt_guanaco", enabled=True
+    )
+    plugins_config.plugins["auto_gpt_vicuna"] = PluginConfig(
+        name="auto_gptp_vicuna", enabled=False
+    )
+    result = scan_plugins(config, debug=True)
+    plugin_class_names = [plugin.__class__.__name__ for plugin in result]
+
+    assert len(result) == 1
+    assert "AutoGPTGuanaco" in plugin_class_names
+    assert "AutoGPTPVicuna" not in plugin_class_names
 
 
 def test_inspect_zip_for_modules():
@@ -12,62 +63,49 @@ def test_inspect_zip_for_modules():
     assert result == [PLUGIN_TEST_INIT_PY]
 
 
-@pytest.fixture
-def mock_config_denylist_allowlist_check():
-    class MockConfig:
-        """Mock config object for testing the denylist_allowlist_check function"""
+def test_create_base_config(config: Config):
+    """Test the backwards-compatibility shim to convert old plugin allow/deny list to a config file"""
+    config.plugins_allowlist = ["a", "b"]
+    config.plugins_denylist = ["c", "d"]
 
-        plugins_denylist = ["BadPlugin"]
-        plugins_allowlist = ["GoodPlugin"]
-        authorise_key = "y"
-        exit_key = "n"
+    os.remove(config.plugins_config_file)
+    plugins_config = config.load_plugins_config()
 
-    return MockConfig()
+    # Check the structure of the plugins config data
+    assert len(plugins_config.plugins) == 4
+    assert plugins_config.get("a").enabled
+    assert plugins_config.get("b").enabled
+    assert not plugins_config.get("c").enabled
+    assert not plugins_config.get("d").enabled
+
+    # Check the saved config file
+    with open(config.plugins_config_file, "r") as saved_config_file:
+        saved_config = yaml.load(saved_config_file, Loader=yaml.FullLoader)
+
+    assert saved_config == {
+        "a": {"enabled": True, "config": {}},
+        "b": {"enabled": True, "config": {}},
+        "c": {"enabled": False, "config": {}},
+        "d": {"enabled": False, "config": {}},
+    }
 
 
-def test_denylist_allowlist_check_denylist(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns False when the plugin is in the denylist
-    monkeypatch.setattr("builtins.input", lambda _: "y")
-    assert not denylist_allowlist_check(
-        "BadPlugin", mock_config_denylist_allowlist_check
-    )
+def test_load_config(config: Config):
+    """Test that the plugin config is loaded correctly from the plugins_config.yaml file"""
+    # Create a test config and write it to disk
+    test_config = {
+        "a": {"enabled": True, "config": {"api_key": "1234"}},
+        "b": {"enabled": False, "config": {}},
+    }
+    with open(config.plugins_config_file, "w+") as f:
+        f.write(yaml.dump(test_config))
 
+    # Load the config from disk
+    plugins_config = config.load_plugins_config()
 
-def test_denylist_allowlist_check_allowlist(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns True when the plugin is in the allowlist
-    monkeypatch.setattr("builtins.input", lambda _: "y")
-    assert denylist_allowlist_check("GoodPlugin", mock_config_denylist_allowlist_check)
-
-
-def test_denylist_allowlist_check_user_input_yes(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns True when the user inputs "y"
-    monkeypatch.setattr("builtins.input", lambda _: "y")
-    assert denylist_allowlist_check(
-        "UnknownPlugin", mock_config_denylist_allowlist_check
-    )
-
-
-def test_denylist_allowlist_check_user_input_no(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns False when the user inputs "n"
-    monkeypatch.setattr("builtins.input", lambda _: "n")
-    assert not denylist_allowlist_check(
-        "UnknownPlugin", mock_config_denylist_allowlist_check
-    )
-
-
-def test_denylist_allowlist_check_user_input_invalid(
-    mock_config_denylist_allowlist_check, monkeypatch
-):
-    # Test that the function returns False when the user inputs an invalid value
-    monkeypatch.setattr("builtins.input", lambda _: "invalid")
-    assert not denylist_allowlist_check(
-        "UnknownPlugin", mock_config_denylist_allowlist_check
-    )
+    # Check that the loaded config is equal to the test config
+    assert len(plugins_config.plugins) == 2
+    assert plugins_config.get("a").enabled
+    assert plugins_config.get("a").config == {"api_key": "1234"}
+    assert not plugins_config.get("b").enabled
+    assert plugins_config.get("b").config == {}

From c76c67a69c897be822335bedce59eb47f0f77e1d Mon Sep 17 00:00:00 2001
From: Richard Beales <rich@richbeales.net>
Date: Wed, 14 Jun 2023 10:03:11 +0100
Subject: [PATCH 63/97] Introduce method to ignore unexpected command params
 (#3570)

Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
Co-authored-by: Luke K <2609441+lc0rp@users.noreply.github.com>
---
 autogpt/commands/command.py         | 30 +++++++++++++++++++++++++++++
 autogpt/commands/file_operations.py |  4 +++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/autogpt/commands/command.py b/autogpt/commands/command.py
index 742cc8df..ed93589f 100644
--- a/autogpt/commands/command.py
+++ b/autogpt/commands/command.py
@@ -1,6 +1,7 @@
 import functools
 import importlib
 import inspect
+from inspect import Parameter
 from typing import Any, Callable, Optional
 
 from autogpt.config import Config
@@ -175,3 +176,32 @@ def command(
         return wrapper
 
     return decorator
+
+
+def ignore_unexpected_kwargs(func: Callable[..., Any]) -> Callable[..., Any]:
+    def filter_kwargs(kwargs: dict) -> dict:
+        sig = inspect.signature(func)
+        # Parameter.VAR_KEYWORD - a dict of keyword arguments that aren't bound to any other
+        if any(map(lambda p: p.kind == Parameter.VAR_KEYWORD, sig.parameters.values())):
+            # if **kwargs exist, return directly
+            return kwargs
+
+        _params = list(
+            filter(
+                lambda p: p.kind
+                in {Parameter.KEYWORD_ONLY, Parameter.POSITIONAL_OR_KEYWORD},
+                sig.parameters.values(),
+            )
+        )
+
+        res_kwargs = {
+            param.name: kwargs[param.name] for param in _params if param.name in kwargs
+        }
+        return res_kwargs
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> Any:
+        kwargs = filter_kwargs(kwargs)
+        return func(*args, **kwargs)
+
+    return wrapper
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index a0a61539..b851d662 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -13,8 +13,9 @@ from confection import Config
 from requests.adapters import HTTPAdapter, Retry
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.commands.command import command, ignore_unexpected_kwargs
 from autogpt.commands.file_operations_utils import read_textual_file
+from autogpt.config import Config
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory
 from autogpt.spinner import Spinner
@@ -308,6 +309,7 @@ def delete_file(filename: str, agent: Agent) -> str:
 
 
 @command("list_files", "List Files in Directory", '"directory": "<directory>"')
+@ignore_unexpected_kwargs
 def list_files(directory: str, agent: Agent) -> list[str]:
     """lists files in a directory recursively
 

From c17d825bbac86365488b586b2c05660e74a9276e Mon Sep 17 00:00:00 2001
From: gravelBridge <john.tian31@gmail.com>
Date: Wed, 14 Jun 2023 03:43:06 -0700
Subject: [PATCH 64/97] Remove urls tts macos (#4260)

Co-authored-by: k-boikov <64261260+k-boikov@users.noreply.github.com>
Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com>
---
 autogpt/speech/base.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/autogpt/speech/base.py b/autogpt/speech/base.py
index a7570d94..07c8d9fe 100644
--- a/autogpt/speech/base.py
+++ b/autogpt/speech/base.py
@@ -1,5 +1,6 @@
 """Base class for all voice classes."""
 import abc
+import re
 from threading import Lock
 
 from autogpt.singleton import AbstractSingleton
@@ -29,6 +30,11 @@ class VoiceBase(AbstractSingleton):
             text (str): The text to say.
             voice_index (int): The index of the voice to use.
         """
+        text = re.sub(
+            r"\b(?:https?://[-\w_.]+/?\w[-\w_.]*\.(?:[-\w_.]+/?\w[-\w_.]*\.)?[a-z]+(?:/[-\w_.%]+)*\b(?!\.))",
+            "",
+            text,
+        )
         with self._mutex:
             return self._speech(text, voice_index)
 

From 6e6e7fcc9a190cc351f6bd3195fc504a738b1fc8 Mon Sep 17 00:00:00 2001
From: James Collins <collijk@uw.edu>
Date: Wed, 14 Jun 2023 07:59:26 -0700
Subject: [PATCH 65/97] Extract openai API calls and retry at lowest level
 (#3696)

* Extract open ai api calls and retry at lowest level

* Forgot a test

* Gotta fix my local docker config so I can let pre-commit hooks run, ugh

* fix: merge artiface

* Fix linting

* Update memory.vector.utils

* feat: make sure resp exists

* fix: raise error message if created

* feat: rename file

* fix: partial test fix

* fix: update comments

* fix: linting

* fix: remove broken test

* fix: require a model to exist

* fix: BaseError issue

* fix: runtime error

* Fix mock response in test_make_agent

* add 429 as errors to retry

---------

Co-authored-by: k-boikov <64261260+k-boikov@users.noreply.github.com>
Co-authored-by: Nicholas Tindle <nick@ntindle.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
Co-authored-by: Nicholas Tindle <nicktindle@outlook.com>
Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com>
Co-authored-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 autogpt/app.py                            |   3 +
 autogpt/llm/api_manager.py                |  49 +-----
 autogpt/llm/base.py                       |   3 +
 autogpt/llm/providers/openai.py           | 178 +++++++++++++++++++++-
 autogpt/llm/utils/__init__.py             | 155 ++++---------------
 autogpt/memory/vector/utils.py            |  14 +-
 tests/integration/test_provider_openai.py |  67 ++++++++
 tests/unit/test_api_manager.py            |  64 +-------
 tests/unit/test_make_agent.py             |   2 +-
 tests/unit/test_retry_provider_openai.py  | 110 +++++++++++++
 10 files changed, 400 insertions(+), 245 deletions(-)
 create mode 100644 tests/integration/test_provider_openai.py
 create mode 100644 tests/unit/test_retry_provider_openai.py

diff --git a/autogpt/app.py b/autogpt/app.py
index eb25fa7d..c081835c 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -185,6 +185,9 @@ def start_agent(name: str, task: str, prompt: str, agent: Agent, model=None) ->
     first_message = f"""You are {name}.  Respond with: "Acknowledged"."""
     agent_intro = f"{voice_name} here, Reporting for duty!"
 
+    if model is None:
+        model = config.smart_llm_model
+
     # Create agent
     if agent.config.speak_mode:
         say_text(agent_intro, 1)
diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index 7442579d..bf276f5a 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -5,8 +5,6 @@ from typing import List, Optional
 import openai
 from openai import Model
 
-from autogpt.config import Config
-from autogpt.llm.base import MessageDict
 from autogpt.llm.modelsinfo import COSTS
 from autogpt.logs import logger
 from autogpt.singleton import Singleton
@@ -27,52 +25,7 @@ class ApiManager(metaclass=Singleton):
         self.total_budget = 0.0
         self.models = None
 
-    def create_chat_completion(
-        self,
-        messages: list[MessageDict],
-        model: str | None = None,
-        temperature: float = None,
-        max_tokens: int | None = None,
-        deployment_id=None,
-    ) -> str:
-        """
-        Create a chat completion and update the cost.
-        Args:
-        messages (list): The list of messages to send to the API.
-        model (str): The model to use for the API call.
-        temperature (float): The temperature to use for the API call.
-        max_tokens (int): The maximum number of tokens for the API call.
-        Returns:
-        str: The AI's response.
-        """
-        cfg = Config()
-        if temperature is None:
-            temperature = cfg.temperature
-        if deployment_id is not None:
-            response = openai.ChatCompletion.create(
-                deployment_id=deployment_id,
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                api_key=cfg.openai_api_key,
-            )
-        else:
-            response = openai.ChatCompletion.create(
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                api_key=cfg.openai_api_key,
-            )
-        if not hasattr(response, "error"):
-            logger.debug(f"Response: {response}")
-            prompt_tokens = response.usage.prompt_tokens
-            completion_tokens = response.usage.completion_tokens
-            self.update_cost(prompt_tokens, completion_tokens, model)
-        return response
-
-    def update_cost(self, prompt_tokens, completion_tokens, model: str):
+    def update_cost(self, prompt_tokens, completion_tokens, model):
         """
         Update the total cost, prompt tokens, and completion tokens.
 
diff --git a/autogpt/llm/base.py b/autogpt/llm/base.py
index 76bd3db1..83da8d84 100644
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -7,6 +7,9 @@ from typing import List, Literal, TypedDict
 MessageRole = Literal["system", "user", "assistant"]
 MessageType = Literal["ai_response", "action_result"]
 
+TText = list[int]
+"""Token array representing tokenized text"""
+
 
 class MessageDict(TypedDict):
     role: MessageRole
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index acaf0671..82819756 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -1,4 +1,23 @@
-from autogpt.llm.base import ChatModelInfo, EmbeddingModelInfo, TextModelInfo
+import functools
+import time
+from typing import List
+from unittest.mock import patch
+
+import openai
+import openai.api_resources.abstract.engine_api_resource as engine_api_resource
+from colorama import Fore, Style
+from openai.error import APIError, RateLimitError, Timeout
+from openai.openai_object import OpenAIObject
+
+from autogpt.llm.api_manager import ApiManager
+from autogpt.llm.base import (
+    ChatModelInfo,
+    EmbeddingModelInfo,
+    MessageDict,
+    TextModelInfo,
+    TText,
+)
+from autogpt.logs import logger
 
 OPEN_AI_CHAT_MODELS = {
     info.name: info
@@ -72,3 +91,160 @@ OPEN_AI_MODELS: dict[str, ChatModelInfo | EmbeddingModelInfo | TextModelInfo] =
     **OPEN_AI_TEXT_MODELS,
     **OPEN_AI_EMBEDDING_MODELS,
 }
+
+
+def meter_api(func):
+    """Adds ApiManager metering to functions which make OpenAI API calls"""
+    api_manager = ApiManager()
+
+    openai_obj_processor = openai.util.convert_to_openai_object
+
+    def update_usage_with_response(response: OpenAIObject):
+        try:
+            usage = response.usage
+            logger.debug(f"Reported usage from call to model {response.model}: {usage}")
+            api_manager.update_cost(
+                response.usage.prompt_tokens,
+                response.usage.completion_tokens if "completion_tokens" in usage else 0,
+                response.model,
+            )
+        except Exception as err:
+            logger.warn(f"Failed to update API costs: {err.__class__.__name__}: {err}")
+
+    def metering_wrapper(*args, **kwargs):
+        openai_obj = openai_obj_processor(*args, **kwargs)
+        if isinstance(openai_obj, OpenAIObject) and "usage" in openai_obj:
+            update_usage_with_response(openai_obj)
+        return openai_obj
+
+    def metered_func(*args, **kwargs):
+        with patch.object(
+            engine_api_resource.util,
+            "convert_to_openai_object",
+            side_effect=metering_wrapper,
+        ):
+            return func(*args, **kwargs)
+
+    return metered_func
+
+
+def retry_api(
+    num_retries: int = 10,
+    backoff_base: float = 2.0,
+    warn_user: bool = True,
+):
+    """Retry an OpenAI API call.
+
+    Args:
+        num_retries int: Number of retries. Defaults to 10.
+        backoff_base float: Base for exponential backoff. Defaults to 2.
+        warn_user bool: Whether to warn the user. Defaults to True.
+    """
+    retry_limit_msg = f"{Fore.RED}Error: " f"Reached rate limit, passing...{Fore.RESET}"
+    api_key_error_msg = (
+        f"Please double check that you have setup a "
+        f"{Fore.CYAN + Style.BRIGHT}PAID{Style.RESET_ALL} OpenAI API Account. You can "
+        f"read more here: {Fore.CYAN}https://docs.agpt.co/setup/#getting-an-api-key{Fore.RESET}"
+    )
+    backoff_msg = (
+        f"{Fore.RED}Error: API Bad gateway. Waiting {{backoff}} seconds...{Fore.RESET}"
+    )
+
+    def _wrapper(func):
+        @functools.wraps(func)
+        def _wrapped(*args, **kwargs):
+            user_warned = not warn_user
+            num_attempts = num_retries + 1  # +1 for the first attempt
+            for attempt in range(1, num_attempts + 1):
+                try:
+                    return func(*args, **kwargs)
+
+                except RateLimitError:
+                    if attempt == num_attempts:
+                        raise
+
+                    logger.debug(retry_limit_msg)
+                    if not user_warned:
+                        logger.double_check(api_key_error_msg)
+                        user_warned = True
+
+                except (APIError, Timeout) as e:
+                    if (e.http_status not in [502, 429]) or (attempt == num_attempts):
+                        raise
+
+                backoff = backoff_base ** (attempt + 2)
+                logger.debug(backoff_msg.format(backoff=backoff))
+                time.sleep(backoff)
+
+        return _wrapped
+
+    return _wrapper
+
+
+@meter_api
+@retry_api()
+def create_chat_completion(
+    messages: List[MessageDict],
+    *_,
+    **kwargs,
+) -> OpenAIObject:
+    """Create a chat completion using the OpenAI API
+
+    Args:
+        messages: A list of messages to feed to the chatbot.
+        kwargs: Other arguments to pass to the OpenAI API chat completion call.
+    Returns:
+        OpenAIObject: The ChatCompletion response from OpenAI
+
+    """
+    completion: OpenAIObject = openai.ChatCompletion.create(
+        messages=messages,
+        **kwargs,
+    )
+    if not hasattr(completion, "error"):
+        logger.debug(f"Response: {completion}")
+    return completion
+
+
+@meter_api
+@retry_api()
+def create_text_completion(
+    prompt: str,
+    *_,
+    **kwargs,
+) -> OpenAIObject:
+    """Create a text completion using the OpenAI API
+
+    Args:
+        prompt: A text prompt to feed to the LLM
+        kwargs: Other arguments to pass to the OpenAI API text completion call.
+    Returns:
+        OpenAIObject: The Completion response from OpenAI
+
+    """
+    return openai.Completion.create(
+        prompt=prompt,
+        **kwargs,
+    )
+
+
+@meter_api
+@retry_api()
+def create_embedding(
+    input: str | TText | List[str] | List[TText],
+    *_,
+    **kwargs,
+) -> OpenAIObject:
+    """Create an embedding using the OpenAI API
+
+    Args:
+        input: The text to embed.
+        kwargs: Other arguments to pass to the OpenAI API embedding call.
+    Returns:
+        OpenAIObject: The Embedding response from OpenAI
+
+    """
+    return openai.Embedding.create(
+        input=input,
+        **kwargs,
+    )
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index 756c4bd5..aee7997a 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -1,119 +1,24 @@
 from __future__ import annotations
 
-import functools
-import time
 from typing import List, Literal, Optional
-from unittest.mock import patch
 
-import openai
-import openai.api_resources.abstract.engine_api_resource as engine_api_resource
-import openai.util
-from colorama import Fore, Style
-from openai.error import APIError, RateLimitError
-from openai.openai_object import OpenAIObject
+from colorama import Fore
 
 from autogpt.config import Config
 from autogpt.logs import logger
 
 from ..api_manager import ApiManager
 from ..base import ChatSequence, Message
+from ..providers import openai as iopenai
 from .token_counter import *
 
 
-def metered(func):
-    """Adds ApiManager metering to functions which make OpenAI API calls"""
-    api_manager = ApiManager()
-
-    openai_obj_processor = openai.util.convert_to_openai_object
-
-    def update_usage_with_response(response: OpenAIObject):
-        try:
-            usage = response.usage
-            logger.debug(f"Reported usage from call to model {response.model}: {usage}")
-            api_manager.update_cost(
-                response.usage.prompt_tokens,
-                response.usage.completion_tokens if "completion_tokens" in usage else 0,
-                response.model,
-            )
-        except Exception as err:
-            logger.warn(f"Failed to update API costs: {err.__class__.__name__}: {err}")
-
-    def metering_wrapper(*args, **kwargs):
-        openai_obj = openai_obj_processor(*args, **kwargs)
-        if isinstance(openai_obj, OpenAIObject) and "usage" in openai_obj:
-            update_usage_with_response(openai_obj)
-        return openai_obj
-
-    def metered_func(*args, **kwargs):
-        with patch.object(
-            engine_api_resource.util,
-            "convert_to_openai_object",
-            side_effect=metering_wrapper,
-        ):
-            return func(*args, **kwargs)
-
-    return metered_func
-
-
-def retry_openai_api(
-    num_retries: int = 10,
-    backoff_base: float = 2.0,
-    warn_user: bool = True,
-):
-    """Retry an OpenAI API call.
-
-    Args:
-        num_retries int: Number of retries. Defaults to 10.
-        backoff_base float: Base for exponential backoff. Defaults to 2.
-        warn_user bool: Whether to warn the user. Defaults to True.
-    """
-    retry_limit_msg = f"{Fore.RED}Error: " f"Reached rate limit, passing...{Fore.RESET}"
-    api_key_error_msg = (
-        f"Please double check that you have setup a "
-        f"{Fore.CYAN + Style.BRIGHT}PAID{Style.RESET_ALL} OpenAI API Account. You can "
-        f"read more here: {Fore.CYAN}https://docs.agpt.co/setup/#getting-an-api-key{Fore.RESET}"
-    )
-    backoff_msg = (
-        f"{Fore.RED}Error: API Bad gateway. Waiting {{backoff}} seconds...{Fore.RESET}"
-    )
-
-    def _wrapper(func):
-        @functools.wraps(func)
-        def _wrapped(*args, **kwargs):
-            user_warned = not warn_user
-            num_attempts = num_retries + 1  # +1 for the first attempt
-            for attempt in range(1, num_attempts + 1):
-                try:
-                    return func(*args, **kwargs)
-
-                except RateLimitError:
-                    if attempt == num_attempts:
-                        raise
-
-                    logger.debug(retry_limit_msg)
-                    if not user_warned:
-                        logger.double_check(api_key_error_msg)
-                        user_warned = True
-
-                except APIError as e:
-                    if (e.http_status not in [502, 429]) or (attempt == num_attempts):
-                        raise
-
-                backoff = backoff_base ** (attempt + 2)
-                logger.debug(backoff_msg.format(backoff=backoff))
-                time.sleep(backoff)
-
-        return _wrapped
-
-    return _wrapper
-
-
 def call_ai_function(
     function: str,
     args: list,
     description: str,
-    model: str | None = None,
-    config: Config = None,
+    model: Optional[str] = None,
+    config: Optional[Config] = None,
 ) -> str:
     """Call an AI function
 
@@ -150,8 +55,6 @@ def call_ai_function(
     return create_chat_completion(prompt=prompt, temperature=0)
 
 
-@metered
-@retry_openai_api()
 def create_text_completion(
     prompt: str,
     model: Optional[str],
@@ -169,24 +72,23 @@ def create_text_completion(
     else:
         kwargs = {"model": model}
 
-    response = openai.Completion.create(
-        **kwargs,
+    response = iopenai.create_text_completion(
         prompt=prompt,
+        **kwargs,
         temperature=temperature,
         max_tokens=max_output_tokens,
         api_key=cfg.openai_api_key,
     )
+    logger.debug(f"Response: {response}")
+
     return response.choices[0].text
 
 
 # Overly simple abstraction until we create something better
-# simple retry mechanism when getting a rate error or a bad gateway
-@metered
-@retry_openai_api()
 def create_chat_completion(
     prompt: ChatSequence,
     model: Optional[str] = None,
-    temperature: float = None,
+    temperature: Optional[float] = None,
     max_tokens: Optional[int] = None,
 ) -> str:
     """Create a chat completion using the OpenAI API
@@ -209,41 +111,48 @@ def create_chat_completion(
     logger.debug(
         f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"
     )
+    chat_completion_kwargs = {
+        "model": model,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+    }
+
     for plugin in cfg.plugins:
         if plugin.can_handle_chat_completion(
             messages=prompt.raw(),
-            model=model,
-            temperature=temperature,
-            max_tokens=max_tokens,
+            **chat_completion_kwargs,
         ):
             message = plugin.handle_chat_completion(
                 messages=prompt.raw(),
-                model=model,
-                temperature=temperature,
-                max_tokens=max_tokens,
+                **chat_completion_kwargs,
             )
             if message is not None:
                 return message
-    api_manager = ApiManager()
-    response = None
 
+    chat_completion_kwargs["api_key"] = cfg.openai_api_key
     if cfg.use_azure:
-        kwargs = {"deployment_id": cfg.get_azure_deployment_id_for_model(model)}
-    else:
-        kwargs = {"model": model}
+        chat_completion_kwargs["deployment_id"] = cfg.get_azure_deployment_id_for_model(
+            model
+        )
 
-    response = api_manager.create_chat_completion(
-        **kwargs,
+    response = iopenai.create_chat_completion(
         messages=prompt.raw(),
-        temperature=temperature,
-        max_tokens=max_tokens,
+        **chat_completion_kwargs,
     )
+    logger.debug(f"Response: {response}")
+
+    resp = ""
+    if not hasattr(response, "error"):
+        resp = response.choices[0].message["content"]
+    else:
+        logger.error(response.error)
+        raise RuntimeError(response.error)
 
-    resp = response.choices[0].message["content"]
     for plugin in cfg.plugins:
         if not plugin.can_handle_on_response():
             continue
         resp = plugin.on_response(resp)
+
     return resp
 
 
diff --git a/autogpt/memory/vector/utils.py b/autogpt/memory/vector/utils.py
index 75d1f69d..b542632b 100644
--- a/autogpt/memory/vector/utils.py
+++ b/autogpt/memory/vector/utils.py
@@ -1,16 +1,14 @@
 from typing import Any, overload
 
 import numpy as np
-import openai
 
 from autogpt.config import Config
-from autogpt.llm.utils import metered, retry_openai_api
+from autogpt.llm.base import TText
+from autogpt.llm.providers import openai as iopenai
 from autogpt.logs import logger
 
 Embedding = list[np.float32] | np.ndarray[Any, np.dtype[np.float32]]
 """Embedding vector"""
-TText = list[int]
-"""Token array representing text"""
 
 
 @overload
@@ -23,8 +21,6 @@ def get_embedding(input: list[str] | list[TText]) -> list[Embedding]:
     ...
 
 
-@metered
-@retry_openai_api()
 def get_embedding(
     input: str | TText | list[str] | list[TText],
 ) -> Embedding | list[Embedding]:
@@ -57,10 +53,10 @@ def get_embedding(
         + (f" via Azure deployment '{kwargs['engine']}'" if cfg.use_azure else "")
     )
 
-    embeddings = openai.Embedding.create(
-        input=input,
-        api_key=cfg.openai_api_key,
+    embeddings = iopenai.create_embedding(
+        input,
         **kwargs,
+        api_key=cfg.openai_api_key,
     ).data
 
     if not multiple:
diff --git a/tests/integration/test_provider_openai.py b/tests/integration/test_provider_openai.py
new file mode 100644
index 00000000..f5ae65cf
--- /dev/null
+++ b/tests/integration/test_provider_openai.py
@@ -0,0 +1,67 @@
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from autogpt.llm.api_manager import COSTS, ApiManager
+from autogpt.llm.providers import openai
+
+api_manager = ApiManager()
+
+
+@pytest.fixture(autouse=True)
+def reset_api_manager():
+    api_manager.reset()
+    yield
+
+
+@pytest.fixture(autouse=True)
+def mock_costs():
+    with patch.dict(
+        COSTS,
+        {
+            "gpt-3.5-turbo": {"prompt": 0.002, "completion": 0.002},
+            "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0},
+        },
+        clear=True,
+    ):
+        yield
+
+
+class TestProviderOpenAI:
+    @staticmethod
+    def test_create_chat_completion_debug_mode(caplog):
+        """Test if debug mode logs response."""
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"},
+        ]
+        model = "gpt-3.5-turbo"
+        with patch("openai.ChatCompletion.create") as mock_create:
+            mock_response = MagicMock()
+            del mock_response.error
+            mock_response.usage.prompt_tokens = 10
+            mock_response.usage.completion_tokens = 20
+            mock_create.return_value = mock_response
+
+            openai.create_chat_completion(messages, model=model)
+
+            assert "Response" in caplog.text
+
+    @staticmethod
+    def test_create_chat_completion_empty_messages():
+        """Test if empty messages result in zero tokens and cost."""
+        messages = []
+        model = "gpt-3.5-turbo"
+
+        with patch("openai.ChatCompletion.create") as mock_create:
+            mock_response = MagicMock()
+            del mock_response.error
+            mock_response.usage.prompt_tokens = 0
+            mock_response.usage.completion_tokens = 0
+            mock_create.return_value = mock_response
+
+            openai.create_chat_completion(messages, model=model)
+
+            assert api_manager.get_total_prompt_tokens() == 0
+            assert api_manager.get_total_completion_tokens() == 0
+            assert api_manager.get_total_cost() == 0
diff --git a/tests/unit/test_api_manager.py b/tests/unit/test_api_manager.py
index 9585fba7..2b4ad90a 100644
--- a/tests/unit/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -1,4 +1,4 @@
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 
@@ -27,68 +27,6 @@ def mock_costs():
 
 
 class TestApiManager:
-    @staticmethod
-    def test_create_chat_completion_debug_mode(caplog):
-        """Test if debug mode logs response."""
-        api_manager_debug = ApiManager(debug=True)
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Who won the world series in 2020?"},
-        ]
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 10
-            mock_response.usage.completion_tokens = 20
-            mock_create.return_value = mock_response
-
-            api_manager_debug.create_chat_completion(messages, model=model)
-
-            assert "Response" in caplog.text
-
-    @staticmethod
-    def test_create_chat_completion_empty_messages():
-        """Test if empty messages result in zero tokens and cost."""
-        messages = []
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 0
-            mock_response.usage.completion_tokens = 0
-            mock_create.return_value = mock_response
-
-            api_manager.create_chat_completion(messages, model=model)
-
-            assert api_manager.get_total_prompt_tokens() == 0
-            assert api_manager.get_total_completion_tokens() == 0
-            assert api_manager.get_total_cost() == 0
-
-    @staticmethod
-    def test_create_chat_completion_valid_inputs():
-        """Test if valid inputs result in correct tokens and cost."""
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Who won the world series in 2020?"},
-        ]
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 10
-            mock_response.usage.completion_tokens = 20
-            mock_create.return_value = mock_response
-
-            api_manager.create_chat_completion(messages, model=model)
-
-            assert api_manager.get_total_prompt_tokens() == 10
-            assert api_manager.get_total_completion_tokens() == 20
-            assert api_manager.get_total_cost() == (10 * 0.002 + 20 * 0.002) / 1000
-
     def test_getter_methods(self):
         """Test the getter methods for total tokens, cost, and budget."""
         api_manager.update_cost(60, 120, "gpt-3.5-turbo")
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
index 23eea027..9939d79c 100644
--- a/tests/unit/test_make_agent.py
+++ b/tests/unit/test_make_agent.py
@@ -11,7 +11,7 @@ def test_make_agent(agent: Agent, mocker: MockerFixture) -> None:
     mock = mocker.patch("openai.ChatCompletion.create")
 
     response = MagicMock()
-    # del response.error
+    del response.error
     response.choices[0].messages[0].content = "Test message"
     response.usage.prompt_tokens = 1
     response.usage.completion_tokens = 1
diff --git a/tests/unit/test_retry_provider_openai.py b/tests/unit/test_retry_provider_openai.py
new file mode 100644
index 00000000..f8162eb8
--- /dev/null
+++ b/tests/unit/test_retry_provider_openai.py
@@ -0,0 +1,110 @@
+import pytest
+from openai.error import APIError, RateLimitError
+
+from autogpt.llm.providers import openai
+
+
+@pytest.fixture(params=[RateLimitError, APIError])
+def error(request):
+    if request.param == APIError:
+        return request.param("Error", http_status=502)
+    else:
+        return request.param("Error")
+
+
+def error_factory(error_instance, error_count, retry_count, warn_user=True):
+    """Creates errors"""
+
+    class RaisesError:
+        def __init__(self):
+            self.count = 0
+
+        @openai.retry_api(
+            num_retries=retry_count, backoff_base=0.001, warn_user=warn_user
+        )
+        def __call__(self):
+            self.count += 1
+            if self.count <= error_count:
+                raise error_instance
+            return self.count
+
+    return RaisesError()
+
+
+def test_retry_open_api_no_error(capsys):
+    """Tests the retry functionality with no errors expected"""
+
+    @openai.retry_api()
+    def f():
+        return 1
+
+    result = f()
+    assert result == 1
+
+    output = capsys.readouterr()
+    assert output.out == ""
+    assert output.err == ""
+
+
+@pytest.mark.parametrize(
+    "error_count, retry_count, failure",
+    [(2, 10, False), (2, 2, False), (10, 2, True), (3, 2, True), (1, 0, True)],
+    ids=["passing", "passing_edge", "failing", "failing_edge", "failing_no_retries"],
+)
+def test_retry_open_api_passing(capsys, error, error_count, retry_count, failure):
+    """Tests the retry with simulated errors [RateLimitError, APIError], but should ulimately pass"""
+    call_count = min(error_count, retry_count) + 1
+
+    raises = error_factory(error, error_count, retry_count)
+    if failure:
+        with pytest.raises(type(error)):
+            raises()
+    else:
+        result = raises()
+        assert result == call_count
+
+    assert raises.count == call_count
+
+    output = capsys.readouterr()
+
+    if error_count and retry_count:
+        if type(error) == RateLimitError:
+            assert "Reached rate limit, passing..." in output.out
+            assert "Please double check" in output.out
+        if type(error) == APIError:
+            assert "API Bad gateway" in output.out
+    else:
+        assert output.out == ""
+
+
+def test_retry_open_api_rate_limit_no_warn(capsys):
+    """Tests the retry logic with a rate limit error"""
+    error_count = 2
+    retry_count = 10
+
+    raises = error_factory(RateLimitError, error_count, retry_count, warn_user=False)
+    result = raises()
+    call_count = min(error_count, retry_count) + 1
+    assert result == call_count
+    assert raises.count == call_count
+
+    output = capsys.readouterr()
+
+    assert "Reached rate limit, passing..." in output.out
+    assert "Please double check" not in output.out
+
+
+def test_retry_openapi_other_api_error(capsys):
+    """Tests the Retry logic with a non rate limit error such as HTTP500"""
+    error_count = 2
+    retry_count = 10
+
+    raises = error_factory(APIError("Error", http_status=500), error_count, retry_count)
+
+    with pytest.raises(APIError):
+        raises()
+    call_count = 1
+    assert raises.count == call_count
+
+    output = capsys.readouterr()
+    assert output.out == ""

From bbaa5b89c2c0e81b8636029fcf43f85b5b1c7240 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 15 Jun 2023 13:16:25 +0200
Subject: [PATCH 66/97] Add CI trigger for release-* branches

---
 .github/workflows/ci.yml        | 16 ++++++++--------
 .github/workflows/docker-ci.yml |  2 +-
 .github/workflows/pr-label.yml  |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0718c46f..3e21d1d7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,18 +7,18 @@ on:
       - 'tests/Auto-GPT-test-cassettes'
       - 'tests/challenges/current_score.json'
   pull_request:
-    branches: [ stable, master ]
+    branches: [ stable, master, release-* ]
   pull_request_target:
-    branches: [ master, ci-test* ]
+    branches: [ master, release-*, ci-test* ]
 
 concurrency:
-  group: ${{ format('ci-{0}', github.head_ref && format('pr-{0}', github.event.pull_request.number) || github.sha) }}
-  cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') && github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target') }}
+  group: ${{ format('ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }}
+  cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') }}
 
 jobs:
   lint:
-    # eliminate duplicate runs on master
-    if: github.event_name == 'push' || github.base_ref != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
+    # eliminate duplicate runs
+    if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
 
     runs-on: ubuntu-latest
     env:
@@ -73,8 +73,8 @@ jobs:
           $cmd --check || (echo "You have unused imports or pass statements, please run '${cmd} --in-place'" && exit 1)
 
   test:
-    # eliminate duplicate runs on master
-    if: github.event_name == 'push' || github.base_ref != 'master' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
+    # eliminate duplicate runs
+    if: github.event_name == 'push' || (github.event.pull_request.head.repo.fork == (github.event_name == 'pull_request_target'))
 
     permissions:
       # Gives the action the necessary permissions for publishing new
diff --git a/.github/workflows/docker-ci.yml b/.github/workflows/docker-ci.yml
index cacb58c5..3da88891 100644
--- a/.github/workflows/docker-ci.yml
+++ b/.github/workflows/docker-ci.yml
@@ -7,7 +7,7 @@ on:
       - 'tests/Auto-GPT-test-cassettes'
       - 'tests/challenges/current_score.json'
   pull_request:
-    branches: [ master, stable ]
+    branches: [ master, release-*, stable ]
 
 concurrency:
   group: ${{ format('docker-ci-{0}', github.head_ref && format('pr-{0}', github.event.pull_request.number) || github.sha) }}
diff --git a/.github/workflows/pr-label.yml b/.github/workflows/pr-label.yml
index e024f34b..ebeb7305 100644
--- a/.github/workflows/pr-label.yml
+++ b/.github/workflows/pr-label.yml
@@ -3,7 +3,7 @@ name: "Pull Request auto-label"
 on:
   # So that PRs touching the same files as the push are updated
   push:
-    branches: [ master ]
+    branches: [ master, release-* ]
     paths-ignore:
       - 'tests/Auto-GPT-test-cassettes'
       - 'tests/challenges/current_score.json'

From a1e5be707763313b3841889ec6fafebfe09f3407 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 15 Jun 2023 17:14:24 +0200
Subject: [PATCH 67/97] Update OpenAI model info and remove duplicate
 modelsinfo.py (#4700)

* Update OpenAI model info and remove duplicate modelsinfo.py

* Fix max_tokens for gpt-4-0613
---
 autogpt/llm/api_manager.py      | 13 ++++----
 autogpt/llm/base.py             | 15 ++++++---
 autogpt/llm/modelsinfo.py       | 11 -------
 autogpt/llm/providers/openai.py | 44 +++++++++++++++----------
 tests/unit/test_api_manager.py  | 57 +++++++++++++++++++++------------
 5 files changed, 81 insertions(+), 59 deletions(-)
 delete mode 100644 autogpt/llm/modelsinfo.py

diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index 7442579d..7a384562 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -6,8 +6,8 @@ import openai
 from openai import Model
 
 from autogpt.config import Config
-from autogpt.llm.base import MessageDict
-from autogpt.llm.modelsinfo import COSTS
+from autogpt.llm.base import CompletionModelInfo, MessageDict
+from autogpt.llm.providers.openai import OPEN_AI_MODELS
 from autogpt.logs import logger
 from autogpt.singleton import Singleton
 
@@ -83,13 +83,14 @@ class ApiManager(metaclass=Singleton):
         """
         # the .model property in API responses can contain version suffixes like -v2
         model = model[:-3] if model.endswith("-v2") else model
+        model_info = OPEN_AI_MODELS[model]
 
         self.total_prompt_tokens += prompt_tokens
         self.total_completion_tokens += completion_tokens
-        self.total_cost += (
-            prompt_tokens * COSTS[model]["prompt"]
-            + completion_tokens * COSTS[model]["completion"]
-        ) / 1000
+        self.total_cost += prompt_tokens * model_info.prompt_token_cost / 1000
+        if issubclass(type(model_info), CompletionModelInfo):
+            self.total_cost += completion_tokens * model_info.completion_token_cost / 1000
+
         logger.debug(f"Total running cost: ${self.total_cost:.3f}")
 
     def set_total_budget(self, total_budget):
diff --git a/autogpt/llm/base.py b/autogpt/llm/base.py
index 76bd3db1..43cc0ad9 100644
--- a/autogpt/llm/base.py
+++ b/autogpt/llm/base.py
@@ -31,22 +31,27 @@ class ModelInfo:
 
     Would be lovely to eventually get this directly from APIs, but needs to be scraped from
     websites for now.
-
     """
 
     name: str
-    prompt_token_cost: float
-    completion_token_cost: float
     max_tokens: int
+    prompt_token_cost: float
 
 
 @dataclass
-class ChatModelInfo(ModelInfo):
+class CompletionModelInfo(ModelInfo):
+    """Struct for generic completion model information."""
+
+    completion_token_cost: float
+
+
+@dataclass
+class ChatModelInfo(CompletionModelInfo):
     """Struct for chat model information."""
 
 
 @dataclass
-class TextModelInfo(ModelInfo):
+class TextModelInfo(CompletionModelInfo):
     """Struct for text completion model information."""
 
 
diff --git a/autogpt/llm/modelsinfo.py b/autogpt/llm/modelsinfo.py
deleted file mode 100644
index 425472de..00000000
--- a/autogpt/llm/modelsinfo.py
+++ /dev/null
@@ -1,11 +0,0 @@
-COSTS = {
-    "gpt-3.5-turbo": {"prompt": 0.002, "completion": 0.002},
-    "gpt-3.5-turbo-0301": {"prompt": 0.002, "completion": 0.002},
-    "gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
-    "gpt-4": {"prompt": 0.03, "completion": 0.06},
-    "gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
-    "gpt-4-32k": {"prompt": 0.06, "completion": 0.12},
-    "gpt-4-32k-0314": {"prompt": 0.06, "completion": 0.12},
-    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
-    "text-davinci-003": {"prompt": 0.02, "completion": 0.02},
-}
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index acaf0671..b4254cd1 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -3,23 +3,23 @@ from autogpt.llm.base import ChatModelInfo, EmbeddingModelInfo, TextModelInfo
 OPEN_AI_CHAT_MODELS = {
     info.name: info
     for info in [
-        ChatModelInfo(
-            name="gpt-3.5-turbo",
-            prompt_token_cost=0.002,
-            completion_token_cost=0.002,
-            max_tokens=4096,
-        ),
         ChatModelInfo(
             name="gpt-3.5-turbo-0301",
-            prompt_token_cost=0.002,
+            prompt_token_cost=0.0015,
             completion_token_cost=0.002,
             max_tokens=4096,
         ),
         ChatModelInfo(
-            name="gpt-4",
-            prompt_token_cost=0.03,
-            completion_token_cost=0.06,
-            max_tokens=8192,
+            name="gpt-3.5-turbo-0613",
+            prompt_token_cost=0.0015,
+            completion_token_cost=0.002,
+            max_tokens=4096,
+        ),
+        ChatModelInfo(
+            name="gpt-3.5-turbo-16k-0613",
+            prompt_token_cost=0.003,
+            completion_token_cost=0.004,
+            max_tokens=16384,
         ),
         ChatModelInfo(
             name="gpt-4-0314",
@@ -28,10 +28,10 @@ OPEN_AI_CHAT_MODELS = {
             max_tokens=8192,
         ),
         ChatModelInfo(
-            name="gpt-4-32k",
-            prompt_token_cost=0.06,
-            completion_token_cost=0.12,
-            max_tokens=32768,
+            name="gpt-4-0613",
+            prompt_token_cost=0.03,
+            completion_token_cost=0.06,
+            max_tokens=8192,
         ),
         ChatModelInfo(
             name="gpt-4-32k-0314",
@@ -39,8 +39,19 @@ OPEN_AI_CHAT_MODELS = {
             completion_token_cost=0.12,
             max_tokens=32768,
         ),
+        ChatModelInfo(
+            name="gpt-4-32k-0613",
+            prompt_token_cost=0.06,
+            completion_token_cost=0.12,
+            max_tokens=32768,
+        ),
     ]
 }
+# Set aliases for rolling model IDs
+OPEN_AI_CHAT_MODELS["gpt-3.5-turbo"] = OPEN_AI_CHAT_MODELS["gpt-3.5-turbo-0301"]
+OPEN_AI_CHAT_MODELS["gpt-3.5-turbo-16k"] = OPEN_AI_CHAT_MODELS["gpt-3.5-turbo-16k-0613"]
+OPEN_AI_CHAT_MODELS["gpt-4"] = OPEN_AI_CHAT_MODELS["gpt-4-0314"]
+OPEN_AI_CHAT_MODELS["gpt-4-32k"] = OPEN_AI_CHAT_MODELS["gpt-4-32k-0314"]
 
 OPEN_AI_TEXT_MODELS = {
     info.name: info
@@ -59,8 +70,7 @@ OPEN_AI_EMBEDDING_MODELS = {
     for info in [
         EmbeddingModelInfo(
             name="text-embedding-ada-002",
-            prompt_token_cost=0.0004,
-            completion_token_cost=0.0,
+            prompt_token_cost=0.0001,
             max_tokens=8191,
             embedding_dimensions=1536,
         ),
diff --git a/tests/unit/test_api_manager.py b/tests/unit/test_api_manager.py
index 9585fba7..e259f56a 100644
--- a/tests/unit/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -1,8 +1,9 @@
 from unittest.mock import MagicMock, patch
 
 import pytest
+from pytest_mock import MockerFixture
 
-from autogpt.llm.api_manager import COSTS, ApiManager
+from autogpt.llm.api_manager import OPEN_AI_MODELS, ApiManager
 
 api_manager = ApiManager()
 
@@ -14,16 +15,17 @@ def reset_api_manager():
 
 
 @pytest.fixture(autouse=True)
-def mock_costs():
-    with patch.dict(
-        COSTS,
-        {
-            "gpt-3.5-turbo": {"prompt": 0.002, "completion": 0.002},
-            "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0},
-        },
-        clear=True,
-    ):
-        yield
+def mock_costs(mocker: MockerFixture):
+    mocker.patch.multiple(
+        OPEN_AI_MODELS["gpt-3.5-turbo"],
+        prompt_token_cost=0.0013,
+        completion_token_cost=0.0025,
+    )
+    mocker.patch.multiple(
+        OPEN_AI_MODELS["text-embedding-ada-002"],
+        prompt_token_cost=0.0004,
+    )
+    yield
 
 
 class TestApiManager:
@@ -87,15 +89,15 @@ class TestApiManager:
 
             assert api_manager.get_total_prompt_tokens() == 10
             assert api_manager.get_total_completion_tokens() == 20
-            assert api_manager.get_total_cost() == (10 * 0.002 + 20 * 0.002) / 1000
+            assert api_manager.get_total_cost() == (10 * 0.0013 + 20 * 0.0025) / 1000
 
     def test_getter_methods(self):
         """Test the getter methods for total tokens, cost, and budget."""
-        api_manager.update_cost(60, 120, "gpt-3.5-turbo")
+        api_manager.update_cost(600, 1200, "gpt-3.5-turbo")
         api_manager.set_total_budget(10.0)
-        assert api_manager.get_total_prompt_tokens() == 60
-        assert api_manager.get_total_completion_tokens() == 120
-        assert api_manager.get_total_cost() == (60 * 0.002 + 120 * 0.002) / 1000
+        assert api_manager.get_total_prompt_tokens() == 600
+        assert api_manager.get_total_completion_tokens() == 1200
+        assert api_manager.get_total_cost() == (600 * 0.0013 + 1200 * 0.0025) / 1000
         assert api_manager.get_total_budget() == 10.0
 
     @staticmethod
@@ -107,7 +109,7 @@ class TestApiManager:
         assert api_manager.get_total_budget() == total_budget
 
     @staticmethod
-    def test_update_cost():
+    def test_update_cost_completion_model():
         """Test if updating the cost works correctly."""
         prompt_tokens = 50
         completion_tokens = 100
@@ -115,9 +117,24 @@ class TestApiManager:
 
         api_manager.update_cost(prompt_tokens, completion_tokens, model)
 
-        assert api_manager.get_total_prompt_tokens() == 50
-        assert api_manager.get_total_completion_tokens() == 100
-        assert api_manager.get_total_cost() == (50 * 0.002 + 100 * 0.002) / 1000
+        assert api_manager.get_total_prompt_tokens() == prompt_tokens
+        assert api_manager.get_total_completion_tokens() == completion_tokens
+        assert (
+            api_manager.get_total_cost()
+            == (prompt_tokens * 0.0013 + completion_tokens * 0.0025) / 1000
+        )
+
+    @staticmethod
+    def test_update_cost_embedding_model():
+        """Test if updating the cost works correctly."""
+        prompt_tokens = 1337
+        model = "text-embedding-ada-002"
+
+        api_manager.update_cost(prompt_tokens, 0, model)
+
+        assert api_manager.get_total_prompt_tokens() == prompt_tokens
+        assert api_manager.get_total_completion_tokens() == 0
+        assert api_manager.get_total_cost() == (prompt_tokens * 0.0004) / 1000
 
     @staticmethod
     def test_get_models():

From f0a5250da5d8893ba8ec7318384d4e2996f98df4 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 15 Jun 2023 17:45:14 +0200
Subject: [PATCH 68/97] Implement loading `MemoryItem`s from file in
 `JSONFileMemory` (#4703)

Further changes:
* remove `init` param from `get_memory()`, replace usages by `memory.clear()`
* make token length calculation optional in `MemoryItem.dump()`
---
 autogpt/commands/file_operations.py           |  2 +-
 autogpt/main.py                               |  3 +-
 autogpt/memory/vector/__init__.py             |  4 +-
 autogpt/memory/vector/memory_item.py          | 48 +++++++++++++++----
 autogpt/memory/vector/providers/json_file.py  | 23 ++++++++-
 data_ingestion.py                             |  4 +-
 tests/conftest.py                             |  3 +-
 tests/integration/agent_factory.py            |  4 +-
 .../memory/test_json_file_memory.py           | 22 ++++++++-
 9 files changed, 93 insertions(+), 20 deletions(-)

diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index a0a61539..2ff14844 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -160,7 +160,7 @@ def ingest_file(
 
         # TODO: differentiate between different types of files
         file_memory = MemoryItem.from_text_file(content, filename)
-        logger.debug(f"Created memory: {file_memory.dump()}")
+        logger.debug(f"Created memory: {file_memory.dump(True)}")
         memory.add(file_memory)
 
         logger.info(f"Ingested {len(file_memory.e_chunks)} chunks from {filename}")
diff --git a/autogpt/main.py b/autogpt/main.py
index ab0a1533..ce6a983d 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -175,7 +175,8 @@ def run_auto_gpt(
 
     # Initialize memory and make sure it is empty.
     # this is particularly important for indexing and referencing pinecone memory
-    memory = get_memory(cfg, init=True)
+    memory = get_memory(cfg)
+    memory.clear()
     logger.typewriter_log(
         "Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}"
     )
diff --git a/autogpt/memory/vector/__init__.py b/autogpt/memory/vector/__init__.py
index aaaf83fb..11c9d697 100644
--- a/autogpt/memory/vector/__init__.py
+++ b/autogpt/memory/vector/__init__.py
@@ -39,7 +39,7 @@ supported_memory = ["json_file", "no_memory"]
 #     MilvusMemory = None
 
 
-def get_memory(cfg: Config, init=False) -> VectorMemory:
+def get_memory(cfg: Config) -> VectorMemory:
     memory = None
 
     match cfg.memory_backend:
@@ -60,7 +60,7 @@ def get_memory(cfg: Config, init=False) -> VectorMemory:
             #     )
             # else:
             #     memory = PineconeMemory(cfg)
-            #     if init:
+            #     if clear:
             #         memory.clear()
 
         case "redis":
diff --git a/autogpt/memory/vector/memory_item.py b/autogpt/memory/vector/memory_item.py
index c57b87aa..539ccb61 100644
--- a/autogpt/memory/vector/memory_item.py
+++ b/autogpt/memory/vector/memory_item.py
@@ -109,21 +109,21 @@ class MemoryItem:
         # The result_message contains either user feedback
         # or the result of the command specified in ai_message
 
-        if ai_message["role"] != "assistant":
-            raise ValueError(f"Invalid role on 'ai_message': {ai_message['role']}")
+        if ai_message.role != "assistant":
+            raise ValueError(f"Invalid role on 'ai_message': {ai_message.role}")
 
         result = (
-            result_message["content"]
-            if result_message["content"].startswith("Command")
+            result_message.content
+            if result_message.content.startswith("Command")
             else "None"
         )
         user_input = (
-            result_message["content"]
-            if result_message["content"].startswith("Human feedback")
+            result_message.content
+            if result_message.content.startswith("Human feedback")
             else "None"
         )
         memory_content = (
-            f"Assistant Reply: {ai_message['content']}"
+            f"Assistant Reply: {ai_message.content}"
             "\n\n"
             f"Result: {result}"
             "\n\n"
@@ -145,11 +145,14 @@ class MemoryItem:
             question_for_summary=question,
         )
 
-    def dump(self) -> str:
-        token_length = count_string_tokens(self.raw_content, Config().embedding_model)
+    def dump(self, calculate_length=False) -> str:
+        if calculate_length:
+            token_length = count_string_tokens(
+                self.raw_content, Config().embedding_model
+            )
         return f"""
 =============== MemoryItem ===============
-Length: {token_length} tokens in {len(self.e_chunks)} chunks
+Size: {f'{token_length} tokens in ' if calculate_length else ''}{len(self.e_chunks)} chunks
 Metadata: {json.dumps(self.metadata, indent=2)}
 ---------------- SUMMARY -----------------
 {self.summary}
@@ -158,6 +161,31 @@ Metadata: {json.dumps(self.metadata, indent=2)}
 ==========================================
 """
 
+    def __eq__(self, other: MemoryItem):
+        return (
+            self.raw_content == other.raw_content
+            and self.chunks == other.chunks
+            and self.chunk_summaries == other.chunk_summaries
+            # Embeddings can either be list[float] or np.ndarray[float32],
+            # and for comparison they must be of the same type
+            and np.array_equal(
+                self.e_summary
+                if isinstance(self.e_summary, np.ndarray)
+                else np.array(self.e_summary, dtype=np.float32),
+                other.e_summary
+                if isinstance(other.e_summary, np.ndarray)
+                else np.array(other.e_summary, dtype=np.float32),
+            )
+            and np.array_equal(
+                self.e_chunks
+                if isinstance(self.e_chunks[0], np.ndarray)
+                else [np.array(c, dtype=np.float32) for c in self.e_chunks],
+                other.e_chunks
+                if isinstance(other.e_chunks[0], np.ndarray)
+                else [np.array(c, dtype=np.float32) for c in other.e_chunks],
+            )
+        )
+
 
 @dataclasses.dataclass
 class MemoryItemRelevance:
diff --git a/autogpt/memory/vector/providers/json_file.py b/autogpt/memory/vector/providers/json_file.py
index 46446a9c..3ae7cd86 100644
--- a/autogpt/memory/vector/providers/json_file.py
+++ b/autogpt/memory/vector/providers/json_file.py
@@ -32,10 +32,17 @@ class JSONFileMemory(VectorMemoryProvider):
         workspace_path = Path(cfg.workspace_path)
         self.file_path = workspace_path / f"{cfg.memory_index}.json"
         self.file_path.touch()
-        logger.debug(f"Initialized {__name__} with index path {self.file_path}")
+        logger.debug(
+            f"Initialized {__class__.__name__} with index path {self.file_path}"
+        )
 
         self.memories = []
-        self.save_index()
+        try:
+            self.load_index()
+            logger.debug(f"Loaded {len(self.memories)} MemoryItems from file")
+        except Exception as e:
+            logger.warn(f"Could not load MemoryItems from file: {e}")
+            self.save_index()
 
     def __iter__(self) -> Iterator[MemoryItem]:
         return iter(self.memories)
@@ -48,6 +55,7 @@ class JSONFileMemory(VectorMemoryProvider):
 
     def add(self, item: MemoryItem):
         self.memories.append(item)
+        logger.debug(f"Adding item to memory: {item.dump()}")
         self.save_index()
         return len(self.memories)
 
@@ -62,6 +70,17 @@ class JSONFileMemory(VectorMemoryProvider):
         self.memories.clear()
         self.save_index()
 
+    def load_index(self):
+        """Loads all memories from the index file"""
+        if not self.file_path.is_file():
+            logger.debug(f"Index file '{self.file_path}' does not exist")
+            return
+        with self.file_path.open("r") as f:
+            logger.debug(f"Loading memories from index file '{self.file_path}'")
+            json_index = orjson.loads(f.read())
+            for memory_item_dict in json_index:
+                self.memories.append(MemoryItem(**memory_item_dict))
+
     def save_index(self):
         logger.debug(f"Saving memory index to file {self.file_path}")
         with self.file_path.open("wb") as f:
diff --git a/data_ingestion.py b/data_ingestion.py
index e2d98d1c..09d5328c 100644
--- a/data_ingestion.py
+++ b/data_ingestion.py
@@ -70,7 +70,9 @@ def main() -> None:
     args = parser.parse_args()
 
     # Initialize memory
-    memory = get_memory(cfg, init=args.init)
+    memory = get_memory(cfg)
+    if args.init:
+        memory.clear()
     logger.debug("Using memory of type: " + memory.__class__.__name__)
 
     if args.file:
diff --git a/tests/conftest.py b/tests/conftest.py
index 2342a3b0..671096fd 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -94,7 +94,8 @@ def agent(config: Config, workspace: Workspace) -> Agent:
     ai_config.command_registry = command_registry
 
     config.set_memory_backend("json_file")
-    memory_json_file = get_memory(config, init=True)
+    memory_json_file = get_memory(config)
+    memory_json_file.clear()
 
     system_prompt = ai_config.construct_full_prompt()
 
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index fff3867e..c9b99f50 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -28,7 +28,9 @@ def memory_json_file(agent_test_config: Config):
     was_memory_backend = agent_test_config.memory_backend
 
     agent_test_config.set_memory_backend("json_file")
-    yield get_memory(agent_test_config, init=True)
+    memory = get_memory(agent_test_config)
+    memory.clear()
+    yield memory
 
     agent_test_config.set_memory_backend(was_memory_backend)
 
diff --git a/tests/integration/memory/test_json_file_memory.py b/tests/integration/memory/test_json_file_memory.py
index 9134a069..41a3e174 100644
--- a/tests/integration/memory/test_json_file_memory.py
+++ b/tests/integration/memory/test_json_file_memory.py
@@ -34,7 +34,9 @@ def test_json_memory_init_with_backing_empty_file(config: Config, workspace: Wor
     assert index_file.read_text() == "[]"
 
 
-def test_json_memory_init_with_backing_file(config: Config, workspace: Workspace):
+def test_json_memory_init_with_backing_invalid_file(
+    config: Config, workspace: Workspace
+):
     index_file = workspace.root / f"{config.memory_index}.json"
     index_file.touch()
 
@@ -78,6 +80,24 @@ def test_json_memory_get(config: Config, memory_item: MemoryItem, mock_get_embed
     assert retrieved.memory_item == memory_item
 
 
+def test_json_memory_load_index(config: Config, memory_item: MemoryItem):
+    index = JSONFileMemory(config)
+    index.add(memory_item)
+
+    try:
+        assert index.file_path.exists(), "index was not saved to file"
+        assert len(index) == 1, f"index constains {len(index)} items instead of 1"
+        assert index.memories[0] == memory_item, "item in index != added mock item"
+    except AssertionError as e:
+        raise ValueError(f"Setting up for load_index test failed: {e}")
+
+    index.memories = []
+    index.load_index()
+
+    assert len(index) == 1
+    assert index.memories[0] == memory_item
+
+
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
 def test_json_memory_get_relevant(config: Config, patched_api_requestor: None) -> None:

From 3525a4b6dbed72408f72dc2c595570713492ddb4 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Thu, 15 Jun 2023 09:09:59 -0700
Subject: [PATCH 69/97] Count tokens with tiktoken (#4704)

* Update OpenAI model info and remove duplicate modelsinfo.py (#4700)

* Update OpenAI model info and remove duplicate modelsinfo.py

* Fix max_tokens for gpt-4-0613

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Update count_message_tokens to support new OpenAI models

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Fix error message in count_message_tokens

---------

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Erik Peterson <e@eriklp.com>
Co-authored-by: Reinier van der Leer <github@pwuts.nl>
---
 .gitignore                         |  1 +
 autogpt/llm/api_manager.py         |  4 +++-
 autogpt/llm/utils/token_counter.py | 26 +++++++++++---------------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index 307a6723..29a0285a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ build/
 develop-eggs/
 dist/
 plugins/
+plugins_config.yaml
 downloads/
 eggs/
 .eggs/
diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index 7a384562..454b4f22 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -89,7 +89,9 @@ class ApiManager(metaclass=Singleton):
         self.total_completion_tokens += completion_tokens
         self.total_cost += prompt_tokens * model_info.prompt_token_cost / 1000
         if issubclass(type(model_info), CompletionModelInfo):
-            self.total_cost += completion_tokens * model_info.completion_token_cost / 1000
+            self.total_cost += (
+                completion_tokens * model_info.completion_token_cost / 1000
+            )
 
         logger.debug(f"Total running cost: ${self.total_cost:.3f}")
 
diff --git a/autogpt/llm/utils/token_counter.py b/autogpt/llm/utils/token_counter.py
index bd1dcf1b..e34dbd1c 100644
--- a/autogpt/llm/utils/token_counter.py
+++ b/autogpt/llm/utils/token_counter.py
@@ -24,32 +24,28 @@ def count_message_tokens(
     Returns:
         int: The number of tokens used by the list of messages.
     """
-    try:
-        encoding = tiktoken.encoding_for_model(model)
-    except KeyError:
-        logger.warn("Warning: model not found. Using cl100k_base encoding.")
-        encoding = tiktoken.get_encoding("cl100k_base")
-    if model == "gpt-3.5-turbo":
-        # !Note: gpt-3.5-turbo may change over time.
-        # Returning num tokens assuming gpt-3.5-turbo-0301.")
-        return count_message_tokens(messages, model="gpt-3.5-turbo-0301")
-    elif model == "gpt-4":
-        # !Note: gpt-4 may change over time. Returning num tokens assuming gpt-4-0314.")
-        return count_message_tokens(messages, model="gpt-4-0314")
-    elif model == "gpt-3.5-turbo-0301":
+    if model.startswith("gpt-3.5-turbo"):
         tokens_per_message = (
             4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
         )
         tokens_per_name = -1  # if there's a name, the role is omitted
-    elif model == "gpt-4-0314":
+        encoding_model = "gpt-3.5-turbo"
+    elif model.startswith("gpt-4"):
         tokens_per_message = 3
         tokens_per_name = 1
+        encoding_model = "gpt-4"
     else:
         raise NotImplementedError(
-            f"num_tokens_from_messages() is not implemented for model {model}.\n"
+            f"count_message_tokens() is not implemented for model {model}.\n"
             " See https://github.com/openai/openai-python/blob/main/chatml.md for"
             " information on how messages are converted to tokens."
         )
+    try:
+        encoding = tiktoken.encoding_for_model(encoding_model)
+    except KeyError:
+        logger.warn("Warning: model not found. Using cl100k_base encoding.")
+        encoding = tiktoken.get_encoding("cl100k_base")
+
     num_tokens = 0
     for message in messages:
         num_tokens += tokens_per_message

From 426cfef882e61fb47e5390867083bd585ea7956a Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 15 Jun 2023 18:19:03 +0200
Subject: [PATCH 70/97] Unpin OpenAI model versions in model info mapping

---
 autogpt/llm/providers/openai.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index b4254cd1..eede1a7d 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -48,10 +48,16 @@ OPEN_AI_CHAT_MODELS = {
     ]
 }
 # Set aliases for rolling model IDs
-OPEN_AI_CHAT_MODELS["gpt-3.5-turbo"] = OPEN_AI_CHAT_MODELS["gpt-3.5-turbo-0301"]
-OPEN_AI_CHAT_MODELS["gpt-3.5-turbo-16k"] = OPEN_AI_CHAT_MODELS["gpt-3.5-turbo-16k-0613"]
-OPEN_AI_CHAT_MODELS["gpt-4"] = OPEN_AI_CHAT_MODELS["gpt-4-0314"]
-OPEN_AI_CHAT_MODELS["gpt-4-32k"] = OPEN_AI_CHAT_MODELS["gpt-4-32k-0314"]
+chat_model_mapping = {
+    "gpt-3.5-turbo":     "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
+    "gpt-4":             "gpt-4-0314",
+    "gpt-4-32k":         "gpt-4-32k-0314",
+}
+for alias, target in chat_model_mapping.items():
+    alias_info = ChatModelInfo(**OPEN_AI_CHAT_MODELS[target].__dict__)
+    alias_info.name = alias
+    OPEN_AI_CHAT_MODELS[alias] = alias_info
 
 OPEN_AI_TEXT_MODELS = {
     info.name: info

From 12588b6483b181d9389caa1419d408e7bb756ad9 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 15 Jun 2023 18:50:26 +0200
Subject: [PATCH 71/97] Fix openai.py linting errors

---
 autogpt/llm/providers/openai.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index eede1a7d..0f24b56e 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -49,10 +49,10 @@ OPEN_AI_CHAT_MODELS = {
 }
 # Set aliases for rolling model IDs
 chat_model_mapping = {
-    "gpt-3.5-turbo":     "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo": "gpt-3.5-turbo-0301",
     "gpt-3.5-turbo-16k": "gpt-3.5-turbo-16k-0613",
-    "gpt-4":             "gpt-4-0314",
-    "gpt-4-32k":         "gpt-4-32k-0314",
+    "gpt-4": "gpt-4-0314",
+    "gpt-4-32k": "gpt-4-32k-0314",
 }
 for alias, target in chat_model_mapping.items():
     alias_info = ChatModelInfo(**OPEN_AI_CHAT_MODELS[target].__dict__)

From 195a7fcad8ad518dd5a3d5ffe3180abc06cfb3a3 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Thu, 15 Jun 2023 11:34:41 -0700
Subject: [PATCH 72/97] Refactor module layout of command classes (#4706)

---
 autogpt/agent/agent.py                        |  2 +-
 autogpt/app.py                                |  3 +-
 autogpt/command_decorator.py                  | 51 +++++++++++
 autogpt/commands/analyze_code.py              |  2 +-
 autogpt/commands/audio_text.py                |  2 +-
 autogpt/commands/execute_code.py              |  2 +-
 autogpt/commands/file_operations.py           |  2 +-
 autogpt/commands/git_operations.py            |  2 +-
 autogpt/commands/google_search.py             |  2 +-
 autogpt/commands/image_gen.py                 |  2 +-
 autogpt/commands/improve_code.py              |  2 +-
 autogpt/commands/task_statuses.py             |  2 +-
 autogpt/commands/web_selenium.py              |  2 +-
 autogpt/commands/write_tests.py               |  2 +-
 autogpt/config/ai_config.py                   |  2 +-
 autogpt/main.py                               |  2 +-
 autogpt/models/__init__.py                    |  0
 autogpt/models/command.py                     | 41 +++++++++
 .../command.py => models/command_registry.py} | 89 +------------------
 autogpt/prompts/generator.py                  |  2 +-
 tests/conftest.py                             |  2 +-
 tests/integration/agent_factory.py            |  2 +-
 tests/mocks/mock_commands.py                  |  2 +-
 tests/unit/test_commands.py                   |  3 +-
 24 files changed, 117 insertions(+), 106 deletions(-)
 create mode 100644 autogpt/command_decorator.py
 create mode 100644 autogpt/models/__init__.py
 create mode 100644 autogpt/models/command.py
 rename autogpt/{commands/command.py => models/command_registry.py} (56%)

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 5a236f67..2fed0d4b 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -5,7 +5,6 @@ from datetime import datetime
 
 from colorama import Fore, Style
 
-from autogpt.commands.command import CommandRegistry
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
 from autogpt.json_utils.utilities import extract_json_from_response, validate_json
@@ -24,6 +23,7 @@ from autogpt.log_cycle.log_cycle import (
 from autogpt.logs import logger, print_assistant_thoughts
 from autogpt.memory.message_history import MessageHistory
 from autogpt.memory.vector import VectorMemory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.speech import say_text
 from autogpt.spinner import Spinner
 from autogpt.utils import clean_input
diff --git a/autogpt/app.py b/autogpt/app.py
index c081835c..fee3413a 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -4,8 +4,9 @@ from typing import Dict, List, Union
 
 from autogpt.agent.agent import Agent
 from autogpt.agent.agent_manager import AgentManager
-from autogpt.commands.command import CommandRegistry, command
+from autogpt.command_decorator import command
 from autogpt.commands.web_requests import scrape_links, scrape_text
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.processing.text import summarize_text
 from autogpt.speech import say_text
 from autogpt.url_utils.validators import validate_url
diff --git a/autogpt/command_decorator.py b/autogpt/command_decorator.py
new file mode 100644
index 00000000..3f8279e4
--- /dev/null
+++ b/autogpt/command_decorator.py
@@ -0,0 +1,51 @@
+import functools
+from typing import Any, Callable, Optional
+
+from autogpt.config import Config
+from autogpt.logs import logger
+from autogpt.models.command import Command
+
+# Unique identifier for auto-gpt commands
+AUTO_GPT_COMMAND_IDENTIFIER = "auto_gpt_command"
+
+
+def command(
+    name: str,
+    description: str,
+    signature: str,
+    enabled: bool | Callable[[Config], bool] = True,
+    disabled_reason: Optional[str] = None,
+) -> Callable[..., Any]:
+    """The command decorator is used to create Command objects from ordinary functions."""
+
+    # TODO: Remove this in favor of better command management
+    CFG = Config()
+
+    if callable(enabled):
+        enabled = enabled(CFG)
+    if not enabled:
+        if disabled_reason is not None:
+            logger.debug(f"Command '{name}' is disabled: {disabled_reason}")
+        return lambda func: func
+
+    def decorator(func: Callable[..., Any]) -> Command:
+        cmd = Command(
+            name=name,
+            description=description,
+            method=func,
+            signature=signature,
+            enabled=enabled,
+            disabled_reason=disabled_reason,
+        )
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs) -> Any:
+            return func(*args, **kwargs)
+
+        wrapper.command = cmd
+
+        setattr(wrapper, AUTO_GPT_COMMAND_IDENTIFIER, True)
+
+        return wrapper
+
+    return decorator
diff --git a/autogpt/commands/analyze_code.py b/autogpt/commands/analyze_code.py
index ca7fcb01..cd176a9a 100644
--- a/autogpt/commands/analyze_code.py
+++ b/autogpt/commands/analyze_code.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.llm.utils import call_ai_function
 
 
diff --git a/autogpt/commands/audio_text.py b/autogpt/commands/audio_text.py
index 2991fff3..e77e37cc 100644
--- a/autogpt/commands/audio_text.py
+++ b/autogpt/commands/audio_text.py
@@ -4,7 +4,7 @@ import json
 import requests
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 
 
 @command(
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 109caa3a..c422d652 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -7,7 +7,7 @@ import docker
 from docker.errors import ImageNotFound
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.config import Config
 from autogpt.logs import logger
 from autogpt.setup import CFG
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 2ff14844..d74fee96 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -13,7 +13,7 @@ from confection import Config
 from requests.adapters import HTTPAdapter, Retry
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.commands.file_operations_utils import read_textual_file
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index e844fd41..8dfe213c 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -3,7 +3,7 @@
 from git.repo import Repo
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.url_utils.validators import validate_url
 
 
diff --git a/autogpt/commands/google_search.py b/autogpt/commands/google_search.py
index b9d243f9..e6a1fc05 100644
--- a/autogpt/commands/google_search.py
+++ b/autogpt/commands/google_search.py
@@ -8,7 +8,7 @@ from itertools import islice
 from duckduckgo_search import DDGS
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 
 DUCKDUCKGO_MAX_ATTEMPTS = 3
 
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index b2dc9ea4..5bed8e00 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -10,7 +10,7 @@ import requests
 from PIL import Image
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.logs import logger
 
 
diff --git a/autogpt/commands/improve_code.py b/autogpt/commands/improve_code.py
index 05e9b51c..d4f87782 100644
--- a/autogpt/commands/improve_code.py
+++ b/autogpt/commands/improve_code.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 import json
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.llm.utils import call_ai_function
 
 
diff --git a/autogpt/commands/task_statuses.py b/autogpt/commands/task_statuses.py
index 283328a3..d5718fd3 100644
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/task_statuses.py
@@ -4,7 +4,7 @@ from __future__ import annotations
 from typing import NoReturn
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.logs import logger
 
 
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 14036c85..bdc5e613 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -28,7 +28,7 @@ from webdriver_manager.firefox import GeckoDriverManager
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
diff --git a/autogpt/commands/write_tests.py b/autogpt/commands/write_tests.py
index c09930b9..881b6ac4 100644
--- a/autogpt/commands/write_tests.py
+++ b/autogpt/commands/write_tests.py
@@ -4,7 +4,7 @@ from __future__ import annotations
 import json
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 from autogpt.llm.utils import call_ai_function
 
 
diff --git a/autogpt/config/ai_config.py b/autogpt/config/ai_config.py
index 1a526832..d118be3f 100644
--- a/autogpt/config/ai_config.py
+++ b/autogpt/config/ai_config.py
@@ -13,7 +13,7 @@ import distro
 import yaml
 
 if TYPE_CHECKING:
-    from autogpt.commands.command import CommandRegistry
+    from autogpt.models.command_registry import CommandRegistry
     from autogpt.prompts.generator import PromptGenerator
 
 # Soon this will go in a folder where it remembers more stuff about the run(s)
diff --git a/autogpt/main.py b/autogpt/main.py
index ce6a983d..97baa7b2 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -6,11 +6,11 @@ from pathlib import Path
 from colorama import Fore, Style
 
 from autogpt.agent import Agent
-from autogpt.commands.command import CommandRegistry
 from autogpt.config import Config, check_openai_api_key
 from autogpt.configurator import create_config
 from autogpt.logs import logger
 from autogpt.memory.vector import get_memory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.plugins import scan_plugins
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT, construct_main_ai_config
 from autogpt.utils import (
diff --git a/autogpt/models/__init__.py b/autogpt/models/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/autogpt/models/command.py b/autogpt/models/command.py
new file mode 100644
index 00000000..a925ca04
--- /dev/null
+++ b/autogpt/models/command.py
@@ -0,0 +1,41 @@
+from typing import Any, Callable, Optional
+
+from autogpt.config import Config
+
+
+class Command:
+    """A class representing a command.
+
+    Attributes:
+        name (str): The name of the command.
+        description (str): A brief description of what the command does.
+        signature (str): The signature of the function that the command executes. Defaults to None.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        method: Callable[..., Any],
+        signature: str = "",
+        enabled: bool | Callable[[Config], bool] = True,
+        disabled_reason: Optional[str] = None,
+    ):
+        self.name = name
+        self.description = description
+        self.method = method
+        self.signature = signature
+        self.enabled = enabled
+        self.disabled_reason = disabled_reason
+
+    def __call__(self, *args, **kwargs) -> Any:
+        if hasattr(kwargs, "config") and callable(self.enabled):
+            self.enabled = self.enabled(kwargs["config"])
+        if not self.enabled:
+            if self.disabled_reason:
+                return f"Command '{self.name}' is disabled: {self.disabled_reason}"
+            return f"Command '{self.name}' is disabled"
+        return self.method(*args, **kwargs)
+
+    def __str__(self) -> str:
+        return f"{self.name}: {self.description}, args: {self.signature}"
diff --git a/autogpt/commands/command.py b/autogpt/models/command_registry.py
similarity index 56%
rename from autogpt/commands/command.py
rename to autogpt/models/command_registry.py
index 742cc8df..29d0143d 100644
--- a/autogpt/commands/command.py
+++ b/autogpt/models/command_registry.py
@@ -1,51 +1,10 @@
-import functools
 import importlib
 import inspect
-from typing import Any, Callable, Optional
+from typing import Any, Callable
 
-from autogpt.config import Config
+from autogpt.command_decorator import AUTO_GPT_COMMAND_IDENTIFIER
 from autogpt.logs import logger
-
-# Unique identifier for auto-gpt commands
-AUTO_GPT_COMMAND_IDENTIFIER = "auto_gpt_command"
-
-
-class Command:
-    """A class representing a command.
-
-    Attributes:
-        name (str): The name of the command.
-        description (str): A brief description of what the command does.
-        signature (str): The signature of the function that the command executes. Defaults to None.
-    """
-
-    def __init__(
-        self,
-        name: str,
-        description: str,
-        method: Callable[..., Any],
-        signature: str = "",
-        enabled: bool | Callable[[Config], bool] = True,
-        disabled_reason: Optional[str] = None,
-    ):
-        self.name = name
-        self.description = description
-        self.method = method
-        self.signature = signature
-        self.enabled = enabled
-        self.disabled_reason = disabled_reason
-
-    def __call__(self, *args, **kwargs) -> Any:
-        if hasattr(kwargs, "config") and callable(self.enabled):
-            self.enabled = self.enabled(kwargs["config"])
-        if not self.enabled:
-            if self.disabled_reason:
-                return f"Command '{self.name}' is disabled: {self.disabled_reason}"
-            return f"Command '{self.name}' is disabled"
-        return self.method(*args, **kwargs)
-
-    def __str__(self) -> str:
-        return f"{self.name}: {self.description}, args: {self.signature}"
+from autogpt.models.command import Command
 
 
 class CommandRegistry:
@@ -133,45 +92,3 @@ class CommandRegistry:
             ):
                 cmd_instance = attr()
                 self.register(cmd_instance)
-
-
-def command(
-    name: str,
-    description: str,
-    signature: str,
-    enabled: bool | Callable[[Config], bool] = True,
-    disabled_reason: Optional[str] = None,
-) -> Callable[..., Any]:
-    """The command decorator is used to create Command objects from ordinary functions."""
-
-    # TODO: Remove this in favor of better command management
-    CFG = Config()
-
-    if callable(enabled):
-        enabled = enabled(CFG)
-    if not enabled:
-        if disabled_reason is not None:
-            logger.debug(f"Command '{name}' is disabled: {disabled_reason}")
-        return lambda func: func
-
-    def decorator(func: Callable[..., Any]) -> Command:
-        cmd = Command(
-            name=name,
-            description=description,
-            method=func,
-            signature=signature,
-            enabled=enabled,
-            disabled_reason=disabled_reason,
-        )
-
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs) -> Any:
-            return func(*args, **kwargs)
-
-        wrapper.command = cmd
-
-        setattr(wrapper, AUTO_GPT_COMMAND_IDENTIFIER, True)
-
-        return wrapper
-
-    return decorator
diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py
index 7101acfe..2a0334bf 100644
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
 from autogpt.json_utils.utilities import llm_response_schema
 
 if TYPE_CHECKING:
-    from autogpt.commands.command import CommandRegistry
+    from autogpt.models.command_registry import CommandRegistry
 
 
 class PromptGenerator:
diff --git a/tests/conftest.py b/tests/conftest.py
index 671096fd..97620e21 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,12 +7,12 @@ import yaml
 from pytest_mock import MockerFixture
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.command import CommandRegistry
 from autogpt.config.ai_config import AIConfig
 from autogpt.config.config import Config
 from autogpt.llm.api_manager import ApiManager
 from autogpt.logs import TypingConsoleHandler
 from autogpt.memory.vector import get_memory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
 
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index c9b99f50..9078a843 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -1,10 +1,10 @@
 import pytest
 
 from autogpt.agent import Agent
-from autogpt.commands.command import CommandRegistry
 from autogpt.config import AIConfig, Config
 from autogpt.main import COMMAND_CATEGORIES
 from autogpt.memory.vector import NoMemory, get_memory
+from autogpt.models.command_registry import CommandRegistry
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 from autogpt.workspace import Workspace
 
diff --git a/tests/mocks/mock_commands.py b/tests/mocks/mock_commands.py
index 42b0ea11..7b16f1d1 100644
--- a/tests/mocks/mock_commands.py
+++ b/tests/mocks/mock_commands.py
@@ -1,4 +1,4 @@
-from autogpt.commands.command import command
+from autogpt.command_decorator import command
 
 
 @command(
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
index 5779a8a3..f02cb620 100644
--- a/tests/unit/test_commands.py
+++ b/tests/unit/test_commands.py
@@ -5,7 +5,8 @@ from pathlib import Path
 
 import pytest
 
-from autogpt.commands.command import Command, CommandRegistry
+from autogpt.models.command import Command
+from autogpt.models.command_registry import CommandRegistry
 
 SIGNATURE = "(arg1: int, arg2: str) -> str"
 

From 512d7ba2089d583d9f917387256d6aaa2d8331bf Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Thu, 15 Jun 2023 12:55:50 -0700
Subject: [PATCH 73/97] Remove analyze_code (#4705)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Erik Peterson <e@eriklp.com>
---
 .gitignore                       |  1 +
 autogpt/commands/analyze_code.py | 34 --------------------------------
 autogpt/main.py                  |  1 -
 docs/configuration/options.md    |  4 ++--
 docs/usage.md                    |  2 +-
 5 files changed, 4 insertions(+), 38 deletions(-)
 delete mode 100644 autogpt/commands/analyze_code.py

diff --git a/.gitignore b/.gitignore
index 307a6723..29a0285a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ build/
 develop-eggs/
 dist/
 plugins/
+plugins_config.yaml
 downloads/
 eggs/
 .eggs/
diff --git a/autogpt/commands/analyze_code.py b/autogpt/commands/analyze_code.py
deleted file mode 100644
index cd176a9a..00000000
--- a/autogpt/commands/analyze_code.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""Code evaluation module."""
-from __future__ import annotations
-
-from autogpt.agent.agent import Agent
-from autogpt.command_decorator import command
-from autogpt.llm.utils import call_ai_function
-
-
-@command(
-    "analyze_code",
-    "Analyze Code",
-    '"code": "<full_code_string>"',
-)
-def analyze_code(code: str, agent: Agent) -> list[str]:
-    """
-    A function that takes in a string and returns a response from create chat
-      completion api call.
-
-    Parameters:
-        code (str): Code to be evaluated.
-    Returns:
-        A result string from create chat completion. A list of suggestions to
-            improve the code.
-    """
-
-    function_string = "def analyze_code(code: str) -> list[str]:"
-    args = [code]
-    description_string = (
-        "Analyzes the given code and returns a list of suggestions for improvements."
-    )
-
-    return call_ai_function(
-        function_string, args, description_string, config=agent.config
-    )
diff --git a/autogpt/main.py b/autogpt/main.py
index 97baa7b2..de124034 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -23,7 +23,6 @@ from autogpt.workspace import Workspace
 from scripts.install_plugin_deps import install_plugin_dependencies
 
 COMMAND_CATEGORIES = [
-    "autogpt.commands.analyze_code",
     "autogpt.commands.audio_text",
     "autogpt.commands.execute_code",
     "autogpt.commands.file_operations",
diff --git a/docs/configuration/options.md b/docs/configuration/options.md
index b2cbf6bc..07e76c68 100644
--- a/docs/configuration/options.md
+++ b/docs/configuration/options.md
@@ -10,7 +10,7 @@ Configuration is controlled through the `Config` object. You can set configurati
 - `BROWSE_CHUNK_MAX_LENGTH`: When browsing website, define the length of chunks to summarize. Default: 3000
 - `BROWSE_SPACY_LANGUAGE_MODEL`: [spaCy language model](https://spacy.io/usage/models) to use when creating chunks. Default: en_core_web_sm
 - `CHAT_MESSAGES_ENABLED`: Enable chat messages. Optional
-- `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.analyze_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
+- `DISABLED_COMMAND_CATEGORIES`: Command categories to disable. Command categories are Python module names, e.g. autogpt.commands.execute_code. See the directory `autogpt/commands` in the source for all command modules. Default: None
 - `ELEVENLABS_API_KEY`: ElevenLabs API Key. Optional.
 - `ELEVENLABS_VOICE_ID`: ElevenLabs Voice ID. Optional.
 - `EMBEDDING_MODEL`: LLM Model to use for embedding tasks. Default: text-embedding-ada-002
@@ -50,4 +50,4 @@ Configuration is controlled through the `Config` object. You can set configurati
 - `USER_AGENT`: User-Agent given when browsing websites. Default: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
 - `USE_AZURE`: Use Azure's LLM Default: False
 - `USE_WEB_BROWSER`: Which web browser to use. Options are `chrome`, `firefox`, `safari` or `edge` Default: chrome
-- `WIPE_REDIS_ON_START`: Wipes data / index on start. Default: True
\ No newline at end of file
+- `WIPE_REDIS_ON_START`: Wipes data / index on start. Default: True
diff --git a/docs/usage.md b/docs/usage.md
index 011f5f8a..93dfd25f 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -104,5 +104,5 @@ If you want to selectively disable some command groups, you can use the `DISABLE
 For example, to disable coding related features, set it to the value below:
 
 ```ini
-DISABLED_COMMAND_CATEGORIES=autogpt.commands.analyze_code,autogpt.commands.execute_code,autogpt.commands.git_operations,autogpt.commands.improve_code,autogpt.commands.write_tests
+DISABLED_COMMAND_CATEGORIES=autogpt.commands.execute_code,autogpt.commands.git_operations,autogpt.commands.improve_code,autogpt.commands.write_tests
 ```

From a30e5a85b266fbdae49118d8abb6fb2cff2d0466 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Thu, 15 Jun 2023 13:32:20 -0700
Subject: [PATCH 74/97] Remove write_tests command (#4707)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Erik Peterson <e@eriklp.com>
---
 autogpt/commands/improve_code.py    | 38 ---------------------------
 autogpt/commands/write_tests.py     | 40 -----------------------------
 autogpt/main.py                     |  2 --
 tests/challenges/current_score.json |  2 +-
 4 files changed, 1 insertion(+), 81 deletions(-)
 delete mode 100644 autogpt/commands/improve_code.py
 delete mode 100644 autogpt/commands/write_tests.py

diff --git a/autogpt/commands/improve_code.py b/autogpt/commands/improve_code.py
deleted file mode 100644
index d4f87782..00000000
--- a/autogpt/commands/improve_code.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from __future__ import annotations
-
-import json
-
-from autogpt.agent.agent import Agent
-from autogpt.command_decorator import command
-from autogpt.llm.utils import call_ai_function
-
-
-@command(
-    "improve_code",
-    "Get Improved Code",
-    '"suggestions": "<list_of_suggestions>", "code": "<full_code_string>"',
-)
-def improve_code(suggestions: list[str], code: str, agent: Agent) -> str:
-    """
-    A function that takes in code and suggestions and returns a response from create
-      chat completion api call.
-
-    Parameters:
-        suggestions (list): A list of suggestions around what needs to be improved.
-        code (str): Code to be improved.
-    Returns:
-        A result string from create chat completion. Improved code in response.
-    """
-
-    function_string = (
-        "def generate_improved_code(suggestions: list[str], code: str) -> str:"
-    )
-    args = [json.dumps(suggestions), code]
-    description_string = (
-        "Improves the provided code based on the suggestions"
-        " provided, making no other changes."
-    )
-
-    return call_ai_function(
-        function_string, args, description_string, config=agent.config
-    )
diff --git a/autogpt/commands/write_tests.py b/autogpt/commands/write_tests.py
deleted file mode 100644
index 881b6ac4..00000000
--- a/autogpt/commands/write_tests.py
+++ /dev/null
@@ -1,40 +0,0 @@
-"""A module that contains a function to generate test cases for the submitted code."""
-from __future__ import annotations
-
-import json
-
-from autogpt.agent.agent import Agent
-from autogpt.command_decorator import command
-from autogpt.llm.utils import call_ai_function
-
-
-@command(
-    "write_tests",
-    "Write Tests",
-    '"code": "<full_code_string>", "focus": "<list_of_focus_areas>"',
-)
-def write_tests(code: str, focus: list[str], agent: Agent) -> str:
-    """
-    A function that takes in code and focus topics and returns a response from create
-      chat completion api call.
-
-    Parameters:
-        focus (list): A list of suggestions around what needs to be improved.
-        code (str): Code for test cases to be generated against.
-    Returns:
-        A result string from create chat completion. Test cases for the submitted code
-          in response.
-    """
-
-    function_string = (
-        "def create_test_cases(code: str, focus: Optional[str] = None) -> str:"
-    )
-    args = [code, json.dumps(focus)]
-    description_string = (
-        "Generates test cases for the existing code, focusing on"
-        " specific areas if required."
-    )
-
-    return call_ai_function(
-        function_string, args, description_string, config=agent.config
-    )
diff --git a/autogpt/main.py b/autogpt/main.py
index de124034..a6238457 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -29,9 +29,7 @@ COMMAND_CATEGORIES = [
     "autogpt.commands.git_operations",
     "autogpt.commands.google_search",
     "autogpt.commands.image_gen",
-    "autogpt.commands.improve_code",
     "autogpt.commands.web_selenium",
-    "autogpt.commands.write_tests",
     "autogpt.app",
     "autogpt.commands.task_statuses",
 ]
diff --git a/tests/challenges/current_score.json b/tests/challenges/current_score.json
index 4d747f03..732e7391 100644
--- a/tests/challenges/current_score.json
+++ b/tests/challenges/current_score.json
@@ -2,7 +2,7 @@
     "basic_abilities": {
         "browse_website": {
             "max_level": 1,
-            "max_level_beaten": 1
+            "max_level_beaten": null
         },
         "write_file": {
             "max_level": 2,

From 244393e4ef4ee47d84b18958c2467a149c0201f5 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 15 Jun 2023 23:11:52 +0200
Subject: [PATCH 75/97] Add fallback token limit in
 llm.utils.create_chat_completion

---
 autogpt/llm/utils/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index 756c4bd5..fd7ba49f 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -17,6 +17,7 @@ from autogpt.logs import logger
 
 from ..api_manager import ApiManager
 from ..base import ChatSequence, Message
+from ..providers.openai import OPEN_AI_CHAT_MODELS
 from .token_counter import *
 
 
@@ -205,6 +206,8 @@ def create_chat_completion(
         model = prompt.model.name
     if temperature is None:
         temperature = cfg.temperature
+    if max_tokens is None:
+        max_tokens = OPEN_AI_CHAT_MODELS[model].max_tokens - prompt.token_length
 
     logger.debug(
         f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"

From 9943c58fbab0cace10cd58e8851cfbd9c26e9b9f Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 15 Jun 2023 23:59:15 +0200
Subject: [PATCH 76/97] Fix test_make_agent

---
 autogpt/llm/api_manager.py    | 2 +-
 autogpt/llm/utils/__init__.py | 2 +-
 tests/unit/test_make_agent.py | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index 454b4f22..acc38c44 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -34,7 +34,7 @@ class ApiManager(metaclass=Singleton):
         temperature: float = None,
         max_tokens: int | None = None,
         deployment_id=None,
-    ) -> str:
+    ):
         """
         Create a chat completion and update the cost.
         Args:
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index fd7ba49f..736745cf 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -242,7 +242,7 @@ def create_chat_completion(
         max_tokens=max_tokens,
     )
 
-    resp = response.choices[0].message["content"]
+    resp = response.choices[0].message.content
     for plugin in cfg.plugins:
         if not plugin.can_handle_on_response():
             continue
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
index 23eea027..61a7a6f5 100644
--- a/tests/unit/test_make_agent.py
+++ b/tests/unit/test_make_agent.py
@@ -11,10 +11,11 @@ def test_make_agent(agent: Agent, mocker: MockerFixture) -> None:
     mock = mocker.patch("openai.ChatCompletion.create")
 
     response = MagicMock()
-    # del response.error
-    response.choices[0].messages[0].content = "Test message"
+    response.choices[0].message.content = "Test message"
     response.usage.prompt_tokens = 1
     response.usage.completion_tokens = 1
+    del response.error
+
     mock.return_value = response
     start_agent("Test Agent", "chat", "Hello, how are you?", agent, "gpt-3.5-turbo")
     agents = list_agents(agent)

From d923004e20f2eb2850af16876b2012f63452b225 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Thu, 15 Jun 2023 15:04:51 -0700
Subject: [PATCH 77/97] Remove app commands, audio text and playwright (#4711)

* Remove App Commands and Audio Text
Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Remove self feedback

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

---------

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Erik Peterson <e@eriklp.com>
---
 autogpt/agent/agent.py                 |  67 +------------
 autogpt/app.py                         | 125 +------------------------
 autogpt/commands/audio_text.py         |  71 --------------
 autogpt/commands/web_playwright.py     |  82 ----------------
 autogpt/commands/web_requests.py       | 104 --------------------
 autogpt/llm/chat.py                    |   6 +-
 autogpt/main.py                        |   1 -
 tests/unit/test_browse_scrape_links.py | 119 -----------------------
 tests/unit/test_browse_scrape_text.py  | 117 -----------------------
 tests/unit/test_get_self_feedback.py   |  62 ------------
 tests/unit/test_make_agent.py          |  24 -----
 11 files changed, 5 insertions(+), 773 deletions(-)
 delete mode 100644 autogpt/commands/audio_text.py
 delete mode 100644 autogpt/commands/web_playwright.py
 delete mode 100644 autogpt/commands/web_requests.py
 delete mode 100644 tests/unit/test_browse_scrape_links.py
 delete mode 100644 tests/unit/test_browse_scrape_text.py
 delete mode 100644 tests/unit/test_get_self_feedback.py
 delete mode 100644 tests/unit/test_make_agent.py

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 2fed0d4b..202c124a 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -8,15 +8,12 @@ from colorama import Fore, Style
 from autogpt.config import Config
 from autogpt.config.ai_config import AIConfig
 from autogpt.json_utils.utilities import extract_json_from_response, validate_json
-from autogpt.llm.base import ChatSequence
-from autogpt.llm.chat import chat_with_ai, create_chat_completion
+from autogpt.llm.chat import chat_with_ai
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens
 from autogpt.log_cycle.log_cycle import (
     FULL_MESSAGE_HISTORY_FILE_NAME,
     NEXT_ACTION_FILE_NAME,
-    PROMPT_SUPERVISOR_FEEDBACK_FILE_NAME,
-    SUPERVISOR_FEEDBACK_FILE_NAME,
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
 )
@@ -208,24 +205,6 @@ class Agent:
                     if console_input.lower().strip() == self.config.authorise_key:
                         user_input = "GENERATE NEXT COMMAND JSON"
                         break
-                    elif console_input.lower().strip() == "s":
-                        logger.typewriter_log(
-                            "-=-=-=-=-=-=-= THOUGHTS, REASONING, PLAN AND CRITICISM WILL NOW BE VERIFIED BY AGENT -=-=-=-=-=-=-=",
-                            Fore.GREEN,
-                            "",
-                        )
-                        thoughts = assistant_reply_json.get("thoughts", {})
-                        self_feedback_resp = self.get_self_feedback(
-                            thoughts, self.config.fast_llm_model
-                        )
-                        logger.typewriter_log(
-                            f"SELF FEEDBACK: {self_feedback_resp}",
-                            Fore.YELLOW,
-                            "",
-                        )
-                        user_input = self_feedback_resp
-                        command_name = "self_feedback"
-                        break
                     elif console_input.lower().strip() == "":
                         logger.warn("Invalid input format.")
                         continue
@@ -281,8 +260,6 @@ class Agent:
                 result = f"Could not execute command: {arguments}"
             elif command_name == "human_feedback":
                 result = f"Human feedback: {user_input}"
-            elif command_name == "self_feedback":
-                result = f"Self feedback: {user_input}"
             else:
                 for plugin in self.config.plugins:
                     if not plugin.can_handle_pre_command():
@@ -336,45 +313,3 @@ class Agent:
                         self.workspace.get_path(command_args[pathlike])
                     )
         return command_args
-
-    def get_self_feedback(self, thoughts: dict, llm_model: str) -> str:
-        """Generates a feedback response based on the provided thoughts dictionary.
-        This method takes in a dictionary of thoughts containing keys such as 'reasoning',
-        'plan', 'thoughts', and 'criticism'. It combines these elements into a single
-        feedback message and uses the create_chat_completion() function to generate a
-        response based on the input message.
-        Args:
-            thoughts (dict): A dictionary containing thought elements like reasoning,
-            plan, thoughts, and criticism.
-        Returns:
-            str: A feedback response generated using the provided thoughts dictionary.
-        """
-        ai_role = self.ai_config.ai_role
-
-        feedback_prompt = f"Below is a message from me, an AI Agent, assuming the role of {ai_role}. whilst keeping knowledge of my slight limitations as an AI Agent Please evaluate my thought process, reasoning, and plan, and provide a concise paragraph outlining potential improvements. Consider adding or removing ideas that do not align with my role and explaining why, prioritizing thoughts based on their significance, or simply refining my overall thought process."
-        reasoning = thoughts.get("reasoning", "")
-        plan = thoughts.get("plan", "")
-        thought = thoughts.get("thoughts", "")
-        feedback_thoughts = thought + reasoning + plan
-
-        prompt = ChatSequence.for_model(llm_model)
-        prompt.add("user", feedback_prompt + feedback_thoughts)
-
-        self.log_cycle_handler.log_cycle(
-            self.ai_config.ai_name,
-            self.created_at,
-            self.cycle_count,
-            prompt.raw(),
-            PROMPT_SUPERVISOR_FEEDBACK_FILE_NAME,
-        )
-
-        feedback = create_chat_completion(prompt)
-
-        self.log_cycle_handler.log_cycle(
-            self.ai_config.ai_name,
-            self.created_at,
-            self.cycle_count,
-            feedback,
-            SUPERVISOR_FEEDBACK_FILE_NAME,
-        )
-        return feedback
diff --git a/autogpt/app.py b/autogpt/app.py
index fee3413a..30f4b7d6 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -1,15 +1,9 @@
 """ Command and Control """
 import json
-from typing import Dict, List, Union
+from typing import Dict
 
 from autogpt.agent.agent import Agent
-from autogpt.agent.agent_manager import AgentManager
-from autogpt.command_decorator import command
-from autogpt.commands.web_requests import scrape_links, scrape_text
 from autogpt.models.command_registry import CommandRegistry
-from autogpt.processing.text import summarize_text
-from autogpt.speech import say_text
-from autogpt.url_utils.validators import validate_url
 
 
 def is_valid_int(value: str) -> bool:
@@ -125,120 +119,3 @@ def execute_command(
         )
     except Exception as e:
         return f"Error: {str(e)}"
-
-
-@command(
-    "get_text_summary", "Get text summary", '"url": "<url>", "question": "<question>"'
-)
-@validate_url
-def get_text_summary(url: str, question: str, agent: Agent) -> str:
-    """Get the text summary of a webpage
-
-    Args:
-        url (str): The url to scrape
-        question (str): The question to summarize the text for
-
-    Returns:
-        str: The summary of the text
-    """
-    text = scrape_text(url, agent)
-    summary, _ = summarize_text(text, question=question)
-
-    return f""" "Result" : {summary}"""
-
-
-@command("get_hyperlinks", "Get hyperlinks", '"url": "<url>"')
-@validate_url
-def get_hyperlinks(url: str, agent: Agent) -> Union[str, List[str]]:
-    """Get all hyperlinks on a webpage
-
-    Args:
-        url (str): The url to scrape
-
-    Returns:
-        str or list: The hyperlinks on the page
-    """
-    return scrape_links(url, agent)
-
-
-@command(
-    "start_agent",
-    "Start GPT Agent",
-    '"name": "<name>", "task": "<short_task_desc>", "prompt": "<prompt>"',
-)
-def start_agent(name: str, task: str, prompt: str, agent: Agent, model=None) -> str:
-    """Start an agent with a given name, task, and prompt
-
-    Args:
-        name (str): The name of the agent
-        task (str): The task of the agent
-        prompt (str): The prompt for the agent
-        model (str): The model to use for the agent
-
-    Returns:
-        str: The response of the agent
-    """
-    agent_manager = AgentManager()
-
-    # Remove underscores from name
-    voice_name = name.replace("_", " ")
-
-    first_message = f"""You are {name}.  Respond with: "Acknowledged"."""
-    agent_intro = f"{voice_name} here, Reporting for duty!"
-
-    if model is None:
-        model = config.smart_llm_model
-
-    # Create agent
-    if agent.config.speak_mode:
-        say_text(agent_intro, 1)
-    key, ack = agent_manager.create_agent(task, first_message, model)
-
-    if agent.config.speak_mode:
-        say_text(f"Hello {voice_name}. Your task is as follows. {task}.")
-
-    # Assign task (prompt), get response
-    agent_response = agent_manager.message_agent(key, prompt)
-
-    return f"Agent {name} created with key {key}. First response: {agent_response}"
-
-
-@command("message_agent", "Message GPT Agent", '"key": "<key>", "message": "<message>"')
-def message_agent(key: str, message: str, agent: Agent) -> str:
-    """Message an agent with a given key and message"""
-    # Check if the key is a valid integer
-    if is_valid_int(key):
-        agent_response = AgentManager().message_agent(int(key), message)
-    else:
-        return "Invalid key, must be an integer."
-
-    # Speak response
-    if agent.config.speak_mode:
-        say_text(agent_response, 1)
-    return agent_response
-
-
-@command("list_agents", "List GPT Agents", "() -> str")
-def list_agents(agent: Agent) -> str:
-    """List all agents
-
-    Returns:
-        str: A list of all agents
-    """
-    return "List of agents:\n" + "\n".join(
-        [str(x[0]) + ": " + x[1] for x in AgentManager().list_agents()]
-    )
-
-
-@command("delete_agent", "Delete GPT Agent", '"key": "<key>"')
-def delete_agent(key: str, agent: Agent) -> str:
-    """Delete an agent with a given key
-
-    Args:
-        key (str): The key of the agent to delete
-
-    Returns:
-        str: A message indicating whether the agent was deleted or not
-    """
-    result = AgentManager().delete_agent(key)
-    return f"Agent {key} deleted." if result else f"Agent {key} does not exist."
diff --git a/autogpt/commands/audio_text.py b/autogpt/commands/audio_text.py
deleted file mode 100644
index e77e37cc..00000000
--- a/autogpt/commands/audio_text.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""Commands for converting audio to text."""
-import json
-
-import requests
-
-from autogpt.agent.agent import Agent
-from autogpt.command_decorator import command
-
-
-@command(
-    "read_audio_from_file",
-    "Convert Audio to text",
-    '"filename": "<filename>"',
-    lambda config: config.huggingface_audio_to_text_model
-    and config.huggingface_api_token,
-    "Configure huggingface_audio_to_text_model and Hugging Face api token.",
-)
-def read_audio_from_file(filename: str, agent: Agent) -> str:
-    """
-    Convert audio to text.
-
-    Args:
-        filename (str): The path to the audio file
-
-    Returns:
-        str: The text from the audio
-    """
-    with open(filename, "rb") as audio_file:
-        audio = audio_file.read()
-    return read_audio(audio, agent.config)
-
-
-def read_audio(audio: bytes, agent: Agent) -> str:
-    """
-    Convert audio to text.
-
-    Args:
-        audio (bytes): The audio to convert
-
-    Returns:
-        str: The text from the audio
-    """
-    if agent.config.audio_to_text_provider == "huggingface":
-        text = read_huggingface_audio(audio, agent.config)
-        if text:
-            return f"The audio says: {text}"
-        else:
-            return f"Error, couldn't convert audio to text"
-
-    return "Error: No audio to text provider given"
-
-
-def read_huggingface_audio(audio: bytes, agent: Agent) -> str:
-    model = agent.config.huggingface_audio_to_text_model
-    api_url = f"https://api-inference.huggingface.co/models/{model}"
-    api_token = agent.config.huggingface_api_token
-    headers = {"Authorization": f"Bearer {api_token}"}
-
-    if api_token is None:
-        raise ValueError(
-            "You need to set your Hugging Face API token in the config file."
-        )
-
-    response = requests.post(
-        api_url,
-        headers=headers,
-        data=audio,
-    )
-
-    response_json = json.loads(response.content.decode("utf-8"))
-    return response_json.get("text")
diff --git a/autogpt/commands/web_playwright.py b/autogpt/commands/web_playwright.py
deleted file mode 100644
index 70f19dee..00000000
--- a/autogpt/commands/web_playwright.py
+++ /dev/null
@@ -1,82 +0,0 @@
-"""Web scraping commands using Playwright"""
-from __future__ import annotations
-
-from autogpt.logs import logger
-
-try:
-    from playwright.sync_api import sync_playwright
-except ImportError:
-    logger.info(
-        "Playwright not installed. Please install it with 'pip install playwright' to use."
-    )
-from bs4 import BeautifulSoup
-
-from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
-
-
-def scrape_text(url: str) -> str:
-    """Scrape text from a webpage
-
-    Args:
-        url (str): The URL to scrape text from
-
-    Returns:
-        str: The scraped text
-    """
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page()
-
-        try:
-            page.goto(url)
-            html_content = page.content()
-            soup = BeautifulSoup(html_content, "html.parser")
-
-            for script in soup(["script", "style"]):
-                script.extract()
-
-            text = soup.get_text()
-            lines = (line.strip() for line in text.splitlines())
-            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-            text = "\n".join(chunk for chunk in chunks if chunk)
-
-        except Exception as e:
-            text = f"Error: {str(e)}"
-
-        finally:
-            browser.close()
-
-    return text
-
-
-def scrape_links(url: str) -> str | list[str]:
-    """Scrape links from a webpage
-
-    Args:
-        url (str): The URL to scrape links from
-
-    Returns:
-        Union[str, List[str]]: The scraped links
-    """
-    with sync_playwright() as p:
-        browser = p.chromium.launch()
-        page = browser.new_page()
-
-        try:
-            page.goto(url)
-            html_content = page.content()
-            soup = BeautifulSoup(html_content, "html.parser")
-
-            for script in soup(["script", "style"]):
-                script.extract()
-
-            hyperlinks = extract_hyperlinks(soup, url)
-            formatted_links = format_hyperlinks(hyperlinks)
-
-        except Exception as e:
-            formatted_links = f"Error: {str(e)}"
-
-        finally:
-            browser.close()
-
-    return formatted_links
diff --git a/autogpt/commands/web_requests.py b/autogpt/commands/web_requests.py
deleted file mode 100644
index 765c3778..00000000
--- a/autogpt/commands/web_requests.py
+++ /dev/null
@@ -1,104 +0,0 @@
-"""Browse a webpage and summarize it using the LLM model"""
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-import requests
-from bs4 import BeautifulSoup
-from requests import Response
-
-from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
-from autogpt.url_utils.validators import validate_url
-
-session = requests.Session()
-
-if TYPE_CHECKING:
-    from autogpt.agent.agent import Agent
-
-
-@validate_url
-def get_response(
-    url: str, agent: Agent, timeout: int = 10
-) -> tuple[None, str] | tuple[Response, None]:
-    """Get the response from a URL
-
-    Args:
-        url (str): The URL to get the response from
-        timeout (int): The timeout for the HTTP request
-
-    Returns:
-        tuple[None, str] | tuple[Response, None]: The response and error message
-
-    Raises:
-        ValueError: If the URL is invalid
-        requests.exceptions.RequestException: If the HTTP request fails
-    """
-    try:
-        session.headers.update({"User-Agent": agent.config.user_agent})
-        response = session.get(url, timeout=timeout)
-
-        # Check if the response contains an HTTP error
-        if response.status_code >= 400:
-            return None, f"Error: HTTP {str(response.status_code)} error"
-
-        return response, None
-    except ValueError as ve:
-        # Handle invalid URL format
-        return None, f"Error: {str(ve)}"
-
-    except requests.exceptions.RequestException as re:
-        # Handle exceptions related to the HTTP request
-        #  (e.g., connection errors, timeouts, etc.)
-        return None, f"Error: {str(re)}"
-
-
-def scrape_text(url: str, agent: Agent) -> str:
-    """Scrape text from a webpage
-
-    Args:
-        url (str): The URL to scrape text from
-
-    Returns:
-        str: The scraped text
-    """
-    response, error_message = get_response(url, agent)
-    if error_message:
-        return error_message
-    if not response:
-        return "Error: Could not get response"
-
-    soup = BeautifulSoup(response.text, "html.parser")
-
-    for script in soup(["script", "style"]):
-        script.extract()
-
-    text = soup.get_text()
-    lines = (line.strip() for line in text.splitlines())
-    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-    text = "\n".join(chunk for chunk in chunks if chunk)
-
-    return text
-
-
-def scrape_links(url: str, agent: Agent) -> str | list[str]:
-    """Scrape links from a webpage
-
-    Args:
-        url (str): The URL to scrape links from
-
-    Returns:
-       str | list[str]: The scraped links
-    """
-    response, error_message = get_response(url, agent)
-    if error_message:
-        return error_message
-    if not response:
-        return "Error: Could not get response"
-    soup = BeautifulSoup(response.text, "html.parser")
-
-    for script in soup(["script", "style"]):
-        script.extract()
-
-    hyperlinks = extract_hyperlinks(soup, url)
-
-    return format_hyperlinks(hyperlinks)
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index 9ed07cb2..41d1b78b 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -19,7 +19,7 @@ def chat_with_ai(
     config: Config,
     agent: Agent,
     system_prompt: str,
-    user_input: str,
+    triggering_prompt: str,
     token_limit: int,
     model: str | None = None,
 ):
@@ -31,7 +31,7 @@ def chat_with_ai(
         config (Config): The config to use.
         agent (Agent): The agent to use.
         system_prompt (str): The prompt explaining the rules to the AI.
-        user_input (str): The input from the user.
+        triggering_prompt (str): The input from the user.
         token_limit (int): The maximum number of tokens allowed in the API call.
         model (str, optional): The model to use. If None, the config.fast_llm_model will be used. Defaults to None.
 
@@ -90,7 +90,7 @@ def chat_with_ai(
     #     )
 
     # Account for user input (appended later)
-    user_input_msg = Message("user", user_input)
+    user_input_msg = Message("user", triggering_prompt)
     current_tokens_used += count_message_tokens([user_input_msg], model)
 
     current_tokens_used += 500  # Reserve space for new_summary_message
diff --git a/autogpt/main.py b/autogpt/main.py
index a6238457..3b980ab2 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -23,7 +23,6 @@ from autogpt.workspace import Workspace
 from scripts.install_plugin_deps import install_plugin_dependencies
 
 COMMAND_CATEGORIES = [
-    "autogpt.commands.audio_text",
     "autogpt.commands.execute_code",
     "autogpt.commands.file_operations",
     "autogpt.commands.git_operations",
diff --git a/tests/unit/test_browse_scrape_links.py b/tests/unit/test_browse_scrape_links.py
deleted file mode 100644
index 5975e086..00000000
--- a/tests/unit/test_browse_scrape_links.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Generated by CodiumAI
-
-# Dependencies:
-# pip install pytest-mock
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.web_requests import scrape_links
-
-"""
-Code Analysis
-
-Objective:
-The objective of the 'scrape_links' function is to scrape hyperlinks from a
-given URL and return them in a formatted way.
-
-Inputs:
-- url: a string representing the URL to be scraped.
-
-Flow:
-1. Send a GET request to the given URL using the requests library and the user agent header from the config file.
-2. Check if the response contains an HTTP error. If it does, return "error".
-3. Parse the HTML content of the response using the BeautifulSoup library.
-4. Remove any script and style tags from the parsed HTML.
-5. Extract all hyperlinks from the parsed HTML using the 'extract_hyperlinks' function.
-6. Format the extracted hyperlinks using the 'format_hyperlinks' function.
-7. Return the formatted hyperlinks.
-
-Outputs:
-- A list of formatted hyperlinks.
-
-Additional aspects:
-- The function uses the 'requests' and 'BeautifulSoup' libraries to send HTTP
-requests and parse HTML content, respectively.
-- The 'extract_hyperlinks' function is called to extract hyperlinks from the parsed HTML.
-- The 'format_hyperlinks' function is called to format the extracted hyperlinks.
-- The function checks for HTTP errors and returns "error" if any are found.
-"""
-
-
-class TestScrapeLinks:
-    """
-    Tests that the function returns a list of formatted hyperlinks when
-    provided with a valid url that returns a webpage with hyperlinks.
-    """
-
-    def test_valid_url_with_hyperlinks(self, agent: Agent):
-        url = "https://www.google.com"
-        result = scrape_links(url, agent=agent)
-        assert len(result) > 0
-        assert isinstance(result, list)
-        assert isinstance(result[0], str)
-
-    def test_valid_url(self, mocker, agent: Agent):
-        """Test that the function returns correctly formatted hyperlinks when given a valid url."""
-        # Mock the requests.get() function to return a response with sample HTML containing hyperlinks
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = (
-            "<html><body><a href='https://www.google.com'>Google</a></body></html>"
-        )
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a valid URL
-        result = scrape_links("https://www.example.com", agent)
-
-        # Assert that the function returns correctly formatted hyperlinks
-        assert result == ["Google (https://www.google.com)"]
-
-    def test_invalid_url(self, mocker, agent: Agent):
-        """Test that the function returns "error" when given an invalid url."""
-        # Mock the requests.get() function to return an HTTP error response
-        mock_response = mocker.Mock()
-        mock_response.status_code = 404
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with an invalid URL
-        result = scrape_links("https://www.invalidurl.com", agent)
-
-        # Assert that the function returns "error"
-        assert "Error:" in result
-
-    def test_no_hyperlinks(self, mocker, agent: Agent):
-        """Test that the function returns an empty list when the html contains no hyperlinks."""
-        # Mock the requests.get() function to return a response with sample HTML containing no hyperlinks
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = "<html><body><p>No hyperlinks here</p></body></html>"
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a URL containing no hyperlinks
-        result = scrape_links("https://www.example.com", agent)
-
-        # Assert that the function returns an empty list
-        assert result == []
-
-    def test_scrape_links_with_few_hyperlinks(self, mocker, agent: Agent):
-        """Test that scrape_links() correctly extracts and formats hyperlinks from a sample HTML containing a few hyperlinks."""
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = """
-            <html>
-                <body>
-                    <div id="google-link"><a href="https://www.google.com">Google</a></div>
-                    <div id="github"><a href="https://github.com">GitHub</a></div>
-                    <div id="CodiumAI"><a href="https://www.codium.ai">CodiumAI</a></div>
-                </body>
-            </html>
-        """
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function being tested
-        result = scrape_links("https://www.example.com", agent)
-
-        # Assert that the function returns a list of formatted hyperlinks
-        assert isinstance(result, list)
-        assert len(result) == 3
-        assert result[0] == "Google (https://www.google.com)"
-        assert result[1] == "GitHub (https://github.com)"
-        assert result[2] == "CodiumAI (https://www.codium.ai)"
diff --git a/tests/unit/test_browse_scrape_text.py b/tests/unit/test_browse_scrape_text.py
deleted file mode 100644
index 23a80c54..00000000
--- a/tests/unit/test_browse_scrape_text.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# Generated by CodiumAI
-
-import pytest
-import requests
-
-from autogpt.agent.agent import Agent
-from autogpt.commands.web_requests import scrape_text
-
-"""
-Code Analysis
-
-Objective:
-The objective of the "scrape_text" function is to scrape the text content from
-a given URL and return it as a string, after removing any unwanted HTML tags and
- scripts.
-
-Inputs:
-- url: a string representing the URL of the webpage to be scraped.
-
-Flow:
-1. Send a GET request to the given URL using the requests library and the user agent
- header from the config file.
-2. Check if the response contains an HTTP error. If it does, return an error message.
-3. Use BeautifulSoup to parse the HTML content of the response and extract all script
- and style tags.
-4. Get the text content of the remaining HTML using the get_text() method of
- BeautifulSoup.
-5. Split the text into lines and then into chunks, removing any extra whitespace.
-6. Join the chunks into a single string with newline characters between them.
-7. Return the cleaned text.
-
-Outputs:
-- A string representing the cleaned text content of the webpage.
-
-Additional aspects:
-- The function uses the requests library and BeautifulSoup to handle the HTTP request
- and HTML parsing, respectively.
-- The function removes script and style tags from the HTML to avoid including unwanted
- content in the text output.
-- The function uses a generator expression to split the text into lines and chunks,
- which can improve performance for large amounts of text.
-"""
-
-
-class TestScrapeText:
-    def test_scrape_text_with_valid_url(self, mocker, agent: Agent):
-        """Tests that scrape_text() returns the expected text when given a valid URL."""
-        # Mock the requests.get() method to return a response with expected text
-        expected_text = "This is some sample text"
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = (
-            "<html><body><div><p style='color: blue;'>"
-            f"{expected_text}</p></div></body></html>"
-        )
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a valid URL and assert that it returns the
-        #  expected text
-        url = "http://www.example.com"
-        assert scrape_text(url, agent) == expected_text
-
-    def test_invalid_url(self, agent: Agent):
-        """Tests that an error is raised when an invalid url is provided."""
-        url = "invalidurl.com"
-        pytest.raises(ValueError, scrape_text, url, agent)
-
-    def test_unreachable_url(self, mocker, agent: Agent):
-        """Test that scrape_text returns an error message when an invalid or unreachable url is provided."""
-        # Mock the requests.get() method to raise an exception
-        mocker.patch(
-            "requests.Session.get", side_effect=requests.exceptions.RequestException
-        )
-
-        # Call the function with an invalid URL and assert that it returns an error
-        #  message
-        url = "http://thiswebsitedoesnotexist.net/"
-        error_message = scrape_text(url, agent)
-        assert "Error:" in error_message
-
-    def test_no_text(self, mocker, agent: Agent):
-        """Test that scrape_text returns an empty string when the html page contains no text to be scraped."""
-        # Mock the requests.get() method to return a response with no text
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = "<html><body></body></html>"
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a valid URL and assert that it returns an empty string
-        url = "http://www.example.com"
-        assert scrape_text(url, agent) == ""
-
-    def test_http_error(self, mocker, agent: Agent):
-        """Test that scrape_text returns an error message when the response status code is an http error (>=400)."""
-        # Mock the requests.get() method to return a response with a 404 status code
-        mocker.patch("requests.Session.get", return_value=mocker.Mock(status_code=404))
-
-        # Call the function with a URL
-        result = scrape_text("https://www.example.com", agent)
-
-        # Check that the function returns an error message
-        assert result == "Error: HTTP 404 error"
-
-    def test_scrape_text_with_html_tags(self, mocker, agent: Agent):
-        """Test that scrape_text() properly handles HTML tags."""
-        # Create a mock response object with HTML containing tags
-        html = "<html><body><p>This is <b>bold</b> text.</p></body></html>"
-        mock_response = mocker.Mock()
-        mock_response.status_code = 200
-        mock_response.text = html
-        mocker.patch("requests.Session.get", return_value=mock_response)
-
-        # Call the function with a URL
-        result = scrape_text("https://www.example.com", agent)
-
-        # Check that the function properly handles HTML tags
-        assert result == "This is bold text."
diff --git a/tests/unit/test_get_self_feedback.py b/tests/unit/test_get_self_feedback.py
deleted file mode 100644
index ba3e10fe..00000000
--- a/tests/unit/test_get_self_feedback.py
+++ /dev/null
@@ -1,62 +0,0 @@
-from datetime import datetime
-
-from pytest_mock import MockerFixture
-
-from autogpt.agent.agent import Agent
-from autogpt.config import AIConfig
-from autogpt.config.config import Config
-from autogpt.llm.chat import create_chat_completion
-from autogpt.log_cycle.log_cycle import LogCycleHandler
-
-
-def test_get_self_feedback(config: Config, mocker: MockerFixture):
-    # Define a sample thoughts dictionary
-    thoughts = {
-        "reasoning": "Sample reasoning.",
-        "plan": "Sample plan.",
-        "thoughts": "Sample thoughts.",
-    }
-
-    # Define a fake response for the create_chat_completion function
-    fake_response = (
-        "The AI Agent has demonstrated a reasonable thought process, but there is room for improvement. "
-        "For example, the reasoning could be elaborated to better justify the plan, and the plan itself "
-        "could be more detailed to ensure its effectiveness. In addition, the AI Agent should focus more "
-        "on its core role and prioritize thoughts that align with that role."
-    )
-
-    # Mock the create_chat_completion function
-    mock_create_chat_completion = mocker.patch(
-        "autogpt.agent.agent.create_chat_completion", wraps=create_chat_completion
-    )
-    mock_create_chat_completion.return_value = fake_response
-
-    # Create a MagicMock object to replace the Agent instance
-    agent_mock = mocker.MagicMock(spec=Agent)
-
-    # Mock the config attribute of the Agent instance
-    agent_mock.config = config
-    agent_mock.ai_config = AIConfig()
-
-    # Mock the log_cycle_handler attribute of the Agent instance
-    agent_mock.log_cycle_handler = LogCycleHandler()
-
-    # Mock the create_nested_directory method of the LogCycleHandler instance
-    agent_mock.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
-
-    # Mock the cycle_count attribute of the Agent instance
-    agent_mock.cycle_count = 0
-
-    # Call the get_self_feedback method
-    feedback = Agent.get_self_feedback(
-        agent_mock,
-        thoughts,
-        "gpt-3.5-turbo",
-    )
-
-    # Check if the response is a non-empty string
-    assert isinstance(feedback, str) and len(feedback) > 0
-
-    # Check if certain keywords from input thoughts are present in the feedback response
-    for keyword in ["reasoning", "plan", "thoughts"]:
-        assert keyword in feedback
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
deleted file mode 100644
index 9939d79c..00000000
--- a/tests/unit/test_make_agent.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from unittest.mock import MagicMock
-
-from pytest_mock import MockerFixture
-
-from autogpt.agent.agent import Agent
-from autogpt.app import list_agents, start_agent
-
-
-def test_make_agent(agent: Agent, mocker: MockerFixture) -> None:
-    """Test that an agent can be created"""
-    mock = mocker.patch("openai.ChatCompletion.create")
-
-    response = MagicMock()
-    del response.error
-    response.choices[0].messages[0].content = "Test message"
-    response.usage.prompt_tokens = 1
-    response.usage.completion_tokens = 1
-    mock.return_value = response
-    start_agent("Test Agent", "chat", "Hello, how are you?", agent, "gpt-3.5-turbo")
-    agents = list_agents(agent)
-    assert "List of agents:\n0: chat" == agents
-    start_agent("Test Agent 2", "write", "Hello, how are you?", agent, "gpt-3.5-turbo")
-    agents = list_agents(agent.config)
-    assert "List of agents:\n0: chat\n1: write" == agents

From e02105ee890567870e28f235e34eaf64169eb8d5 Mon Sep 17 00:00:00 2001
From: "Luke K (pr-0f3t)" <2609441+lc0rp@users.noreply.github.com>
Date: Fri, 16 Jun 2023 08:28:58 -0300
Subject: [PATCH 78/97] Improve plugin backward compatibility (#4716)

---
 autogpt/plugins/__init__.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/autogpt/plugins/__init__.py b/autogpt/plugins/__init__.py
index 60022352..4d84c9b5 100644
--- a/autogpt/plugins/__init__.py
+++ b/autogpt/plugins/__init__.py
@@ -254,11 +254,6 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                 logger.debug(f"Plugin: {plugin} Module: {module}")
                 zipped_package = zipimporter(str(plugin))
                 zipped_module = zipped_package.load_module(str(module.parent))
-                plugin_module_name = zipped_module.__name__.split(os.path.sep)[-1]
-
-                if not plugins_config.is_enabled(plugin_module_name):
-                    logger.warn(f"Plugin {plugin_module_name} found but not configured")
-                    continue
 
                 for key in dir(zipped_module):
                     if key.startswith("__"):
@@ -269,7 +264,26 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                         "_abc_impl" in a_keys
                         and a_module.__name__ != "AutoGPTPluginTemplate"
                     ):
-                        loaded_plugins.append(a_module())
+                        plugin_name = a_module.__name__
+                        plugin_configured = plugins_config.get(plugin_name) is not None
+                        plugin_enabled = plugins_config.is_enabled(plugin_name)
+
+                        if plugin_configured and plugin_enabled:
+                            logger.debug(
+                                f"Loading plugin {plugin_name} as it was enabled in config."
+                            )
+                            loaded_plugins.append(a_module())
+                        elif plugin_configured and not plugin_enabled:
+                            logger.debug(
+                                f"Not loading plugin {plugin_name} as it was disabled in config."
+                            )
+                        elif not plugin_configured:
+                            logger.warn(
+                                f"Not loading plugin {plugin_name} as it was not found in config. "
+                                f"Please check your config. Starting with 0.4.1, plugins will not be loaded unless "
+                                f"they are enabled in plugins_config.yaml. Zipped plugins should use the class "
+                                f"name ({plugin_name}) as the key."
+                            )
 
     # OpenAI plugins
     if cfg.plugins_openai:

From 4a307ad4eb2871f2946f8e2cfd2475ca9669e1aa Mon Sep 17 00:00:00 2001
From: Luke <2609441+lc0rp@users.noreply.github.com>
Date: Fri, 16 Jun 2023 11:53:25 +0000
Subject: [PATCH 79/97] Fixed plugin test

---
 tests/unit/test_plugins.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 3a6f6d70..80aa1b9d 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -30,8 +30,8 @@ def test_scan_plugins_generic(config: Config):
     plugins_config.plugins["auto_gpt_guanaco"] = PluginConfig(
         name="auto_gpt_guanaco", enabled=True
     )
-    plugins_config.plugins["auto_gpt_vicuna"] = PluginConfig(
-        name="auto_gptp_vicuna", enabled=True
+    plugins_config.plugins["AutoGPTPVicuna"] = PluginConfig(
+        name="AutoGPTPVicuna", enabled=True
     )
     result = scan_plugins(config, debug=True)
     plugin_class_names = [plugin.__class__.__name__ for plugin in result]

From 0b6fec4a28aeada46ad090faca69cafcec800f91 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Fri, 16 Jun 2023 18:17:47 -0700
Subject: [PATCH 80/97] Fix summarization happening in first cycle (#4719)

---
 autogpt/json_utils/utilities.py   | 26 --------------------------
 autogpt/memory/message_history.py | 10 +++-------
 2 files changed, 3 insertions(+), 33 deletions(-)

diff --git a/autogpt/json_utils/utilities.py b/autogpt/json_utils/utilities.py
index 01848871..e492d302 100644
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -67,29 +67,3 @@ def validate_json(
     logger.debug("The JSON object is valid.")
 
     return True
-
-
-def validate_json_string(json_string: str, schema_name: str) -> dict | None:
-    """
-    :type schema_name: object
-    :param schema_name: str
-    :type json_object: object
-    """
-
-    try:
-        json_loaded = json.loads(json_string)
-        if not validate_json(json_loaded, schema_name):
-            return None
-        return json_loaded
-    except:
-        return None
-
-
-def is_string_valid_json(json_string: str, schema_name: str) -> bool:
-    """
-    :type schema_name: object
-    :param schema_name: str
-    :type json_object: object
-    """
-
-    return validate_json_string(json_string, schema_name) is not None
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index be524125..897cee15 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -9,11 +9,7 @@ if TYPE_CHECKING:
     from autogpt.agent import Agent
 
 from autogpt.config import Config
-from autogpt.json_utils.utilities import (
-    LLM_DEFAULT_RESPONSE_FORMAT,
-    extract_json_from_response,
-    is_string_valid_json,
-)
+from autogpt.json_utils.utilities import extract_json_from_response
 from autogpt.llm.base import ChatSequence, Message, MessageRole, MessageType
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS
 from autogpt.llm.utils import count_string_tokens, create_chat_completion
@@ -105,8 +101,8 @@ class MessageHistory:
             )
             result_message = messages[i + 1]
             try:
-                assert is_string_valid_json(
-                    ai_message.content, LLM_DEFAULT_RESPONSE_FORMAT
+                assert (
+                    extract_json_from_response(ai_message.content) != {}
                 ), "AI response is not a valid JSON object"
                 assert result_message.type == "action_result"
 

From 7f6f18f642459d30f1a17ba2947133eab95eecc7 Mon Sep 17 00:00:00 2001
From: "Luke K (pr-0f3t)" <2609441+lc0rp@users.noreply.github.com>
Date: Sat, 17 Jun 2023 05:03:21 -0300
Subject: [PATCH 81/97] Bulletin.md update for 0.4.1 release (#4721)

---
 BULLETIN.md    | 52 ++++++++++++++++----------------------------------
 pyproject.toml |  2 +-
 2 files changed, 17 insertions(+), 37 deletions(-)

diff --git a/BULLETIN.md b/BULLETIN.md
index 4c858b73..ba1de5a1 100644
--- a/BULLETIN.md
+++ b/BULLETIN.md
@@ -3,45 +3,25 @@ Check out *https://agpt.co*, the official news & updates site for Auto-GPT!
 The documentation also has a place here, at *https://docs.agpt.co*
 
 # For contributors 👷🏼
-Since releasing v0.3.0, we are working on re-architecting the Auto-GPT core to make
-it more extensible and to make room for structural performance-oriented R&D.
-In the meantime, we have less time to process incoming pull requests and issues,
-so we focus on high-value contributions:
- * significant bugfixes
- * *major* improvements to existing functionality and/or docs (so no single-typo fixes)
- * contributions that help us with re-architecture and other roadmapped items
-We have to be somewhat selective in order to keep making progress, but this does not
-mean you can't contribute. Check out the contribution guide on our wiki:
+Since releasing v0.3.0, whave been working on re-architecting the Auto-GPT core to make it more extensible and make room for structural performance-oriented R&D.
+
+Check out the contribution guide on our wiki:
 https://github.com/Significant-Gravitas/Auto-GPT/wiki/Contributing
 
-# 🚀 v0.4.0 Release 🚀
-Two weeks and 76 pull requests have passed since v0.3.1, and we are happy to announce
-the release of v0.4.0!
+# 🚀 v0.4.1 Release 🚀
+Two weeks and 50+ pull requests have passed since v0.4.0, and we are happy to announce the release of v0.4.1!
 
-Highlights and notable changes since v0.3.0:
-
-## ⚠️ Command `send_tweet` is REMOVED
-Twitter functionality (and more) is now covered by plugins.
-
-## ⚠️ Memory backend deprecation 💾
-The Milvus, Pinecone and Weaviate memory backends were rendered incompatible
-by work on the memory system, and have been removed in `master`. The Redis
-memory store was also temporarily removed; we will merge a new implementation ASAP.
-Whether built-in support for the others will be added back in the future is subject to
-discussion, feel free to pitch in: https://github.com/Significant-Gravitas/Auto-GPT/discussions/4280
-
-## Document support in `read_file` 📄
-Auto-GPT can now read text from document files, with support added for PDF, DOCX, CSV,
-HTML, TeX and more!
-
-## Managing Auto-GPT's access to commands ❌🔧
-You can now disable set of built-in commands through the *DISABLED_COMMAND_CATEGORIES*
-variable in .env. Specific shell commands can also be disabled using *DENY_COMMANDS*,
-or selectively enabled using *ALLOW_COMMANDS*.
+Highlights and notable changes since v0.4.0:
+- The .env.template is more readable and better explains the purpose of each environment variable.
+- More dependable search
+    - The CUSTOM_SEARCH_ENGINE_ID variable has been replaced to GOOGLE_CUSTOM_SEARCH_ENGINE_ID, make sure you update it.
+- Better read_file
+- More reliable python code execution
+- Lots of JSON error fixes
+- Directory-based plugins
 
 ## Further fixes and changes 🛠️
-Other highlights include improvements to self-feedback mode and continuous mode,
-documentation, docker and devcontainer setups, and much more. Most of the improvements
-that were made are not yet visible to users, but will pay off in the long term.
-Take a look at the Release Notes on Github for the full changelog!
+Under the hood, we've done a bunch of work improving architectures and streamlining code. Most of that won't be user-visible
+
+## Take a look at the Release Notes on Github for the full changelog!
 https://github.com/Significant-Gravitas/Auto-GPT/releases
diff --git a/pyproject.toml b/pyproject.toml
index d695ac08..d795f53e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "agpt"
-version = "0.4.0"
+version = "0.4.1"
 authors = [
   { name="Torantulino", email="support@agpt.co" },
 ]

From 10d7747ae20528155b96a31a1276bce3cee88885 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sat, 17 Jun 2023 08:39:17 -0700
Subject: [PATCH 82/97] Use JSON format for commands signature (#4714)

* Use JSON for command signature

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Improve plugin backward compatibility (#4716)

* Fixed plugin test

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Fix Docker-CI

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

* Put back commands, clean typing and signatures

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>

---------

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
Co-authored-by: Erik Peterson <e@eriklp.com>
Co-authored-by: Luke K (pr-0f3t) <2609441+lc0rp@users.noreply.github.com>
---
 autogpt/command_decorator.py                  |   6 +-
 autogpt/commands/execute_code.py              |  45 ++++-
 autogpt/commands/file_operations.py           | 184 ++++++------------
 autogpt/commands/git_operations.py            |  13 +-
 autogpt/commands/image_gen.py                 |   8 +-
 autogpt/commands/task_statuses.py             |  14 +-
 .../{google_search.py => web_search.py}       |  27 ++-
 autogpt/commands/web_selenium.py              |   9 +-
 autogpt/main.py                               |   4 +-
 autogpt/models/command.py                     |   4 +-
 autogpt/plugins/__init__.py                   |  26 ++-
 docs/usage.md                                 |   2 +-
 tests/Auto-GPT-test-cassettes                 |   2 +-
 tests/mocks/mock_commands.py                  |   7 +-
 tests/unit/test_commands.py                   |   7 +
 tests/unit/test_file_operations.py            |  71 -------
 tests/unit/test_plugins.py                    |   4 +-
 ...st_google_search.py => test_web_search.py} |  14 +-
 18 files changed, 205 insertions(+), 242 deletions(-)
 rename autogpt/commands/{google_search.py => web_search.py} (87%)
 rename tests/unit/{test_google_search.py => test_web_search.py} (88%)

diff --git a/autogpt/command_decorator.py b/autogpt/command_decorator.py
index 3f8279e4..98f114e4 100644
--- a/autogpt/command_decorator.py
+++ b/autogpt/command_decorator.py
@@ -1,5 +1,5 @@
 import functools
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Dict, Optional
 
 from autogpt.config import Config
 from autogpt.logs import logger
@@ -12,7 +12,7 @@ AUTO_GPT_COMMAND_IDENTIFIER = "auto_gpt_command"
 def command(
     name: str,
     description: str,
-    signature: str,
+    arguments: Dict[str, Dict[str, Any]],
     enabled: bool | Callable[[Config], bool] = True,
     disabled_reason: Optional[str] = None,
 ) -> Callable[..., Any]:
@@ -33,7 +33,7 @@ def command(
             name=name,
             description=description,
             method=func,
-            signature=signature,
+            signature=arguments,
             enabled=enabled,
             disabled_reason=disabled_reason,
         )
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index c422d652..beaae64c 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -20,7 +20,18 @@ DENYLIST_CONTROL = "denylist"
 @command(
     "execute_python_code",
     "Create a Python file and execute it",
-    '"code": "<code>", "basename": "<basename>"',
+    {
+        "code": {
+            "type": "string",
+            "description": "The Python code to run",
+            "required": True,
+        },
+        "name": {
+            "type": "string",
+            "description": "A name to be given to the python file",
+            "required": True,
+        },
+    },
 )
 def execute_python_code(code: str, basename: str, agent: Agent) -> str:
     """Create and execute a Python file in a Docker container and return the STDOUT of the
@@ -51,7 +62,17 @@ def execute_python_code(code: str, basename: str, agent: Agent) -> str:
         return f"Error: {str(e)}"
 
 
-@command("execute_python_file", "Execute Python File", '"filename": "<filename>"')
+@command(
+    "execute_python_file",
+    "Execute an existing Python file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of te file to execute",
+            "required": True,
+        },
+    },
+)
 def execute_python_file(filename: str, agent: Agent) -> str:
     """Execute a Python file in a Docker container and return the output
 
@@ -171,9 +192,15 @@ def validate_command(command: str, config: Config) -> bool:
 @command(
     "execute_shell",
     "Execute Shell Command, non-interactive commands only",
-    '"command_line": "<command_line>"',
-    lambda cfg: cfg.execute_local_commands,
-    "You are not allowed to run local shell commands. To execute"
+    {
+        "command_line": {
+            "type": "string",
+            "description": "The command line to execute",
+            "required": True,
+        }
+    },
+    enabled=lambda cfg: cfg.execute_local_commands,
+    disabled_reason="You are not allowed to run local shell commands. To execute"
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
     "in your config file: .env - do not attempt to bypass the restriction.",
 )
@@ -211,7 +238,13 @@ def execute_shell(command_line: str, agent: Agent) -> str:
 @command(
     "execute_shell_popen",
     "Execute Shell Command, non-interactive commands only",
-    '"command_line": "<command_line>"',
+    {
+        "query": {
+            "type": "string",
+            "description": "The search query",
+            "required": True,
+        }
+    },
     lambda config: config.execute_local_commands,
     "You are not allowed to run local shell commands. To execute"
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index d74fee96..2a932d38 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -4,21 +4,15 @@ from __future__ import annotations
 import hashlib
 import os
 import os.path
-import re
 from typing import Generator, Literal
 
-import requests
-from colorama import Back, Fore
 from confection import Config
-from requests.adapters import HTTPAdapter, Retry
 
 from autogpt.agent.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.commands.file_operations_utils import read_textual_file
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory
-from autogpt.spinner import Spinner
-from autogpt.utils import readable_file_size
 
 Operation = Literal["write", "append", "delete"]
 
@@ -119,7 +113,17 @@ def log_operation(
     )
 
 
-@command("read_file", "Read a file", '"filename": "<filename>"')
+@command(
+    "read_file",
+    "Read an existing file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The path of the file to read",
+            "required": True,
+        }
+    },
+)
 def read_file(filename: str, agent: Agent) -> str:
     """Read a file and return the contents
 
@@ -168,7 +172,22 @@ def ingest_file(
         logger.warn(f"Error while ingesting file '{filename}': {err}")
 
 
-@command("write_to_file", "Write to file", '"filename": "<filename>", "text": "<text>"')
+@command(
+    "write_to_file",
+    "Write to file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of the file to write to",
+            "required": True,
+        },
+        "text": {
+            "type": "string",
+            "description": "The text to write to the file",
+            "required": True,
+        },
+    },
+)
 def write_to_file(filename: str, text: str, agent: Agent) -> str:
     """Write text to a file
 
@@ -194,69 +213,20 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
 
 
 @command(
-    "replace_in_file",
-    "Replace text or code in a file",
-    '"filename": "<filename>", '
-    '"old_text": "<old_text>", "new_text": "<new_text>", '
-    '"occurrence_index": "<occurrence_index>"',
-)
-def replace_in_file(
-    filename: str, old_text: str, new_text: str, agent: Agent, occurrence_index=None
-):
-    """Update a file by replacing one or all occurrences of old_text with new_text using Python's built-in string
-    manipulation and regular expression modules for cross-platform file editing similar to sed and awk.
-
-    Args:
-        filename (str): The name of the file
-        old_text (str): String to be replaced. \n will be stripped from the end.
-        new_text (str): New string. \n will be stripped from the end.
-        occurrence_index (int): Optional index of the occurrence to replace. If None, all occurrences will be replaced.
-
-    Returns:
-        str: A message indicating whether the file was updated successfully or if there were no matches found for old_text
-        in the file.
-
-    Raises:
-        Exception: If there was an error updating the file.
-    """
-    try:
-        with open(filename, "r", encoding="utf-8") as f:
-            content = f.read()
-
-        old_text = old_text.rstrip("\n")
-        new_text = new_text.rstrip("\n")
-
-        if occurrence_index is None:
-            new_content = content.replace(old_text, new_text)
-        else:
-            matches = list(re.finditer(re.escape(old_text), content))
-            if not matches:
-                return f"No matches found for {old_text} in {filename}"
-
-            if int(occurrence_index) >= len(matches):
-                return f"Occurrence index {occurrence_index} is out of range for {old_text} in {filename}"
-
-            match = matches[int(occurrence_index)]
-            start, end = match.start(), match.end()
-            new_content = content[:start] + new_text + content[end:]
-
-        if content == new_content:
-            return f"No matches found for {old_text} in {filename}"
-
-        with open(filename, "w", encoding="utf-8") as f:
-            f.write(new_content)
-
-        with open(filename, "r", encoding="utf-8") as f:
-            checksum = text_checksum(f.read())
-        log_operation("update", filename, agent, checksum=checksum)
-
-        return f"File {filename} updated successfully."
-    except Exception as e:
-        return "Error: " + str(e)
-
-
-@command(
-    "append_to_file", "Append to file", '"filename": "<filename>", "text": "<text>"'
+    "append_to_file",
+    "Append to file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of the file to write to",
+            "required": True,
+        },
+        "text": {
+            "type": "string",
+            "description": "The text to write to the file",
+            "required": True,
+        },
+    },
 )
 def append_to_file(
     filename: str, text: str, agent: Agent, should_log: bool = True
@@ -287,7 +257,17 @@ def append_to_file(
         return f"Error: {err}"
 
 
-@command("delete_file", "Delete file", '"filename": "<filename>"')
+@command(
+    "delete_file",
+    "Delete file",
+    {
+        "filename": {
+            "type": "string",
+            "description": "The name of the file to delete",
+            "required": True,
+        }
+    },
+)
 def delete_file(filename: str, agent: Agent) -> str:
     """Delete a file
 
@@ -307,7 +287,17 @@ def delete_file(filename: str, agent: Agent) -> str:
         return f"Error: {err}"
 
 
-@command("list_files", "List Files in Directory", '"directory": "<directory>"')
+@command(
+    "list_files",
+    "List Files in Directory",
+    {
+        "directory": {
+            "type": "string",
+            "description": "The directory to list files in",
+            "required": True,
+        }
+    },
+)
 def list_files(directory: str, agent: Agent) -> list[str]:
     """lists files in a directory recursively
 
@@ -329,51 +319,3 @@ def list_files(directory: str, agent: Agent) -> list[str]:
             found_files.append(relative_path)
 
     return found_files
-
-
-@command(
-    "download_file",
-    "Download File",
-    '"url": "<url>", "filename": "<filename>"',
-    lambda config: config.allow_downloads,
-    "Error: You do not have user authorization to download files locally.",
-)
-def download_file(url, filename, agent: Agent):
-    """Downloads a file
-    Args:
-        url (str): URL of the file to download
-        filename (str): Filename to save the file as
-    """
-    try:
-        directory = os.path.dirname(filename)
-        os.makedirs(directory, exist_ok=True)
-        message = f"{Fore.YELLOW}Downloading file from {Back.LIGHTBLUE_EX}{url}{Back.RESET}{Fore.RESET}"
-        with Spinner(message, plain_output=agent.config.plain_output) as spinner:
-            session = requests.Session()
-            retry = Retry(total=3, backoff_factor=1, status_forcelist=[502, 503, 504])
-            adapter = HTTPAdapter(max_retries=retry)
-            session.mount("http://", adapter)
-            session.mount("https://", adapter)
-
-            total_size = 0
-            downloaded_size = 0
-
-            with session.get(url, allow_redirects=True, stream=True) as r:
-                r.raise_for_status()
-                total_size = int(r.headers.get("Content-Length", 0))
-                downloaded_size = 0
-
-                with open(filename, "wb") as f:
-                    for chunk in r.iter_content(chunk_size=8192):
-                        f.write(chunk)
-                        downloaded_size += len(chunk)
-
-                        # Update the progress message
-                        progress = f"{readable_file_size(downloaded_size)} / {readable_file_size(total_size)}"
-                        spinner.update_message(f"{message} {progress}")
-
-            return f'Successfully downloaded and locally stored file: "{filename}"! (Size: {readable_file_size(downloaded_size)})'
-    except requests.HTTPError as err:
-        return f"Got an HTTP Error whilst trying to download file: {err}"
-    except Exception as err:
-        return f"Error: {err}"
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index 8dfe213c..3832ca88 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -10,7 +10,18 @@ from autogpt.url_utils.validators import validate_url
 @command(
     "clone_repository",
     "Clone Repository",
-    '"url": "<repository_url>", "clone_path": "<clone_path>"',
+    {
+        "url": {
+            "type": "string",
+            "description": "The URL of the repository to clone",
+            "required": True,
+        },
+        "clone_path": {
+            "type": "string",
+            "description": "The path to clone the repository to",
+            "required": True,
+        },
+    },
     lambda config: config.github_username and config.github_api_key,
     "Configure github_username and github_api_key.",
 )
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index 5bed8e00..043e91d7 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -17,7 +17,13 @@ from autogpt.logs import logger
 @command(
     "generate_image",
     "Generate Image",
-    '"prompt": "<prompt>"',
+    {
+        "prompt": {
+            "type": "string",
+            "description": "The prompt used to generate the image",
+            "required": True,
+        },
+    },
     lambda config: config.image_provider,
     "Requires a image provider to be set.",
 )
diff --git a/autogpt/commands/task_statuses.py b/autogpt/commands/task_statuses.py
index d5718fd3..062ebe3a 100644
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/task_statuses.py
@@ -9,16 +9,22 @@ from autogpt.logs import logger
 
 
 @command(
-    "task_complete",
-    "Task Complete (Shutdown)",
-    '"reason": "<reason>"',
+    "goals_accomplished",
+    "Goals are accomplished and there is nothing left to do",
+    {
+        "reason": {
+            "type": "string",
+            "description": "A summary to the user of how the goals were accomplished",
+            "required": True,
+        }
+    },
 )
 def task_complete(reason: str, agent: Agent) -> NoReturn:
     """
     A function that takes in a string and exits the program
 
     Parameters:
-        reason (str): The reason for shutting down.
+        reason (str): A summary to the user of how the goals were accomplished.
     Returns:
         A result string from create chat completion. A list of suggestions to
             improve the code.
diff --git a/autogpt/commands/google_search.py b/autogpt/commands/web_search.py
similarity index 87%
rename from autogpt/commands/google_search.py
rename to autogpt/commands/web_search.py
index e6a1fc05..50b06e48 100644
--- a/autogpt/commands/google_search.py
+++ b/autogpt/commands/web_search.py
@@ -14,12 +14,17 @@ DUCKDUCKGO_MAX_ATTEMPTS = 3
 
 
 @command(
-    "google",
-    "Google Search",
-    '"query": "<query>"',
-    lambda config: not config.google_api_key,
+    "web_search",
+    "Search the web",
+    {
+        "query": {
+            "type": "string",
+            "description": "The search query",
+            "required": True,
+        }
+    },
 )
-def google_search(query: str, agent: Agent, num_results: int = 8) -> str:
+def web_search(query: str, agent: Agent, num_results: int = 8) -> str:
     """Return the results of a Google search
 
     Args:
@@ -52,14 +57,18 @@ def google_search(query: str, agent: Agent, num_results: int = 8) -> str:
 @command(
     "google",
     "Google Search",
-    '"query": "<query>"',
+    {
+        "query": {
+            "type": "string",
+            "description": "The search query",
+            "required": True,
+        }
+    },
     lambda config: bool(config.google_api_key)
     and bool(config.google_custom_search_engine_id),
     "Configure google_api_key and custom_search_engine_id.",
 )
-def google_official_search(
-    query: str, agent: Agent, num_results: int = 8
-) -> str | list[str]:
+def google(query: str, agent: Agent, num_results: int = 8) -> str | list[str]:
     """Return the results of a Google search using the official Google API
 
     Args:
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index bdc5e613..718cde71 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -42,7 +42,14 @@ FILE_DIR = Path(__file__).parent.parent
 @command(
     "browse_website",
     "Browse Website",
-    '"url": "<url>", "question": "<what_you_want_to_find_on_website>"',
+    {
+        "url": {"type": "string", "description": "The URL to visit", "required": True},
+        "question": {
+            "type": "string",
+            "description": "What you want to find on the website",
+            "required": True,
+        },
+    },
 )
 @validate_url
 def browse_website(url: str, question: str, agent: Agent) -> str:
diff --git a/autogpt/main.py b/autogpt/main.py
index 3b980ab2..f0af9b53 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -25,9 +25,7 @@ from scripts.install_plugin_deps import install_plugin_dependencies
 COMMAND_CATEGORIES = [
     "autogpt.commands.execute_code",
     "autogpt.commands.file_operations",
-    "autogpt.commands.git_operations",
-    "autogpt.commands.google_search",
-    "autogpt.commands.image_gen",
+    "autogpt.commands.web_search",
     "autogpt.commands.web_selenium",
     "autogpt.app",
     "autogpt.commands.task_statuses",
diff --git a/autogpt/models/command.py b/autogpt/models/command.py
index a925ca04..f88bbcae 100644
--- a/autogpt/models/command.py
+++ b/autogpt/models/command.py
@@ -1,4 +1,4 @@
-from typing import Any, Callable, Optional
+from typing import Any, Callable, Dict, Optional
 
 from autogpt.config import Config
 
@@ -17,7 +17,7 @@ class Command:
         name: str,
         description: str,
         method: Callable[..., Any],
-        signature: str = "",
+        signature: Dict[str, Dict[str, Any]],
         enabled: bool | Callable[[Config], bool] = True,
         disabled_reason: Optional[str] = None,
     ):
diff --git a/autogpt/plugins/__init__.py b/autogpt/plugins/__init__.py
index 60022352..4d84c9b5 100644
--- a/autogpt/plugins/__init__.py
+++ b/autogpt/plugins/__init__.py
@@ -254,11 +254,6 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                 logger.debug(f"Plugin: {plugin} Module: {module}")
                 zipped_package = zipimporter(str(plugin))
                 zipped_module = zipped_package.load_module(str(module.parent))
-                plugin_module_name = zipped_module.__name__.split(os.path.sep)[-1]
-
-                if not plugins_config.is_enabled(plugin_module_name):
-                    logger.warn(f"Plugin {plugin_module_name} found but not configured")
-                    continue
 
                 for key in dir(zipped_module):
                     if key.startswith("__"):
@@ -269,7 +264,26 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                         "_abc_impl" in a_keys
                         and a_module.__name__ != "AutoGPTPluginTemplate"
                     ):
-                        loaded_plugins.append(a_module())
+                        plugin_name = a_module.__name__
+                        plugin_configured = plugins_config.get(plugin_name) is not None
+                        plugin_enabled = plugins_config.is_enabled(plugin_name)
+
+                        if plugin_configured and plugin_enabled:
+                            logger.debug(
+                                f"Loading plugin {plugin_name} as it was enabled in config."
+                            )
+                            loaded_plugins.append(a_module())
+                        elif plugin_configured and not plugin_enabled:
+                            logger.debug(
+                                f"Not loading plugin {plugin_name} as it was disabled in config."
+                            )
+                        elif not plugin_configured:
+                            logger.warn(
+                                f"Not loading plugin {plugin_name} as it was not found in config. "
+                                f"Please check your config. Starting with 0.4.1, plugins will not be loaded unless "
+                                f"they are enabled in plugins_config.yaml. Zipped plugins should use the class "
+                                f"name ({plugin_name}) as the key."
+                            )
 
     # OpenAI plugins
     if cfg.plugins_openai:
diff --git a/docs/usage.md b/docs/usage.md
index 93dfd25f..2e88298c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -104,5 +104,5 @@ If you want to selectively disable some command groups, you can use the `DISABLE
 For example, to disable coding related features, set it to the value below:
 
 ```ini
-DISABLED_COMMAND_CATEGORIES=autogpt.commands.execute_code,autogpt.commands.git_operations,autogpt.commands.improve_code,autogpt.commands.write_tests
+DISABLED_COMMAND_CATEGORIES=autogpt.commands.execute_code
 ```
diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 427de672..e6033baa 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 427de6721cb5209a7a34359a81b71d60e80a110a
+Subproject commit e6033baadcdd6b6fcf5b029670e70113422c0c30
diff --git a/tests/mocks/mock_commands.py b/tests/mocks/mock_commands.py
index 7b16f1d1..278894c4 100644
--- a/tests/mocks/mock_commands.py
+++ b/tests/mocks/mock_commands.py
@@ -2,7 +2,12 @@ from autogpt.command_decorator import command
 
 
 @command(
-    "function_based", "Function-based test command", "(arg1: int, arg2: str) -> str"
+    "function_based",
+    "Function-based test command",
+    {
+        "arg1": {"type": "int", "description": "arg 1", "required": True},
+        "arg2": {"type": "str", "description": "arg 2", "required": True},
+    },
 )
 def function_based(arg1: int, arg2: str) -> str:
     """A function-based test command that returns a string with the two arguments separated by a dash."""
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
index f02cb620..cb3f539a 100644
--- a/tests/unit/test_commands.py
+++ b/tests/unit/test_commands.py
@@ -41,6 +41,13 @@ class TestCommand:
             name="example",
             description="Example command",
             method=self.example_command_method,
+            signature={
+                "prompt": {
+                    "type": "string",
+                    "description": "The prompt used to generate the image",
+                    "required": True,
+                },
+            },
         )
         result = cmd(arg1=1, arg2="test")
         assert result == "1 - test"
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index 27af9373..5761e01a 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -15,7 +15,6 @@ import autogpt.commands.file_operations as file_ops
 from autogpt.agent.agent import Agent
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
-from autogpt.utils import readable_file_size
 from autogpt.workspace import Workspace
 
 
@@ -243,53 +242,6 @@ def test_write_file_succeeds_if_content_different(
     assert result == "File written to successfully."
 
 
-# Update file testing
-def test_replace_in_file_all_occurrences(test_file, test_file_path, agent: Agent):
-    old_content = "This is a test file.\n we test file here\na test is needed"
-    expected_content = (
-        "This is a update file.\n we update file here\na update is needed"
-    )
-    test_file.write(old_content)
-    test_file.close()
-    file_ops.replace_in_file(test_file_path, "test", "update", agent=agent)
-    with open(test_file_path) as f:
-        new_content = f.read()
-    print(new_content)
-    print(expected_content)
-    assert new_content == expected_content
-
-
-def test_replace_in_file_one_occurrence(test_file, test_file_path, agent: Agent):
-    old_content = "This is a test file.\n we test file here\na test is needed"
-    expected_content = "This is a test file.\n we update file here\na test is needed"
-    test_file.write(old_content)
-    test_file.close()
-    file_ops.replace_in_file(
-        test_file_path, "test", "update", agent=agent, occurrence_index=1
-    )
-    with open(test_file_path) as f:
-        new_content = f.read()
-
-    assert new_content == expected_content
-
-
-def test_replace_in_file_multiline_old_text(test_file, test_file_path, agent: Agent):
-    old_content = "This is a multi_line\ntest for testing\nhow well this function\nworks when the input\nis multi-lined"
-    expected_content = "This is a multi_line\nfile. succeeded test\nis multi-lined"
-    test_file.write(old_content)
-    test_file.close()
-    file_ops.replace_in_file(
-        test_file_path,
-        "\ntest for testing\nhow well this function\nworks when the input\n",
-        "\nfile. succeeded test\n",
-        agent=agent,
-    )
-    with open(test_file_path) as f:
-        new_content = f.read()
-
-    assert new_content == expected_content
-
-
 def test_append_to_file(test_nested_file: Path, agent: Agent):
     append_text = "This is appended text.\n"
     file_ops.write_to_file(test_nested_file, append_text, agent=agent)
@@ -373,26 +325,3 @@ def test_list_files(workspace: Workspace, test_directory: Path, agent: Agent):
     non_existent_file = "non_existent_file.txt"
     files = file_ops.list_files("", agent=agent)
     assert non_existent_file not in files
-
-
-def test_download_file(workspace: Workspace, agent: Agent):
-    url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.2.2.tar.gz"
-    local_name = workspace.get_path("auto-gpt.tar.gz")
-    size = 365023
-    readable_size = readable_file_size(size)
-    assert (
-        file_ops.download_file(url, local_name, agent=agent)
-        == f'Successfully downloaded and locally stored file: "{local_name}"! (Size: {readable_size})'
-    )
-    assert os.path.isfile(local_name) is True
-    assert os.path.getsize(local_name) == size
-
-    url = "https://github.com/Significant-Gravitas/Auto-GPT/archive/refs/tags/v0.0.0.tar.gz"
-    assert "Got an HTTP Error whilst trying to download file" in file_ops.download_file(
-        url, local_name, agent=agent
-    )
-
-    url = "https://thiswebsiteiswrong.hmm/v0.0.0.tar.gz"
-    assert "Failed to establish a new connection:" in file_ops.download_file(
-        url, local_name, agent=agent
-    )
diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 3a6f6d70..80aa1b9d 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -30,8 +30,8 @@ def test_scan_plugins_generic(config: Config):
     plugins_config.plugins["auto_gpt_guanaco"] = PluginConfig(
         name="auto_gpt_guanaco", enabled=True
     )
-    plugins_config.plugins["auto_gpt_vicuna"] = PluginConfig(
-        name="auto_gptp_vicuna", enabled=True
+    plugins_config.plugins["AutoGPTPVicuna"] = PluginConfig(
+        name="AutoGPTPVicuna", enabled=True
     )
     result = scan_plugins(config, debug=True)
     plugin_class_names = [plugin.__class__.__name__ for plugin in result]
diff --git a/tests/unit/test_google_search.py b/tests/unit/test_web_search.py
similarity index 88%
rename from tests/unit/test_google_search.py
rename to tests/unit/test_web_search.py
index 3f039fdb..4f514306 100644
--- a/tests/unit/test_google_search.py
+++ b/tests/unit/test_web_search.py
@@ -4,11 +4,7 @@ import pytest
 from googleapiclient.errors import HttpError
 
 from autogpt.agent.agent import Agent
-from autogpt.commands.google_search import (
-    google_official_search,
-    google_search,
-    safe_google_results,
-)
+from autogpt.commands.web_search import google, safe_google_results, web_search
 
 
 @pytest.mark.parametrize(
@@ -45,8 +41,8 @@ def test_google_search(
     mock_ddg = mocker.Mock()
     mock_ddg.return_value = return_value
 
-    mocker.patch("autogpt.commands.google_search.DDGS.text", mock_ddg)
-    actual_output = google_search(query, agent=agent, num_results=num_results)
+    mocker.patch("autogpt.commands.web_search.DDGS.text", mock_ddg)
+    actual_output = web_search(query, agent=agent, num_results=num_results)
     expected_output = safe_google_results(expected_output)
     assert actual_output == expected_output
 
@@ -88,7 +84,7 @@ def test_google_official_search(
     agent: Agent,
 ):
     mock_googleapiclient.return_value = search_results
-    actual_output = google_official_search(query, agent=agent, num_results=num_results)
+    actual_output = google(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)
 
 
@@ -136,5 +132,5 @@ def test_google_official_search_errors(
     )
 
     mock_googleapiclient.side_effect = error
-    actual_output = google_official_search(query, agent=agent, num_results=num_results)
+    actual_output = google(query, agent=agent, num_results=num_results)
     assert actual_output == safe_google_results(expected_output)

From dc1b48463089ff7b711cb47b1d8047c6d8856af7 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Sat, 17 Jun 2023 15:46:07 +0000
Subject: [PATCH 83/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index e6033baa..43f536a1 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit e6033baadcdd6b6fcf5b029670e70113422c0c30
+Subproject commit 43f536a193a57cd76f31fa405cf7ec2309ed383a

From 7bac56b57d30770df0f25a4fbcd3226a63b1e4e4 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Sat, 17 Jun 2023 14:45:34 -0700
Subject: [PATCH 84/97] Fix execute_command coming from plugins (#4730)

---
 autogpt/agent/agent.py             |  5 ++---
 autogpt/app.py                     |  8 ++++----
 tests/unit/test_execute_command.py | 24 ++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 7 deletions(-)
 create mode 100644 tests/unit/test_execute_command.py

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 5a236f67..1f31be16 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -291,9 +291,8 @@ class Agent:
                         command_name, arguments
                     )
                 command_result = execute_command(
-                    self.command_registry,
-                    command_name,
-                    arguments,
+                    command_name=command_name,
+                    arguments=arguments,
                     agent=self,
                 )
                 result = f"Command {command_name} returned: " f"{command_result}"
diff --git a/autogpt/app.py b/autogpt/app.py
index eb25fa7d..780b74a0 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -4,7 +4,7 @@ from typing import Dict, List, Union
 
 from autogpt.agent.agent import Agent
 from autogpt.agent.agent_manager import AgentManager
-from autogpt.commands.command import CommandRegistry, command
+from autogpt.commands.command import command
 from autogpt.commands.web_requests import scrape_links, scrape_text
 from autogpt.processing.text import summarize_text
 from autogpt.speech import say_text
@@ -84,7 +84,6 @@ def map_command_synonyms(command_name: str):
 
 
 def execute_command(
-    command_registry: CommandRegistry,
     command_name: str,
     arguments: dict[str, str],
     agent: Agent,
@@ -94,12 +93,13 @@ def execute_command(
     Args:
         command_name (str): The name of the command to execute
         arguments (dict): The arguments for the command
+        agent (Agent): The agent that is executing the command
 
     Returns:
         str: The result of the command
     """
     try:
-        cmd = command_registry.commands.get(command_name)
+        cmd = agent.command_registry.commands.get(command_name)
 
         # If the command is found, call it with the provided arguments
         if cmd:
@@ -111,7 +111,7 @@ def execute_command(
         # TODO: Change these to take in a file rather than pasted code, if
         # non-file is given, return instructions "Input should be a python
         # filepath, write your code to file and try again
-        for command in agent.prompt.commands:
+        for command in agent.ai_config.prompt_generator.commands:
             if (
                 command_name == command["label"].lower()
                 or command_name == command["name"].lower()
diff --git a/tests/unit/test_execute_command.py b/tests/unit/test_execute_command.py
new file mode 100644
index 00000000..fb3f043a
--- /dev/null
+++ b/tests/unit/test_execute_command.py
@@ -0,0 +1,24 @@
+from autogpt.agent import Agent
+from autogpt.app import execute_command
+
+
+def check_plan():
+    return "hi"
+
+
+def test_execute_command_plugin(agent: Agent):
+    """Test that executing a command that came from a plugin works as expected"""
+    agent.ai_config.prompt_generator.add_command(
+        "check_plan",
+        "Read the plan.md with the next goals to achieve",
+        {},
+        check_plan,
+    )
+    command_name = "check_plan"
+    arguments = {}
+    command_result = execute_command(
+        command_name=command_name,
+        arguments=arguments,
+        agent=agent,
+    )
+    assert command_result == "hi"

From 096d27f342322849355b7754e1f5908f23912082 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Sat, 17 Jun 2023 14:53:58 -0700
Subject: [PATCH 85/97] Fix execute_command coming from plugins (#4729)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 autogpt/agent/agent.py             |  5 ++---
 autogpt/app.py                     |  7 +++----
 tests/unit/test_execute_command.py | 24 ++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 7 deletions(-)
 create mode 100644 tests/unit/test_execute_command.py

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 202c124a..8af9f49e 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -268,9 +268,8 @@ class Agent:
                         command_name, arguments
                     )
                 command_result = execute_command(
-                    self.command_registry,
-                    command_name,
-                    arguments,
+                    command_name=command_name,
+                    arguments=arguments,
                     agent=self,
                 )
                 result = f"Command {command_name} returned: " f"{command_result}"
diff --git a/autogpt/app.py b/autogpt/app.py
index 30f4b7d6..78e3a4dd 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -3,7 +3,6 @@ import json
 from typing import Dict
 
 from autogpt.agent.agent import Agent
-from autogpt.models.command_registry import CommandRegistry
 
 
 def is_valid_int(value: str) -> bool:
@@ -79,7 +78,6 @@ def map_command_synonyms(command_name: str):
 
 
 def execute_command(
-    command_registry: CommandRegistry,
     command_name: str,
     arguments: dict[str, str],
     agent: Agent,
@@ -89,12 +87,13 @@ def execute_command(
     Args:
         command_name (str): The name of the command to execute
         arguments (dict): The arguments for the command
+        agent (Agent): The agent that is executing the command
 
     Returns:
         str: The result of the command
     """
     try:
-        cmd = command_registry.commands.get(command_name)
+        cmd = agent.command_registry.commands.get(command_name)
 
         # If the command is found, call it with the provided arguments
         if cmd:
@@ -106,7 +105,7 @@ def execute_command(
         # TODO: Change these to take in a file rather than pasted code, if
         # non-file is given, return instructions "Input should be a python
         # filepath, write your code to file and try again
-        for command in agent.prompt.commands:
+        for command in agent.ai_config.prompt_generator.commands:
             if (
                 command_name == command["label"].lower()
                 or command_name == command["name"].lower()
diff --git a/tests/unit/test_execute_command.py b/tests/unit/test_execute_command.py
new file mode 100644
index 00000000..fb3f043a
--- /dev/null
+++ b/tests/unit/test_execute_command.py
@@ -0,0 +1,24 @@
+from autogpt.agent import Agent
+from autogpt.app import execute_command
+
+
+def check_plan():
+    return "hi"
+
+
+def test_execute_command_plugin(agent: Agent):
+    """Test that executing a command that came from a plugin works as expected"""
+    agent.ai_config.prompt_generator.add_command(
+        "check_plan",
+        "Read the plan.md with the next goals to achieve",
+        {},
+        check_plan,
+    )
+    command_name = "check_plan"
+    arguments = {}
+    command_result = execute_command(
+        command_name=command_name,
+        arguments=arguments,
+        agent=agent,
+    )
+    assert command_result == "hi"

From a7f805604c2e6c69aaf13dc84e09dff028455258 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sun, 18 Jun 2023 19:05:41 -0700
Subject: [PATCH 86/97] Pass config everywhere in order to get rid of singleton
 (#4666)

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 autogpt/agent/agent.py                        | 10 +--
 autogpt/agent/agent_manager.py                | 20 +++---
 autogpt/command_decorator.py                  |  4 +-
 autogpt/commands/execute_code.py              |  8 +--
 autogpt/commands/file_operations.py           |  3 +-
 autogpt/commands/image_gen.py                 |  2 +-
 autogpt/commands/web_selenium.py              |  2 +-
 autogpt/config/ai_config.py                   | 22 +++----
 autogpt/config/config.py                      |  5 +-
 autogpt/config/prompt_config.py               | 12 +---
 autogpt/json_utils/utilities.py               |  5 +-
 autogpt/llm/chat.py                           |  7 ++-
 autogpt/llm/utils/__init__.py                 | 30 ++++-----
 autogpt/logs.py                               |  7 ++-
 autogpt/main.py                               | 38 +++++------
 autogpt/memory/message_history.py             | 36 ++++++-----
 autogpt/memory/vector/__init__.py             | 18 +++---
 autogpt/memory/vector/memory_item.py          | 20 +++---
 autogpt/memory/vector/providers/base.py       | 18 +++---
 autogpt/memory/vector/providers/json_file.py  |  8 +--
 autogpt/memory/vector/utils.py                | 13 ++--
 autogpt/plugins/__init__.py                   | 32 +++++-----
 autogpt/processing/text.py                    | 29 ++++++---
 autogpt/prompts/prompt.py                     | 63 +++++++++----------
 autogpt/setup.py                              | 31 ++++-----
 autogpt/speech/base.py                        |  5 +-
 autogpt/speech/eleven_labs.py                 | 17 +++--
 autogpt/speech/say.py                         |  7 +--
 autogpt/utils.py                              | 11 ++--
 data_ingestion.py                             |  4 +-
 docs/challenges/building_challenges.md        |  2 +-
 tests/Auto-GPT-test-cassettes                 |  2 +-
 tests/conftest.py                             |  2 +-
 tests/integration/agent_factory.py            | 26 ++++----
 tests/integration/goal_oriented/__init__.py   |  0
 .../memory/test_json_file_memory.py           | 27 +++++---
 tests/integration/test_setup.py               | 16 ++---
 tests/unit/test_agent_manager.py              |  4 +-
 tests/unit/test_ai_config.py                  | 22 +++----
 tests/unit/test_config.py                     |  6 +-
 tests/unit/test_file_operations.py            |  7 ++-
 tests/unit/test_message_history.py            |  7 +--
 tests/unit/test_prompt_config.py              |  6 +-
 tests/unit/test_utils.py                      |  9 +--
 44 files changed, 323 insertions(+), 300 deletions(-)
 delete mode 100644 tests/integration/goal_oriented/__init__.py

diff --git a/autogpt/agent/agent.py b/autogpt/agent/agent.py
index 8af9f49e..7537233e 100644
--- a/autogpt/agent/agent.py
+++ b/autogpt/agent/agent.py
@@ -143,7 +143,7 @@ class Agent:
 
             try:
                 assistant_reply_json = extract_json_from_response(assistant_reply)
-                validate_json(assistant_reply_json)
+                validate_json(assistant_reply_json, self.config)
             except json.JSONDecodeError as e:
                 logger.error(f"Exception while validating assistant reply JSON: {e}")
                 assistant_reply_json = {}
@@ -158,7 +158,7 @@ class Agent:
                 # Get command name and arguments
                 try:
                     print_assistant_thoughts(
-                        self.ai_name, assistant_reply_json, self.config.speak_mode
+                        self.ai_name, assistant_reply_json, self.config
                     )
                     command_name, arguments = get_command(assistant_reply_json)
                     if self.config.speak_mode:
@@ -197,10 +197,12 @@ class Agent:
                 )
                 while True:
                     if self.config.chat_messages_enabled:
-                        console_input = clean_input("Waiting for your response...")
+                        console_input = clean_input(
+                            self.config, "Waiting for your response..."
+                        )
                     else:
                         console_input = clean_input(
-                            Fore.MAGENTA + "Input:" + Style.RESET_ALL
+                            self.config, Fore.MAGENTA + "Input:" + Style.RESET_ALL
                         )
                     if console_input.lower().strip() == self.config.authorise_key:
                         user_input = "GENERATE NEXT COMMAND JSON"
diff --git a/autogpt/agent/agent_manager.py b/autogpt/agent/agent_manager.py
index 8560b0ec..1f1c8a1d 100644
--- a/autogpt/agent/agent_manager.py
+++ b/autogpt/agent/agent_manager.py
@@ -10,12 +10,12 @@ from autogpt.singleton import Singleton
 class AgentManager(metaclass=Singleton):
     """Agent manager for managing GPT agents"""
 
-    def __init__(self):
+    def __init__(self, config: Config):
         self.next_key = 0
         self.agents: dict[
             int, tuple[str, list[Message], str]
         ] = {}  # key, (task, full_message_history, model)
-        self.cfg = Config()
+        self.config = config
 
     # Create new GPT agent
     # TODO: Centralise use of create_chat_completion() to globally enforce token limit
@@ -35,18 +35,18 @@ class AgentManager(metaclass=Singleton):
         """
         messages = ChatSequence.for_model(model, [Message("user", creation_prompt)])
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_pre_instruction():
                 continue
             if plugin_messages := plugin.pre_instruction(messages.raw()):
                 messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
         # Start GPT instance
-        agent_reply = create_chat_completion(prompt=messages)
+        agent_reply = create_chat_completion(prompt=messages, config=self.config)
 
         messages.add("assistant", agent_reply)
 
         plugins_reply = ""
-        for i, plugin in enumerate(self.cfg.plugins):
+        for i, plugin in enumerate(self.config.plugins):
             if not plugin.can_handle_on_instruction():
                 continue
             if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
@@ -62,7 +62,7 @@ class AgentManager(metaclass=Singleton):
 
         self.agents[key] = (task, list(messages), model)
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_post_instruction():
                 continue
             agent_reply = plugin.post_instruction(agent_reply)
@@ -85,19 +85,19 @@ class AgentManager(metaclass=Singleton):
         messages = ChatSequence.for_model(model, messages)
         messages.add("user", message)
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_pre_instruction():
                 continue
             if plugin_messages := plugin.pre_instruction([m.raw() for m in messages]):
                 messages.extend([Message(**raw_msg) for raw_msg in plugin_messages])
 
         # Start GPT instance
-        agent_reply = create_chat_completion(prompt=messages)
+        agent_reply = create_chat_completion(prompt=messages, config=self.config)
 
         messages.add("assistant", agent_reply)
 
         plugins_reply = agent_reply
-        for i, plugin in enumerate(self.cfg.plugins):
+        for i, plugin in enumerate(self.config.plugins):
             if not plugin.can_handle_on_instruction():
                 continue
             if plugin_result := plugin.on_instruction([m.raw() for m in messages]):
@@ -107,7 +107,7 @@ class AgentManager(metaclass=Singleton):
         if plugins_reply and plugins_reply != "":
             messages.add("assistant", plugins_reply)
 
-        for plugin in self.cfg.plugins:
+        for plugin in self.config.plugins:
             if not plugin.can_handle_post_instruction():
                 continue
             agent_reply = plugin.post_instruction(agent_reply)
diff --git a/autogpt/command_decorator.py b/autogpt/command_decorator.py
index 98f114e4..7ac4b7cd 100644
--- a/autogpt/command_decorator.py
+++ b/autogpt/command_decorator.py
@@ -19,10 +19,10 @@ def command(
     """The command decorator is used to create Command objects from ordinary functions."""
 
     # TODO: Remove this in favor of better command management
-    CFG = Config()
+    config = Config()
 
     if callable(enabled):
-        enabled = enabled(CFG)
+        enabled = enabled(config)
     if not enabled:
         if disabled_reason is not None:
             logger.debug(f"Command '{name}' is disabled: {disabled_reason}")
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index beaae64c..663800ef 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -10,7 +10,6 @@ from autogpt.agent.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.config import Config
 from autogpt.logs import logger
-from autogpt.setup import CFG
 from autogpt.workspace.workspace import Workspace
 
 ALLOWLIST_CONTROL = "allowlist"
@@ -83,7 +82,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
         str: The output of the file
     """
     logger.info(
-        f"Executing python file '{filename}' in working directory '{CFG.workspace_path}'"
+        f"Executing python file '{filename}' in working directory '{agent.config.workspace_path}'"
     )
 
     if not filename.endswith(".py"):
@@ -105,7 +104,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
             ["python", str(path)],
             capture_output=True,
             encoding="utf8",
-            cwd=CFG.workspace_path,
+            cwd=agent.config.workspace_path,
         )
         if result.returncode == 0:
             return result.stdout
@@ -174,6 +173,7 @@ def validate_command(command: str, config: Config) -> bool:
 
     Args:
         command (str): The command to validate
+        config (Config): The config to use to validate the command
 
     Returns:
         bool: True if the command is allowed, False otherwise
@@ -199,7 +199,7 @@ def validate_command(command: str, config: Config) -> bool:
             "required": True,
         }
     },
-    enabled=lambda cfg: cfg.execute_local_commands,
+    enabled=lambda config: config.execute_local_commands,
     disabled_reason="You are not allowed to run local shell commands. To execute"
     " shell commands, EXECUTE_LOCAL_COMMANDS must be set to 'True' "
     "in your config file: .env - do not attempt to bypass the restriction.",
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 2a932d38..ebf15ac0 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -81,6 +81,7 @@ def is_duplicate_operation(
     Args:
         operation: The operation to check for
         filename: The name of the file to check for
+        config: The agent config
         checksum: The checksum of the contents to be written
 
     Returns:
@@ -137,7 +138,7 @@ def read_file(filename: str, agent: Agent) -> str:
         content = read_textual_file(filename, logger)
 
         # TODO: invalidate/update memory when file is edited
-        file_memory = MemoryItem.from_text_file(content, filename)
+        file_memory = MemoryItem.from_text_file(content, filename, agent.config)
         if len(file_memory.chunks) > 1:
             return file_memory.summary
 
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index 043e91d7..d6bb73d8 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -181,7 +181,7 @@ def generate_image_with_sd_webui(
             "negative_prompt": negative_prompt,
             "sampler_index": "DDIM",
             "steps": 20,
-            "cfg_scale": 7.0,
+            "config_scale": 7.0,
             "width": size,
             "height": size,
             "n_iter": 1,
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 718cde71..471e203b 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -232,6 +232,6 @@ def summarize_memorize_webpage(
 
     memory = get_memory(agent.config)
 
-    new_memory = MemoryItem.from_webpage(text, url, question=question)
+    new_memory = MemoryItem.from_webpage(text, url, agent.config, question=question)
     memory.add(new_memory)
     return new_memory.summary
diff --git a/autogpt/config/ai_config.py b/autogpt/config/ai_config.py
index d118be3f..6b9e15f1 100644
--- a/autogpt/config/ai_config.py
+++ b/autogpt/config/ai_config.py
@@ -59,14 +59,14 @@ class AIConfig:
         self.command_registry: CommandRegistry | None = None
 
     @staticmethod
-    def load(config_file: str = SAVE_FILE) -> "AIConfig":
+    def load(ai_settings_file: str = SAVE_FILE) -> "AIConfig":
         """
         Returns class object with parameters (ai_name, ai_role, ai_goals, api_budget) loaded from
           yaml file if yaml file exists,
         else returns class with no parameters.
 
         Parameters:
-           config_file (int): The path to the config yaml file.
+           ai_settings_file (int): The path to the config yaml file.
              DEFAULT: "../ai_settings.yaml"
 
         Returns:
@@ -74,7 +74,7 @@ class AIConfig:
         """
 
         try:
-            with open(config_file, encoding="utf-8") as file:
+            with open(ai_settings_file, encoding="utf-8") as file:
                 config_params = yaml.load(file, Loader=yaml.FullLoader) or {}
         except FileNotFoundError:
             config_params = {}
@@ -91,12 +91,12 @@ class AIConfig:
         # type: Type[AIConfig]
         return AIConfig(ai_name, ai_role, ai_goals, api_budget)
 
-    def save(self, config_file: str = SAVE_FILE) -> None:
+    def save(self, ai_settings_file: str = SAVE_FILE) -> None:
         """
         Saves the class parameters to the specified file yaml file path as a yaml file.
 
         Parameters:
-            config_file(str): The path to the config yaml file.
+            ai_settings_file(str): The path to the config yaml file.
               DEFAULT: "../ai_settings.yaml"
 
         Returns:
@@ -109,11 +109,11 @@ class AIConfig:
             "ai_goals": self.ai_goals,
             "api_budget": self.api_budget,
         }
-        with open(config_file, "w", encoding="utf-8") as file:
+        with open(ai_settings_file, "w", encoding="utf-8") as file:
             yaml.dump(config, file, allow_unicode=True)
 
     def construct_full_prompt(
-        self, prompt_generator: Optional[PromptGenerator] = None
+        self, config, prompt_generator: Optional[PromptGenerator] = None
     ) -> str:
         """
         Returns a prompt to the user with the class information in an organized fashion.
@@ -133,22 +133,20 @@ class AIConfig:
             ""
         )
 
-        from autogpt.config import Config
         from autogpt.prompts.prompt import build_default_prompt_generator
 
-        cfg = Config()
         if prompt_generator is None:
-            prompt_generator = build_default_prompt_generator()
+            prompt_generator = build_default_prompt_generator(config)
         prompt_generator.goals = self.ai_goals
         prompt_generator.name = self.ai_name
         prompt_generator.role = self.ai_role
         prompt_generator.command_registry = self.command_registry
-        for plugin in cfg.plugins:
+        for plugin in config.plugins:
             if not plugin.can_handle_post_prompt():
                 continue
             prompt_generator = plugin.post_prompt(prompt_generator)
 
-        if cfg.execute_local_commands:
+        if config.execute_local_commands:
             # add OS info to prompt
             os_name = platform.system()
             os_info = (
diff --git a/autogpt/config/config.py b/autogpt/config/config.py
index 92712dd7..3231f560 100644
--- a/autogpt/config/config.py
+++ b/autogpt/config/config.py
@@ -300,10 +300,9 @@ class Config(metaclass=Singleton):
         self.memory_backend = name
 
 
-def check_openai_api_key() -> None:
+def check_openai_api_key(config: Config) -> None:
     """Check if the OpenAI API key is set in config.py or as an environment variable."""
-    cfg = Config()
-    if not cfg.openai_api_key:
+    if not config.openai_api_key:
         print(
             Fore.RED
             + "Please set your OpenAI API key in .env or as an environment variable."
diff --git a/autogpt/config/prompt_config.py b/autogpt/config/prompt_config.py
index 3f562c95..793bb444 100644
--- a/autogpt/config/prompt_config.py
+++ b/autogpt/config/prompt_config.py
@@ -6,11 +6,8 @@ import yaml
 from colorama import Fore
 
 from autogpt import utils
-from autogpt.config.config import Config
 from autogpt.logs import logger
 
-CFG = Config()
-
 
 class PromptConfig:
     """
@@ -22,10 +19,7 @@ class PromptConfig:
         performance_evaluations (list): Performance evaluation list for the prompt generator.
     """
 
-    def __init__(
-        self,
-        config_file: str = CFG.prompt_settings_file,
-    ) -> None:
+    def __init__(self, prompt_settings_file: str) -> None:
         """
         Initialize a class instance with parameters (constraints, resources, performance_evaluations) loaded from
           yaml file if yaml file exists,
@@ -39,13 +33,13 @@ class PromptConfig:
             None
         """
         # Validate file
-        (validated, message) = utils.validate_yaml_file(config_file)
+        (validated, message) = utils.validate_yaml_file(prompt_settings_file)
         if not validated:
             logger.typewriter_log("FAILED FILE VALIDATION", Fore.RED, message)
             logger.double_check()
             exit(1)
 
-        with open(config_file, encoding="utf-8") as file:
+        with open(prompt_settings_file, encoding="utf-8") as file:
             config_params = yaml.load(file, Loader=yaml.FullLoader)
 
         self.constraints = config_params.get("constraints", [])
diff --git a/autogpt/json_utils/utilities.py b/autogpt/json_utils/utilities.py
index e492d302..62f3b3ca 100644
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -9,7 +9,6 @@ from jsonschema import Draft7Validator
 from autogpt.config import Config
 from autogpt.logs import logger
 
-CFG = Config()
 LLM_DEFAULT_RESPONSE_FORMAT = "llm_response_format_1"
 
 
@@ -37,7 +36,7 @@ def llm_response_schema(
 
 
 def validate_json(
-    json_object: object, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
+    json_object: object, config: Config, schema_name: str = LLM_DEFAULT_RESPONSE_FORMAT
 ) -> bool:
     """
     :type schema_name: object
@@ -54,7 +53,7 @@ def validate_json(
         for error in errors:
             logger.error(f"JSON Validation Error: {error}")
 
-        if CFG.debug_mode:
+        if config.debug_mode:
             logger.error(
                 json.dumps(json_object, indent=4)
             )  # Replace 'json_object' with the variable containing the JSON data
diff --git a/autogpt/llm/chat.py b/autogpt/llm/chat.py
index 41d1b78b..0a088d06 100644
--- a/autogpt/llm/chat.py
+++ b/autogpt/llm/chat.py
@@ -96,7 +96,7 @@ def chat_with_ai(
     current_tokens_used += 500  # Reserve space for new_summary_message
 
     # Add Messages until the token limit is reached or there are no more messages to add.
-    for cycle in reversed(list(agent.history.per_cycle())):
+    for cycle in reversed(list(agent.history.per_cycle(agent.config))):
         messages_to_add = [msg for msg in cycle if msg is not None]
         tokens_to_add = count_message_tokens(messages_to_add, model)
         if current_tokens_used + tokens_to_add > send_token_limit:
@@ -110,14 +110,14 @@ def chat_with_ai(
     # Update & add summary of trimmed messages
     if len(agent.history) > 0:
         new_summary_message, trimmed_messages = agent.history.trim_messages(
-            current_message_chain=list(message_sequence),
+            current_message_chain=list(message_sequence), config=agent.config
         )
         tokens_to_add = count_message_tokens([new_summary_message], model)
         message_sequence.insert(insertion_index, new_summary_message)
         current_tokens_used += tokens_to_add - 500
 
         # FIXME: uncomment when memory is back in use
-        # memory_store = get_memory(cfg)
+        # memory_store = get_memory(config)
         # for _, ai_msg, result_msg in agent.history.per_cycle(trimmed_messages):
         #     memory_to_add = MemoryItem.from_ai_action(ai_msg, result_msg)
         #     logger.debug(f"Storing the following memory:\n{memory_to_add.dump()}")
@@ -192,6 +192,7 @@ def chat_with_ai(
     # temperature and other settings we care about
     assistant_reply = create_chat_completion(
         prompt=message_sequence,
+        config=agent.config,
         max_tokens=tokens_remaining,
     )
 
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index aee7997a..3b0d3e17 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -57,18 +57,18 @@ def call_ai_function(
 
 def create_text_completion(
     prompt: str,
+    config: Config,
     model: Optional[str],
     temperature: Optional[float],
     max_output_tokens: Optional[int],
 ) -> str:
-    cfg = Config()
     if model is None:
-        model = cfg.fast_llm_model
+        model = config.fast_llm_model
     if temperature is None:
-        temperature = cfg.temperature
+        temperature = config.temperature
 
-    if cfg.use_azure:
-        kwargs = {"deployment_id": cfg.get_azure_deployment_id_for_model(model)}
+    if config.use_azure:
+        kwargs = {"deployment_id": config.get_azure_deployment_id_for_model(model)}
     else:
         kwargs = {"model": model}
 
@@ -77,7 +77,7 @@ def create_text_completion(
         **kwargs,
         temperature=temperature,
         max_tokens=max_output_tokens,
-        api_key=cfg.openai_api_key,
+        api_key=config.openai_api_key,
     )
     logger.debug(f"Response: {response}")
 
@@ -87,6 +87,7 @@ def create_text_completion(
 # Overly simple abstraction until we create something better
 def create_chat_completion(
     prompt: ChatSequence,
+    config: Config,
     model: Optional[str] = None,
     temperature: Optional[float] = None,
     max_tokens: Optional[int] = None,
@@ -102,11 +103,10 @@ def create_chat_completion(
     Returns:
         str: The response from the chat completion
     """
-    cfg = Config()
     if model is None:
         model = prompt.model.name
     if temperature is None:
-        temperature = cfg.temperature
+        temperature = config.temperature
 
     logger.debug(
         f"{Fore.GREEN}Creating chat completion with model {model}, temperature {temperature}, max_tokens {max_tokens}{Fore.RESET}"
@@ -117,7 +117,7 @@ def create_chat_completion(
         "max_tokens": max_tokens,
     }
 
-    for plugin in cfg.plugins:
+    for plugin in config.plugins:
         if plugin.can_handle_chat_completion(
             messages=prompt.raw(),
             **chat_completion_kwargs,
@@ -129,11 +129,11 @@ def create_chat_completion(
             if message is not None:
                 return message
 
-    chat_completion_kwargs["api_key"] = cfg.openai_api_key
-    if cfg.use_azure:
-        chat_completion_kwargs["deployment_id"] = cfg.get_azure_deployment_id_for_model(
-            model
-        )
+    chat_completion_kwargs["api_key"] = config.openai_api_key
+    if config.use_azure:
+        chat_completion_kwargs[
+            "deployment_id"
+        ] = config.get_azure_deployment_id_for_model(model)
 
     response = iopenai.create_chat_completion(
         messages=prompt.raw(),
@@ -148,7 +148,7 @@ def create_chat_completion(
         logger.error(response.error)
         raise RuntimeError(response.error)
 
-    for plugin in cfg.plugins:
+    for plugin in config.plugins:
         if not plugin.can_handle_on_response():
             continue
         resp = plugin.on_response(resp)
diff --git a/autogpt/logs.py b/autogpt/logs.py
index f14267fc..90d006bc 100644
--- a/autogpt/logs.py
+++ b/autogpt/logs.py
@@ -9,6 +9,7 @@ from typing import Any
 
 from colorama import Fore, Style
 
+from autogpt.config import Config
 from autogpt.log_cycle.json_handler import JsonFileHandler, JsonFormatter
 from autogpt.singleton import Singleton
 from autogpt.speech import say_text
@@ -254,7 +255,7 @@ logger = Logger()
 def print_assistant_thoughts(
     ai_name: object,
     assistant_reply_json_valid: object,
-    speak_mode: bool = False,
+    config: Config,
 ) -> None:
     assistant_thoughts_reasoning = None
     assistant_thoughts_plan = None
@@ -288,7 +289,7 @@ def print_assistant_thoughts(
     logger.typewriter_log("CRITICISM:", Fore.YELLOW, f"{assistant_thoughts_criticism}")
     # Speak the assistant's thoughts
     if assistant_thoughts_speak:
-        if speak_mode:
-            say_text(assistant_thoughts_speak)
+        if config.speak_mode:
+            say_text(assistant_thoughts_speak, config)
         else:
             logger.typewriter_log("SPEAK:", Fore.YELLOW, f"{assistant_thoughts_speak}")
diff --git a/autogpt/main.py b/autogpt/main.py
index f0af9b53..43c7bcb3 100644
--- a/autogpt/main.py
+++ b/autogpt/main.py
@@ -53,12 +53,12 @@ def run_auto_gpt(
     logger.set_level(logging.DEBUG if debug else logging.INFO)
     logger.speak_mode = speak
 
-    cfg = Config()
+    config = Config()
     # TODO: fill in llm values here
-    check_openai_api_key()
+    check_openai_api_key(config)
 
     create_config(
-        cfg,
+        config,
         continuous,
         continuous_limit,
         ai_settings,
@@ -74,17 +74,17 @@ def run_auto_gpt(
         skip_news,
     )
 
-    if cfg.continuous_mode:
+    if config.continuous_mode:
         for line in get_legal_warning().split("\n"):
             logger.warn(markdown_to_ansi_style(line), "LEGAL:", Fore.RED)
 
-    if not cfg.skip_news:
+    if not config.skip_news:
         motd, is_new_motd = get_latest_bulletin()
         if motd:
             motd = markdown_to_ansi_style(motd)
             for motd_line in motd.split("\n"):
                 logger.info(motd_line, "NEWS:", Fore.GREEN)
-            if is_new_motd and not cfg.chat_messages_enabled:
+            if is_new_motd and not config.chat_messages_enabled:
                 input(
                     Fore.MAGENTA
                     + Style.BRIGHT
@@ -123,7 +123,7 @@ def run_auto_gpt(
     # TODO: pass in the ai_settings file and the env file and have them cloned into
     #   the workspace directory so we can bind them to the agent.
     workspace_directory = Workspace.make_workspace(workspace_directory)
-    cfg.workspace_path = str(workspace_directory)
+    config.workspace_path = str(workspace_directory)
 
     # HACK: doing this here to collect some globals that depend on the workspace.
     file_logger_path = workspace_directory / "file_logger.txt"
@@ -131,17 +131,17 @@ def run_auto_gpt(
         with file_logger_path.open(mode="w", encoding="utf-8") as f:
             f.write("File Operation Logger ")
 
-    cfg.file_logger_path = str(file_logger_path)
+    config.file_logger_path = str(file_logger_path)
 
-    cfg.set_plugins(scan_plugins(cfg, cfg.debug_mode))
+    config.set_plugins(scan_plugins(config, config.debug_mode))
     # Create a CommandRegistry instance and scan default folder
     command_registry = CommandRegistry()
 
     logger.debug(
-        f"The following command categories are disabled: {cfg.disabled_command_categories}"
+        f"The following command categories are disabled: {config.disabled_command_categories}"
     )
     enabled_command_categories = [
-        x for x in COMMAND_CATEGORIES if x not in cfg.disabled_command_categories
+        x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
     ]
 
     logger.debug(
@@ -152,7 +152,7 @@ def run_auto_gpt(
         command_registry.import_commands(command_category)
 
     ai_name = ""
-    ai_config = construct_main_ai_config()
+    ai_config = construct_main_ai_config(config)
     ai_config.command_registry = command_registry
     if ai_config.ai_name:
         ai_name = ai_config.ai_name
@@ -161,22 +161,22 @@ def run_auto_gpt(
     next_action_count = 0
 
     # add chat plugins capable of report to logger
-    if cfg.chat_messages_enabled:
-        for plugin in cfg.plugins:
+    if config.chat_messages_enabled:
+        for plugin in config.plugins:
             if hasattr(plugin, "can_handle_report") and plugin.can_handle_report():
                 logger.info(f"Loaded plugin into logger: {plugin.__class__.__name__}")
                 logger.chat_plugins.append(plugin)
 
     # Initialize memory and make sure it is empty.
     # this is particularly important for indexing and referencing pinecone memory
-    memory = get_memory(cfg)
+    memory = get_memory(config)
     memory.clear()
     logger.typewriter_log(
         "Using memory of type:", Fore.GREEN, f"{memory.__class__.__name__}"
     )
-    logger.typewriter_log("Using Browser:", Fore.GREEN, cfg.selenium_web_browser)
-    system_prompt = ai_config.construct_full_prompt()
-    if cfg.debug_mode:
+    logger.typewriter_log("Using Browser:", Fore.GREEN, config.selenium_web_browser)
+    system_prompt = ai_config.construct_full_prompt(config)
+    if config.debug_mode:
         logger.typewriter_log("Prompt:", Fore.GREEN, system_prompt)
 
     agent = Agent(
@@ -188,6 +188,6 @@ def run_auto_gpt(
         triggering_prompt=DEFAULT_TRIGGERING_PROMPT,
         workspace_directory=workspace_directory,
         ai_config=ai_config,
-        config=cfg,
+        config=config,
     )
     agent.start_interaction_loop()
diff --git a/autogpt/memory/message_history.py b/autogpt/memory/message_history.py
index 897cee15..4dba13dd 100644
--- a/autogpt/memory/message_history.py
+++ b/autogpt/memory/message_history.py
@@ -47,8 +47,7 @@ class MessageHistory:
         return self.messages.append(message)
 
     def trim_messages(
-        self,
-        current_message_chain: list[Message],
+        self, current_message_chain: list[Message], config: Config
     ) -> tuple[Message, list[Message]]:
         """
         Returns a list of trimmed messages: messages which are in the message history
@@ -56,6 +55,7 @@ class MessageHistory:
 
         Args:
             current_message_chain (list[Message]): The messages currently in the context.
+            config (Config): The config to use.
 
         Returns:
             Message: A message with the new running summary after adding the trimmed messages.
@@ -75,7 +75,7 @@ class MessageHistory:
             return self.summary_message(), []
 
         new_summary_message = self.update_running_summary(
-            new_events=new_messages_not_in_chain
+            new_events=new_messages_not_in_chain, config=config
         )
 
         # Find the index of the last message processed
@@ -84,7 +84,7 @@ class MessageHistory:
 
         return new_summary_message, new_messages_not_in_chain
 
-    def per_cycle(self, messages: list[Message] | None = None):
+    def per_cycle(self, config: Config, messages: list[Message] | None = None):
         """
         Yields:
             Message: a message containing user input
@@ -118,7 +118,9 @@ class MessageHistory:
             f"This reminds you of these events from your past: \n{self.summary}",
         )
 
-    def update_running_summary(self, new_events: list[Message]) -> Message:
+    def update_running_summary(
+        self, new_events: list[Message], config: Config
+    ) -> Message:
         """
         This function takes a list of dictionaries representing new events and combines them with the current summary,
         focusing on key and potentially important information to remember. The updated summary is returned in a message
@@ -135,8 +137,6 @@ class MessageHistory:
             update_running_summary(new_events)
             # Returns: "This reminds you of these events from your past: \nI entered the kitchen and found a scrawled note saying 7."
         """
-        cfg = Config()
-
         if not new_events:
             return self.summary_message()
 
@@ -156,7 +156,7 @@ class MessageHistory:
                     event.content = json.dumps(content_dict)
                 except json.JSONDecodeError as e:
                     logger.error(f"Error: Invalid JSON: {e}")
-                    if cfg.debug_mode:
+                    if config.debug_mode:
                         logger.error(f"{event.content}")
 
             elif event.role.lower() == "system":
@@ -171,23 +171,23 @@ class MessageHistory:
         # Assume an upper bound length for the summary prompt template, i.e. Your task is to create a concise running summary...., in summarize_batch func
         # TODO make this default dynamic
         prompt_template_length = 100
-        max_tokens = OPEN_AI_CHAT_MODELS.get(cfg.fast_llm_model).max_tokens
-        summary_tlength = count_string_tokens(str(self.summary), cfg.fast_llm_model)
+        max_tokens = OPEN_AI_CHAT_MODELS.get(config.fast_llm_model).max_tokens
+        summary_tlength = count_string_tokens(str(self.summary), config.fast_llm_model)
         batch = []
         batch_tlength = 0
 
         # TODO Can put a cap on length of total new events and drop some previous events to save API cost, but need to think thru more how to do it without losing the context
         for event in new_events:
-            event_tlength = count_string_tokens(str(event), cfg.fast_llm_model)
+            event_tlength = count_string_tokens(str(event), config.fast_llm_model)
 
             if (
                 batch_tlength + event_tlength
                 > max_tokens - prompt_template_length - summary_tlength
             ):
                 # The batch is full. Summarize it and start a new one.
-                self.summarize_batch(batch, cfg)
+                self.summarize_batch(batch, config)
                 summary_tlength = count_string_tokens(
-                    str(self.summary), cfg.fast_llm_model
+                    str(self.summary), config.fast_llm_model
                 )
                 batch = [event]
                 batch_tlength = event_tlength
@@ -197,11 +197,11 @@ class MessageHistory:
 
         if batch:
             # There's an unprocessed batch. Summarize it.
-            self.summarize_batch(batch, cfg)
+            self.summarize_batch(batch, config)
 
         return self.summary_message()
 
-    def summarize_batch(self, new_events_batch, cfg):
+    def summarize_batch(self, new_events_batch, config):
         prompt = f'''Your task is to create a concise running summary of actions and information results in the provided text, focusing on key and potentially important information to remember.
 
 You will receive the current summary and your latest actions. Combine them, adding relevant key information from the latest development in 1st person past tense and keeping the summary concise.
@@ -217,7 +217,9 @@ Latest Development:
 """
 '''
 
-        prompt = ChatSequence.for_model(cfg.fast_llm_model, [Message("user", prompt)])
+        prompt = ChatSequence.for_model(
+            config.fast_llm_model, [Message("user", prompt)]
+        )
         self.agent.log_cycle_handler.log_cycle(
             self.agent.ai_name,
             self.agent.created_at,
@@ -226,7 +228,7 @@ Latest Development:
             PROMPT_SUMMARY_FILE_NAME,
         )
 
-        self.summary = create_chat_completion(prompt)
+        self.summary = create_chat_completion(prompt, config)
 
         self.agent.log_cycle_handler.log_cycle(
             self.agent.ai_name,
diff --git a/autogpt/memory/vector/__init__.py b/autogpt/memory/vector/__init__.py
index 11c9d697..72abbb00 100644
--- a/autogpt/memory/vector/__init__.py
+++ b/autogpt/memory/vector/__init__.py
@@ -39,12 +39,12 @@ supported_memory = ["json_file", "no_memory"]
 #     MilvusMemory = None
 
 
-def get_memory(cfg: Config) -> VectorMemory:
+def get_memory(config: Config) -> VectorMemory:
     memory = None
 
-    match cfg.memory_backend:
+    match config.memory_backend:
         case "json_file":
-            memory = JSONFileMemory(cfg)
+            memory = JSONFileMemory(config)
 
         case "pinecone":
             raise NotImplementedError(
@@ -59,7 +59,7 @@ def get_memory(cfg: Config) -> VectorMemory:
             #         " to use Pinecone as a memory backend."
             #     )
             # else:
-            #     memory = PineconeMemory(cfg)
+            #     memory = PineconeMemory(config)
             #     if clear:
             #         memory.clear()
 
@@ -74,7 +74,7 @@ def get_memory(cfg: Config) -> VectorMemory:
             #         " use Redis as a memory backend."
             #     )
             # else:
-            #     memory = RedisMemory(cfg)
+            #     memory = RedisMemory(config)
 
         case "weaviate":
             raise NotImplementedError(
@@ -89,7 +89,7 @@ def get_memory(cfg: Config) -> VectorMemory:
             #         " use Weaviate as a memory backend."
             #     )
             # else:
-            #     memory = WeaviateMemory(cfg)
+            #     memory = WeaviateMemory(config)
 
         case "milvus":
             raise NotImplementedError(
@@ -104,18 +104,18 @@ def get_memory(cfg: Config) -> VectorMemory:
             #         "Please install pymilvus to use Milvus or Zilliz Cloud as memory backend."
             #     )
             # else:
-            #     memory = MilvusMemory(cfg)
+            #     memory = MilvusMemory(config)
 
         case "no_memory":
             memory = NoMemory()
 
         case _:
             raise ValueError(
-                f"Unknown memory backend '{cfg.memory_backend}'. Please check your config."
+                f"Unknown memory backend '{config.memory_backend}'. Please check your config."
             )
 
     if memory is None:
-        memory = JSONFileMemory(cfg)
+        memory = JSONFileMemory(config)
 
     return memory
 
diff --git a/autogpt/memory/vector/memory_item.py b/autogpt/memory/vector/memory_item.py
index 539ccb61..cf00cc87 100644
--- a/autogpt/memory/vector/memory_item.py
+++ b/autogpt/memory/vector/memory_item.py
@@ -36,19 +36,19 @@ class MemoryItem:
     def from_text(
         text: str,
         source_type: MemoryDocType,
+        config: Config,
         metadata: dict = {},
         how_to_summarize: str | None = None,
         question_for_summary: str | None = None,
     ):
-        cfg = Config()
         logger.debug(f"Memorizing text:\n{'-'*32}\n{text}\n{'-'*32}\n")
 
         chunks = [
             chunk
             for chunk, _ in (
-                split_text(text, cfg.embedding_model)
+                split_text(text, config.embedding_model, config)
                 if source_type != "code_file"
-                else chunk_content(text, cfg.embedding_model)
+                else chunk_content(text, config.embedding_model)
             )
         ]
         logger.debug("Chunks: " + str(chunks))
@@ -58,6 +58,7 @@ class MemoryItem:
             for summary, _ in [
                 summarize_text(
                     text_chunk,
+                    config,
                     instruction=how_to_summarize,
                     question=question_for_summary,
                 )
@@ -66,7 +67,7 @@ class MemoryItem:
         ]
         logger.debug("Chunk summaries: " + str(chunk_summaries))
 
-        e_chunks = get_embedding(chunks)
+        e_chunks = get_embedding(chunks, config)
 
         summary = (
             chunk_summaries[0]
@@ -81,7 +82,7 @@ class MemoryItem:
 
         # TODO: investigate search performance of weighted average vs summary
         # e_average = np.average(e_chunks, axis=0, weights=[len(c) for c in chunks])
-        e_summary = get_embedding(summary)
+        e_summary = get_embedding(summary, config)
 
         metadata["source_type"] = source_type
 
@@ -96,8 +97,8 @@ class MemoryItem:
         )
 
     @staticmethod
-    def from_text_file(content: str, path: str):
-        return MemoryItem.from_text(content, "text_file", {"location": path})
+    def from_text_file(content: str, path: str, config: Config):
+        return MemoryItem.from_text(content, "text_file", config, {"location": path})
 
     @staticmethod
     def from_code_file(content: str, path: str):
@@ -137,10 +138,13 @@ class MemoryItem:
         )
 
     @staticmethod
-    def from_webpage(content: str, url: str, question: str | None = None):
+    def from_webpage(
+        content: str, url: str, config: Config, question: str | None = None
+    ):
         return MemoryItem.from_text(
             text=content,
             source_type="webpage",
+            config=config,
             metadata={"location": url},
             question_for_summary=question,
         )
diff --git a/autogpt/memory/vector/providers/base.py b/autogpt/memory/vector/providers/base.py
index 969d8934..dc4dbf3c 100644
--- a/autogpt/memory/vector/providers/base.py
+++ b/autogpt/memory/vector/providers/base.py
@@ -17,25 +17,29 @@ class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
     def __init__(self, config: Config):
         pass
 
-    def get(self, query: str) -> MemoryItemRelevance | None:
+    def get(self, query: str, config: Config) -> MemoryItemRelevance | None:
         """
         Gets the data from the memory that is most relevant to the given query.
 
         Args:
-            data: The data to compare to.
+            query: The query used to retrieve information.
+            config: The config Object.
 
         Returns: The most relevant Memory
         """
-        result = self.get_relevant(query, 1)
+        result = self.get_relevant(query, 1, config)
         return result[0] if result else None
 
-    def get_relevant(self, query: str, k: int) -> Sequence[MemoryItemRelevance]:
+    def get_relevant(
+        self, query: str, k: int, config: Config
+    ) -> Sequence[MemoryItemRelevance]:
         """
         Returns the top-k most relevant memories for the given query
 
         Args:
             query: the query to compare stored memories to
             k: the number of relevant memories to fetch
+            config: The config Object.
 
         Returns:
             list[MemoryItemRelevance] containing the top [k] relevant memories
@@ -48,7 +52,7 @@ class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
             f"{len(self)} memories in index"
         )
 
-        relevances = self.score_memories_for_relevance(query)
+        relevances = self.score_memories_for_relevance(query, config)
         logger.debug(f"Memory relevance scores: {[str(r) for r in relevances]}")
 
         # take last k items and reverse
@@ -57,13 +61,13 @@ class VectorMemoryProvider(MutableSet[MemoryItem], AbstractSingleton):
         return [relevances[i] for i in top_k_indices]
 
     def score_memories_for_relevance(
-        self, for_query: str
+        self, for_query: str, config: Config
     ) -> Sequence[MemoryItemRelevance]:
         """
         Returns MemoryItemRelevance for every memory in the index.
         Implementations may override this function for performance purposes.
         """
-        e_query: Embedding = get_embedding(for_query)
+        e_query: Embedding = get_embedding(for_query, config)
         return [m.relevance_for(for_query, e_query) for m in self]
 
     def get_stats(self) -> tuple[int, int]:
diff --git a/autogpt/memory/vector/providers/json_file.py b/autogpt/memory/vector/providers/json_file.py
index 3ae7cd86..b85ea8e6 100644
--- a/autogpt/memory/vector/providers/json_file.py
+++ b/autogpt/memory/vector/providers/json_file.py
@@ -20,17 +20,17 @@ class JSONFileMemory(VectorMemoryProvider):
     file_path: Path
     memories: list[MemoryItem]
 
-    def __init__(self, cfg: Config) -> None:
+    def __init__(self, config: Config) -> None:
         """Initialize a class instance
 
         Args:
-            cfg: Config object
+            config: Config object
 
         Returns:
             None
         """
-        workspace_path = Path(cfg.workspace_path)
-        self.file_path = workspace_path / f"{cfg.memory_index}.json"
+        workspace_path = Path(config.workspace_path)
+        self.file_path = workspace_path / f"{config.memory_index}.json"
         self.file_path.touch()
         logger.debug(
             f"Initialized {__class__.__name__} with index path {self.file_path}"
diff --git a/autogpt/memory/vector/utils.py b/autogpt/memory/vector/utils.py
index b542632b..beb2fcf9 100644
--- a/autogpt/memory/vector/utils.py
+++ b/autogpt/memory/vector/utils.py
@@ -22,7 +22,7 @@ def get_embedding(input: list[str] | list[TText]) -> list[Embedding]:
 
 
 def get_embedding(
-    input: str | TText | list[str] | list[TText],
+    input: str | TText | list[str] | list[TText], config: Config
 ) -> Embedding | list[Embedding]:
     """Get an embedding from the ada model.
 
@@ -33,7 +33,6 @@ def get_embedding(
     Returns:
         List[float]: The embedding.
     """
-    cfg = Config()
     multiple = isinstance(input, list) and all(not isinstance(i, int) for i in input)
 
     if isinstance(input, str):
@@ -41,22 +40,22 @@ def get_embedding(
     elif multiple and isinstance(input[0], str):
         input = [text.replace("\n", " ") for text in input]
 
-    model = cfg.embedding_model
-    if cfg.use_azure:
-        kwargs = {"engine": cfg.get_azure_deployment_id_for_model(model)}
+    model = config.embedding_model
+    if config.use_azure:
+        kwargs = {"engine": config.get_azure_deployment_id_for_model(model)}
     else:
         kwargs = {"model": model}
 
     logger.debug(
         f"Getting embedding{f's for {len(input)} inputs' if multiple else ''}"
         f" with model '{model}'"
-        + (f" via Azure deployment '{kwargs['engine']}'" if cfg.use_azure else "")
+        + (f" via Azure deployment '{kwargs['engine']}'" if config.use_azure else "")
     )
 
     embeddings = iopenai.create_embedding(
         input,
         **kwargs,
-        api_key=cfg.openai_api_key,
+        api_key=config.openai_api_key,
     ).data
 
     if not multiple:
diff --git a/autogpt/plugins/__init__.py b/autogpt/plugins/__init__.py
index 4d84c9b5..d0a4864c 100644
--- a/autogpt/plugins/__init__.py
+++ b/autogpt/plugins/__init__.py
@@ -58,7 +58,7 @@ def write_dict_to_json_file(data: dict, file_path: str) -> None:
         json.dump(data, file, indent=4)
 
 
-def fetch_openai_plugins_manifest_and_spec(cfg: Config) -> dict:
+def fetch_openai_plugins_manifest_and_spec(config: Config) -> dict:
     """
     Fetch the manifest for a list of OpenAI plugins.
         Args:
@@ -68,8 +68,8 @@ def fetch_openai_plugins_manifest_and_spec(cfg: Config) -> dict:
     """
     # TODO add directory scan
     manifests = {}
-    for url in cfg.plugins_openai:
-        openai_plugin_client_dir = f"{cfg.plugins_dir}/openai/{urlparse(url).netloc}"
+    for url in config.plugins_openai:
+        openai_plugin_client_dir = f"{config.plugins_dir}/openai/{urlparse(url).netloc}"
         create_directory_if_not_exists(openai_plugin_client_dir)
         if not os.path.exists(f"{openai_plugin_client_dir}/ai-plugin.json"):
             try:
@@ -134,18 +134,18 @@ def create_directory_if_not_exists(directory_path: str) -> bool:
 
 
 def initialize_openai_plugins(
-    manifests_specs: dict, cfg: Config, debug: bool = False
+    manifests_specs: dict, config: Config, debug: bool = False
 ) -> dict:
     """
     Initialize OpenAI plugins.
     Args:
         manifests_specs (dict): per url dictionary of manifest and spec.
-        cfg (Config): Config instance including plugins config
+        config (Config): Config instance including plugins config
         debug (bool, optional): Enable debug logging. Defaults to False.
     Returns:
         dict: per url dictionary of manifest, spec and client.
     """
-    openai_plugins_dir = f"{cfg.plugins_dir}/openai"
+    openai_plugins_dir = f"{config.plugins_dir}/openai"
     if create_directory_if_not_exists(openai_plugins_dir):
         for url, manifest_spec in manifests_specs.items():
             openai_plugin_client_dir = f"{openai_plugins_dir}/{urlparse(url).hostname}"
@@ -188,13 +188,13 @@ def initialize_openai_plugins(
 
 
 def instantiate_openai_plugin_clients(
-    manifests_specs_clients: dict, cfg: Config, debug: bool = False
+    manifests_specs_clients: dict, config: Config, debug: bool = False
 ) -> dict:
     """
     Instantiates BaseOpenAIPlugin instances for each OpenAI plugin.
     Args:
         manifests_specs_clients (dict): per url dictionary of manifest, spec and client.
-        cfg (Config): Config instance including plugins config
+        config (Config): Config instance including plugins config
         debug (bool, optional): Enable debug logging. Defaults to False.
     Returns:
           plugins (dict): per url dictionary of BaseOpenAIPlugin instances.
@@ -206,11 +206,11 @@ def instantiate_openai_plugin_clients(
     return plugins
 
 
-def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate]:
+def scan_plugins(config: Config, debug: bool = False) -> List[AutoGPTPluginTemplate]:
     """Scan the plugins directory for plugins and loads them.
 
     Args:
-        cfg (Config): Config instance including plugins config
+        config (Config): Config instance including plugins config
         debug (bool, optional): Enable debug logging. Defaults to False.
 
     Returns:
@@ -218,11 +218,11 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
     """
     loaded_plugins = []
     # Generic plugins
-    plugins_path_path = Path(cfg.plugins_dir)
-    plugins_config = cfg.plugins_config
+    plugins_path_path = Path(config.plugins_dir)
+    plugins_config = config.plugins_config
 
     # Directory-based plugins
-    for plugin_path in [f.path for f in os.scandir(cfg.plugins_dir) if f.is_dir()]:
+    for plugin_path in [f.path for f in os.scandir(config.plugins_dir) if f.is_dir()]:
         # Avoid going into __pycache__ or other hidden directories
         if plugin_path.startswith("__"):
             continue
@@ -286,11 +286,11 @@ def scan_plugins(cfg: Config, debug: bool = False) -> List[AutoGPTPluginTemplate
                             )
 
     # OpenAI plugins
-    if cfg.plugins_openai:
-        manifests_specs = fetch_openai_plugins_manifest_and_spec(cfg)
+    if config.plugins_openai:
+        manifests_specs = fetch_openai_plugins_manifest_and_spec(config)
         if manifests_specs.keys():
             manifests_specs_clients = initialize_openai_plugins(
-                manifests_specs, cfg, debug
+                manifests_specs, config, debug
             )
             for url, openai_plugin_meta in manifests_specs_clients.items():
                 if not plugins_config.is_enabled(url):
diff --git a/autogpt/processing/text.py b/autogpt/processing/text.py
index aadc93ef..78eabf45 100644
--- a/autogpt/processing/text.py
+++ b/autogpt/processing/text.py
@@ -12,8 +12,6 @@ from autogpt.llm.utils import count_string_tokens, create_chat_completion
 from autogpt.logs import logger
 from autogpt.utils import batch
 
-CFG = Config()
-
 
 def _max_chunk_length(model: str, max: Optional[int] = None) -> int:
     model_max_input_tokens = OPEN_AI_MODELS[model].max_tokens - 1
@@ -60,13 +58,18 @@ def chunk_content(
 
 
 def summarize_text(
-    text: str, instruction: Optional[str] = None, question: Optional[str] = None
+    text: str,
+    config: Config,
+    instruction: Optional[str] = None,
+    question: Optional[str] = None,
 ) -> tuple[str, None | list[tuple[str, str]]]:
     """Summarize text using the OpenAI API
 
     Args:
         text (str): The text to summarize
+        config (Config): The config object
         instruction (str): Additional instruction for summarization, e.g. "focus on information related to polar bears", "omit personal information contained in the text"
+        question (str): Question to answer in the summary
 
     Returns:
         str: The summary of the text
@@ -79,7 +82,7 @@ def summarize_text(
     if instruction and question:
         raise ValueError("Parameters 'question' and 'instructions' cannot both be set")
 
-    model = CFG.fast_llm_model
+    model = config.fast_llm_model
 
     if question:
         instruction = (
@@ -111,14 +114,18 @@ def summarize_text(
 
         logger.debug(f"Summarizing with {model}:\n{summarization_prompt.dump()}\n")
         summary = create_chat_completion(
-            summarization_prompt, temperature=0, max_tokens=500
+            summarization_prompt, config, temperature=0, max_tokens=500
         )
 
         logger.debug(f"\n{'-'*16} SUMMARY {'-'*17}\n{summary}\n{'-'*42}\n")
         return summary.strip(), None
 
     summaries: list[str] = []
-    chunks = list(split_text(text, for_model=model, max_chunk_length=max_chunk_length))
+    chunks = list(
+        split_text(
+            text, for_model=model, config=config, max_chunk_length=max_chunk_length
+        )
+    )
 
     for i, (chunk, chunk_length) in enumerate(chunks):
         logger.info(
@@ -138,7 +145,8 @@ def summarize_text(
 
 def split_text(
     text: str,
-    for_model: str = CFG.fast_llm_model,
+    for_model: str,
+    config: Config,
     with_overlap=True,
     max_chunk_length: Optional[int] = None,
 ):
@@ -147,7 +155,9 @@ def split_text(
     Args:
         text (str): The text to split
         for_model (str): The model to chunk for; determines tokenizer and constraints
-        max_length (int, optional): The maximum length of each chunk
+        config (Config): The config object
+        with_overlap (bool, optional): Whether to allow overlap between chunks
+        max_chunk_length (int, optional): The maximum length of a chunk
 
     Yields:
         str: The next chunk of text
@@ -155,6 +165,7 @@ def split_text(
     Raises:
         ValueError: when a sentence is longer than the maximum length
     """
+
     max_length = _max_chunk_length(for_model, max_chunk_length)
 
     # flatten paragraphs to improve performance
@@ -168,7 +179,7 @@ def split_text(
     n_chunks = ceil(text_length / max_length)
     target_chunk_length = ceil(text_length / n_chunks)
 
-    nlp: spacy.language.Language = spacy.load(CFG.browse_spacy_language_model)
+    nlp: spacy.language.Language = spacy.load(config.browse_spacy_language_model)
     nlp.add_pipe("sentencizer")
     doc = nlp(text)
     sentences = [sentence.text.strip() for sentence in doc.sents]
diff --git a/autogpt/prompts/prompt.py b/autogpt/prompts/prompt.py
index 17d78bd1..16d5c7e7 100644
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -9,12 +9,10 @@ from autogpt.prompts.generator import PromptGenerator
 from autogpt.setup import prompt_user
 from autogpt.utils import clean_input
 
-CFG = Config()
-
 DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the JSON schema specified previously:"
 
 
-def build_default_prompt_generator() -> PromptGenerator:
+def build_default_prompt_generator(config: Config) -> PromptGenerator:
     """
     This function generates a prompt string that includes various constraints,
         commands, resources, and performance evaluations.
@@ -27,7 +25,7 @@ def build_default_prompt_generator() -> PromptGenerator:
     prompt_generator = PromptGenerator()
 
     # Initialize the PromptConfig object and load the file set in the main config (default: prompts_settings.yaml)
-    prompt_config = PromptConfig(CFG.prompt_settings_file)
+    prompt_config = PromptConfig(config.prompt_settings_file)
 
     # Add constraints to the PromptGenerator object
     for constraint in prompt_config.constraints:
@@ -44,70 +42,71 @@ def build_default_prompt_generator() -> PromptGenerator:
     return prompt_generator
 
 
-def construct_main_ai_config() -> AIConfig:
+def construct_main_ai_config(config: Config) -> AIConfig:
     """Construct the prompt for the AI to respond to
 
     Returns:
         str: The prompt string
     """
-    config = AIConfig.load(CFG.ai_settings_file)
-    if CFG.skip_reprompt and config.ai_name:
-        logger.typewriter_log("Name :", Fore.GREEN, config.ai_name)
-        logger.typewriter_log("Role :", Fore.GREEN, config.ai_role)
-        logger.typewriter_log("Goals:", Fore.GREEN, f"{config.ai_goals}")
+    ai_config = AIConfig.load(config.ai_settings_file)
+    if config.skip_reprompt and ai_config.ai_name:
+        logger.typewriter_log("Name :", Fore.GREEN, ai_config.ai_name)
+        logger.typewriter_log("Role :", Fore.GREEN, ai_config.ai_role)
+        logger.typewriter_log("Goals:", Fore.GREEN, f"{ai_config.ai_goals}")
         logger.typewriter_log(
             "API Budget:",
             Fore.GREEN,
-            "infinite" if config.api_budget <= 0 else f"${config.api_budget}",
+            "infinite" if ai_config.api_budget <= 0 else f"${ai_config.api_budget}",
         )
-    elif config.ai_name:
+    elif ai_config.ai_name:
         logger.typewriter_log(
             "Welcome back! ",
             Fore.GREEN,
-            f"Would you like me to return to being {config.ai_name}?",
+            f"Would you like me to return to being {ai_config.ai_name}?",
             speak_text=True,
         )
         should_continue = clean_input(
+            config,
             f"""Continue with the last settings?
-Name:  {config.ai_name}
-Role:  {config.ai_role}
-Goals: {config.ai_goals}
-API Budget: {"infinite" if config.api_budget <= 0 else f"${config.api_budget}"}
-Continue ({CFG.authorise_key}/{CFG.exit_key}): """
+Name:  {ai_config.ai_name}
+Role:  {ai_config.ai_role}
+Goals: {ai_config.ai_goals}
+API Budget: {"infinite" if ai_config.api_budget <= 0 else f"${ai_config.api_budget}"}
+Continue ({config.authorise_key}/{config.exit_key}): """,
         )
-        if should_continue.lower() == CFG.exit_key:
-            config = AIConfig()
+        if should_continue.lower() == config.exit_key:
+            ai_config = AIConfig()
 
-    if not config.ai_name:
-        config = prompt_user()
-        config.save(CFG.ai_settings_file)
+    if not ai_config.ai_name:
+        ai_config = prompt_user(config)
+        ai_config.save(config.ai_settings_file)
 
-    if CFG.restrict_to_workspace:
+    if config.restrict_to_workspace:
         logger.typewriter_log(
             "NOTE:All files/directories created by this agent can be found inside its workspace at:",
             Fore.YELLOW,
-            f"{CFG.workspace_path}",
+            f"{config.workspace_path}",
         )
     # set the total api budget
     api_manager = ApiManager()
-    api_manager.set_total_budget(config.api_budget)
+    api_manager.set_total_budget(ai_config.api_budget)
 
     # Agent Created, print message
     logger.typewriter_log(
-        config.ai_name,
+        ai_config.ai_name,
         Fore.LIGHTBLUE_EX,
         "has been created with the following details:",
         speak_text=True,
     )
 
-    # Print the ai config details
+    # Print the ai_config details
     # Name
-    logger.typewriter_log("Name:", Fore.GREEN, config.ai_name, speak_text=False)
+    logger.typewriter_log("Name:", Fore.GREEN, ai_config.ai_name, speak_text=False)
     # Role
-    logger.typewriter_log("Role:", Fore.GREEN, config.ai_role, speak_text=False)
+    logger.typewriter_log("Role:", Fore.GREEN, ai_config.ai_role, speak_text=False)
     # Goals
     logger.typewriter_log("Goals:", Fore.GREEN, "", speak_text=False)
-    for goal in config.ai_goals:
+    for goal in ai_config.ai_goals:
         logger.typewriter_log("-", Fore.GREEN, goal, speak_text=False)
 
-    return config
+    return ai_config
diff --git a/autogpt/setup.py b/autogpt/setup.py
index 67cae5da..2fe8b3a9 100644
--- a/autogpt/setup.py
+++ b/autogpt/setup.py
@@ -16,10 +16,8 @@ from autogpt.prompts.default_prompts import (
     DEFAULT_USER_DESIRE_PROMPT,
 )
 
-CFG = Config()
 
-
-def prompt_user() -> AIConfig:
+def prompt_user(config: Config) -> AIConfig:
     """Prompt the user for input
 
     Returns:
@@ -45,7 +43,7 @@ def prompt_user() -> AIConfig:
     )
 
     user_desire = utils.clean_input(
-        f"{Fore.LIGHTBLUE_EX}I want Auto-GPT to{Style.RESET_ALL}: "
+        config, f"{Fore.LIGHTBLUE_EX}I want Auto-GPT to{Style.RESET_ALL}: "
     )
 
     if user_desire == "":
@@ -58,11 +56,11 @@ def prompt_user() -> AIConfig:
             Fore.GREEN,
             speak_text=True,
         )
-        return generate_aiconfig_manual()
+        return generate_aiconfig_manual(config)
 
     else:
         try:
-            return generate_aiconfig_automatic(user_desire)
+            return generate_aiconfig_automatic(user_desire, config)
         except Exception as e:
             logger.typewriter_log(
                 "Unable to automatically generate AI Config based on user desire.",
@@ -71,10 +69,10 @@ def prompt_user() -> AIConfig:
                 speak_text=True,
             )
 
-            return generate_aiconfig_manual()
+            return generate_aiconfig_manual(config)
 
 
-def generate_aiconfig_manual() -> AIConfig:
+def generate_aiconfig_manual(config: Config) -> AIConfig:
     """
     Interactively create an AI configuration by prompting the user to provide the name, role, and goals of the AI.
 
@@ -99,7 +97,7 @@ def generate_aiconfig_manual() -> AIConfig:
     logger.typewriter_log(
         "Name your AI: ", Fore.GREEN, "For example, 'Entrepreneur-GPT'"
     )
-    ai_name = utils.clean_input("AI Name: ")
+    ai_name = utils.clean_input(config, "AI Name: ")
     if ai_name == "":
         ai_name = "Entrepreneur-GPT"
 
@@ -114,7 +112,7 @@ def generate_aiconfig_manual() -> AIConfig:
         "For example, 'an AI designed to autonomously develop and run businesses with"
         " the sole goal of increasing your net worth.'",
     )
-    ai_role = utils.clean_input(f"{ai_name} is: ")
+    ai_role = utils.clean_input(config, f"{ai_name} is: ")
     if ai_role == "":
         ai_role = "an AI designed to autonomously develop and run businesses with the"
         " sole goal of increasing your net worth."
@@ -129,7 +127,9 @@ def generate_aiconfig_manual() -> AIConfig:
     logger.info("Enter nothing to load defaults, enter nothing when finished.")
     ai_goals = []
     for i in range(5):
-        ai_goal = utils.clean_input(f"{Fore.LIGHTBLUE_EX}Goal{Style.RESET_ALL} {i+1}: ")
+        ai_goal = utils.clean_input(
+            config, f"{Fore.LIGHTBLUE_EX}Goal{Style.RESET_ALL} {i+1}: "
+        )
         if ai_goal == "":
             break
         ai_goals.append(ai_goal)
@@ -148,7 +148,7 @@ def generate_aiconfig_manual() -> AIConfig:
     )
     logger.info("Enter nothing to let the AI run without monetary limit")
     api_budget_input = utils.clean_input(
-        f"{Fore.LIGHTBLUE_EX}Budget{Style.RESET_ALL}: $"
+        config, f"{Fore.LIGHTBLUE_EX}Budget{Style.RESET_ALL}: $"
     )
     if api_budget_input == "":
         api_budget = 0.0
@@ -164,7 +164,7 @@ def generate_aiconfig_manual() -> AIConfig:
     return AIConfig(ai_name, ai_role, ai_goals, api_budget)
 
 
-def generate_aiconfig_automatic(user_prompt) -> AIConfig:
+def generate_aiconfig_automatic(user_prompt: str, config: Config) -> AIConfig:
     """Generates an AIConfig object from the given string.
 
     Returns:
@@ -178,12 +178,13 @@ def generate_aiconfig_automatic(user_prompt) -> AIConfig:
     # Call LLM with the string as user input
     output = create_chat_completion(
         ChatSequence.for_model(
-            CFG.fast_llm_model,
+            config.fast_llm_model,
             [
                 Message("system", system_prompt),
                 Message("user", prompt_ai_config_automatic),
             ],
-        )
+        ),
+        config,
     )
 
     # Debug LLM Output
diff --git a/autogpt/speech/base.py b/autogpt/speech/base.py
index a7570d94..a75acaed 100644
--- a/autogpt/speech/base.py
+++ b/autogpt/speech/base.py
@@ -2,6 +2,7 @@
 import abc
 from threading import Lock
 
+from autogpt.config import Config
 from autogpt.singleton import AbstractSingleton
 
 
@@ -10,7 +11,7 @@ class VoiceBase(AbstractSingleton):
     Base class for all voice classes.
     """
 
-    def __init__(self):
+    def __init__(self, config: Config):
         """
         Initialize the voice class.
         """
@@ -19,7 +20,7 @@ class VoiceBase(AbstractSingleton):
         self._api_key = None
         self._voices = []
         self._mutex = Lock()
-        self._setup()
+        self._setup(config)
 
     def say(self, text: str, voice_index: int = 0) -> bool:
         """
diff --git a/autogpt/speech/eleven_labs.py b/autogpt/speech/eleven_labs.py
index 5952508d..3f3baf33 100644
--- a/autogpt/speech/eleven_labs.py
+++ b/autogpt/speech/eleven_labs.py
@@ -13,14 +13,13 @@ PLACEHOLDERS = {"your-voice-id"}
 class ElevenLabsSpeech(VoiceBase):
     """ElevenLabs speech class"""
 
-    def _setup(self) -> None:
+    def _setup(self, config: Config) -> None:
         """Set up the voices, API key, etc.
 
         Returns:
             None: None
         """
 
-        cfg = Config()
         default_voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"]
         voice_options = {
             "Rachel": "21m00Tcm4TlvDq8ikWAM",
@@ -35,15 +34,15 @@ class ElevenLabsSpeech(VoiceBase):
         }
         self._headers = {
             "Content-Type": "application/json",
-            "xi-api-key": cfg.elevenlabs_api_key,
+            "xi-api-key": config.elevenlabs_api_key,
         }
         self._voices = default_voices.copy()
-        if cfg.elevenlabs_voice_id in voice_options:
-            cfg.elevenlabs_voice_id = voice_options[cfg.elevenlabs_voice_id]
-        if cfg.elevenlabs_voice_2_id in voice_options:
-            cfg.elevenlabs_voice_2_id = voice_options[cfg.elevenlabs_voice_2_id]
-        self._use_custom_voice(cfg.elevenlabs_voice_id, 0)
-        self._use_custom_voice(cfg.elevenlabs_voice_2_id, 1)
+        if config.elevenlabs_voice_id in voice_options:
+            config.elevenlabs_voice_id = voice_options[config.elevenlabs_voice_id]
+        if config.elevenlabs_voice_2_id in voice_options:
+            config.elevenlabs_voice_2_id = voice_options[config.elevenlabs_voice_2_id]
+        self._use_custom_voice(config.elevenlabs_voice_id, 0)
+        self._use_custom_voice(config.elevenlabs_voice_2_id, 1)
 
     def _use_custom_voice(self, voice, voice_index) -> None:
         """Use a custom voice if provided and not a placeholder
diff --git a/autogpt/speech/say.py b/autogpt/speech/say.py
index 06f580f0..1847c631 100644
--- a/autogpt/speech/say.py
+++ b/autogpt/speech/say.py
@@ -14,10 +14,9 @@ _QUEUE_SEMAPHORE = Semaphore(
 )  # The amount of sounds to queue before blocking the main thread
 
 
-def say_text(text: str, voice_index: int = 0) -> None:
+def say_text(text: str, config: Config, voice_index: int = 0) -> None:
     """Speak the given text using the given voice index"""
-    cfg = Config()
-    default_voice_engine, voice_engine = _get_voice_engine(cfg)
+    default_voice_engine, voice_engine = _get_voice_engine(config)
 
     def speak() -> None:
         success = voice_engine.say(text, voice_index)
@@ -35,7 +34,7 @@ def _get_voice_engine(config: Config) -> tuple[VoiceBase, VoiceBase]:
     """Get the voice engine to use for the given configuration"""
     tts_provider = config.text_to_speech_provider
     if tts_provider == "elevenlabs":
-        voice_engine = ElevenLabsSpeech()
+        voice_engine = ElevenLabsSpeech(config)
     elif tts_provider == "macos":
         voice_engine = MacOSTTS()
     elif tts_provider == "streamelements":
diff --git a/autogpt/utils.py b/autogpt/utils.py
index 91e570a0..9eb6cbe4 100644
--- a/autogpt/utils.py
+++ b/autogpt/utils.py
@@ -23,11 +23,10 @@ def batch(iterable, max_batch_length: int, overlap: int = 0):
         yield iterable[i : i + max_batch_length]
 
 
-def clean_input(prompt: str = "", talk=False):
+def clean_input(config: Config, prompt: str = "", talk=False):
     try:
-        cfg = Config()
-        if cfg.chat_messages_enabled:
-            for plugin in cfg.plugins:
+        if config.chat_messages_enabled:
+            for plugin in config.plugins:
                 if not hasattr(plugin, "can_handle_user_input"):
                     continue
                 if not plugin.can_handle_user_input(user_input=prompt):
@@ -44,14 +43,14 @@ def clean_input(prompt: str = "", talk=False):
                     "sure",
                     "alright",
                 ]:
-                    return cfg.authorise_key
+                    return config.authorise_key
                 elif plugin_response.lower() in [
                     "no",
                     "nope",
                     "n",
                     "negative",
                 ]:
-                    return cfg.exit_key
+                    return config.exit_key
                 return plugin_response
 
         # ask for input, default when just pressing Enter is y
diff --git a/data_ingestion.py b/data_ingestion.py
index 09d5328c..ae32b47c 100644
--- a/data_ingestion.py
+++ b/data_ingestion.py
@@ -5,7 +5,7 @@ from autogpt.commands.file_operations import ingest_file, list_files
 from autogpt.config import Config
 from autogpt.memory.vector import VectorMemory, get_memory
 
-cfg = Config()
+config = Config()
 
 
 def configure_logging():
@@ -70,7 +70,7 @@ def main() -> None:
     args = parser.parse_args()
 
     # Initialize memory
-    memory = get_memory(cfg)
+    memory = get_memory(config)
     if args.init:
         memory.clear()
     logger.debug("Using memory of type: " + memory.__class__.__name__)
diff --git a/docs/challenges/building_challenges.md b/docs/challenges/building_challenges.md
index 0c3d89ac..f50b0ea9 100644
--- a/docs/challenges/building_challenges.md
+++ b/docs/challenges/building_challenges.md
@@ -52,7 +52,7 @@ def kubernetes_agent(
     ai_config.command_registry = command_registry
 
     system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
+    agent_test_config.set_continuous_mode(False)
     agent = Agent(
         # We also give the AI a name 
         ai_name="Kubernetes-Demo",
diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 43f536a1..058606a1 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 43f536a193a57cd76f31fa405cf7ec2309ed383a
+Subproject commit 058606a1009948a91808f4ccf5fb3c4f9e522db2
diff --git a/tests/conftest.py b/tests/conftest.py
index 97620e21..db0eced0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -97,7 +97,7 @@ def agent(config: Config, workspace: Workspace) -> Agent:
     memory_json_file = get_memory(config)
     memory_json_file.clear()
 
-    system_prompt = ai_config.construct_full_prompt()
+    system_prompt = ai_config.construct_full_prompt(config)
 
     return Agent(
         ai_name=ai_config.ai_name,
diff --git a/tests/integration/agent_factory.py b/tests/integration/agent_factory.py
index 9078a843..e6702b66 100644
--- a/tests/integration/agent_factory.py
+++ b/tests/integration/agent_factory.py
@@ -55,7 +55,7 @@ def browser_agent(agent_test_config, memory_none: NoMemory, workspace: Workspace
     )
     ai_config.command_registry = command_registry
 
-    system_prompt = ai_config.construct_full_prompt()
+    system_prompt = ai_config.construct_full_prompt(agent_test_config)
 
     agent = Agent(
         ai_name="",
@@ -91,8 +91,8 @@ def file_system_agents(
             ai_goals=[ai_goal],
         )
         ai_config.command_registry = command_registry
-        system_prompt = ai_config.construct_full_prompt()
-        Config().set_continuous_mode(False)
+        system_prompt = ai_config.construct_full_prompt(agent_test_config)
+        agent_test_config.set_continuous_mode(False)
         agents.append(
             Agent(
                 ai_name="File System Agent",
@@ -123,7 +123,7 @@ def memory_management_agent(agent_test_config, memory_json_file, workspace: Work
     )
     ai_config.command_registry = command_registry
 
-    system_prompt = ai_config.construct_full_prompt()
+    system_prompt = ai_config.construct_full_prompt(agent_test_config)
 
     agent = Agent(
         ai_name="Follow-Instructions-GPT",
@@ -159,8 +159,8 @@ def information_retrieval_agents(
             ai_goals=[ai_goal],
         )
         ai_config.command_registry = command_registry
-        system_prompt = ai_config.construct_full_prompt()
-        Config().set_continuous_mode(False)
+        system_prompt = ai_config.construct_full_prompt(agent_test_config)
+        agent_test_config.set_continuous_mode(False)
         agents.append(
             Agent(
                 ai_name="Information Retrieval Agent",
@@ -195,8 +195,8 @@ def kubernetes_agent(
     )
     ai_config.command_registry = command_registry
 
-    system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
+    system_prompt = ai_config.construct_full_prompt(agent_test_config)
+    agent_test_config.set_continuous_mode(False)
     agent = Agent(
         ai_name="Kubernetes-Demo",
         memory=memory_json_file,
@@ -228,8 +228,8 @@ def get_nobel_prize_agent(agent_test_config, memory_json_file, workspace: Worksp
     )
     ai_config.command_registry = command_registry
 
-    system_prompt = ai_config.construct_full_prompt()
-    Config().set_continuous_mode(False)
+    system_prompt = ai_config.construct_full_prompt(agent_test_config)
+    agent_test_config.set_continuous_mode(False)
 
     agent = Agent(
         ai_name="Get-PhysicsNobelPrize",
@@ -254,7 +254,7 @@ def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace)
             "1- Run test.py using the execute_python_file command.",
             "2- Read code.py using the read_file command.",
             "3- Modify code.py using the write_to_file command."
-            "Repeat step 1, 2 and 3 until test.py runs without errors.",
+            "Repeat step 1, 2 and 3 until test.py runs without errors. Do not modify the test.py file.",
         ],
         [
             "1- Run test.py.",
@@ -273,8 +273,8 @@ def debug_code_agents(agent_test_config, memory_json_file, workspace: Workspace)
         )
         command_registry = get_command_registry(agent_test_config)
         ai_config.command_registry = command_registry
-        system_prompt = ai_config.construct_full_prompt()
-        Config().set_continuous_mode(False)
+        system_prompt = ai_config.construct_full_prompt(agent_test_config)
+        agent_test_config.set_continuous_mode(False)
         agents.append(
             Agent(
                 ai_name="Debug Code Agent",
diff --git a/tests/integration/goal_oriented/__init__.py b/tests/integration/goal_oriented/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/integration/memory/test_json_file_memory.py b/tests/integration/memory/test_json_file_memory.py
index 41a3e174..ab3996de 100644
--- a/tests/integration/memory/test_json_file_memory.py
+++ b/tests/integration/memory/test_json_file_memory.py
@@ -71,11 +71,11 @@ def test_json_memory_clear(config: Config, memory_item: MemoryItem):
 def test_json_memory_get(config: Config, memory_item: MemoryItem, mock_get_embedding):
     index = JSONFileMemory(config)
     assert (
-        index.get("test") == None
+        index.get("test", config) == None
     ), "Cannot test get() because initial index is not empty"
 
     index.add(memory_item)
-    retrieved = index.get("test")
+    retrieved = index.get("test", config)
     assert retrieved is not None
     assert retrieved.memory_item == memory_item
 
@@ -102,20 +102,27 @@ def test_json_memory_load_index(config: Config, memory_item: MemoryItem):
 @requires_api_key("OPENAI_API_KEY")
 def test_json_memory_get_relevant(config: Config, patched_api_requestor: None) -> None:
     index = JSONFileMemory(config)
-    mem1 = MemoryItem.from_text_file("Sample text", "sample.txt")
-    mem2 = MemoryItem.from_text_file("Grocery list:\n- Pancake mix", "groceries.txt")
-    mem3 = MemoryItem.from_text_file("What is your favorite color?", "color.txt")
+    mem1 = MemoryItem.from_text_file("Sample text", "sample.txt", config)
+    mem2 = MemoryItem.from_text_file(
+        "Grocery list:\n- Pancake mix", "groceries.txt", config
+    )
+    mem3 = MemoryItem.from_text_file(
+        "What is your favorite color?", "color.txt", config
+    )
     lipsum = "Lorem ipsum dolor sit amet"
-    mem4 = MemoryItem.from_text_file(" ".join([lipsum] * 100), "lipsum.txt")
+    mem4 = MemoryItem.from_text_file(" ".join([lipsum] * 100), "lipsum.txt", config)
     index.add(mem1)
     index.add(mem2)
     index.add(mem3)
     index.add(mem4)
 
-    assert index.get_relevant(mem1.raw_content, 1)[0].memory_item == mem1
-    assert index.get_relevant(mem2.raw_content, 1)[0].memory_item == mem2
-    assert index.get_relevant(mem3.raw_content, 1)[0].memory_item == mem3
-    assert [mr.memory_item for mr in index.get_relevant(lipsum, 2)] == [mem4, mem1]
+    assert index.get_relevant(mem1.raw_content, 1, config)[0].memory_item == mem1
+    assert index.get_relevant(mem2.raw_content, 1, config)[0].memory_item == mem2
+    assert index.get_relevant(mem3.raw_content, 1, config)[0].memory_item == mem3
+    assert [mr.memory_item for mr in index.get_relevant(lipsum, 2, config)] == [
+        mem4,
+        mem1,
+    ]
 
 
 def test_json_memory_get_stats(config: Config, memory_item: MemoryItem) -> None:
diff --git a/tests/integration/test_setup.py b/tests/integration/test_setup.py
index 5217d72a..ed7eb8fd 100644
--- a/tests/integration/test_setup.py
+++ b/tests/integration/test_setup.py
@@ -9,10 +9,10 @@ from tests.utils import requires_api_key
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
-def test_generate_aiconfig_automatic_default(patched_api_requestor):
+def test_generate_aiconfig_automatic_default(patched_api_requestor, config):
     user_inputs = [""]
     with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
-        ai_config = prompt_user()
+        ai_config = prompt_user(config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name is not None
@@ -22,9 +22,9 @@ def test_generate_aiconfig_automatic_default(patched_api_requestor):
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
-def test_generate_aiconfig_automatic_typical(patched_api_requestor):
+def test_generate_aiconfig_automatic_typical(patched_api_requestor, config):
     user_prompt = "Help me create a rock opera about cybernetic giraffes"
-    ai_config = generate_aiconfig_automatic(user_prompt)
+    ai_config = generate_aiconfig_automatic(user_prompt, config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name is not None
@@ -34,7 +34,7 @@ def test_generate_aiconfig_automatic_typical(patched_api_requestor):
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
-def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
+def test_generate_aiconfig_automatic_fallback(patched_api_requestor, config):
     user_inputs = [
         "T&GF£OIBECC()!*",
         "Chef-GPT",
@@ -45,7 +45,7 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
         "",
     ]
     with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
-        ai_config = prompt_user()
+        ai_config = prompt_user(config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name == "Chef-GPT"
@@ -55,7 +55,7 @@ def test_generate_aiconfig_automatic_fallback(patched_api_requestor):
 
 @pytest.mark.vcr
 @requires_api_key("OPENAI_API_KEY")
-def test_prompt_user_manual_mode(patched_api_requestor):
+def test_prompt_user_manual_mode(patched_api_requestor, config):
     user_inputs = [
         "--manual",
         "Chef-GPT",
@@ -66,7 +66,7 @@ def test_prompt_user_manual_mode(patched_api_requestor):
         "",
     ]
     with patch("autogpt.utils.session.prompt", side_effect=user_inputs):
-        ai_config = prompt_user()
+        ai_config = prompt_user(config)
 
     assert isinstance(ai_config, AIConfig)
     assert ai_config.ai_name == "Chef-GPT"
diff --git a/tests/unit/test_agent_manager.py b/tests/unit/test_agent_manager.py
index 4b0a01bc..a372b726 100644
--- a/tests/unit/test_agent_manager.py
+++ b/tests/unit/test_agent_manager.py
@@ -5,9 +5,9 @@ from autogpt.llm.chat import create_chat_completion
 
 
 @pytest.fixture
-def agent_manager():
+def agent_manager(config):
     # Hack, real gross. Singletons are not good times.
-    yield AgentManager()
+    yield AgentManager(config)
     del AgentManager._instances[AgentManager]
 
 
diff --git a/tests/unit/test_ai_config.py b/tests/unit/test_ai_config.py
index a684373b..e3c31d5d 100644
--- a/tests/unit/test_ai_config.py
+++ b/tests/unit/test_ai_config.py
@@ -19,10 +19,10 @@ ai_name: McFamished
 ai_role: A hungry AI
 api_budget: 0.0
 """
-    config_file = tmp_path / "ai_settings.yaml"
-    config_file.write_text(yaml_content)
+    ai_settings_file = tmp_path / "ai_settings.yaml"
+    ai_settings_file.write_text(yaml_content)
 
-    ai_config = AIConfig.load(config_file)
+    ai_config = AIConfig.load(ai_settings_file)
 
     assert len(ai_config.ai_goals) == 4
     assert ai_config.ai_goals[0] == "Goal 1: Make a sandwich"
@@ -30,8 +30,8 @@ api_budget: 0.0
     assert ai_config.ai_goals[2] == "Goal 3 - Go to sleep"
     assert ai_config.ai_goals[3] == "Goal 4: Wake up"
 
-    config_file.write_text("")
-    ai_config.save(config_file)
+    ai_settings_file.write_text("")
+    ai_config.save(ai_settings_file)
 
     yaml_content2 = """ai_goals:
 - 'Goal 1: Make a sandwich'
@@ -42,15 +42,15 @@ ai_name: McFamished
 ai_role: A hungry AI
 api_budget: 0.0
 """
-    assert config_file.read_text() == yaml_content2
+    assert ai_settings_file.read_text() == yaml_content2
 
 
 def test_ai_config_file_not_exists(workspace):
     """Test if file does not exist."""
 
-    config_file = workspace.get_path("ai_settings.yaml")
+    ai_settings_file = workspace.get_path("ai_settings.yaml")
 
-    ai_config = AIConfig.load(str(config_file))
+    ai_config = AIConfig.load(str(ai_settings_file))
     assert ai_config.ai_name == ""
     assert ai_config.ai_role == ""
     assert ai_config.ai_goals == []
@@ -62,10 +62,10 @@ def test_ai_config_file_not_exists(workspace):
 def test_ai_config_file_is_empty(workspace):
     """Test if file does not exist."""
 
-    config_file = workspace.get_path("ai_settings.yaml")
-    config_file.write_text("")
+    ai_settings_file = workspace.get_path("ai_settings.yaml")
+    ai_settings_file.write_text("")
 
-    ai_config = AIConfig.load(str(config_file))
+    ai_config = AIConfig.load(str(ai_settings_file))
     assert ai_config.ai_name == ""
     assert ai_config.ai_role == ""
     assert ai_config.ai_goals == []
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index 9a95cef1..19f474e2 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -1,5 +1,5 @@
 """
-Test cases for the Config class, which handles the configuration settings
+Test cases for the config class, which handles the configuration settings
 for the AI and ensures it behaves as a singleton.
 """
 from unittest import mock
@@ -7,14 +7,14 @@ from unittest.mock import patch
 
 import pytest
 
-from autogpt.config.config import Config
+from autogpt.config import Config
 from autogpt.configurator import GPT_3_MODEL, GPT_4_MODEL, create_config
 from autogpt.workspace.workspace import Workspace
 
 
 def test_initial_values(config: Config):
     """
-    Test if the initial values of the Config class attributes are set correctly.
+    Test if the initial values of the config class attributes are set correctly.
     """
     assert config.debug_mode == False
     assert config.continuous_mode == False
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index 5761e01a..b8283111 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -13,6 +13,7 @@ from pytest_mock import MockerFixture
 
 import autogpt.commands.file_operations as file_ops
 from autogpt.agent.agent import Agent
+from autogpt.config import Config
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
 from autogpt.workspace import Workspace
@@ -24,11 +25,13 @@ def file_content():
 
 
 @pytest.fixture()
-def mock_MemoryItem_from_text(mocker: MockerFixture, mock_embedding: Embedding):
+def mock_MemoryItem_from_text(
+    mocker: MockerFixture, mock_embedding: Embedding, config: Config
+):
     mocker.patch.object(
         file_ops.MemoryItem,
         "from_text",
-        new=lambda content, source_type, metadata: MemoryItem(
+        new=lambda content, source_type, config, metadata: MemoryItem(
             raw_content=content,
             summary=f"Summary of content '{content}'",
             chunk_summaries=[f"Summary of content '{content}'"],
diff --git a/tests/unit/test_message_history.py b/tests/unit/test_message_history.py
index 6fdf75e6..14b60895 100644
--- a/tests/unit/test_message_history.py
+++ b/tests/unit/test_message_history.py
@@ -38,8 +38,7 @@ def agent(config: Config):
     return agent
 
 
-def test_message_history_batch_summary(mocker, agent):
-    config = Config()
+def test_message_history_batch_summary(mocker, agent, config):
     history = MessageHistory(agent)
     model = config.fast_llm_model
     message_tlength = 0
@@ -114,7 +113,7 @@ def test_message_history_batch_summary(mocker, agent):
         history.append(user_input_msg)
 
     # only take the last cycle of the message history,  trim the rest of previous messages, and generate a summary for them
-    for cycle in reversed(list(history.per_cycle())):
+    for cycle in reversed(list(history.per_cycle(config))):
         messages_to_add = [msg for msg in cycle if msg is not None]
         message_sequence.insert(insertion_index, *messages_to_add)
         break
@@ -127,7 +126,7 @@ def test_message_history_batch_summary(mocker, agent):
 
     # test the main trim_message function
     new_summary_message, trimmed_messages = history.trim_messages(
-        current_message_chain=list(message_sequence),
+        current_message_chain=list(message_sequence), config=config
     )
 
     expected_call_count = math.ceil(
diff --git a/tests/unit/test_prompt_config.py b/tests/unit/test_prompt_config.py
index eacb0cf4..4616db97 100644
--- a/tests/unit/test_prompt_config.py
+++ b/tests/unit/test_prompt_config.py
@@ -23,10 +23,10 @@ performance_evaluations:
 - Another test performance evaluation
 - A third test performance evaluation
 """
-    config_file = tmp_path / "test_prompt_settings.yaml"
-    config_file.write_text(yaml_content)
+    prompt_settings_file = tmp_path / "test_prompt_settings.yaml"
+    prompt_settings_file.write_text(yaml_content)
 
-    prompt_config = PromptConfig(config_file)
+    prompt_config = PromptConfig(prompt_settings_file)
 
     assert len(prompt_config.constraints) == 3
     assert prompt_config.constraints[0] == "A test constraint"
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index f9a471c2..0258cc49 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -4,6 +4,7 @@ from unittest.mock import patch
 import pytest
 import requests
 
+from autogpt.config import Config
 from autogpt.json_utils.utilities import extract_json_from_response, validate_json
 from autogpt.utils import (
     get_bulletin_from_web,
@@ -185,12 +186,12 @@ def test_get_current_git_branch_failure(mock_repo):
     assert branch_name == ""
 
 
-def test_validate_json_valid(valid_json_response):
-    assert validate_json(valid_json_response)
+def test_validate_json_valid(valid_json_response, config: Config):
+    assert validate_json(valid_json_response, config)
 
 
-def test_validate_json_invalid(invalid_json_response):
-    assert not validate_json(valid_json_response)
+def test_validate_json_invalid(invalid_json_response, config: Config):
+    assert not validate_json(valid_json_response, config)
 
 
 def test_extract_json_from_response(valid_json_response: dict):

From 8077d2a249e43fc5806f709113d9b49fdf50a3a3 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Mon, 19 Jun 2023 02:10:07 +0000
Subject: [PATCH 87/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 058606a1..9c1b66f9 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 058606a1009948a91808f4ccf5fb3c4f9e522db2
+Subproject commit 9c1b66f9992ffbeab09b060bdda91e53fe3ee318

From 0abfa3a68f7619546a4c8e29408bf0d366cf822a Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Sun, 18 Jun 2023 20:00:23 -0700
Subject: [PATCH 88/97] Remove config from command decorator (#4736)

---
 autogpt/command_decorator.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/autogpt/command_decorator.py b/autogpt/command_decorator.py
index 7ac4b7cd..1edd766e 100644
--- a/autogpt/command_decorator.py
+++ b/autogpt/command_decorator.py
@@ -2,7 +2,6 @@ import functools
 from typing import Any, Callable, Dict, Optional
 
 from autogpt.config import Config
-from autogpt.logs import logger
 from autogpt.models.command import Command
 
 # Unique identifier for auto-gpt commands
@@ -18,16 +17,6 @@ def command(
 ) -> Callable[..., Any]:
     """The command decorator is used to create Command objects from ordinary functions."""
 
-    # TODO: Remove this in favor of better command management
-    config = Config()
-
-    if callable(enabled):
-        enabled = enabled(config)
-    if not enabled:
-        if disabled_reason is not None:
-            logger.debug(f"Command '{name}' is disabled: {disabled_reason}")
-        return lambda func: func
-
     def decorator(func: Callable[..., Any]) -> Command:
         cmd = Command(
             name=name,

From ee7d04775e74d28c72d7a31b30688277906ce85e Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Mon, 19 Jun 2023 03:05:39 +0000
Subject: [PATCH 89/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 9c1b66f9..53d0f0e2 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 9c1b66f9992ffbeab09b060bdda91e53fe3ee318
+Subproject commit 53d0f0e2e44c9ac6ab074190e5129ab49f4de277

From 9f737274b78c84f509cd8b740752444b5fa09722 Mon Sep 17 00:00:00 2001
From: Erik Peterson <e@eriklp.com>
Date: Sun, 18 Jun 2023 20:30:08 -0700
Subject: [PATCH 90/97] Fix issues with execute_python_code responses (#4738)

Co-authored-by: merwanehamadi <merwanehamadi@gmail.com>
---
 autogpt/commands/execute_code.py | 10 +++++-----
 autogpt/json_utils/utilities.py  |  3 ++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 663800ef..0b0f731c 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -32,13 +32,13 @@ DENYLIST_CONTROL = "denylist"
         },
     },
 )
-def execute_python_code(code: str, basename: str, agent: Agent) -> str:
+def execute_python_code(code: str, name: str, agent: Agent) -> str:
     """Create and execute a Python file in a Docker container and return the STDOUT of the
     executed code. If there is any data that needs to be captured use a print statement
 
     Args:
         code (str): The Python code to run
-        basename (str): A name to be given to the Python file
+        name (str): A name to be given to the Python file
 
     Returns:
         str: The STDOUT captured from the code when it ran
@@ -47,10 +47,10 @@ def execute_python_code(code: str, basename: str, agent: Agent) -> str:
     directory = os.path.join(agent.config.workspace_path, ai_name, "executed_code")
     os.makedirs(directory, exist_ok=True)
 
-    if not basename.endswith(".py"):
-        basename = basename + ".py"
+    if not name.endswith(".py"):
+        name = name + ".py"
 
-    path = os.path.join(directory, basename)
+    path = os.path.join(directory, name)
 
     try:
         with open(path, "w+", encoding="utf-8") as f:
diff --git a/autogpt/json_utils/utilities.py b/autogpt/json_utils/utilities.py
index 62f3b3ca..835542a0 100644
--- a/autogpt/json_utils/utilities.py
+++ b/autogpt/json_utils/utilities.py
@@ -22,7 +22,8 @@ def extract_json_from_response(response_content: str) -> dict:
     try:
         return ast.literal_eval(response_content)
     except BaseException as e:
-        logger.error(f"Error parsing JSON response with literal_eval {e}")
+        logger.info(f"Error parsing JSON response with literal_eval {e}")
+        logger.debug(f"Invalid JSON received in response: {response_content}")
         # TODO: How to raise an error here without causing the program to exit?
         return {}
 

From 7d923f83e6ca39bb91fd9d964a2c1df02206f627 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Mon, 19 Jun 2023 03:35:37 +0000
Subject: [PATCH 91/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index 53d0f0e2..cd3eec6c 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit 53d0f0e2e44c9ac6ab074190e5129ab49f4de277
+Subproject commit cd3eec6cf09874afed733b4d6ba0c60f2fd92951

From 71ca4ea9904ecd07a1287fa5b146490be6bd2945 Mon Sep 17 00:00:00 2001
From: Luke <2609441+lc0rp@users.noreply.github.com>
Date: Mon, 19 Jun 2023 19:36:46 +0000
Subject: [PATCH 92/97] Updates to sync 0.4.1 to master

---
 autogpt/app.py                 |  6 ----
 tests/unit/test_api_manager.py | 64 +++++++++++++++++++++++++++++++++-
 2 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/autogpt/app.py b/autogpt/app.py
index 8586653c..78e3a4dd 100644
--- a/autogpt/app.py
+++ b/autogpt/app.py
@@ -3,12 +3,6 @@ import json
 from typing import Dict
 
 from autogpt.agent.agent import Agent
-from autogpt.agent.agent_manager import AgentManager
-from autogpt.commands.command import command
-from autogpt.commands.web_requests import scrape_links, scrape_text
-from autogpt.processing.text import summarize_text
-from autogpt.speech import say_text
-from autogpt.url_utils.validators import validate_url
 
 
 def is_valid_int(value: str) -> bool:
diff --git a/tests/unit/test_api_manager.py b/tests/unit/test_api_manager.py
index 4a21a891..e259f56a 100644
--- a/tests/unit/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -1,4 +1,4 @@
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 from pytest_mock import MockerFixture
@@ -29,6 +29,68 @@ def mock_costs(mocker: MockerFixture):
 
 
 class TestApiManager:
+    @staticmethod
+    def test_create_chat_completion_debug_mode(caplog):
+        """Test if debug mode logs response."""
+        api_manager_debug = ApiManager(debug=True)
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"},
+        ]
+        model = "gpt-3.5-turbo"
+
+        with patch("openai.ChatCompletion.create") as mock_create:
+            mock_response = MagicMock()
+            del mock_response.error
+            mock_response.usage.prompt_tokens = 10
+            mock_response.usage.completion_tokens = 20
+            mock_create.return_value = mock_response
+
+            api_manager_debug.create_chat_completion(messages, model=model)
+
+            assert "Response" in caplog.text
+
+    @staticmethod
+    def test_create_chat_completion_empty_messages():
+        """Test if empty messages result in zero tokens and cost."""
+        messages = []
+        model = "gpt-3.5-turbo"
+
+        with patch("openai.ChatCompletion.create") as mock_create:
+            mock_response = MagicMock()
+            del mock_response.error
+            mock_response.usage.prompt_tokens = 0
+            mock_response.usage.completion_tokens = 0
+            mock_create.return_value = mock_response
+
+            api_manager.create_chat_completion(messages, model=model)
+
+            assert api_manager.get_total_prompt_tokens() == 0
+            assert api_manager.get_total_completion_tokens() == 0
+            assert api_manager.get_total_cost() == 0
+
+    @staticmethod
+    def test_create_chat_completion_valid_inputs():
+        """Test if valid inputs result in correct tokens and cost."""
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Who won the world series in 2020?"},
+        ]
+        model = "gpt-3.5-turbo"
+
+        with patch("openai.ChatCompletion.create") as mock_create:
+            mock_response = MagicMock()
+            del mock_response.error
+            mock_response.usage.prompt_tokens = 10
+            mock_response.usage.completion_tokens = 20
+            mock_create.return_value = mock_response
+
+            api_manager.create_chat_completion(messages, model=model)
+
+            assert api_manager.get_total_prompt_tokens() == 10
+            assert api_manager.get_total_completion_tokens() == 20
+            assert api_manager.get_total_cost() == (10 * 0.0013 + 20 * 0.0025) / 1000
+
     def test_getter_methods(self):
         """Test the getter methods for total tokens, cost, and budget."""
         api_manager.update_cost(600, 1200, "gpt-3.5-turbo")

From 2a8f4ce0b74e2ca01f098b9f8bdc479a87190e41 Mon Sep 17 00:00:00 2001
From: Luke <2609441+lc0rp@users.noreply.github.com>
Date: Mon, 19 Jun 2023 19:46:46 +0000
Subject: [PATCH 93/97] Fixing syntax error from mis-merge

---
 autogpt/llm/api_manager.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index aaf93421..3a9d3eac 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -4,14 +4,8 @@ from typing import List, Optional
 
 import openai
 from openai import Model
-
-<<<<<<< HEAD
-from autogpt.config import Config
-from autogpt.llm.base import CompletionModelInfo, MessageDict
+from autogpt.llm.base import CompletionModelInfo
 from autogpt.llm.providers.openai import OPEN_AI_MODELS
-=======
-from autogpt.llm.modelsinfo import COSTS
->>>>>>> upstream/master
 from autogpt.logs import logger
 from autogpt.singleton import Singleton
 
@@ -30,8 +24,8 @@ class ApiManager(metaclass=Singleton):
         self.total_cost = 0
         self.total_budget = 0.0
         self.models = None
-        
-    def update_cost(self, prompt_tokens, completion_tokens, model)
+
+    def update_cost(self, prompt_tokens, completion_tokens, model):
         """
         Update the total cost, prompt tokens, and completion tokens.
 

From 2568164cb3546856d3c8edcb00399d4ba2c8d204 Mon Sep 17 00:00:00 2001
From: Luke <2609441+lc0rp@users.noreply.github.com>
Date: Mon, 19 Jun 2023 20:54:52 +0000
Subject: [PATCH 94/97] Fixing circular imports

---
 autogpt/llm/api_manager.py                |  2 +-
 autogpt/llm/providers/openai.py           |  2 +-
 tests/integration/test_provider_openai.py | 15 +-------------
 tests/unit/test_api_manager.py            |  3 ++-
 tests/unit/test_make_agent.py             | 25 -----------------------
 5 files changed, 5 insertions(+), 42 deletions(-)
 delete mode 100644 tests/unit/test_make_agent.py

diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index 3a9d3eac..afab6e4a 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -5,7 +5,6 @@ from typing import List, Optional
 import openai
 from openai import Model
 from autogpt.llm.base import CompletionModelInfo
-from autogpt.llm.providers.openai import OPEN_AI_MODELS
 from autogpt.logs import logger
 from autogpt.singleton import Singleton
 
@@ -35,6 +34,7 @@ class ApiManager(metaclass=Singleton):
         model (str): The model used for the API call.
         """
         # the .model property in API responses can contain version suffixes like -v2
+        from autogpt.llm.providers.openai import OPEN_AI_MODELS
         model = model[:-3] if model.endswith("-v2") else model
         model_info = OPEN_AI_MODELS[model]
 
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index 08375aa8..707a7db8 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -9,7 +9,6 @@ from colorama import Fore, Style
 from openai.error import APIError, RateLimitError, Timeout
 from openai.openai_object import OpenAIObject
 
-from autogpt.llm.api_manager import ApiManager
 from autogpt.llm.base import (
     ChatModelInfo,
     EmbeddingModelInfo,
@@ -111,6 +110,7 @@ OPEN_AI_MODELS: dict[str, ChatModelInfo | EmbeddingModelInfo | TextModelInfo] =
 
 def meter_api(func):
     """Adds ApiManager metering to functions which make OpenAI API calls"""
+    from autogpt.llm.api_manager import ApiManager
     api_manager = ApiManager()
 
     openai_obj_processor = openai.util.convert_to_openai_object
diff --git a/tests/integration/test_provider_openai.py b/tests/integration/test_provider_openai.py
index f5ae65cf..f51ad9ac 100644
--- a/tests/integration/test_provider_openai.py
+++ b/tests/integration/test_provider_openai.py
@@ -2,7 +2,7 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 
-from autogpt.llm.api_manager import COSTS, ApiManager
+from autogpt.llm.api_manager import ApiManager
 from autogpt.llm.providers import openai
 
 api_manager = ApiManager()
@@ -14,19 +14,6 @@ def reset_api_manager():
     yield
 
 
-@pytest.fixture(autouse=True)
-def mock_costs():
-    with patch.dict(
-        COSTS,
-        {
-            "gpt-3.5-turbo": {"prompt": 0.002, "completion": 0.002},
-            "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0},
-        },
-        clear=True,
-    ):
-        yield
-
-
 class TestProviderOpenAI:
     @staticmethod
     def test_create_chat_completion_debug_mode(caplog):
diff --git a/tests/unit/test_api_manager.py b/tests/unit/test_api_manager.py
index e259f56a..c29aadf5 100644
--- a/tests/unit/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -3,7 +3,8 @@ from unittest.mock import MagicMock, patch
 import pytest
 from pytest_mock import MockerFixture
 
-from autogpt.llm.api_manager import OPEN_AI_MODELS, ApiManager
+from autogpt.llm.api_manager import ApiManager
+from autogpt.llm.providers.openai import OPEN_AI_MODELS
 
 api_manager = ApiManager()
 
diff --git a/tests/unit/test_make_agent.py b/tests/unit/test_make_agent.py
deleted file mode 100644
index 61a7a6f5..00000000
--- a/tests/unit/test_make_agent.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from unittest.mock import MagicMock
-
-from pytest_mock import MockerFixture
-
-from autogpt.agent.agent import Agent
-from autogpt.app import list_agents, start_agent
-
-
-def test_make_agent(agent: Agent, mocker: MockerFixture) -> None:
-    """Test that an agent can be created"""
-    mock = mocker.patch("openai.ChatCompletion.create")
-
-    response = MagicMock()
-    response.choices[0].message.content = "Test message"
-    response.usage.prompt_tokens = 1
-    response.usage.completion_tokens = 1
-    del response.error
-
-    mock.return_value = response
-    start_agent("Test Agent", "chat", "Hello, how are you?", agent, "gpt-3.5-turbo")
-    agents = list_agents(agent)
-    assert "List of agents:\n0: chat" == agents
-    start_agent("Test Agent 2", "write", "Hello, how are you?", agent, "gpt-3.5-turbo")
-    agents = list_agents(agent.config)
-    assert "List of agents:\n0: chat\n1: write" == agents

From 162d77707b0dcb6e6f2487a399363097d66ad26e Mon Sep 17 00:00:00 2001
From: Merwane Hamadi <merwanehamadi@gmail.com>
Date: Mon, 19 Jun 2023 16:13:30 -0700
Subject: [PATCH 95/97] Fix test API manager

---
 tests/unit/test_api_manager.py | 68 ++--------------------------------
 1 file changed, 3 insertions(+), 65 deletions(-)

diff --git a/tests/unit/test_api_manager.py b/tests/unit/test_api_manager.py
index c29aadf5..04242d57 100644
--- a/tests/unit/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -4,7 +4,7 @@ import pytest
 from pytest_mock import MockerFixture
 
 from autogpt.llm.api_manager import ApiManager
-from autogpt.llm.providers.openai import OPEN_AI_MODELS
+from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, OPEN_AI_EMBEDDING_MODELS
 
 api_manager = ApiManager()
 
@@ -18,80 +18,18 @@ def reset_api_manager():
 @pytest.fixture(autouse=True)
 def mock_costs(mocker: MockerFixture):
     mocker.patch.multiple(
-        OPEN_AI_MODELS["gpt-3.5-turbo"],
+        OPEN_AI_CHAT_MODELS["gpt-3.5-turbo"],
         prompt_token_cost=0.0013,
         completion_token_cost=0.0025,
     )
     mocker.patch.multiple(
-        OPEN_AI_MODELS["text-embedding-ada-002"],
+        OPEN_AI_EMBEDDING_MODELS["text-embedding-ada-002"],
         prompt_token_cost=0.0004,
     )
     yield
 
 
 class TestApiManager:
-    @staticmethod
-    def test_create_chat_completion_debug_mode(caplog):
-        """Test if debug mode logs response."""
-        api_manager_debug = ApiManager(debug=True)
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Who won the world series in 2020?"},
-        ]
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 10
-            mock_response.usage.completion_tokens = 20
-            mock_create.return_value = mock_response
-
-            api_manager_debug.create_chat_completion(messages, model=model)
-
-            assert "Response" in caplog.text
-
-    @staticmethod
-    def test_create_chat_completion_empty_messages():
-        """Test if empty messages result in zero tokens and cost."""
-        messages = []
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 0
-            mock_response.usage.completion_tokens = 0
-            mock_create.return_value = mock_response
-
-            api_manager.create_chat_completion(messages, model=model)
-
-            assert api_manager.get_total_prompt_tokens() == 0
-            assert api_manager.get_total_completion_tokens() == 0
-            assert api_manager.get_total_cost() == 0
-
-    @staticmethod
-    def test_create_chat_completion_valid_inputs():
-        """Test if valid inputs result in correct tokens and cost."""
-        messages = [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "Who won the world series in 2020?"},
-        ]
-        model = "gpt-3.5-turbo"
-
-        with patch("openai.ChatCompletion.create") as mock_create:
-            mock_response = MagicMock()
-            del mock_response.error
-            mock_response.usage.prompt_tokens = 10
-            mock_response.usage.completion_tokens = 20
-            mock_create.return_value = mock_response
-
-            api_manager.create_chat_completion(messages, model=model)
-
-            assert api_manager.get_total_prompt_tokens() == 10
-            assert api_manager.get_total_completion_tokens() == 20
-            assert api_manager.get_total_cost() == (10 * 0.0013 + 20 * 0.0025) / 1000
-
     def test_getter_methods(self):
         """Test the getter methods for total tokens, cost, and budget."""
         api_manager.update_cost(600, 1200, "gpt-3.5-turbo")

From f4c000a547e5351048eae8e6f919d268ffafd7e4 Mon Sep 17 00:00:00 2001
From: Merwane Hamadi <merwanehamadi@gmail.com>
Date: Mon, 19 Jun 2023 16:21:40 -0700
Subject: [PATCH 96/97] Fixes LLM thinking command descriptions are orders

Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
---
 autogpt/commands/execute_code.py    | 8 ++++----
 autogpt/commands/file_operations.py | 8 ++++----
 autogpt/commands/git_operations.py  | 2 +-
 autogpt/commands/image_gen.py       | 2 +-
 autogpt/commands/web_search.py      | 2 +-
 autogpt/commands/web_selenium.py    | 2 +-
 autogpt/llm/api_manager.py          | 2 ++
 autogpt/llm/providers/openai.py     | 1 +
 tests/unit/test_api_manager.py      | 2 +-
 9 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 0b0f731c..2db78ccc 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -18,7 +18,7 @@ DENYLIST_CONTROL = "denylist"
 
 @command(
     "execute_python_code",
-    "Create a Python file and execute it",
+    "Creates a Python file and executes it",
     {
         "code": {
             "type": "string",
@@ -63,7 +63,7 @@ def execute_python_code(code: str, name: str, agent: Agent) -> str:
 
 @command(
     "execute_python_file",
-    "Execute an existing Python file",
+    "Executes an existing Python file",
     {
         "filename": {
             "type": "string",
@@ -191,7 +191,7 @@ def validate_command(command: str, config: Config) -> bool:
 
 @command(
     "execute_shell",
-    "Execute Shell Command, non-interactive commands only",
+    "Executes a Shell Command, non-interactive commands only",
     {
         "command_line": {
             "type": "string",
@@ -237,7 +237,7 @@ def execute_shell(command_line: str, agent: Agent) -> str:
 
 @command(
     "execute_shell_popen",
-    "Execute Shell Command, non-interactive commands only",
+    "Executes a Shell Command, non-interactive commands only",
     {
         "query": {
             "type": "string",
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index d059493f..ca248743 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -176,7 +176,7 @@ def ingest_file(
 
 @command(
     "write_to_file",
-    "Write to file",
+    "Writes to a file",
     {
         "filename": {
             "type": "string",
@@ -216,7 +216,7 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
 
 @command(
     "append_to_file",
-    "Append to file",
+    "Appends to a file",
     {
         "filename": {
             "type": "string",
@@ -261,7 +261,7 @@ def append_to_file(
 
 @command(
     "delete_file",
-    "Delete file",
+    "Deletes a file",
     {
         "filename": {
             "type": "string",
@@ -291,7 +291,7 @@ def delete_file(filename: str, agent: Agent) -> str:
 
 @command(
     "list_files",
-    "List Files in Directory",
+    "Lists Files in a Directory",
     {
         "directory": {
             "type": "string",
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index 3832ca88..fc967e40 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -9,7 +9,7 @@ from autogpt.url_utils.validators import validate_url
 
 @command(
     "clone_repository",
-    "Clone Repository",
+    "Clones a Repository",
     {
         "url": {
             "type": "string",
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index d6bb73d8..c295392c 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -16,7 +16,7 @@ from autogpt.logs import logger
 
 @command(
     "generate_image",
-    "Generate Image",
+    "Generates an Image",
     {
         "prompt": {
             "type": "string",
diff --git a/autogpt/commands/web_search.py b/autogpt/commands/web_search.py
index 50b06e48..5af81058 100644
--- a/autogpt/commands/web_search.py
+++ b/autogpt/commands/web_search.py
@@ -15,7 +15,7 @@ DUCKDUCKGO_MAX_ATTEMPTS = 3
 
 @command(
     "web_search",
-    "Search the web",
+    "Searches the web",
     {
         "query": {
             "type": "string",
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 471e203b..821957f3 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -41,7 +41,7 @@ FILE_DIR = Path(__file__).parent.parent
 
 @command(
     "browse_website",
-    "Browse Website",
+    "Browses a Website",
     {
         "url": {"type": "string", "description": "The URL to visit", "required": True},
         "question": {
diff --git a/autogpt/llm/api_manager.py b/autogpt/llm/api_manager.py
index afab6e4a..4e2aba9d 100644
--- a/autogpt/llm/api_manager.py
+++ b/autogpt/llm/api_manager.py
@@ -4,6 +4,7 @@ from typing import List, Optional
 
 import openai
 from openai import Model
+
 from autogpt.llm.base import CompletionModelInfo
 from autogpt.logs import logger
 from autogpt.singleton import Singleton
@@ -35,6 +36,7 @@ class ApiManager(metaclass=Singleton):
         """
         # the .model property in API responses can contain version suffixes like -v2
         from autogpt.llm.providers.openai import OPEN_AI_MODELS
+
         model = model[:-3] if model.endswith("-v2") else model
         model_info = OPEN_AI_MODELS[model]
 
diff --git a/autogpt/llm/providers/openai.py b/autogpt/llm/providers/openai.py
index 707a7db8..add9954c 100644
--- a/autogpt/llm/providers/openai.py
+++ b/autogpt/llm/providers/openai.py
@@ -111,6 +111,7 @@ OPEN_AI_MODELS: dict[str, ChatModelInfo | EmbeddingModelInfo | TextModelInfo] =
 def meter_api(func):
     """Adds ApiManager metering to functions which make OpenAI API calls"""
     from autogpt.llm.api_manager import ApiManager
+
     api_manager = ApiManager()
 
     openai_obj_processor = openai.util.convert_to_openai_object
diff --git a/tests/unit/test_api_manager.py b/tests/unit/test_api_manager.py
index 04242d57..615204d1 100644
--- a/tests/unit/test_api_manager.py
+++ b/tests/unit/test_api_manager.py
@@ -1,4 +1,4 @@
-from unittest.mock import MagicMock, patch
+from unittest.mock import patch
 
 import pytest
 from pytest_mock import MockerFixture

From 7632067768c8ddbe1f8137752c371bd4e787cd88 Mon Sep 17 00:00:00 2001
From: Auto-GPT-Bot <github-bot@agpt.co>
Date: Tue, 20 Jun 2023 01:14:08 +0000
Subject: [PATCH 97/97] Update cassette submodule

---
 tests/Auto-GPT-test-cassettes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/Auto-GPT-test-cassettes b/tests/Auto-GPT-test-cassettes
index cd3eec6c..3a78bdd7 160000
--- a/tests/Auto-GPT-test-cassettes
+++ b/tests/Auto-GPT-test-cassettes
@@ -1 +1 @@
-Subproject commit cd3eec6cf09874afed733b4d6ba0c60f2fd92951
+Subproject commit 3a78bdd7716189c5917b574eae787d9b23f95edb