diff --git a/agbenchmark/ReportManager.py b/agbenchmark/ReportManager.py
index cae13595..202574f9 100644
--- a/agbenchmark/ReportManager.py
+++ b/agbenchmark/ReportManager.py
@@ -3,7 +3,7 @@ import os
 import sys
 import time
 from datetime import datetime
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 from agbenchmark.utils import get_highest_success_difficulty
 
@@ -37,8 +37,18 @@ class ReportManager:
         with open(self.filename, "w") as f:
             json.dump(self.tests, f, indent=4)
 
-    def add_test(self, test_name: str, test_details: dict | list) -> None:
-        self.tests[test_name] = test_details
+    def add_test(
+        self,
+        test_name: str,
+        test_details: dict | list,
+        agent_name: Optional[str] = None,
+    ) -> None:
+        if agent_name:
+            if agent_name not in self.tests:
+                self.tests[agent_name] = {}
+            self.tests[agent_name][test_name] = test_details
+        else:
+            self.tests[test_name] = test_details
 
         self.save()
 
diff --git a/agbenchmark/conftest.py b/agbenchmark/conftest.py
index 245df485..4a62af0b 100644
--- a/agbenchmark/conftest.py
+++ b/agbenchmark/conftest.py
@@ -15,7 +15,7 @@ from agbenchmark.start_benchmark import (
     REGRESSION_TESTS_PATH,
     get_regression_data,
 )
-from agbenchmark.utils import calculate_success_percentage
+from agbenchmark.utils import AGENT_NAME, calculate_success_percentage
 
 
 def resolve_workspace(workspace: str) -> str:
@@ -128,9 +128,10 @@ regression_manager = ReportManager(REGRESSION_TESTS_PATH)
 # user facing reporting information
 info_manager = ReportManager(INFO_TESTS_PATH)
 
-INTERNAL_LOGS = Path(__file__).resolve().parent  # agbenchmark/conftest.py
+INTERNAL_LOGS_PATH = Path(__file__).resolve().parent / "reports"
+
 # internal db step in replacement track pass/fail rate
-internal_info = ReportManager(str(INTERNAL_LOGS / "internal_info.json"))
+internal_info = ReportManager(str(INTERNAL_LOGS_PATH / "internal_info.json"))
 
 
 def pytest_runtest_makereport(item: Any, call: Any) -> None:
@@ -171,11 +172,22 @@ def pytest_runtest_makereport(item: Any, call: Any) -> None:
                 regression_manager.remove_test(test_name)
             info_details["metrics"]["fail_reason"] = str(call.excinfo.value)
 
-        prev_test_results: list[bool] = internal_info.tests.get(test_name, [])
+        prev_test_results: list[bool]
+        agent_tests: dict[str, list[bool]] = {}
+
+        # if the structure is nested inside of the agent name
+        if AGENT_NAME:
+            agent_tests = internal_info.tests.get(AGENT_NAME, {})
+
+        if agent_tests:
+            prev_test_results = agent_tests.get(test_name, [])
+        else:
+            prev_test_results = internal_info.tests.get(test_name, [])
+
         if not mock:
             # only add if it's an actual test
             prev_test_results.append(info_details["metrics"]["success"])
-            internal_info.add_test(test_name, prev_test_results)
+            internal_info.add_test(test_name, prev_test_results, AGENT_NAME)
 
             # can calculate success rate regardless of mock
             info_details["metrics"]["success_%"] = calculate_success_percentage(
diff --git a/agbenchmark/internal_info.json b/agbenchmark/internal_info.json
deleted file mode 100644
index 0e34ad7a..00000000
--- a/agbenchmark/internal_info.json
+++ /dev/null
@@ -1,83 +0,0 @@
-{
-    "TestBasicMemory": [
-        true,
-        true,
-        true
-    ],
-    "TestBasicRetrieval": [
-        true,
-        true,
-        true
-    ],
-    "TestCreateSimpleWebServer": [
-        false,
-        false,
-        false
-    ],
-    "TestDebugSimpleTypoWithGuidance": [
-        false,
-        false,
-        false,
-        false,
-        false
-    ],
-    "TestDebugSimpleTypoWithoutGuidance": [
-        false,
-        false,
-        false
-    ],
-    "TestReadFile": [
-        true,
-        true,
-        true,
-        true
-    ],
-    "TestRememberMultipleIds": [
-        true,
-        true,
-        true
-    ],
-    "TestRememberMultipleIdsWithNoise": [
-        true,
-        true,
-        true
-    ],
-    "TestRememberMultiplePhrasesWithNoise": [
-        true,
-        true,
-        true
-    ],
-    "TestRetrieval2": [
-        true,
-        true,
-        true
-    ],
-    "TestRetrieval3": [
-        true,
-        true,
-        true
-    ],
-    "TestSearch": [
-        true,
-        true,
-        true,
-        true
-    ],
-    "TestWriteFile": [
-        true,
-        true,
-        true,
-        false,
-        false,
-        false,
-        false,
-        true,
-        false,
-        true,
-        false,
-        false,
-        false,
-        false,
-        true
-    ]
-}
\ No newline at end of file
diff --git a/agbenchmark/reports/internal_info.json b/agbenchmark/reports/internal_info.json
new file mode 100644
index 00000000..97b525c0
--- /dev/null
+++ b/agbenchmark/reports/internal_info.json
@@ -0,0 +1,40 @@
+{
+  "mini-agi": {
+    "TestBasicMemory": [true, true, true],
+    "TestBasicRetrieval": [true, true, true],
+    "TestCreateSimpleWebServer": [false, false, false],
+    "TestDebugSimpleTypoWithGuidance": [
+      false,
+      false,
+      false,
+      false,
+      false,
+      false
+    ],
+    "TestDebugSimpleTypoWithoutGuidance": [false, false, false],
+    "TestReadFile": [true, true, true, true],
+    "TestRememberMultipleIds": [true, true, true],
+    "TestRememberMultipleIdsWithNoise": [true, true, true],
+    "TestRememberMultiplePhrasesWithNoise": [true, true, true],
+    "TestRetrieval2": [true, true, true],
+    "TestRetrieval3": [true, true, true],
+    "TestSearch": [true, true, true, true],
+    "TestWriteFile": [
+      true,
+      true,
+      true,
+      false,
+      false,
+      false,
+      false,
+      true,
+      false,
+      true,
+      false,
+      false,
+      false,
+      false,
+      true
+    ]
+  }
+}
diff --git a/agbenchmark/utils.py b/agbenchmark/utils.py
index c69509c7..e99a1fa0 100644
--- a/agbenchmark/utils.py
+++ b/agbenchmark/utils.py
@@ -17,7 +17,6 @@ HOME_ENV = os.getenv("HOME_ENV")
 
 
 def calculate_info_test_path(reports_path: Path) -> str:
-    print("reports_pathreports_pathreports_pathreports_path", reports_path)
     if not reports_path.exists():
         reports_path.mkdir(parents=True, exist_ok=True)
         return str(
@@ -129,6 +128,7 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
         CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH = assign_paths(
             benchmarks_folder_path
         )
+
     else:
         # otherwise the default is when home is an agent (running agbenchmark from agent/agent_repo)
         # used when its just a pip install
@@ -139,4 +139,9 @@ def calculate_dynamic_paths() -> tuple[Path, str, str, str]:
     if not benchmarks_folder_path.exists():
         benchmarks_folder_path.mkdir(exist_ok=True)
 
-    return HOME_DIRECTORY, CONFIG_PATH, REGRESSION_TESTS_PATH, INFO_TESTS_PATH
+    return (
+        HOME_DIRECTORY,
+        CONFIG_PATH,
+        REGRESSION_TESTS_PATH,
+        INFO_TESTS_PATH,
+    )