diff --git a/benchmark/agbenchmark/reports/processing/report_types.py b/benchmark/agbenchmark/reports/processing/report_types.py
index e2fb1bc6..d2fc8dea 100644
--- a/benchmark/agbenchmark/reports/processing/report_types.py
+++ b/benchmark/agbenchmark/reports/processing/report_types.py
@@ -51,6 +51,8 @@ class Test(BaseModelBenchmark):
     category: List[str]
     task: str
     reached_cutoff: bool
+    metadata: Any
+
 
 
 class ReportBase(BaseModelBenchmark):
@@ -68,6 +70,7 @@ class Report(ReportBase):
     tests: Dict[str, Test]
 
 
+
 class ReportV2(Test, ReportBase):
     test_name: str
     run_id: str | None
diff --git a/benchmark/agbenchmark/utils/data_types.py b/benchmark/agbenchmark/utils/data_types.py
index 74b50932..955b1d6a 100644
--- a/benchmark/agbenchmark/utils/data_types.py
+++ b/benchmark/agbenchmark/utils/data_types.py
@@ -174,6 +174,9 @@ class Category(str, Enum):
     GENERALIST = "general"
     CODING = "coding"
     SCRAPE_SYNTHESIZE = "scrape_synthesize"
+    GAIA_1 = "GAIA_1"
+    GAIA_2 = "GAIA_2"
+    GAIA_3 = "GAIA_3"
 
 
 class ChallengeData(BaseModel):