Refactor Benchmarking Workflow and Introduce New Data Models (#5264)

* New benchmark data models * Update _benchmarkBaseUrl * Remove ReportRequestBody * Update benchmark service methods for proxy approach * Add eval id to SkillNodeData * Refactor runBenchmark Method for proxy approach
2025-12-17 22:14:28 +01:00 · 2023-09-19 17:01:15 -07:00
parent 2098e192da
commit 5afab461ee
8 changed files with 113 additions and 70 deletions
--- a/frontend/lib/services/benchmark_service.dart
+++ b/frontend/lib/services/benchmark_service.dart
@@ -1,5 +1,6 @@
 import 'dart:async';
-import 'package:auto_gpt_flutter_client/models/benchmark_service/report_request_body.dart';
+import 'package:auto_gpt_flutter_client/models/benchmark_service/benchmark_step_request_body.dart';
+import 'package:auto_gpt_flutter_client/models/benchmark_service/benchmark_task_request_body.dart';
 import 'package:auto_gpt_flutter_client/utils/rest_api_utility.dart';
 import 'package:auto_gpt_flutter_client/models/benchmark_service/api_type.dart';

@@ -8,30 +9,43 @@ class BenchmarkService {

  BenchmarkService(this.api);

-  /// Generates a single report using POST REST API at the /reports URL.
+  /// Creates a new benchmark task.
  ///
-  /// [reportRequestBody] is a Map representing the request body for generating a single report.
-  Future<Map<String, dynamic>> generateSingleReport(
-      ReportRequestBody reportRequestBody) async {
+  /// [benchmarkTaskRequestBody] is a Map representing the request body for creating a task.
+  Future<Map<String, dynamic>> createBenchmarkTask(
+      BenchmarkTaskRequestBody benchmarkTaskRequestBody) async {
    try {
-      return await api.post('reports', reportRequestBody.toJson(),
+      return await api.post('agent/tasks', benchmarkTaskRequestBody.toJson(),
          apiType: ApiType.benchmark);
    } catch (e) {
-      throw Exception('Failed to generate single report: $e');
+      throw Exception('Failed to create a new task: $e');
    }
  }

-  /// Generates a combined report using POST REST API at the /reports/query URL.
+  /// Executes a step in a specific benchmark task.
  ///
-  /// [testRunIds] is a list of strings representing the test run IDs to be combined into a single report.
-  Future<Map<String, dynamic>> generateCombinedReport(
-      List<String> testRunIds) async {
+  /// [taskId] is the ID of the task.
+  /// [benchmarkStepRequestBody] is a Map representing the request body for executing a step.
+  Future<Map<String, dynamic>> executeBenchmarkStep(
+      String taskId, BenchmarkStepRequestBody benchmarkStepRequestBody) async {
    try {
-      final Map<String, dynamic> requestBody = {'test_run_ids': testRunIds};
-      return await api.post('reports/query', requestBody,
+      return await api.post(
+          'agent/tasks/$taskId/steps', benchmarkStepRequestBody.toJson(),
          apiType: ApiType.benchmark);
    } catch (e) {
-      throw Exception('Failed to generate combined report: $e');
+      throw Exception('Failed to execute step: $e');
+    }
+  }
+
+  /// Triggers an evaluation for a specific benchmark task.
+  ///
+  /// [taskId] is the ID of the task.
+  Future<Map<String, dynamic>> triggerEvaluation(String taskId) async {
+    try {
+      return await api.post('agent/tasks/$taskId/evaluation', {},
+          apiType: ApiType.benchmark);
+    } catch (e) {
+      throw Exception('Failed to trigger evaluation: $e');
    }
  }
 }