mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-17 22:14:28 +01:00
Refactor Benchmarking Workflow and Introduce New Data Models (#5264)
* New benchmark data models * Update _benchmarkBaseUrl * Remove ReportRequestBody * Update benchmark service methods for proxy approach * Add eval id to SkillNodeData * Refactor runBenchmark Method for proxy approach
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
import 'dart:async';
|
||||
import 'package:auto_gpt_flutter_client/models/benchmark_service/report_request_body.dart';
|
||||
import 'package:auto_gpt_flutter_client/models/benchmark_service/benchmark_step_request_body.dart';
|
||||
import 'package:auto_gpt_flutter_client/models/benchmark_service/benchmark_task_request_body.dart';
|
||||
import 'package:auto_gpt_flutter_client/utils/rest_api_utility.dart';
|
||||
import 'package:auto_gpt_flutter_client/models/benchmark_service/api_type.dart';
|
||||
|
||||
@@ -8,30 +9,43 @@ class BenchmarkService {
|
||||
|
||||
BenchmarkService(this.api);
|
||||
|
||||
/// Generates a single report using POST REST API at the /reports URL.
|
||||
/// Creates a new benchmark task.
|
||||
///
|
||||
/// [reportRequestBody] is a Map representing the request body for generating a single report.
|
||||
Future<Map<String, dynamic>> generateSingleReport(
|
||||
ReportRequestBody reportRequestBody) async {
|
||||
/// [benchmarkTaskRequestBody] is a Map representing the request body for creating a task.
|
||||
Future<Map<String, dynamic>> createBenchmarkTask(
|
||||
BenchmarkTaskRequestBody benchmarkTaskRequestBody) async {
|
||||
try {
|
||||
return await api.post('reports', reportRequestBody.toJson(),
|
||||
return await api.post('agent/tasks', benchmarkTaskRequestBody.toJson(),
|
||||
apiType: ApiType.benchmark);
|
||||
} catch (e) {
|
||||
throw Exception('Failed to generate single report: $e');
|
||||
throw Exception('Failed to create a new task: $e');
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates a combined report using POST REST API at the /reports/query URL.
|
||||
/// Executes a step in a specific benchmark task.
|
||||
///
|
||||
/// [testRunIds] is a list of strings representing the test run IDs to be combined into a single report.
|
||||
Future<Map<String, dynamic>> generateCombinedReport(
|
||||
List<String> testRunIds) async {
|
||||
/// [taskId] is the ID of the task.
|
||||
/// [benchmarkStepRequestBody] is a Map representing the request body for executing a step.
|
||||
Future<Map<String, dynamic>> executeBenchmarkStep(
|
||||
String taskId, BenchmarkStepRequestBody benchmarkStepRequestBody) async {
|
||||
try {
|
||||
final Map<String, dynamic> requestBody = {'test_run_ids': testRunIds};
|
||||
return await api.post('reports/query', requestBody,
|
||||
return await api.post(
|
||||
'agent/tasks/$taskId/steps', benchmarkStepRequestBody.toJson(),
|
||||
apiType: ApiType.benchmark);
|
||||
} catch (e) {
|
||||
throw Exception('Failed to generate combined report: $e');
|
||||
throw Exception('Failed to execute step: $e');
|
||||
}
|
||||
}
|
||||
|
||||
/// Triggers an evaluation for a specific benchmark task.
|
||||
///
|
||||
/// [taskId] is the ID of the task.
|
||||
Future<Map<String, dynamic>> triggerEvaluation(String taskId) async {
|
||||
try {
|
||||
return await api.post('agent/tasks/$taskId/evaluation', {},
|
||||
apiType: ApiType.benchmark);
|
||||
} catch (e) {
|
||||
throw Exception('Failed to trigger evaluation: $e');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user