From e4d84dad0a335c1e4d581a31963145e56bfe72cb Mon Sep 17 00:00:00 2001 From: hunteraraujo Date: Wed, 27 Sep 2023 15:14:48 -0700 Subject: [PATCH] Enhance SkillTreeViewModel with Benchmark Runs Tracking and Leaderboard Submission This commit incorporates significant enhancements to the SkillTreeViewModel, introducing the ability to track current benchmark runs and submit results to the leaderboard. A new list, `currentBenchmarkRuns`, is introduced to store each benchmark run object during a specific benchmark session. This list is reset to an empty state when initiating a new benchmark. Changes made: - Introduced `currentBenchmarkRuns` to track ongoing benchmark runs, ensuring real-time data availability. - Enhanced `runBenchmark` method to populate `currentBenchmarkRuns` with benchmark run objects as the benchmark progresses. - Implemented `submitToLeaderboard` method, accepting parameters `teamName`, `repoUrl`, and `agentGitCommitSha`, and updating each run object with this information. All runs share a common UUID generated at the beginning of the submission process. These enhancements ensure that benchmark run data is readily available and organized, facilitating a streamlined process for submitting well-structured data to the leaderboard. It fosters a more interactive and informative user experience, offering insights into each benchmark run's progress and outcomes. --- .../lib/viewmodels/skill_tree_viewmodel.dart | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/frontend/lib/viewmodels/skill_tree_viewmodel.dart b/frontend/lib/viewmodels/skill_tree_viewmodel.dart index f5b091d6..1b9b1efd 100644 --- a/frontend/lib/viewmodels/skill_tree_viewmodel.dart +++ b/frontend/lib/viewmodels/skill_tree_viewmodel.dart @@ -17,6 +17,7 @@ import 'package:collection/collection.dart'; import 'package:flutter/foundation.dart'; import 'package:flutter/services.dart'; import 'package:graphview/GraphView.dart'; +import 'package:uuid/uuid.dart'; class SkillTreeViewModel extends ChangeNotifier { // TODO: Potentially move to task queue view model when we create one @@ -26,8 +27,11 @@ class SkillTreeViewModel extends ChangeNotifier { // TODO: Potentially move to task queue view model when we create one bool isBenchmarkRunning = false; // TODO: Potentially move to task queue view model when we create one + // TODO: clear when clicking a new node Map benchmarkStatusMap = {}; + List currentBenchmarkRuns = []; + List _skillTreeNodes = []; List _skillTreeEdges = []; SkillTreeNode? _selectedNode; @@ -156,6 +160,9 @@ class SkillTreeViewModel extends ChangeNotifier { // Clear the benchmarkStatusList benchmarkStatusMap.clear(); + // Reset the current benchmark runs list to be empty at the start of a new benchmark + currentBenchmarkRuns = []; + // Create a new TestSuite object with the current timestamp final testSuite = TestSuite(timestamp: DateTime.now().toIso8601String(), tests: []); @@ -215,11 +222,15 @@ class SkillTreeViewModel extends ChangeNotifier { // Decode the evaluationResponse into a BenchmarkRun object BenchmarkRun benchmarkRun = BenchmarkRun.fromJson(evaluationResponse); + // Add the benchmark run object to the list of current benchmark runs + currentBenchmarkRuns.add(benchmarkRun); + // Update the benchmarkStatusList based on the evaluation response bool successStatus = benchmarkRun.metrics.success; benchmarkStatusMap[node] = successStatus ? BenchmarkTaskStatus.success : BenchmarkTaskStatus.failure; + // await Future.delayed(Duration(seconds: 2)); notifyListeners(); // If successStatus is false, break out of the loop @@ -243,5 +254,21 @@ class SkillTreeViewModel extends ChangeNotifier { } // TODO: Move to task queue view model - Future submitToLeaderboard() async {} + Future submitToLeaderboard( + String teamName, String repoUrl, String agentGitCommitSha) async { + // Create a UUID.v4 for our unique run ID + String uuid = const Uuid().v4(); + + for (var run in currentBenchmarkRuns) { + run.repositoryInfo.teamName = teamName; + run.repositoryInfo.repoUrl = repoUrl; + run.repositoryInfo.agentGitCommitSha = agentGitCommitSha; + run.runDetails.runId = uuid; + + await leaderboardService.submitReport(run); + } + + // Clear the currentBenchmarkRuns list after submitting to the leaderboard + currentBenchmarkRuns.clear(); + } }