diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aaf1df1..3e494970 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ on: agents: description: 'Agents to run (comma-separated)' required: false - default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,BabyAGI,PolyGPT' # Default agents if none are specified + default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,BabyAGI,PolyGPT' # Default agents if none are specified schedule: - cron: '0 8 * * *' push: @@ -110,7 +110,6 @@ jobs: run: | echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}" - - name: Checkout repository uses: actions/checkout@v3 with: @@ -126,7 +125,7 @@ jobs: cd agbenchmark/challenges git submodule update --init --remote --recursive cd ../.. - + sudo apt-get update sudo apt-get install -y unzip xvfb wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add - @@ -206,7 +205,7 @@ jobs: fi pip install ../../dist/*.whl - + bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then @@ -214,12 +213,12 @@ jobs: ${prefix}agbenchmark start --maintain --mock EXIT_CODE=$? set -e # Stop ignoring non-zero exit codes - + # Check if the exit code was 5, and if so, exit with 0 instead if [ $EXIT_CODE -eq 5 ]; then echo "regression_tests.json is empty." fi - + ${prefix}agbenchmark start --mock ${prefix}agbenchmark start --mock --category=retrieval ${prefix}agbenchmark start --mock --category=interface @@ -279,11 +278,9 @@ jobs: poetry run python send_to_googledrive.py || echo "Failed to upload to Google Drive" echo "Adding skill_tree submodule to update website" poetry run agbenchmark start --mock - cd agbenchmark/challenges/skill_tree + cd agbenchmark/challenges/frontend git add . git commit -m "Update website with new skill tree" - git remote set-url origin https://x-access-token:${{ env.GH_TOKEN }}@github.com/agbenchmark/skill-tree.github.io.git - git push origin HEAD:refs/heads/main || echo "Already pushed the website, no need to push again." cd ../../.. exit 0 else diff --git a/.gitmodules b/.gitmodules index 5039aa18..268a2385 100644 --- a/.gitmodules +++ b/.gitmodules @@ -12,8 +12,8 @@ branch = benchmark-integration [submodule "agent/smol-developer"] path = agent/smol-developer - url = https://github.com/e2b-dev/smol-developer.git - branch = benchmarks + url = https://github.com/e2b-dev/smol-developer.git + branch = benchmarks [submodule "agent/SuperAGI"] path = agent/SuperAGI url = https://github.com/SilenNaihin/SuperAGI.git @@ -30,10 +30,6 @@ path = agent/PolyGPT url = https://github.com/polywrap/PolyGPT.git branch = nerfzael-use-local-wrap-library -[submodule "agbenchmark/challenges"] - path = agbenchmark/challenges - url = https://github.com/agbenchmark/agent-evals.git -[submodule "skill-tree"] - path = skill-tree - url = https://github.com/agbenchmark/skill-tree.github.io - branch = main +[submodule "agbenchmark/challenges"] + path = agbenchmark/challenges + url = https://github.com/agbenchmark/agent-evals.git diff --git a/agbenchmark/challenges b/agbenchmark/challenges index b1945bb0..9c9a35e0 160000 --- a/agbenchmark/challenges +++ b/agbenchmark/challenges @@ -1 +1 @@ -Subproject commit b1945bb0a95b6184bbbc0af1b260c1cde838eaac +Subproject commit 9c9a35e0de60ca09d826ef7c7abd6c32edca4b99 diff --git a/agbenchmark/utils/dependencies/graphs.py b/agbenchmark/utils/dependencies/graphs.py index 57bda0f7..af866296 100644 --- a/agbenchmark/utils/dependencies/graphs.py +++ b/agbenchmark/utils/dependencies/graphs.py @@ -1,6 +1,6 @@ +import json import math from pathlib import Path -import json from typing import Any, Dict, List, Tuple import matplotlib.patches as patches @@ -183,7 +183,9 @@ def get_category_colors(categories: Dict[Any, str]) -> Dict[str, str]: def graph_interactive_network( - dag: nx.DiGraph, labels: Dict[Any, Dict[str, Any]], show: bool = False + dag: nx.DiGraph, + labels: Dict[Any, Dict[str, Any]], + html_graph_path: str = "", ) -> None: nt = Network(notebook=True, width="100%", height="800px", directed=True) @@ -275,103 +277,12 @@ def graph_interactive_network( json_graph = json.dumps(graph_data) # Optionally, save to a file - with open(Path("agbenchmark/challenges/skill-tree/graph.json").resolve(), "w") as f: + with open( + Path("agbenchmark/challenges/frontend/public/graph.json").resolve(), "w" + ) as f: f.write(json_graph) - relative_path = "agbenchmark/challenges/skill-tree/index.html" - file_path = str(Path(relative_path).resolve()) + if html_graph_path: + file_path = str(Path(html_graph_path).resolve()) - if show: - nt.show(file_path, notebook=False) - nt.write_html(file_path) - - # Example usage - table_data = [ - ["Task: ", "Click on a skill to to see the task"], - ] - - iframe_path = "index.html" - combined_file_path = "agbenchmark/challenges/skill-tree/combined_view.html" - - create_combined_html(combined_file_path, iframe_path, table_data) - # JavaScript code snippet to be inserted - iframe_js_code = """ - network.on("click", function(params) { - if (params.nodes.length > 0) { - var clickedNodeId = params.nodes[0]; - var clickedNode = nodes.get(clickedNodeId); - var clickedNodeLabel = clickedNode.task; - window.parent.updateLabel(clickedNodeLabel); - } - }); - """ - - # Path to the iframe HTML file - iframe_path = "agbenchmark/challenges/skill-tree/index.html" - - # Insert the JS code snippet into the iframe HTML file - insert_js_into_iframe(iframe_path, iframe_js_code) - - -def create_combined_html( - file_path: str, iframe_path: str, table_data: List[List[Any]] -) -> None: - table_html = "" - for row in table_data: - table_html += "" - for cell in row: - table_html += f"" - table_html += "" - table_html += "
{cell}
" - table_html = table_html.replace( - "Click on a skill to to see the task", - 'Click on a skill to to see the task', - 1, - ) - - # JavaScript function to update the table - js_function = """ - - """ - - iframe_html = f'' - - full_html = f""" - - - - Graph with Table - - - {js_function} - {table_html} - {iframe_html} - - - """ - - with open(file_path, "w", encoding="utf-8") as file: - file.write(full_html) - - -def insert_js_into_iframe(iframe_path: str, js_code: str) -> None: - with open(iframe_path, "r", encoding="utf-8") as file: - content = file.readlines() - - # Locate the line number where "drawGraph();" is called - line_number = -1 - for index, line in enumerate(content): - if "drawGraph();" in line: - line_number = index - break - - # Insert the JS code snippet just after "drawGraph();" - if line_number != -1: - content.insert(line_number + 1, js_code) - - with open(iframe_path, "w", encoding="utf-8") as file: - file.writelines(content) + nt.write_html(file_path) diff --git a/agbenchmark/utils/dependencies/main.py b/agbenchmark/utils/dependencies/main.py index 8ec88fb0..95102b28 100644 --- a/agbenchmark/utils/dependencies/main.py +++ b/agbenchmark/utils/dependencies/main.py @@ -8,7 +8,7 @@ __init__.py. import collections import json import os -from typing import Any, Generator, Optional +from typing import Any, Generator import colorama import networkx @@ -193,15 +193,13 @@ class DependencyManager(object): colorama.deinit() @property - def sorted_items(self, show_graph: Optional[bool] = False) -> Generator: + def sorted_items(self) -> Generator: """Get a sorted list of tests where all tests are sorted after their dependencies.""" # Build a directed graph for sorting build_skill_tree = os.getenv("BUILD_SKILL_TREE") BUILD_SKILL_TREE = ( build_skill_tree.lower() == "true" if build_skill_tree else False ) - if BUILD_SKILL_TREE: - show_graph = True dag = networkx.DiGraph() # Insert all items as nodes, to prevent items that have no dependencies and are not dependencies themselves from @@ -226,9 +224,9 @@ class DependencyManager(object): data["name"] = node_name labels[item] = data - if show_graph: + if BUILD_SKILL_TREE: # graph_spring_layout(dag, labels) - graph_interactive_network(dag, labels, show=False) + graph_interactive_network(dag, labels, html_graph_path="") # Sort based on the dependencies return networkx.topological_sort(dag)