diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aaf1df1..3e494970 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ on: agents: description: 'Agents to run (comma-separated)' required: false - default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,BabyAGI,PolyGPT' # Default agents if none are specified + default: 'gpt-engineer,smol-developer,Auto-GPT,mini-agi,beebot,BabyAGI,PolyGPT' # Default agents if none are specified schedule: - cron: '0 8 * * *' push: @@ -110,7 +110,6 @@ jobs: run: | echo "Matrix Setup Environment Name: ${{ needs.matrix-setup.outputs.env-name }}" - - name: Checkout repository uses: actions/checkout@v3 with: @@ -126,7 +125,7 @@ jobs: cd agbenchmark/challenges git submodule update --init --remote --recursive cd ../.. - + sudo apt-get update sudo apt-get install -y unzip xvfb wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add - @@ -206,7 +205,7 @@ jobs: fi pip install ../../dist/*.whl - + bash -c "$(curl -fsSL https://raw.githubusercontent.com/merwanehamadi/helicone/b7ab4bc53e51d8ab29fff19ce5986ab7720970c6/mitmproxy.sh)" -s start if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then @@ -214,12 +213,12 @@ jobs: ${prefix}agbenchmark start --maintain --mock EXIT_CODE=$? set -e # Stop ignoring non-zero exit codes - + # Check if the exit code was 5, and if so, exit with 0 instead if [ $EXIT_CODE -eq 5 ]; then echo "regression_tests.json is empty." fi - + ${prefix}agbenchmark start --mock ${prefix}agbenchmark start --mock --category=retrieval ${prefix}agbenchmark start --mock --category=interface @@ -279,11 +278,9 @@ jobs: poetry run python send_to_googledrive.py || echo "Failed to upload to Google Drive" echo "Adding skill_tree submodule to update website" poetry run agbenchmark start --mock - cd agbenchmark/challenges/skill_tree + cd agbenchmark/challenges/frontend git add . git commit -m "Update website with new skill tree" - git remote set-url origin https://x-access-token:${{ env.GH_TOKEN }}@github.com/agbenchmark/skill-tree.github.io.git - git push origin HEAD:refs/heads/main || echo "Already pushed the website, no need to push again." cd ../../.. exit 0 else diff --git a/.gitmodules b/.gitmodules index 5039aa18..268a2385 100644 --- a/.gitmodules +++ b/.gitmodules @@ -12,8 +12,8 @@ branch = benchmark-integration [submodule "agent/smol-developer"] path = agent/smol-developer - url = https://github.com/e2b-dev/smol-developer.git - branch = benchmarks + url = https://github.com/e2b-dev/smol-developer.git + branch = benchmarks [submodule "agent/SuperAGI"] path = agent/SuperAGI url = https://github.com/SilenNaihin/SuperAGI.git @@ -30,10 +30,6 @@ path = agent/PolyGPT url = https://github.com/polywrap/PolyGPT.git branch = nerfzael-use-local-wrap-library -[submodule "agbenchmark/challenges"] - path = agbenchmark/challenges - url = https://github.com/agbenchmark/agent-evals.git -[submodule "skill-tree"] - path = skill-tree - url = https://github.com/agbenchmark/skill-tree.github.io - branch = main +[submodule "agbenchmark/challenges"] + path = agbenchmark/challenges + url = https://github.com/agbenchmark/agent-evals.git diff --git a/agbenchmark/challenges b/agbenchmark/challenges index b1945bb0..9c9a35e0 160000 --- a/agbenchmark/challenges +++ b/agbenchmark/challenges @@ -1 +1 @@ -Subproject commit b1945bb0a95b6184bbbc0af1b260c1cde838eaac +Subproject commit 9c9a35e0de60ca09d826ef7c7abd6c32edca4b99 diff --git a/agbenchmark/utils/dependencies/graphs.py b/agbenchmark/utils/dependencies/graphs.py index 57bda0f7..af866296 100644 --- a/agbenchmark/utils/dependencies/graphs.py +++ b/agbenchmark/utils/dependencies/graphs.py @@ -1,6 +1,6 @@ +import json import math from pathlib import Path -import json from typing import Any, Dict, List, Tuple import matplotlib.patches as patches @@ -183,7 +183,9 @@ def get_category_colors(categories: Dict[Any, str]) -> Dict[str, str]: def graph_interactive_network( - dag: nx.DiGraph, labels: Dict[Any, Dict[str, Any]], show: bool = False + dag: nx.DiGraph, + labels: Dict[Any, Dict[str, Any]], + html_graph_path: str = "", ) -> None: nt = Network(notebook=True, width="100%", height="800px", directed=True) @@ -275,103 +277,12 @@ def graph_interactive_network( json_graph = json.dumps(graph_data) # Optionally, save to a file - with open(Path("agbenchmark/challenges/skill-tree/graph.json").resolve(), "w") as f: + with open( + Path("agbenchmark/challenges/frontend/public/graph.json").resolve(), "w" + ) as f: f.write(json_graph) - relative_path = "agbenchmark/challenges/skill-tree/index.html" - file_path = str(Path(relative_path).resolve()) + if html_graph_path: + file_path = str(Path(html_graph_path).resolve()) - if show: - nt.show(file_path, notebook=False) - nt.write_html(file_path) - - # Example usage - table_data = [ - ["Task: ", "Click on a skill to to see the task"], - ] - - iframe_path = "index.html" - combined_file_path = "agbenchmark/challenges/skill-tree/combined_view.html" - - create_combined_html(combined_file_path, iframe_path, table_data) - # JavaScript code snippet to be inserted - iframe_js_code = """ - network.on("click", function(params) { - if (params.nodes.length > 0) { - var clickedNodeId = params.nodes[0]; - var clickedNode = nodes.get(clickedNodeId); - var clickedNodeLabel = clickedNode.task; - window.parent.updateLabel(clickedNodeLabel); - } - }); - """ - - # Path to the iframe HTML file - iframe_path = "agbenchmark/challenges/skill-tree/index.html" - - # Insert the JS code snippet into the iframe HTML file - insert_js_into_iframe(iframe_path, iframe_js_code) - - -def create_combined_html( - file_path: str, iframe_path: str, table_data: List[List[Any]] -) -> None: - table_html = "
| {cell} | " - table_html += "