Auto-GPT/reports/mini-agi/folder1_maintain/report.json

{
    "command": "agbenchmark start --maintain --mock",
    "completion_time": "2023-08-01-23:53",
    "benchmark_start_time": "2023-08-01-23:53",
    "metrics": {
        "run_time": "0.59 seconds",
        "highest_difficulty": "novice: 3"
    },
    "tests": {
        "TestDebugSimpleTypoWithGuidance": {
            "data_path": "agbenchmark/challenges/code/c2_debug_suite/d2_debug/data.json",
            "is_regression": true,
            "category": [
                "code",
                "iterate"
            ],
            "task": "1- Run test.py.\n2- Read code.py.\n3- Modify code.py.\nRepeat step 1, 2 and 3 until test.py runs without errors.\n",
            "answer": "[0, 1] [2, 5] [0, 3]",
            "description": "Tests ability for the agent to debug python code with a simple typo in it.",
            "metrics": {
                "difficulty": "novice",
                "success": true,
                "success_%": 100.0,
                "cost": null,
                "run_time": "0.044 seconds"
            },
            "reached_cutoff": false
        },
        "TestSearch": {
            "data_path": "agbenchmark/challenges/interface/search/data.json",
            "is_regression": true,
            "category": [
                "interface"
            ],
            "task": "Open 'https://silennaihin.com/random/plain.html' and paste the text on the page in a .txt file",
            "answer": "This is a Heading\nThis is a paragraph.",
            "description": "Tests if an llm can search",
            "metrics": {
                "difficulty": "interface",
                "success": true,
                "success_%": 100.0,
                "cost": null,
                "run_time": "0.002 seconds"
            },
            "reached_cutoff": false
        },
        "TestWriteFile": {
            "data_path": "agbenchmark/challenges/interface/write_file/data.json",
            "is_regression": true,
            "category": [
                "interface"
            ],
            "task": "Print the the capital of America to a .txt file",
            "answer": "Washington",
            "description": "Tests the writing to file",
            "metrics": {
                "difficulty": "interface",
                "success": true,
                "success_%": 100.0,
                "cost": null,
                "run_time": "0.002 seconds"
            },
            "reached_cutoff": false
        },
        "TestReadFile": {
            "data_path": "agbenchmark/challenges/interface/read_file/data.json",
            "is_regression": true,
            "category": [
                "interface"
            ],
            "task": "Write the string 'random string' before any existing text to the file called file_to_check.txt",
            "answer": "random string Hello World!",
            "description": "This reads the file quickly",
            "metrics": {
                "difficulty": "interface",
                "success": true,
                "success_%": 100.0,
                "cost": null,
                "run_time": "0.002 seconds"
            },
            "reached_cutoff": false
        },
        "TestBasicRetrieval": {
            "data_path": "agbenchmark/challenges/retrieval/r1_book_price/data.json",
            "is_regression": true,
            "category": [
                "retrieval"
            ],
            "task": "Write the price of the book in this url 'http://books.toscrape.com/catalogue/meditations_33/index.html' to a .txt file.",
            "answer": "\u00a325.89",
            "description": "Specifies specific website to retrieve website from.",
            "metrics": {
                "difficulty": "basic",
                "success": true,
                "success_%": 100.0,
                "cost": null,
                "run_time": "0.002 seconds"
            },
            "reached_cutoff": false
        },
        "TestRevenueRetrieval_1.0": {
            "data_path": "agbenchmark/challenges/retrieval/r2_search_suite_1/1_tesla_revenue/data.json",
            "is_regression": true,
            "category": [
                "retrieval",
                "retrieval"
            ],
            "task": "Write tesla's exact revenue in 2022 into a .txt file. Use the US notation, with a precision rounded to the nearest million dollars (for instance, $31,578 billion).",
            "answer": "It was $81.462 billion in 2022.",
            "description": "A no guardrails search for info",
            "metrics": {
                "difficulty": "novice",
                "success": true,
                "success_%": 100.0,
                "cost": null,
                "run_time": "0.002 seconds"
            },
            "reached_cutoff": false
        }
    },
    "config": {
        "workspace": "${os.path.join(Path.home(), 'miniagi')}"
    }
}