mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2026-02-23 15:14:44 +01:00
Make agbenchmark a proxy of the evaluated agent (#5279)
Make agbenchmark a Proxy of the evaluated agent Signed-off-by: Merwane Hamadi <merwanehamadi@gmail.com>
This commit is contained in:
@@ -1,21 +1,33 @@
|
||||
{
|
||||
"name": "ReturnCodeModify",
|
||||
"category": ["code", "iterate"],
|
||||
"task": "Modify the multiply_int function in sample_code.py to be able to pass in a 'multiplier' argument to multiply the 'num' by 'multiplier'. Both arguments are integers. You can make sure you have correctly done this by running test.py",
|
||||
"dependencies": ["ReturnCodeWrite"],
|
||||
"cutoff": 120,
|
||||
"ground": {
|
||||
"answer": "def multiply_int(num, multiplier):\n return num * multiplier\n",
|
||||
"should_contain": ["8", "49", "-12"],
|
||||
"should_not_contain": [],
|
||||
"files": ["test.py"],
|
||||
"eval": {
|
||||
"type": "python"
|
||||
}
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "intermediate",
|
||||
"description": "Builds on the previous function also take a multiplier .",
|
||||
"side_effects": []
|
||||
}
|
||||
}
|
||||
"name": "ReturnCodeModify",
|
||||
"category": [
|
||||
"code",
|
||||
"iterate"
|
||||
],
|
||||
"task": "Modify the multiply_int function in sample_code.py to be able to pass in a 'multiplier' argument to multiply the 'num' by 'multiplier'. Both arguments are integers. You can make sure you have correctly done this by running test.py",
|
||||
"dependencies": [
|
||||
"ReturnCodeWrite"
|
||||
],
|
||||
"cutoff": 120,
|
||||
"ground": {
|
||||
"answer": "def multiply_int(num, multiplier):\n return num * multiplier\n",
|
||||
"should_contain": [
|
||||
"8",
|
||||
"49",
|
||||
"-12"
|
||||
],
|
||||
"should_not_contain": [],
|
||||
"files": [
|
||||
"test.py"
|
||||
],
|
||||
"eval": {
|
||||
"type": "python"
|
||||
}
|
||||
},
|
||||
"info": {
|
||||
"difficulty": "intermediate",
|
||||
"description": "Builds on the previous function also take a multiplier .",
|
||||
"side_effects": []
|
||||
},
|
||||
"eval_id": "cffd9e7f-a1d5-478b-a04d-9504f372639a"
|
||||
}
|
||||
Reference in New Issue
Block a user