mirror of
https://github.com/aljazceru/Auto-GPT.git
synced 2025-12-19 15:04:26 +01:00
Fixing benchmarks
This commit is contained in:
@@ -1,68 +0,0 @@
|
||||
SCORING_MAP = {
|
||||
"percentage": "assign a float score that will represent a percentage out of 100. Use decimal points to be even more accurate. 0 represents the worst possible generation, while 100 represents the ideal generation",
|
||||
"scale": "assign an integer score from a scale of 1-10. 1 represents a really bad generation, while 10 represents an ideal generation",
|
||||
"binary": "assign a binary score of either 0 or 1. 0 represents a failure, while 1 represents a success",
|
||||
}
|
||||
|
||||
|
||||
REFERENCE_PROMPT = """Ignore previous directions. You are now an expert at evaluating how close machine generated responses are to human answers. You essentially act as a hyper advanced BLEU score.
|
||||
In order to score the machine generated response you will {scoring}. Make sure to factor in the distance to the ideal response into your thinking, deliberation, and final result regarding scoring. Return nothing but a float score.
|
||||
|
||||
Here is the given task for you to evaluate:
|
||||
{task}
|
||||
|
||||
Here is the ideal response you're comparing to based on the task:
|
||||
{answer}
|
||||
|
||||
Here is the current machine generated response to the task that you need to evaluate:
|
||||
{response}
|
||||
|
||||
"""
|
||||
|
||||
RUBRIC_PROMPT = """Ignore previous directions. You are now an expert at evaluating machine generated responses to given tasks.
|
||||
In order to score the generated texts you will {scoring}. Make sure to factor in rubric into your thinking, deliberation, and final result regarding scoring. Return nothing but a float score.
|
||||
|
||||
Here is the given task for you to evaluate:
|
||||
{task}
|
||||
|
||||
Use the below rubric to guide your thinking about scoring:
|
||||
{answer}
|
||||
|
||||
Here is the current machine generated response to the task that you need to evaluate:
|
||||
{response}
|
||||
|
||||
"""
|
||||
|
||||
QUESTION_PROMPT = """Ignore previous directions. You are now an expert at evaluating machine generated responses to given tasks.
|
||||
In order to score the generated texts you will {scoring}. Make sure to think about whether the generated response answers the question well in order to score accurately. Return nothing but a float score.
|
||||
|
||||
Here is the given task:
|
||||
{task}
|
||||
|
||||
Here is a question that checks if the task was completed correctly:
|
||||
{answer}
|
||||
|
||||
Here is the current machine generated response to the task that you need to evaluate:
|
||||
{response}
|
||||
|
||||
"""
|
||||
|
||||
FEW_SHOT_EXAMPLES = """Here are some examples of how to score a machine generated response based on the above:
|
||||
{examples}
|
||||
|
||||
"""
|
||||
|
||||
CUSTOM_PROMPT = """{custom}
|
||||
{scoring}
|
||||
|
||||
"""
|
||||
|
||||
PROMPT_MAP = {
|
||||
"rubric": RUBRIC_PROMPT,
|
||||
"reference": REFERENCE_PROMPT,
|
||||
"question": QUESTION_PROMPT,
|
||||
"custom": CUSTOM_PROMPT,
|
||||
}
|
||||
|
||||
END_PROMPT = """Remember to always end your response with nothing but a float score.
|
||||
Float score:"""
|
||||
Reference in New Issue
Block a user