goose/scripts/bench-postprocess-scripts/llm-judges/run_vibes_judge.sh

#!/bin/bash
# Wrapper script for LLM judge post-processing and final score calculation
# This script is called by the benchmark runner with the eval results file as an argument

# Get the directory where this script is located
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"

# Get the eval results file path from the first argument
EVAL_RESULTS_FILE="$1"

# Extract the working directory from the eval results file path
WORKING_DIR="$(dirname "$EVAL_RESULTS_FILE")"

# Change to the working directory
cd "$WORKING_DIR"

# Determine the evaluation name from the eval-results.json
EVAL_NAME=$(python3 -c "import json; print(json.load(open('eval-results.json'))['name'])")

# Set the output file name and prompt file based on the evaluation
if [ "$EVAL_NAME" = "blog_summary" ]; then
    OUTPUT_FILE="blog_summary_output.txt"
    PROMPT_FILE="$SCRIPT_DIR/blog_summary_prompt.txt"
elif [ "$EVAL_NAME" = "restaurant_research" ]; then
    OUTPUT_FILE="restaurant_research_output.txt"
    PROMPT_FILE="$SCRIPT_DIR/restaurant_research_prompt.txt"
else
    echo "Error: Unknown evaluation name: $EVAL_NAME"
    exit 1
fi

# Run the LLM judge script with the appropriate arguments
python3 "$SCRIPT_DIR/llm_judge.py" "$OUTPUT_FILE" --prompt-file "$PROMPT_FILE"

# Check if LLM judge succeeded
if [ $? -ne 0 ]; then
    echo "Error: LLM judge failed"
    exit 1
fi

# Calculate the final score
python3 "$SCRIPT_DIR/calculate_final_scores_vibes.py" "$EVAL_NAME"

# Check if score calculation succeeded
if [ $? -ne 0 ]; then
    echo "Error: Final score calculation failed"
    exit 1
fi

echo "Successfully completed post-processing for $EVAL_NAME"