From 41909f0de7aa4221a3077e76f4ffc17303fbf371 Mon Sep 17 00:00:00 2001 From: merwanehamadi Date: Thu, 31 Aug 2023 20:45:31 -0700 Subject: [PATCH] Tic tac toe challenge (#345) Signed-off-by: Merwane Hamadi --- .github/workflows/ci.yml | 18 ++++- .../4_tic_tac_toe/artifacts_out/__init__.py | 0 .../artifacts_out/tic_tac_toe.py | 78 +++++++++++++++++++ .../code/4_tic_tac_toe/custom_python/test.py | 35 +++++++++ .../code/4_tic_tac_toe/data_draft.json | 21 +++++ server.py | 1 + 6 files changed, 151 insertions(+), 2 deletions(-) create mode 100644 agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/__init__.py create mode 100644 agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/tic_tac_toe.py create mode 100644 agbenchmark/challenges/verticals/code/4_tic_tac_toe/custom_python/test.py create mode 100644 agbenchmark/challenges/verticals/code/4_tic_tac_toe/data_draft.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d637af31..4161f1f0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -201,6 +201,7 @@ jobs: if [ "${GITHUB_EVENT_NAME}" == "pull_request" ] || [ "${{ github.event_name }}" == "push" ]; then set +e # Ignore non-zero exit codes and continue execution + echo "Running the following command: ${prefix}agbenchmark start --maintain --mock" ${prefix}agbenchmark start --maintain --mock EXIT_CODE=$? set -e # Stop ignoring non-zero exit codes @@ -208,14 +209,25 @@ jobs: if [ $EXIT_CODE -eq 5 ]; then echo "regression_tests.json is empty." fi + echo "Running the following command: ${prefix}agbenchmark start --mock" ${prefix}agbenchmark start --mock + + echo "Running the following command: ${prefix}agbenchmark start --mock --category=retrieval" ${prefix}agbenchmark start --mock --category=retrieval + + echo "Running the following command: ${prefix}agbenchmark start --mock --category=interface" ${prefix}agbenchmark start --mock --category=interface + + echo "Running the following command: ${prefix}agbenchmark start --mock --category=code" ${prefix}agbenchmark start --mock --category=code + + echo "Running the following command: ${prefix}agbenchmark start --mock --category=memory" ${prefix}agbenchmark start --mock --category=memory - ${prefix}agbenchmark start --mock --category=iterate - ${prefix}agbenchmark start --mock --suite TestReturnCode + + echo "Running the following command: ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval" ${prefix}agbenchmark start --mock --suite TestRevenueRetrieval + + echo "Running the following command: ${prefix}agbenchmark start --test=TestWriteFile" ${prefix}agbenchmark start --test=TestWriteFile cd ../.. @@ -223,8 +235,10 @@ jobs: poetry run uvicorn server:app --reload & sleep 5 export AGENT_NAME=mini-agi + echo "poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000" poetry run agbenchmark start --mock --api_mode --host=http://localhost:8000 else + echo "${prefix}agbenchmark start" ${prefix}agbenchmark start || echo "This command will always return a non zero exit code unless all the challenges are solved." fi diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/__init__.py b/agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/tic_tac_toe.py b/agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/tic_tac_toe.py new file mode 100644 index 00000000..e42044a7 --- /dev/null +++ b/agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/tic_tac_toe.py @@ -0,0 +1,78 @@ +import pprint + +def column(matrix, i): + return [row[i] for row in matrix] + +def check(list): + if len(set(list)) <= 1: + if list[0] != 0: + return list[0] + return None + +def checkDiagLeft(board): + if (board[0][0] == board[1][1] and board[1][1] == board[2][2]): + if board[0][0] != 0: + return board[0][0] + return None + +def checkDiagRight(board): + if (board[2][0] == board[1][1] and board[1][1] == board[0][2]): + if board[2][0] != 0: + return board[2][0] + return None + +def placeItem(row, column, board, current_player): + if board[row][column] != 0: + return None + else: + board[row][column] = current_player + +def swapPlayers(player): + if (player == 2): + return 1 + else: + return 2 + +def winner(board): + for rowIndex in board: + if check(rowIndex) is not None: + return check(rowIndex) + for columnIndex in range(len(board[0])): + if check(column(board, columnIndex)) is not None: + return check(column(board, columnIndex)) + if checkDiagLeft(board) is not None: + return checkDiagLeft(board) + if checkDiagRight(board) is not None: + return checkDiagRight(board) + return 0 + +def getLocation(): + location = input("Choose where to play. Enter two numbers separated by a comma, for example: 1,1 ") + print(f"\nYou picked {location}") + coordinates = [int(x) for x in location.split(',')] + while (len(coordinates) != 2 or coordinates[0] < 0 or coordinates[0] > 2 or coordinates[1] < 0 or coordinates[1] > 2): + print("You inputted a location in an invalid format") + location = input("Choose where to play. Enter two numbers separated by a comma, for example: 1,1 ") + coordinates = [int(x) for x in location.split(',')] + return coordinates + +def gamePlay(): + num_moves = 0 + pp = pprint.PrettyPrinter(width=20) + current_player = 1 + board = [[0 for x in range(3)] for x in range(3)] + + while (num_moves < 9 and winner(board) == 0): + print("This is the current board: ") + pp.pprint(board) + coordinates = getLocation() + placeItem(coordinates[0], coordinates[1], board, current_player) + current_player = swapPlayers(current_player) + if winner(board) != 0: + print(f"Player {winner(board)} won!") + num_moves += 1 + + if winner(board) == 0: + print("Draw") + +gamePlay() diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/custom_python/test.py b/agbenchmark/challenges/verticals/code/4_tic_tac_toe/custom_python/test.py new file mode 100644 index 00000000..6fa52251 --- /dev/null +++ b/agbenchmark/challenges/verticals/code/4_tic_tac_toe/custom_python/test.py @@ -0,0 +1,35 @@ +import subprocess +import pytest + +def run_game_with_inputs(inputs): + # Start the game process + process = subprocess.Popen( + ['python', 'tic_tac_toe.py'], + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True + ) + + # Send the input moves one by one + output, errors = process.communicate('\n'.join(inputs)) + + # Print the inputs and outputs + print("Inputs:\n", "\n".join(inputs)) + print("Output:\n", output) + print("Errors:\n", errors) + + return output + + +@pytest.mark.parametrize("inputs, expected_output", [ + (["0,0", "1,0", "0,1", "1,1", "0,2"], "Player 1 won!"), + (["1,0", "0,0", "1,1", "0,1", "2,0", "0,2"], "Player 2 won!"), + (["0,0", "0,1", "0,2", "1,1", "1,0", "1,2", "2,1", "2,0", "2,2"], "Draw") +]) +def test_game(inputs, expected_output): + output = run_game_with_inputs(inputs) + assert expected_output in output + +if __name__ == '__main__': + pytest.main() diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/data_draft.json b/agbenchmark/challenges/verticals/code/4_tic_tac_toe/data_draft.json new file mode 100644 index 00000000..1fb8193e --- /dev/null +++ b/agbenchmark/challenges/verticals/code/4_tic_tac_toe/data_draft.json @@ -0,0 +1,21 @@ +{ + "name": "TestTicTacToe", + "category": ["code"], + "task": "Build a Tic-Tac-Toe game using a python CLI. Here are the specifications.\n\nThe Grid: The game board is a 3x3 grid, consisting of 3 rows and 3 columns, creating a total of 9 squares.\n\nPlayers: There are two players. One player uses the number \"1\", and the other player uses the number \"2\".\n\nTaking Turns: Players take turns to put their respective numbers (\"1\" or \"2\") in an empty square of the grid. Once a player has placed their number in a square, it cannot be changed or removed.\n\nObjective: The goal is to get three of your numbers in a row, either horizontally, vertically, or diagonally.\n\nEnd of the Game: The game concludes in one of two ways: One player gets three of their numbers in a row (horizontally, vertically, or diagonally) and is declared the winner.\nAll squares on the grid are filled, and no player has three in a row. This situation is a \"draw\" or a \"tie\".\n\nTechnical specifications:\nBuild a file called tic_tac_toe.py. This file will be called through command lines. You will have to prompt users for their move. Player 1 will always start.\nPlayers will input their move in the following format: \"x,y\" where x and y represent the location in the grid (0,0 is top left, 2,2 is bottom right).\n\nYour primary requirement is to halt the game when appropriate and to print only one of these three exact sentences:\n\n\"Player 1 won!\"\n\"Player 2 won!\"\n\"Draw\"\n\nEdge cases: A player can send an incorrect location. Either the location is incorrect or the square is already filled. In this case, this counts as doing nothing, and the player gets prompted for new locations again.\n\n\nYou will be expected to create a python file called tic_tac_toe.py that will run through command lines by using ```python tic_tac_toe.py```.\n\nHere is an example of how your tic_tac_toe.py game will be tested.\n```\nprocess = subprocess.Popen(\n ['python', 'tic_tac_toe.py'],\n stdout=subprocess.PIPE,\n text=True\n)\n\noutput, _ = process.communicate('\\n'.join([\"0,0\", \"1,0\", \"0,1\", \"1,1\", \"0,2\"]))\n\nassert \"Player 1 won!\" in output\n```", + "dependencies": ["TestWriteFile"], + "cutoff": 150, + "ground": { + "answer": "The correct python file for a TicTacToe game is written", + "should_contain": [], + "should_not_contain": [], + "files": ["test.py"], + "eval": { + "type": "python" + } + }, + "info": { + "difficulty": "basic", + "description": "Tests ability for the agent to create Tic-Tac-Toe game", + "side_effects": [] + } +} diff --git a/server.py b/server.py index 93a990dd..5537a6ef 100644 --- a/server.py +++ b/server.py @@ -106,6 +106,7 @@ async def create_steps(task_id: str): @app.post("/agent/tasks") async def create_tasks(task: Task): + artifacts.clear() return { "input": "random", "additional_input": {},