diff --git a/requirements.txt b/requirements.txt index 063931a9..7961106b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ pyyaml==6.0 readability-lxml==0.8.1 requests tiktoken==0.3.3 +gTTS==2.3.1 docker googlesearch-python google-api-python-client #(https://developers.google.com/custom-search/v1/overview) diff --git a/scripts/data.py b/scripts/data.py index bb8936c1..7694329a 100644 --- a/scripts/data.py +++ b/scripts/data.py @@ -1,6 +1,6 @@ from os import path from pathlib import Path - +SRC_DIR = Path(__file__).parent def load_prompt(): """Load the prompt from data/prompt.txt""" @@ -10,7 +10,7 @@ def load_prompt(): data_dir = file_dir / "data" prompt_file = data_dir / "prompt.txt" # Load the promt from data/prompt.txt - with open(prompt_file, "r") as prompt_file: + with open(SRC_DIR/ "data/prompt.txt", "r") as prompt_file: prompt = prompt_file.read() return prompt diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index d17fa27a..7069bfff 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -1,7 +1,8 @@ CONSTRAINTS: -1. ~4000 word limit for memory. Your memory is short, so immidiately save important information to long term memory and code to files. +1. ~4000 word limit for memory. Your memory is short, so immediately save important information to long term memory and code to files. 2. No user assistance +3. Exclusively use the commands listed in double quotes e.g. "command name" COMMANDS: @@ -34,9 +35,9 @@ RESOURCES: PERFORMANCE EVALUATION: 1. Continuously review and analyze your actions to ensure you are performing to the best of your abilities. -2. Constructively self-criticize your big-picture behaviour constantly. +2. Constructively self-criticize your big-picture behavior constantly. 3. Reflect on past decisions and strategies to refine your approach. -4. Every command has a cost, so be smart and efficent. Aim to complete tasks in the least number of steps. +4. Every command has a cost, so be smart and efficient. Aim to complete tasks in the least number of steps. You should only respond in JSON format as described below @@ -58,4 +59,4 @@ RESPONSE FORMAT: } } -Ensure the response can be parsed by Python json.loads \ No newline at end of file +Ensure the response can be parsed by Python json.loads diff --git a/scripts/main.py b/scripts/main.py index 8f0400ae..e89e57dc 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -61,6 +61,14 @@ def print_assistant_thoughts(assistant_reply): # Parse and print Assistant response assistant_reply_json = fix_and_parse_json(assistant_reply) + # Check if assistant_reply_json is a string and attempt to parse it into a JSON object + if isinstance(assistant_reply_json, str): + try: + assistant_reply_json = json.loads(assistant_reply_json) + except json.JSONDecodeError as e: + print_to_console("Error: Invalid JSON\n", Fore.RED, assistant_reply) + assistant_reply_json = {} + assistant_thoughts_reasoning = None assistant_thoughts_plan = None assistant_thoughts_speak = None @@ -334,8 +342,9 @@ while True: "NEXT ACTION: ", Fore.CYAN, f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL} ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}") - print("Enter 'y' to authorise command or 'n' to exit program...", flush=True) - + print( + f"Enter 'y' to authorise command or 'n' to exit program, or enter feedback for {ai_name}...", + flush=True) while True: console_input = input(Fore.MAGENTA + "Input:" + Style.RESET_ALL) @@ -346,16 +355,18 @@ while True: user_input = "EXIT" break else: - continue + user_input = console_input + command_name = "human_feedback" + break - if user_input != "GENERATE NEXT COMMAND JSON": - print("Exiting...", flush=True) - break - - print_to_console( + if user_input == "GENERATE NEXT COMMAND JSON": + print_to_console( "-=-=-=-=-=-=-= COMMAND AUTHORISED BY USER -=-=-=-=-=-=-=", Fore.MAGENTA, "") + elif user_input == "EXIT": + print("Exiting...", flush=True) + break else: # Print command print_to_console( @@ -364,10 +375,12 @@ while True: f"COMMAND = {Fore.CYAN}{command_name}{Style.RESET_ALL} ARGUMENTS = {Fore.CYAN}{arguments}{Style.RESET_ALL}") # Execute command - if command_name.lower() != "error": - result = f"Command {command_name} returned: {cmd.execute_command(command_name, arguments)}" - else: + if command_name.lower() == "error": result = f"Command {command_name} threw the following error: " + arguments + elif command_name == "human_feedback": + result = f"Human feedback: {user_input}" + else: + result = f"Command {command_name} returned: {cmd.execute_command(command_name, arguments)}" # Check if there's a result from the command append it to the message # history @@ -379,3 +392,4 @@ while True: chat.create_chat_message( "system", "Unable to execute command")) print_to_console("SYSTEM: ", Fore.YELLOW, "Unable to execute command") + diff --git a/scripts/speak.py b/scripts/speak.py index 5a0728a2..9f32fac2 100644 --- a/scripts/speak.py +++ b/scripts/speak.py @@ -4,6 +4,8 @@ import requests from config import Config cfg = Config() +import gtts + # TODO: Nicer names for these ids voices = ["ErXwobaYiN019PkySvjV", "EXAVITQu4vr4xnSDxMaL"] @@ -13,11 +15,10 @@ tts_headers = { "xi-api-key": cfg.elevenlabs_api_key } -def say_text(text, voice_index=0): - """Say text using ElevenLabs API""" +def eleven_labs_speech(text, voice_index=0): + """Return the results of a google search""" tts_url = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}".format( voice_id=voices[voice_index]) - formatted_message = {"text": text} response = requests.post(tts_url, headers=tts_headers, json=formatted_message) @@ -25,8 +26,24 @@ def say_text(text, voice_index=0): with open("speech.mpeg", "wb") as f: f.write(response.content) playsound("speech.mpeg") - # Delete audio file os.remove("speech.mpeg") + return True else: print("Request failed with status code:", response.status_code) print("Response content:", response.content) + return False + +def gtts_speech(text): + tts = gtts.gTTS(text) + tts.save("speech.mp3") + playsound("speech.mp3") + os.remove("speech.mp3") + +def say_text(text, voice_index=0): + if not cfg.elevenlabs_api_key: + gtts_speech(text) + else: + success = eleven_labs_speech() + if not success: + gtts_speech(text) +