diff --git a/.env.template b/.env.template index e9ccda5e..525cd61c 100644 --- a/.env.template +++ b/.env.template @@ -9,4 +9,6 @@ CUSTOM_SEARCH_ENGINE_ID= USE_AZURE=False OPENAI_API_BASE=your-base-url-for-azure OPENAI_API_VERSION=api-version-for-azure -OPENAI_DEPLOYMENT_ID=deployment-id-for-azure \ No newline at end of file +OPENAI_DEPLOYMENT_ID=deployment-id-for-azure +IMAGE_PROVIDER=dalle +HUGGINGFACE_API_TOKEN= \ No newline at end of file diff --git a/README.md b/README.md index a89c5d03..f6cf6093 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Your support is greatly appreciated - [Setting up environment variables](#setting-up-environment-variables) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) + - [🖼 Image Generation](#image-generation) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) @@ -169,6 +170,7 @@ Or you can set them in the `.env` file. 1. View memory usage by using the `--debug` flag :) + ## 💀 Continuous Mode ⚠️ Run the AI **without** user authorisation, 100% automated. Continuous mode is not recommended. @@ -187,6 +189,15 @@ If you don't have access to the GPT4 api, this mode will allow you to use Auto-G python scripts/main.py --gpt3only ``` +## 🖼 Image Generation +By default, Auto-GPT uses DALL-e for image generation. To use Stable Diffusion, a [HuggingFace API Token](https://huggingface.co/settings/tokens) is required. + +Once you have a token, set these variables in your `.env`: +``` +IMAGE_PROVIDER=sd +HUGGINGFACE_API_TOKEN="YOUR_HUGGINGFACE_API_TOKEN" +``` + ## ⚠️ Limitations This experiment aims to showcase the potential of GPT-4 but comes with some limitations: diff --git a/scripts/image_gen.py b/scripts/image_gen.py index 92cda290..deda7ed5 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -11,40 +11,49 @@ cfg = Config() working_directory = "auto_gpt_workspace" -API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" -headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} - def generate_image(prompt): filename = str(uuid.uuid4()) + ".jpg" - + # DALL-E - openai.api_key = cfg.openai_api_key + if cfg.image_provider == 'dalle': + + openai.api_key = cfg.openai_api_key - response = openai.Image.create( - prompt=prompt, - n=1, - size="256x256", - response_format="b64_json", - ) + response = openai.Image.create( + prompt=prompt, + n=1, + size="256x256", + response_format="b64_json", + ) - print("Image Generated for prompt:" + prompt) - print(response["data"][0]["b64_json"][:50]) + print("Image Generated for prompt:" + prompt) + print(response["data"][0]["b64_json"][:50]) - image_data = b64decode(response["data"][0]["b64_json"]) - with open(working_directory + "/" + filename, mode="wb") as png: - png.write(image_data) + image_data = b64decode(response["data"][0]["b64_json"]) - return "Saved to disk:" + filename + with open(working_directory + "/" + filename, mode="wb") as png: + png.write(image_data) + + return "Saved to disk:" + filename # STABLE DIFFUSION - response = requests.post(API_URL, headers=headers, json={ - "inputs": prompt, - }) - image = Image.open(io.BytesIO(response.content)) - print("Image Generated for prompt:" + prompt) + elif cfg.image_provider == 'sd': - image.save(os.path.join(working_directory, filename)) - print("Saved to disk:" + filename) + API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" + headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} - return str("Image " + filename + " saved to disk for prompt: " + prompt) + response = requests.post(API_URL, headers=headers, json={ + "inputs": prompt, + }) + + image = Image.open(io.BytesIO(response.content)) + print("Image Generated for prompt:" + prompt) + + image.save(os.path.join(working_directory, filename)) + print("Saved to disk:" + filename) + + return str("Image " + filename + " saved to disk for prompt: " + prompt) + + else: + return "No Image Provider Set" \ No newline at end of file