From 57412bcf4e85c7edff4f021c34390658ee80eb06 Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Thu, 6 Apr 2023 21:16:05 -0700 Subject: [PATCH 1/7] add kandinsky support - cuda issue --- scripts/image_gen.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 scripts/image_gen.py diff --git a/scripts/image_gen.py b/scripts/image_gen.py new file mode 100644 index 00000000..cdc4fc4d --- /dev/null +++ b/scripts/image_gen.py @@ -0,0 +1,44 @@ +from kandinsky2 import get_kandinsky2 +from config import Config + +cfg = Config() + +def generate_image(prompt): + + model = get_kandinsky2('cuda', task_type='text2img', model_version='2.1', use_flash_attention=False) + images = model.generate_text2img( + "red cat, 4k photo", # prompt + num_steps=100, + batch_size=1, + guidance_scale=4, + h=768, w=768, + sampler='p_sampler', + prior_cf_scale=4, + prior_steps="5" + ) + return images + + # base_url = 'http://export.arxiv.org/api/query?' + # query = f'search_query=all:{search_query}&start=0&max_results={max_results}' + # url = base_url + query + # response = requests.get(url) + + # if response.status_code == 200: + # soup = BeautifulSoup(response.content, 'xml') + # entries = soup.find_all('entry') + + # articles = [] + # for entry in entries: + # title = entry.title.text.strip() + # url = entry.id.text.strip() + # published = entry.published.text.strip() + + # articles.append({ + # 'title': title, + # 'url': url, + # 'published': published + # }) + + # return articles + # else: + # return None From 28cc9865e487dec005ef98cffca059cf81c1c806 Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 08:02:48 -0700 Subject: [PATCH 2/7] feat(ImageGen): add stable diffusion support --- scripts/commands.py | 3 +++ scripts/config.py | 2 ++ scripts/data/prompt.txt | 1 + scripts/image_gen.py | 54 +++++++++++++++-------------------------- 4 files changed, 25 insertions(+), 35 deletions(-) diff --git a/scripts/commands.py b/scripts/commands.py index fc10d1d0..bf8d7983 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -9,6 +9,7 @@ import ai_functions as ai from file_operations import read_file, write_to_file, append_to_file, delete_file, search_files from execute_code import execute_python_file from json_parser import fix_and_parse_json +from image_gen import generate_image from duckduckgo_search import ddg from googleapiclient.discovery import build from googleapiclient.errors import HttpError @@ -102,6 +103,8 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) + elif command_name == "generate_image": # Add this command + return generate_image(arguments["prompt"]) elif command_name == "task_complete": shutdown() else: diff --git a/scripts/config.py b/scripts/config.py index fe48d298..2eca1675 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -53,6 +53,8 @@ class Config(metaclass=Singleton): self.pinecone_api_key = os.getenv("PINECONE_API_KEY") self.pinecone_region = os.getenv("PINECONE_ENV") + self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN") + # User agent headers to use when browsing web # Some websites might just completely deny request with an error code if no user agent was found. self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} diff --git a/scripts/data/prompt.txt b/scripts/data/prompt.txt index 28797d9e..363342c0 100644 --- a/scripts/data/prompt.txt +++ b/scripts/data/prompt.txt @@ -23,6 +23,7 @@ COMMANDS: 17. Write Tests: "write_tests", args: "code": "", "focus": "" 18. Execute Python File: "execute_python_file", args: "file": "" 19. Task Complete (Shutdown): "task_complete", args: "reason": "" +20. Generate Image: "generate_image", args: "prompt": "" RESOURCES: diff --git a/scripts/image_gen.py b/scripts/image_gen.py index cdc4fc4d..bb3e7686 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -1,44 +1,28 @@ -from kandinsky2 import get_kandinsky2 +import requests +import io +import os.path +from PIL import Image from config import Config +import uuid cfg = Config() +working_directory = "auto_gpt_workspace" + +API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" +headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} + def generate_image(prompt): - - model = get_kandinsky2('cuda', task_type='text2img', model_version='2.1', use_flash_attention=False) - images = model.generate_text2img( - "red cat, 4k photo", # prompt - num_steps=100, - batch_size=1, - guidance_scale=4, - h=768, w=768, - sampler='p_sampler', - prior_cf_scale=4, - prior_steps="5" - ) - return images - - # base_url = 'http://export.arxiv.org/api/query?' - # query = f'search_query=all:{search_query}&start=0&max_results={max_results}' - # url = base_url + query - # response = requests.get(url) + response = requests.post(API_URL, headers=headers, json={ + "inputs": prompt, + }) + image = Image.open(io.BytesIO(response.content)) + print("Image Generated for prompt:" + prompt) - # if response.status_code == 200: - # soup = BeautifulSoup(response.content, 'xml') - # entries = soup.find_all('entry') + filename = str(uuid.uuid4()) + ".jpg" - # articles = [] - # for entry in entries: - # title = entry.title.text.strip() - # url = entry.id.text.strip() - # published = entry.published.text.strip() + image.save(os.path.join(working_directory, filename)) - # articles.append({ - # 'title': title, - # 'url': url, - # 'published': published - # }) + print("Saved to disk:" + filename) - # return articles - # else: - # return None + return str("Image " + filename + " saved to disk for prompt: " + prompt) From b56b04e86f4bcd93297cbe48efb8d9117be2566e Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 11:03:23 -0700 Subject: [PATCH 3/7] feat(ImageGen): add DALL-E support --- scripts/image_gen.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/scripts/image_gen.py b/scripts/image_gen.py index bb3e7686..92cda290 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -4,6 +4,8 @@ import os.path from PIL import Image from config import Config import uuid +import openai +from base64 import b64decode cfg = Config() @@ -13,16 +15,36 @@ API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion- headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} def generate_image(prompt): + + filename = str(uuid.uuid4()) + ".jpg" + + # DALL-E + openai.api_key = cfg.openai_api_key + + response = openai.Image.create( + prompt=prompt, + n=1, + size="256x256", + response_format="b64_json", + ) + + print("Image Generated for prompt:" + prompt) + print(response["data"][0]["b64_json"][:50]) + + image_data = b64decode(response["data"][0]["b64_json"]) + with open(working_directory + "/" + filename, mode="wb") as png: + png.write(image_data) + + return "Saved to disk:" + filename + + # STABLE DIFFUSION response = requests.post(API_URL, headers=headers, json={ "inputs": prompt, }) image = Image.open(io.BytesIO(response.content)) print("Image Generated for prompt:" + prompt) - filename = str(uuid.uuid4()) + ".jpg" - image.save(os.path.join(working_directory, filename)) - print("Saved to disk:" + filename) return str("Image " + filename + " saved to disk for prompt: " + prompt) From f3e64ec4e9128d4757bf5ffadbb73a9b144b2ecb Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 11:29:43 -0700 Subject: [PATCH 4/7] feat(ImageGen): support env vars, update readme --- .env.template | 4 ++- README.md | 11 +++++++++ scripts/image_gen.py | 59 +++++++++++++++++++++++++------------------- 3 files changed, 48 insertions(+), 26 deletions(-) diff --git a/.env.template b/.env.template index e9ccda5e..525cd61c 100644 --- a/.env.template +++ b/.env.template @@ -9,4 +9,6 @@ CUSTOM_SEARCH_ENGINE_ID= USE_AZURE=False OPENAI_API_BASE=your-base-url-for-azure OPENAI_API_VERSION=api-version-for-azure -OPENAI_DEPLOYMENT_ID=deployment-id-for-azure \ No newline at end of file +OPENAI_DEPLOYMENT_ID=deployment-id-for-azure +IMAGE_PROVIDER=dalle +HUGGINGFACE_API_TOKEN= \ No newline at end of file diff --git a/README.md b/README.md index a89c5d03..f6cf6093 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ Your support is greatly appreciated - [Setting up environment variables](#setting-up-environment-variables) - [💀 Continuous Mode ⚠️](#-continuous-mode-️) - [GPT3.5 ONLY Mode](#gpt35-only-mode) + - [🖼 Image Generation](#image-generation) - [⚠️ Limitations](#️-limitations) - [🛡 Disclaimer](#-disclaimer) - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter) @@ -169,6 +170,7 @@ Or you can set them in the `.env` file. 1. View memory usage by using the `--debug` flag :) + ## 💀 Continuous Mode ⚠️ Run the AI **without** user authorisation, 100% automated. Continuous mode is not recommended. @@ -187,6 +189,15 @@ If you don't have access to the GPT4 api, this mode will allow you to use Auto-G python scripts/main.py --gpt3only ``` +## 🖼 Image Generation +By default, Auto-GPT uses DALL-e for image generation. To use Stable Diffusion, a [HuggingFace API Token](https://huggingface.co/settings/tokens) is required. + +Once you have a token, set these variables in your `.env`: +``` +IMAGE_PROVIDER=sd +HUGGINGFACE_API_TOKEN="YOUR_HUGGINGFACE_API_TOKEN" +``` + ## ⚠️ Limitations This experiment aims to showcase the potential of GPT-4 but comes with some limitations: diff --git a/scripts/image_gen.py b/scripts/image_gen.py index 92cda290..deda7ed5 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -11,40 +11,49 @@ cfg = Config() working_directory = "auto_gpt_workspace" -API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" -headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} - def generate_image(prompt): filename = str(uuid.uuid4()) + ".jpg" - + # DALL-E - openai.api_key = cfg.openai_api_key + if cfg.image_provider == 'dalle': + + openai.api_key = cfg.openai_api_key - response = openai.Image.create( - prompt=prompt, - n=1, - size="256x256", - response_format="b64_json", - ) + response = openai.Image.create( + prompt=prompt, + n=1, + size="256x256", + response_format="b64_json", + ) - print("Image Generated for prompt:" + prompt) - print(response["data"][0]["b64_json"][:50]) + print("Image Generated for prompt:" + prompt) + print(response["data"][0]["b64_json"][:50]) - image_data = b64decode(response["data"][0]["b64_json"]) - with open(working_directory + "/" + filename, mode="wb") as png: - png.write(image_data) + image_data = b64decode(response["data"][0]["b64_json"]) - return "Saved to disk:" + filename + with open(working_directory + "/" + filename, mode="wb") as png: + png.write(image_data) + + return "Saved to disk:" + filename # STABLE DIFFUSION - response = requests.post(API_URL, headers=headers, json={ - "inputs": prompt, - }) - image = Image.open(io.BytesIO(response.content)) - print("Image Generated for prompt:" + prompt) + elif cfg.image_provider == 'sd': - image.save(os.path.join(working_directory, filename)) - print("Saved to disk:" + filename) + API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4" + headers = {"Authorization": "Bearer " + cfg.huggingface_api_token} - return str("Image " + filename + " saved to disk for prompt: " + prompt) + response = requests.post(API_URL, headers=headers, json={ + "inputs": prompt, + }) + + image = Image.open(io.BytesIO(response.content)) + print("Image Generated for prompt:" + prompt) + + image.save(os.path.join(working_directory, filename)) + print("Saved to disk:" + filename) + + return str("Image " + filename + " saved to disk for prompt: " + prompt) + + else: + return "No Image Provider Set" \ No newline at end of file From 091db1d4c3db6a3bf4bc50e882f299719bc65c60 Mon Sep 17 00:00:00 2001 From: blankey1337 <42594751+blankey1337@users.noreply.github.com> Date: Fri, 7 Apr 2023 12:47:49 -0700 Subject: [PATCH 5/7] chore(ImageGen): cleanup --- scripts/config.py | 1 + scripts/image_gen.py | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/config.py b/scripts/config.py index 2eca1675..959c3eb2 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -53,6 +53,7 @@ class Config(metaclass=Singleton): self.pinecone_api_key = os.getenv("PINECONE_API_KEY") self.pinecone_region = os.getenv("PINECONE_ENV") + self.image_provider = os.getenv("IMAGE_PROVIDER") self.huggingface_api_token = os.getenv("HUGGINGFACE_API_TOKEN") # User agent headers to use when browsing web diff --git a/scripts/image_gen.py b/scripts/image_gen.py index deda7ed5..185ed427 100644 --- a/scripts/image_gen.py +++ b/scripts/image_gen.py @@ -17,7 +17,7 @@ def generate_image(prompt): # DALL-E if cfg.image_provider == 'dalle': - + openai.api_key = cfg.openai_api_key response = openai.Image.create( @@ -28,7 +28,6 @@ def generate_image(prompt): ) print("Image Generated for prompt:" + prompt) - print(response["data"][0]["b64_json"][:50]) image_data = b64decode(response["data"][0]["b64_json"]) @@ -51,9 +50,8 @@ def generate_image(prompt): print("Image Generated for prompt:" + prompt) image.save(os.path.join(working_directory, filename)) - print("Saved to disk:" + filename) - return str("Image " + filename + " saved to disk for prompt: " + prompt) + return "Saved to disk:" + filename else: return "No Image Provider Set" \ No newline at end of file From 9328c8f7b5b9b7eb76dd131f36ad6109e8b28e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Machado=20de=20Oliveira?= Date: Sat, 8 Apr 2023 00:15:14 -0300 Subject: [PATCH 6/7] Settings were being saved and loaded in the wrong directory --- scripts/ai_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ai_config.py b/scripts/ai_config.py index 2f432748..8cfa183a 100644 --- a/scripts/ai_config.py +++ b/scripts/ai_config.py @@ -1,6 +1,6 @@ import yaml import data - +import os class AIConfig: def __init__(self, ai_name="", ai_role="", ai_goals=[]): @@ -9,7 +9,7 @@ class AIConfig: self.ai_goals = ai_goals # Soon this will go in a folder where it remembers more stuff about the run(s) - SAVE_FILE = "../ai_settings.yaml" + SAVE_FILE = os.path.join(os.path.dirname(__file__), '..', 'ai_settings.yaml') @classmethod def load(cls, config_file=SAVE_FILE): From 8b36a5cfd33e38641d594bba10c6fe7356438938 Mon Sep 17 00:00:00 2001 From: Toran Bruce Richards Date: Sat, 8 Apr 2023 12:27:05 +0100 Subject: [PATCH 7/7] Removes comment --- scripts/commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/commands.py b/scripts/commands.py index bf8d7983..a45fb896 100644 --- a/scripts/commands.py +++ b/scripts/commands.py @@ -103,7 +103,7 @@ def execute_command(command_name, arguments): return ai.write_tests(arguments["code"], arguments.get("focus")) elif command_name == "execute_python_file": # Add this command return execute_python_file(arguments["file"]) - elif command_name == "generate_image": # Add this command + elif command_name == "generate_image": return generate_image(arguments["prompt"]) elif command_name == "task_complete": shutdown()