feat(ImageGen): support env vars, update readme

2025-12-17 22:14:28 +01:00 · 2023-04-07 11:29:43 -07:00
parent b56b04e86f
commit f3e64ec4e9
3 changed files with 48 additions and 26 deletions
--- a/.env.template
+++ b/.env.template
@@ -9,4 +9,6 @@ CUSTOM_SEARCH_ENGINE_ID=
 USE_AZURE=False
 OPENAI_API_BASE=your-base-url-for-azure
 OPENAI_API_VERSION=api-version-for-azure
-OPENAI_DEPLOYMENT_ID=deployment-id-for-azure
+OPENAI_DEPLOYMENT_ID=deployment-id-for-azure
+IMAGE_PROVIDER=dalle
+HUGGINGFACE_API_TOKEN=
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ Your support is greatly appreciated
    - [Setting up environment variables](#setting-up-environment-variables)
  - [💀 Continuous Mode ⚠️](#-continuous-mode-️)
  - [GPT3.5 ONLY Mode](#gpt35-only-mode)
+  - [🖼 Image Generation](#image-generation)
  - [⚠️ Limitations](#️-limitations)
  - [🛡 Disclaimer](#-disclaimer)
  - [🐦 Connect with Us on Twitter](#-connect-with-us-on-twitter)
@@ -169,6 +170,7 @@ Or you can set them in the `.env` file.

 1. View memory usage by using the `--debug` flag :)

+
 ## 💀 Continuous Mode ⚠️
 Run the AI **without** user authorisation, 100% automated.
 Continuous mode is not recommended. 
@@ -187,6 +189,15 @@ If you don't have access to the GPT4 api, this mode will allow you to use Auto-G
 python scripts/main.py --gpt3only
 ```

+## 🖼 Image Generation
+By default, Auto-GPT uses DALL-e for image generation. To use Stable Diffusion, a [HuggingFace API Token](https://huggingface.co/settings/tokens) is required.
+
+Once you have a token, set these variables in your `.env`:
+```
+IMAGE_PROVIDER=sd
+HUGGINGFACE_API_TOKEN="YOUR_HUGGINGFACE_API_TOKEN"
+```
+
 ## ⚠️ Limitations
 This experiment aims to showcase the potential of GPT-4 but comes with some limitations:

--- a/scripts/image_gen.py
+++ b/scripts/image_gen.py
@@ -11,40 +11,49 @@ cfg = Config()

 working_directory = "auto_gpt_workspace"

-API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4"
-headers = {"Authorization": "Bearer " + cfg.huggingface_api_token}
-
 def generate_image(prompt):

    filename = str(uuid.uuid4()) + ".jpg"
-
+    
    # DALL-E
-    openai.api_key = cfg.openai_api_key
+    if cfg.image_provider == 'dalle':
+        
+        openai.api_key = cfg.openai_api_key

-    response = openai.Image.create(
-        prompt=prompt,
-        n=1,
-        size="256x256",
-        response_format="b64_json",
-    )
+        response = openai.Image.create(
+            prompt=prompt,
+            n=1,
+            size="256x256",
+            response_format="b64_json",
+        )

-    print("Image Generated for prompt:" + prompt)
-    print(response["data"][0]["b64_json"][:50])
+        print("Image Generated for prompt:" + prompt)
+        print(response["data"][0]["b64_json"][:50])

-    image_data = b64decode(response["data"][0]["b64_json"])
-    with open(working_directory + "/" + filename, mode="wb") as png:
-        png.write(image_data)
+        image_data = b64decode(response["data"][0]["b64_json"])

-    return "Saved to disk:" + filename
+        with open(working_directory + "/" + filename, mode="wb") as png:
+            png.write(image_data)
+
+        return "Saved to disk:" + filename

    # STABLE DIFFUSION
-    response = requests.post(API_URL, headers=headers, json={
-        "inputs": prompt,
-    })
-    image = Image.open(io.BytesIO(response.content))
-    print("Image Generated for prompt:" + prompt)
+    elif cfg.image_provider == 'sd':

-    image.save(os.path.join(working_directory, filename))
-    print("Saved to disk:" + filename)
+        API_URL = "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4"
+        headers = {"Authorization": "Bearer " + cfg.huggingface_api_token}

-    return str("Image " + filename + " saved to disk for prompt: " + prompt)
+        response = requests.post(API_URL, headers=headers, json={
+            "inputs": prompt,
+        })
+
+        image = Image.open(io.BytesIO(response.content))
+        print("Image Generated for prompt:" + prompt)
+
+        image.save(os.path.join(working_directory, filename))
+        print("Saved to disk:" + filename)
+
+        return str("Image " + filename + " saved to disk for prompt: " + prompt)
+
+    else:
+        return "No Image Provider Set"