Merge branch 'master' of https://github.com/BillSchumacher/Auto-GPT into plugin-support

2025-12-23 17:04:21 +01:00 · 2023-04-19 17:28:17 -05:00
parent d5523600c7 fa91bc154c
commit 23c650ca10
44 changed files with 698 additions and 3812 deletions
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -15,11 +15,12 @@ CFG = Config()


@command("generate_image", "Generate Image", '"prompt": "<prompt>"', CFG.image_provider)
-def generate_image(prompt: str) -> str:
+def generate_image(prompt: str, size: int = 256) -> str:
    """Generate an image from a prompt.

    Args:
        prompt (str): The prompt to use
+        size (int, optional): The size of the image. Defaults to 256. (Not supported by HuggingFace)

    Returns:
        str: The filename of the image
@@ -28,11 +29,14 @@ def generate_image(prompt: str) -> str:

    # DALL-E
    if CFG.image_provider == "dalle":
-        return generate_image_with_dalle(prompt, filename)
-    elif CFG.image_provider == "sd":
+        return generate_image_with_dalle(prompt, filename, size)
+    # HuggingFace
+    elif CFG.image_provider == "huggingface":
        return generate_image_with_hf(prompt, filename)
-    else:
-        return "No Image Provider Set"
+    # SD WebUI
+    elif CFG.image_provider == "sdwebui":
+        return generate_image_with_sd_webui(prompt, filename, size)
+    return "No Image Provider Set"


 def generate_image_with_hf(prompt: str, filename: str) -> str:
@@ -46,13 +50,16 @@ def generate_image_with_hf(prompt: str, filename: str) -> str:
        str: The filename of the image
    """
    API_URL = (
-        "https://api-inference.huggingface.co/models/CompVis/stable-diffusion-v1-4"
+        f"https://api-inference.huggingface.co/models/{CFG.huggingface_image_model}"
    )
    if CFG.huggingface_api_token is None:
        raise ValueError(
            "You need to set your Hugging Face API token in the config file."
        )
-    headers = {"Authorization": f"Bearer {CFG.huggingface_api_token}"}
+    headers = {
+        "Authorization": f"Bearer {CFG.huggingface_api_token}",
+        "X-Use-Cache": "false",
+    }

    response = requests.post(
        API_URL,
@@ -82,10 +89,18 @@ def generate_image_with_dalle(prompt: str, filename: str) -> str:
    """
    openai.api_key = CFG.openai_api_key

+    # Check for supported image sizes
+    if size not in [256, 512, 1024]:
+        closest = min([256, 512, 1024], key=lambda x: abs(x - size))
+        print(
+            f"DALL-E only supports image sizes of 256x256, 512x512, or 1024x1024. Setting to {closest}, was {size}."
+        )
+        size = closest
+
    response = openai.Image.create(
        prompt=prompt,
        n=1,
-        size="256x256",
+        size=f"{size}x{size}",
        response_format="b64_json",
    )

@@ -97,3 +112,53 @@ def generate_image_with_dalle(prompt: str, filename: str) -> str:
        png.write(image_data)

    return f"Saved to disk:{filename}"
+
+
+def generate_image_with_sd_webui(
+    prompt: str,
+    filename: str,
+    size: int = 512,
+    negative_prompt: str = "",
+    extra: dict = {},
+) -> str:
+    """Generate an image with Stable Diffusion webui.
+    Args:
+        prompt (str): The prompt to use
+        filename (str): The filename to save the image to
+        size (int, optional): The size of the image. Defaults to 256.
+        negative_prompt (str, optional): The negative prompt to use. Defaults to "".
+        extra (dict, optional): Extra parameters to pass to the API. Defaults to {}.
+    Returns:
+        str: The filename of the image
+    """
+    # Create a session and set the basic auth if needed
+    s = requests.Session()
+    if CFG.sd_webui_auth:
+        username, password = CFG.sd_webui_auth.split(":")
+        s.auth = (username, password or "")
+
+    # Generate the images
+    response = requests.post(
+        f"{CFG.sd_webui_url}/sdapi/v1/txt2img",
+        json={
+            "prompt": prompt,
+            "negative_prompt": negative_prompt,
+            "sampler_index": "DDIM",
+            "steps": 20,
+            "cfg_scale": 7.0,
+            "width": size,
+            "height": size,
+            "n_iter": 1,
+            **extra,
+        },
+    )
+
+    print(f"Image Generated for prompt:{prompt}")
+
+    # Save the image to disk
+    response = response.json()
+    b64 = b64decode(response["images"][0].split(",", 1)[0])
+    image = Image.open(io.BytesIO(b64))
+    image.save(path_in_workspace(filename))
+
+    return f"Saved to disk:{filename}"