From c6bfd7fdc8199c864f35894abb7c1a05fda2ddfd Mon Sep 17 00:00:00 2001
From: Phil Wang <lucidrains@gmail.com>
Date: Mon, 18 Apr 2022 12:43:10 -0700
Subject: [PATCH] readme

---
 README.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 68a453f..279e4f0 100644
--- a/README.md
+++ b/README.md
@@ -197,10 +197,10 @@ clip = CLIP(
     dim_image = 512,
     dim_latent = 512,
     num_text_tokens = 49408,
-    text_enc_depth = 1,
+    text_enc_depth = 6,
     text_seq_len = 256,
     text_heads = 8,
-    visual_enc_depth = 1,
+    visual_enc_depth = 6,
     visual_image_size = 256,
     visual_patch_size = 32,
     visual_heads = 8
@@ -209,14 +209,15 @@ clip = CLIP(
 # 2 unets for the decoder (a la cascading DDPM)
 
 unet1 = Unet(
-    dim = 16,
+    dim = 32,
     image_embed_dim = 512,
+    cond_dim = 128,
     channels = 3,
     dim_mults = (1, 2, 4, 8)
 ).cuda()
 
 unet2 = Unet(
-    dim = 16,
+    dim = 32,
     image_embed_dim = 512,
     cond_dim = 128,
     channels = 3,
@@ -228,8 +229,8 @@ unet2 = Unet(
 decoder = Decoder(
     clip = clip,
     unet = (unet1, unet2),            # insert both unets in order of low resolution to highest resolution (you can have as many stages as you want here)
-    image_sizes = (256, 512),         # resolutions, 256 for first unet, 512 for second
-    timesteps = 100,
+    image_sizes = (256, 512),         # resolutions, 256 for first unet, 512 for second. these must be unique and in ascending order (matches with the unets passed in)
+    timesteps = 1000,
     cond_drop_prob = 0.2
 ).cuda()