From 4010aec0336bd3a836564a449b957546c0582041 Mon Sep 17 00:00:00 2001 From: Phil Wang Date: Sat, 7 May 2022 09:38:17 -0700 Subject: [PATCH] turn off classifier free guidance if predicting x_start for diffusion prior --- dalle2_pytorch/dalle2_pytorch.py | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dalle2_pytorch/dalle2_pytorch.py b/dalle2_pytorch/dalle2_pytorch.py index 8e6e45a..61aa44e 100644 --- a/dalle2_pytorch/dalle2_pytorch.py +++ b/dalle2_pytorch/dalle2_pytorch.py @@ -800,7 +800,7 @@ class DiffusionPrior(BaseGaussianDiffusion): image_size = None, image_channels = 3, timesteps = 1000, - cond_drop_prob = 0.2, + cond_drop_prob = 0., loss_type = "l1", predict_x_start = True, beta_schedule = "cosine", @@ -834,7 +834,7 @@ class DiffusionPrior(BaseGaussianDiffusion): self.image_embed_dim = default(image_embed_dim, lambda: clip.dim_latent) self.channels = default(image_channels, lambda: clip.image_channels) - self.cond_drop_prob = cond_drop_prob + self.cond_drop_prob = cond_drop_prob if not predict_x_start else 0. self.condition_on_text_encodings = condition_on_text_encodings # in paper, they do not predict the noise, but predict x0 directly for image embedding, claiming empirically better results. I'll just offer both. diff --git a/setup.py b/setup.py index d39797f..72087b4 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( 'dream = dalle2_pytorch.cli:dream' ], }, - version = '0.1.8', + version = '0.1.9', license='MIT', description = 'DALL-E 2', author = 'Phil Wang',