diff --git a/dalle2_pytorch/dalle2_pytorch.py b/dalle2_pytorch/dalle2_pytorch.py index 8e6e45a..61aa44e 100644 --- a/dalle2_pytorch/dalle2_pytorch.py +++ b/dalle2_pytorch/dalle2_pytorch.py @@ -800,7 +800,7 @@ class DiffusionPrior(BaseGaussianDiffusion): image_size = None, image_channels = 3, timesteps = 1000, - cond_drop_prob = 0.2, + cond_drop_prob = 0., loss_type = "l1", predict_x_start = True, beta_schedule = "cosine", @@ -834,7 +834,7 @@ class DiffusionPrior(BaseGaussianDiffusion): self.image_embed_dim = default(image_embed_dim, lambda: clip.dim_latent) self.channels = default(image_channels, lambda: clip.image_channels) - self.cond_drop_prob = cond_drop_prob + self.cond_drop_prob = cond_drop_prob if not predict_x_start else 0. self.condition_on_text_encodings = condition_on_text_encodings # in paper, they do not predict the noise, but predict x0 directly for image embedding, claiming empirically better results. I'll just offer both. diff --git a/setup.py b/setup.py index d39797f..72087b4 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( 'dream = dalle2_pytorch.cli:dream' ], }, - version = '0.1.8', + version = '0.1.9', license='MIT', description = 'DALL-E 2', author = 'Phil Wang',