diff --git a/dalle2_pytorch/dalle2_pytorch.py b/dalle2_pytorch/dalle2_pytorch.py index 61aa44e..2152f80 100644 --- a/dalle2_pytorch/dalle2_pytorch.py +++ b/dalle2_pytorch/dalle2_pytorch.py @@ -834,7 +834,7 @@ class DiffusionPrior(BaseGaussianDiffusion): self.image_embed_dim = default(image_embed_dim, lambda: clip.dim_latent) self.channels = default(image_channels, lambda: clip.image_channels) - self.cond_drop_prob = cond_drop_prob if not predict_x_start else 0. + self.cond_drop_prob = cond_drop_prob self.condition_on_text_encodings = condition_on_text_encodings # in paper, they do not predict the noise, but predict x0 directly for image embedding, claiming empirically better results. I'll just offer both. diff --git a/setup.py b/setup.py index 72087b4..b0908cc 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( 'dream = dalle2_pytorch.cli:dream' ], }, - version = '0.1.9', + version = '0.1.10', license='MIT', description = 'DALL-E 2', author = 'Phil Wang',