turn off classifier free guidance if predicting x_start for diffusion prior

2025-12-19 17:54:20 +01:00 · 2022-05-07 09:38:17 -07:00
parent c87b84a259
commit 4010aec033
2 changed files with 3 additions and 3 deletions
--- a/dalle2_pytorch/dalle2_pytorch.py
+++ b/dalle2_pytorch/dalle2_pytorch.py
@@ -800,7 +800,7 @@ class DiffusionPrior(BaseGaussianDiffusion):
        image_size = None,
        image_channels = 3,
        timesteps = 1000,
-        cond_drop_prob = 0.2,
+        cond_drop_prob = 0.,
        loss_type = "l1",
        predict_x_start = True,
        beta_schedule = "cosine",
@@ -834,7 +834,7 @@ class DiffusionPrior(BaseGaussianDiffusion):
        self.image_embed_dim = default(image_embed_dim, lambda: clip.dim_latent)
        self.channels = default(image_channels, lambda: clip.image_channels)
-        self.cond_drop_prob = cond_drop_prob
+        self.cond_drop_prob = cond_drop_prob if not predict_x_start else 0.
        self.condition_on_text_encodings = condition_on_text_encodings
        # in paper, they do not predict the noise, but predict x0 directly for image embedding, claiming empirically better results. I'll just offer both.
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.1.8',
+  version = '0.1.9',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',