align the ema model device back after sampling from the cascading ddpm in the decoder

default to l2 loss
fix typo in README.md (#85 )
2026-02-12 11:34:29 +01:00 · 2022-05-11 19:56:54 -07:00 · 2022-05-11 19:24:51 -07:00 · 2022-05-11 13:38:16 -07:00
4 changed files with 13 additions and 4 deletions
--- a/README.md
+++ b/README.md
@@ -508,7 +508,7 @@ To use a pretrained OpenAI CLIP, simply import `OpenAIClipAdapter` and pass it i
 import torch
 from dalle2_pytorch import DALLE2, DiffusionPriorNetwork, DiffusionPrior, Unet, Decoder, OpenAIClipAdapter

-# openai pretrained clip - defaults to ViT/B-32
+# openai pretrained clip - defaults to ViT-B/32

 clip = OpenAIClipAdapter()

--- a/dalle2_pytorch/dalle2_pytorch.py
+++ b/dalle2_pytorch/dalle2_pytorch.py
@@ -831,7 +831,7 @@ class DiffusionPrior(BaseGaussianDiffusion):
        image_channels = 3,
        timesteps = 1000,
        cond_drop_prob = 0.,
-        loss_type = "l1",
+        loss_type = "l2",
        predict_x_start = True,
        beta_schedule = "cosine",
        condition_on_text_encodings = True, # the paper suggests this is needed, but you can turn it off for your CLIP preprocessed text embed -> image embed training
@@ -1614,7 +1614,7 @@ class Decoder(BaseGaussianDiffusion):
        timesteps = 1000,
        image_cond_drop_prob = 0.1,
        text_cond_drop_prob = 0.5,
-        loss_type = 'l1',
+        loss_type = 'l2',
        beta_schedule = 'cosine',
        predict_x_start = False,
        predict_x_start_for_latent_diffusion = False,
--- a/dalle2_pytorch/train.py
+++ b/dalle2_pytorch/train.py
@@ -105,6 +105,10 @@ class EMA(nn.Module):
        self.register_buffer('initted', torch.Tensor([False]))
        self.register_buffer('step', torch.tensor([0.]))

+    def restore_ema_model_device(self):
+        device = self.initted.device
+        self.ema_model.to(device)
+
    def update(self):
        self.step += 1

@@ -305,6 +309,11 @@ class DecoderTrainer(nn.Module):

        if self.use_ema:
            self.decoder.unets = trainable_unets             # restore original training unets
+
+        # cast the ema_model unets back to original device
+        for ema in self.ema_unets:
+            ema.restore_ema_model_device()
+
        return output

    def forward(
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.2.10',
+  version = '0.2.12',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',
Author	SHA1	Message	Date
Phil Wang	924455d97d	align the ema model device back after sampling from the cascading ddpm in the decoder	2022-05-11 19:56:54 -07:00
Phil Wang	6021945fc8	default to l2 loss	2022-05-11 19:24:51 -07:00
Light-V	6f76652d11	fix typo in README.md (#85 ) The default config for clip from openai should be ViT-B/32	2022-05-11 13:38:16 -07:00