vqgan-vae codebook dims should be 256 or smaller

🐛
make sure another CLIP can actually be passed in, as long as it is wrapped in an adapter extended from BaseClipAdapter
2026-02-19 12:05:40 +01:00 · 2022-04-28 08:59:03 -07:00 · 2022-04-28 07:21:18 -07:00 · 2022-04-27 20:45:27 -07:00
3 changed files with 11 additions and 4 deletions
--- a/dalle2_pytorch/dalle2_pytorch.py
+++ b/dalle2_pytorch/dalle2_pytorch.py
@@ -647,9 +647,12 @@ class DiffusionPrior(BaseGaussianDiffusion):
        )
        if exists(clip):
-            assert isinstance(clip, CLIP)
+            if isinstance(clip, CLIP):
                clip = XClipAdapter(clip)
            assert isinstance(clip, BaseClipAdapter)
            freeze_model_and_make_eval_(clip)
-            self.clip = XClipAdapter(clip)
+            self.clip = clip
        else:
            assert exists(image_embed_dim), 'latent dimension must be given, if training prior network without CLIP given'
            self.clip = None
@@ -740,7 +743,7 @@ class DiffusionPrior(BaseGaussianDiffusion):
        text_cond = dict(text_embed = text_embed)
        if self.condition_on_text_encodings:
-            text_cond = {**text_cond, 'text_encodings': text_encodings, 'mask': text_mask}
+            text_cond = {**text_cond, 'text_encodings': text_encodings, 'mask': text != 0}
        image_embeds = self.p_sample_loop((batch_size, image_embed_dim), text_cond = text_cond)
        text_embeds = text_cond['text_embed']
@@ -1248,6 +1251,8 @@ class Decoder(BaseGaussianDiffusion):
            clip = XClipAdapter(clip)
        freeze_model_and_make_eval_(clip)
        assert isinstance(clip, BaseClipAdapter)
        self.clip = clip
        self.clip_image_size = clip.image_size
        self.channels = clip.image_channels
--- a/dalle2_pytorch/vqgan_vae.py
+++ b/dalle2_pytorch/vqgan_vae.py
@@ -545,6 +545,7 @@ class VQGanVAE(nn.Module):
        l2_recon_loss = False,
        use_hinge_loss = True,
        vgg = None,
        vq_codebook_dim = 256,
        vq_codebook_size = 512,
        vq_decay = 0.8,
        vq_commitment_weight = 1.,
@@ -579,6 +580,7 @@ class VQGanVAE(nn.Module):
        self.vq = VQ(
            dim = self.enc_dec.encoded_dim,
            codebook_dim = vq_codebook_dim,
            codebook_size = vq_codebook_size,
            decay = vq_decay,
            commitment_weight = vq_commitment_weight,
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.0.60',
+  version = '0.0.63',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',
Author	SHA1	Message	Date
Phil Wang	cb26187450	vqgan-vae codebook dims should be 256 or smaller	2022-04-28 08:59:03 -07:00
Phil Wang	625ce23f6b	🐛	2022-04-28 07:21:18 -07:00
Phil Wang	dbf4a281f1	make sure another CLIP can actually be passed in, as long as it is wrapped in an adapter extended from BaseClipAdapter	2022-04-27 20:45:27 -07:00