complete helper methods for doing condition scaling (classifier free guidance), for decoder unet and prior network

2025-12-23 03:24:20 +01:00 · 2022-04-12 16:11:16 -07:00
parent d4c8373635
commit d546a615c0
1 changed files with 29 additions and 1 deletions
--- a/dalle2_pytorch/dalle2_pytorch.py
+++ b/dalle2_pytorch/dalle2_pytorch.py
@@ -179,6 +179,20 @@ class DiffusionPriorNetwork(nn.Module):
        self.learned_query = nn.Parameter(torch.randn(dim))
        self.causal_transformer = Transformer(**kwargs)
    def forward_with_cond_scale(
        self,
        x,
        *,
        cond_scale = 1.,
        **kwargs
    ):
        if cond_scale == 1:
            return self.forward(x, **kwargs)
        logits = self.forward(x, **kwargs)
        null_logits = self.forward(x, cond_prob_drop = 1., **kwargs)
        return null_logits + (logits - null_logits) * cond_scale
    def forward(
        self,
        image_embed,
@@ -371,6 +385,20 @@ class Unet(nn.Module):
            nn.Conv2d(dim, out_dim, 1)
        )
    def forward_with_cond_scale(
        self,
        x,
        *,
        cond_scale = 1.,
        **kwargs
    ):
        if cond_scale == 1:
            return self.forward(x, **kwargs)
        logits = self.forward(x, **kwargs)
        null_logits = self.forward(x, cond_prob_drop = 1., **kwargs)
        return null_logits + (logits - null_logits) * cond_scale
    def forward(
        self,
        x,
@@ -378,7 +406,7 @@ class Unet(nn.Module):
        image_embed,
        time,
        text_encodings = None,
-        cond_prob_drop = 0.2
+        cond_prob_drop = 0.
    ):
        batch_size, device = image_embed.shape[0], image_embed.device
        t = self.time_mlp(time) if exists(self.time_mlp) else None