start using swish glu everywhere, given success of PaLM

better naming
2026-02-12 19:44:26 +01:00 · 2022-04-14 09:34:32 -07:00 · 2022-04-14 09:24:31 -07:00
2 changed files with 15 additions and 6 deletions
--- a/dalle2_pytorch/dalle2_pytorch.py
+++ b/dalle2_pytorch/dalle2_pytorch.py
@@ -164,12 +164,21 @@ class MLP(nn.Module):

 # feedforward

-def FeedForward(dim, mult = 4, dropout = 0.):
+class SwiGLU(nn.Module):
+    """ used successfully in https://arxiv.org/abs/2204.0231 """
+    def forward(self, x):
+        x, gate = x.chunk(2, dim = -1)
+        return x * F.silu(gate)
+
+def FeedForward(dim, mult = 4, dropout = 0., post_activation_norm = False):
+    """ post-activation norm https://arxiv.org/abs/2110.09456 """
+
    inner_dim = int(mult * dim)
    return nn.Sequential(
        RMSNorm(dim),
-        nn.Linear(dim, inner_dim, bias = False),
-        nn.GELU(),
+        nn.Linear(dim, inner_dim * 2, bias = False),
+        SwiGLU(),
+        RMSNorm(inner_dim) if post_activation_norm else nn.Identity(),
        nn.Dropout(dropout),
        nn.Linear(inner_dim, dim, bias = False)
    )
@@ -320,8 +329,8 @@ class DiffusionPriorNetwork(nn.Module):
        # but let's just do it right

        if exists(mask):
-            all_masked_out = mask.any(dim = -1)
-            mask = torch.cat((mask, rearrange(all_masked_out, 'b -> b 1')), dim = 1)
+            not_all_masked_out = mask.any(dim = -1)
+            mask = torch.cat((mask, rearrange(not_all_masked_out, 'b -> b 1')), dim = 1)

        if exists(mask):
            mask = F.pad(mask, (0, 2), value = True) # extend mask for text embedding, noised image embedding, time step embedding, and learned query
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.0.9',
+  version = '0.0.10',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',
Author	SHA1	Message	Date
Phil Wang	95b018374a	start using swish glu everywhere, given success of PaLM	2022-04-14 09:34:32 -07:00
Phil Wang	8b5c2385b0	better naming	2022-04-14 09:24:31 -07:00