diff --git a/dalle2_pytorch/dalle2_pytorch.py b/dalle2_pytorch/dalle2_pytorch.py index 5de4a90..286c0b1 100644 --- a/dalle2_pytorch/dalle2_pytorch.py +++ b/dalle2_pytorch/dalle2_pytorch.py @@ -1004,9 +1004,9 @@ class DiffusionPriorNetwork(nn.Module): # setup self conditioning - self_cond = None if self.self_cond: - self_cond = default(self_cond, lambda: torch.zeros(batch, 1, self.dim, device = device, dtype = dtype)) + self_cond = default(self_cond, lambda: torch.zeros(batch, self.dim, device = device, dtype = dtype)) + self_cond = rearrange(self_cond, 'b d -> b 1 d') # in section 2.2, last paragraph # "... consisting of encoded text, CLIP text embedding, diffusion timestep embedding, noised CLIP image embedding, final embedding for prediction" diff --git a/dalle2_pytorch/version.py b/dalle2_pytorch/version.py index bb64aa4..4574cc8 100644 --- a/dalle2_pytorch/version.py +++ b/dalle2_pytorch/version.py @@ -1 +1 @@ -__version__ = '1.6.1' +__version__ = '1.6.3'