use some magic just this once to remove the need for researchers to think

This commit is contained in:
Phil Wang
2022-04-18 12:40:38 -07:00
parent 7214df472d
commit 960a79857b
3 changed files with 27 additions and 5 deletions

View File

@@ -218,7 +218,6 @@ unet1 = Unet(
unet2 = Unet(
dim = 16,
image_embed_dim = 512,
lowres_cond = True, # subsequent unets must have this turned on (and first unet must have this turned off)
cond_dim = 128,
channels = 3,
dim_mults = (1, 2, 4, 8, 16)
@@ -349,8 +348,7 @@ unet2 = Unet(
image_embed_dim = 512,
cond_dim = 128,
channels = 3,
dim_mults = (1, 2, 4, 8, 16),
lowres_cond = True
dim_mults = (1, 2, 4, 8, 16)
).cuda()
decoder = Decoder(

View File

@@ -816,6 +816,11 @@ class Unet(nn.Module):
attend_at_middle = True, # whether to have a layer of attention at the bottleneck (can turn off for higher resolution in cascading DDPM, before bringing in efficient attention)
):
super().__init__()
# save locals to take care of some hyperparameters for cascading DDPM
self._locals = locals()
del self._locals['self']
del self._locals['__class__']
# for eventual cascading diffusion
@@ -896,6 +901,15 @@ class Unet(nn.Module):
nn.Conv2d(dim, out_dim, 1)
)
# if the current settings for the unet are not correct
# for cascading DDPM, then reinit the unet with the right settings
def force_lowres_cond(self, lowres_cond):
if lowres_cond == self.lowres_cond:
return self
updated_kwargs = {**self._locals, 'lowres_cond': lowres_cond}
return self.__class__(**updated_kwargs)
def forward_with_cond_scale(
self,
*args,
@@ -1021,7 +1035,17 @@ class Decoder(nn.Module):
self.clip_image_size = clip.image_size
self.channels = clip.image_channels
self.unets = nn.ModuleList(unet)
# automatically take care of ensuring that first unet is unconditional
# while the rest of the unets are conditioned on the low resolution image produced by previous unet
self.unets = nn.ModuleList([])
for ind, one_unet in enumerate(cast_tuple(unet)):
is_first = ind == 0
one_unet = one_unet.force_lowres_cond(not is_first)
self.unets.append(one_unet)
# unet image sizes
image_sizes = default(image_sizes, (clip.image_size,))
image_sizes = tuple(sorted(set(image_sizes)))

View File

@@ -10,7 +10,7 @@ setup(
'dream = dalle2_pytorch.cli:dream'
],
},
version = '0.0.21',
version = '0.0.22',
license='MIT',
description = 'DALL-E 2',
author = 'Phil Wang',