mirror of
https://github.com/lucidrains/DALLE2-pytorch.git
synced 2025-12-19 09:44:19 +01:00
fix remaining issues with deriving cond_on_text_encodings from child unet settings
This commit is contained in:
@@ -368,7 +368,8 @@ unet1 = Unet(
|
|||||||
image_embed_dim = 512,
|
image_embed_dim = 512,
|
||||||
cond_dim = 128,
|
cond_dim = 128,
|
||||||
channels = 3,
|
channels = 3,
|
||||||
dim_mults=(1, 2, 4, 8)
|
dim_mults=(1, 2, 4, 8),
|
||||||
|
cond_on_text_encodings = True # set to True for any unets that need to be conditioned on text encodings
|
||||||
).cuda()
|
).cuda()
|
||||||
|
|
||||||
unet2 = Unet(
|
unet2 = Unet(
|
||||||
@@ -385,8 +386,7 @@ decoder = Decoder(
|
|||||||
clip = clip,
|
clip = clip,
|
||||||
timesteps = 100,
|
timesteps = 100,
|
||||||
image_cond_drop_prob = 0.1,
|
image_cond_drop_prob = 0.1,
|
||||||
text_cond_drop_prob = 0.5,
|
text_cond_drop_prob = 0.5
|
||||||
condition_on_text_encodings = False # set this to True if you wish to condition on text during training and sampling
|
|
||||||
).cuda()
|
).cuda()
|
||||||
|
|
||||||
for unet_number in (1, 2):
|
for unet_number in (1, 2):
|
||||||
|
|||||||
@@ -1818,8 +1818,6 @@ class Decoder(nn.Module):
|
|||||||
num_unets = len(unets)
|
num_unets = len(unets)
|
||||||
|
|
||||||
self.unconditional = unconditional
|
self.unconditional = unconditional
|
||||||
self.condition_on_text_encodings = unets[0].cond_on_text_encodings
|
|
||||||
assert not (self.condition_on_text_encodings and unconditional), 'unconditional decoder image generation cannot be set to True if conditioning on text is present'
|
|
||||||
|
|
||||||
# automatically take care of ensuring that first unet is unconditional
|
# automatically take care of ensuring that first unet is unconditional
|
||||||
# while the rest of the unets are conditioned on the low resolution image produced by previous unet
|
# while the rest of the unets are conditioned on the low resolution image produced by previous unet
|
||||||
@@ -1859,6 +1857,10 @@ class Decoder(nn.Module):
|
|||||||
self.unets.append(one_unet)
|
self.unets.append(one_unet)
|
||||||
self.vaes.append(one_vae.copy_for_eval())
|
self.vaes.append(one_vae.copy_for_eval())
|
||||||
|
|
||||||
|
# determine from unets whether conditioning on text encoding is needed
|
||||||
|
|
||||||
|
self.condition_on_text_encodings = any([unet.cond_on_text_encodings for unet in self.unets])
|
||||||
|
|
||||||
# create noise schedulers per unet
|
# create noise schedulers per unet
|
||||||
|
|
||||||
if not exists(beta_schedule):
|
if not exists(beta_schedule):
|
||||||
|
|||||||
@@ -284,21 +284,27 @@ class TrainDecoderConfig(BaseModel):
|
|||||||
def check_has_embeddings(cls, values):
|
def check_has_embeddings(cls, values):
|
||||||
# Makes sure that enough information is provided to get the embeddings specified for training
|
# Makes sure that enough information is provided to get the embeddings specified for training
|
||||||
data_config, decoder_config = values.get('data'), values.get('decoder')
|
data_config, decoder_config = values.get('data'), values.get('decoder')
|
||||||
if data_config is None or decoder_config is None:
|
|
||||||
|
if not exists(data_config) or not exists(decoder_config):
|
||||||
# Then something else errored and we should just pass through
|
# Then something else errored and we should just pass through
|
||||||
return values
|
return values
|
||||||
using_text_encodings = decoder_config.unets[0].cond_on_text_encodings # in dalle2 only the first UNet is text conditioned
|
|
||||||
|
using_text_encodings = any([unet.cond_on_text_encodings for unet in decoder_config.unets])
|
||||||
using_clip = exists(decoder_config.clip)
|
using_clip = exists(decoder_config.clip)
|
||||||
img_emb_url = data_config.img_embeddings_url
|
img_emb_url = data_config.img_embeddings_url
|
||||||
text_emb_url = data_config.text_embeddings_url
|
text_emb_url = data_config.text_embeddings_url
|
||||||
|
|
||||||
if using_text_embeddings:
|
if using_text_embeddings:
|
||||||
# Then we need some way to get the embeddings
|
# Then we need some way to get the embeddings
|
||||||
assert using_clip or text_emb_url is not None, 'If text conditioning, either clip or text_embeddings_url must be provided'
|
assert using_clip or exists(text_emb_url), 'If text conditioning, either clip or text_embeddings_url must be provided'
|
||||||
|
|
||||||
if using_clip:
|
if using_clip:
|
||||||
if using_text_embeddings:
|
if using_text_embeddings:
|
||||||
assert text_emb_url is None or img_emb_url is None, 'Loaded clip, but also provided text_embeddings_url and img_embeddings_url. This is redundant. Remove the clip model or the text embeddings'
|
assert not exists(text_emb_url) or not exists(img_emb_url), 'Loaded clip, but also provided text_embeddings_url and img_embeddings_url. This is redundant. Remove the clip model or the text embeddings'
|
||||||
else:
|
else:
|
||||||
assert img_emb_url is None, 'Loaded clip, but also provided img_embeddings_url. This is redundant. Remove the clip model or the embeddings'
|
assert not exists(img_emb_url), 'Loaded clip, but also provided img_embeddings_url. This is redundant. Remove the clip model or the embeddings'
|
||||||
|
|
||||||
if text_emb_url:
|
if text_emb_url:
|
||||||
assert using_text_embeddings, "Text embeddings are being loaded, but text embeddings are not being conditioned on. This will slow down the dataloader for no reason."
|
assert using_text_embeddings, "Text embeddings are being loaded, but text embeddings are not being conditioned on. This will slow down the dataloader for no reason."
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = '0.12.1'
|
__version__ = '0.12.2'
|
||||||
|
|||||||
@@ -596,7 +596,8 @@ def initialize_training(config, config_path):
|
|||||||
|
|
||||||
has_img_embeddings = config.data.img_embeddings_url is not None
|
has_img_embeddings = config.data.img_embeddings_url is not None
|
||||||
has_text_embeddings = config.data.text_embeddings_url is not None
|
has_text_embeddings = config.data.text_embeddings_url is not None
|
||||||
conditioning_on_text = config.decoder.unets[0].cond_on_text_encodings
|
conditioning_on_text = any([unet.cond_on_text_encodings for unet in config.decoder.unets])
|
||||||
|
|
||||||
has_clip_model = config.decoder.clip is not None
|
has_clip_model = config.decoder.clip is not None
|
||||||
data_source_string = ""
|
data_source_string = ""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user