Compare commits

...

6 Commits

Author SHA1 Message Date
Phil Wang
dde51fd362 revert restriction for classifier free guidance for diffusion prior, given @crowsonkb advice 2022-05-07 20:55:41 -07:00
Nasir Khalid
2eac7996fa Additional image_embed metric (#75)
Added metric to track image_embed vs predicted_image_embed
2022-05-07 14:32:33 -07:00
Phil Wang
4010aec033 turn off classifier free guidance if predicting x_start for diffusion prior 2022-05-07 09:38:17 -07:00
Phil Wang
c87b84a259 todo 2022-05-07 09:21:08 -07:00
Phil Wang
8b05468653 todo 2022-05-07 08:33:45 -07:00
Phil Wang
830afd3c15 sinusoidal embed time embeddings for diffusion prior as well, for continuous version 2022-05-07 08:32:43 -07:00
4 changed files with 9 additions and 3 deletions

View File

@@ -981,6 +981,8 @@ Once built, images will be saved to the same directory the command is invoked
- [ ] make sure FILIP works with DALL-E2 from x-clip https://arxiv.org/abs/2111.07783
- [ ] make sure resnet hyperparameters can be configurable across unet depth (groups and expansion factor)
- [ ] offer save / load methods on the trainer classes to automatically take care of state dicts for scalers / optimizers / saving versions and checking for breaking changes
- [ ] offer setting in diffusion prior to split time and image embeddings into multiple tokens, configurable, for more surface area during attention
- [ ] bring in skip-layer excitatons (from lightweight gan paper) to see if it helps for either decoder of unet or vqgan-vae training
## Citations

View File

@@ -706,7 +706,7 @@ class DiffusionPriorNetwork(nn.Module):
**kwargs
):
super().__init__()
self.time_embeddings = nn.Embedding(num_timesteps, dim) if exists(num_timesteps) else nn.Sequential(Rearrange('b -> b 1'), MLP(1, dim)) # also offer a continuous version of timestep embeddings, with a 2 layer MLP
self.time_embeddings = nn.Embedding(num_timesteps, dim) if exists(num_timesteps) else nn.Sequential(SinusoidalPosEmb(dim), MLP(dim, dim)) # also offer a continuous version of timestep embeddings, with a 2 layer MLP
self.learned_query = nn.Parameter(torch.randn(dim))
self.causal_transformer = CausalTransformer(dim = dim, **kwargs)
@@ -800,7 +800,7 @@ class DiffusionPrior(BaseGaussianDiffusion):
image_size = None,
image_channels = 3,
timesteps = 1000,
cond_drop_prob = 0.2,
cond_drop_prob = 0.,
loss_type = "l1",
predict_x_start = True,
beta_schedule = "cosine",

View File

@@ -10,7 +10,7 @@ setup(
'dream = dalle2_pytorch.cli:dream'
],
},
version = '0.1.7',
version = '0.1.10',
license='MIT',
description = 'DALL-E 2',
author = 'Phil Wang',

View File

@@ -93,6 +93,8 @@ def report_cosine_sims(diffusion_prior, image_reader, text_reader, train_set_siz
text_embed, predicted_image_embeddings).cpu().numpy()
unrelated_similarity = cos(
text_embed, predicted_unrelated_embeddings).cpu().numpy()
predicted_img_similarity = cos(
test_image_embeddings, predicted_image_embeddings).cpu().numpy()
wandb.log(
{"CosineSimilarity(text_embed,image_embed)": np.mean(original_similarity)})
@@ -100,6 +102,8 @@ def report_cosine_sims(diffusion_prior, image_reader, text_reader, train_set_siz
predicted_similarity)})
wandb.log({"CosineSimilarity(text_embed,predicted_unrelated_embed)": np.mean(
unrelated_similarity)})
wandb.log({"CosineSimilarity(image_embed,predicted_image_embed)": np.mean(
predicted_img_similarity)})
return np.mean(predicted_similarity - original_similarity)