mirror of
https://github.com/lucidrains/DALLE2-pytorch.git
synced 2025-12-19 17:54:20 +01:00
be transparent
This commit is contained in:
@@ -39,7 +39,7 @@ Todo
|
|||||||
## Todo
|
## Todo
|
||||||
|
|
||||||
- [x] finish off gaussian diffusion class for latent embedding - allow for prediction of epsilon
|
- [x] finish off gaussian diffusion class for latent embedding - allow for prediction of epsilon
|
||||||
- [ ] add what was proposed in the paper, where DDPM objective for image latent embedding predicts x0 directly (reread vq-diffusion paper and get caught up on that line of work)
|
- [x] add what was proposed in the paper, where DDPM objective for image latent embedding predicts x0 directly (reread vq-diffusion paper and get caught up on that line of work)
|
||||||
- [ ] make sure it works end to end to produce an output tensor, taking a single gradient step
|
- [ ] make sure it works end to end to produce an output tensor, taking a single gradient step
|
||||||
- [ ] augment unet so that it can also be conditioned on text encodings (although in paper they hinted this didn't make much a difference)
|
- [ ] augment unet so that it can also be conditioned on text encodings (although in paper they hinted this didn't make much a difference)
|
||||||
- [ ] look into Jonathan Ho's cascading DDPM for the decoder, as that seems to be what they are using. get caught up on DDPM literature
|
- [ ] look into Jonathan Ho's cascading DDPM for the decoder, as that seems to be what they are using. get caught up on DDPM literature
|
||||||
|
|||||||
@@ -363,6 +363,11 @@ class DiffusionPrior(nn.Module):
|
|||||||
return posterior_mean, posterior_variance, posterior_log_variance_clipped
|
return posterior_mean, posterior_variance, posterior_log_variance_clipped
|
||||||
|
|
||||||
def p_mean_variance(self, x, t, text_cond, clip_denoised: bool):
|
def p_mean_variance(self, x, t, text_cond, clip_denoised: bool):
|
||||||
|
if self.predict_x0:
|
||||||
|
x_recon = self.net(x, t, **text_cond)
|
||||||
|
# not 100% sure of this above line - for any spectators, let me know in the github issues (or through a pull request) if you know how to correctly do this
|
||||||
|
# i'll be rereading https://arxiv.org/abs/2111.14822, where i think a similar approach is taken
|
||||||
|
else:
|
||||||
x_recon = self.predict_start_from_noise(x, t = t, noise = self.net(x, t, **text_cond))
|
x_recon = self.predict_start_from_noise(x, t = t, noise = self.net(x, t, **text_cond))
|
||||||
|
|
||||||
if clip_denoised:
|
if clip_denoised:
|
||||||
|
|||||||
Reference in New Issue
Block a user