Compare commits

...

5 Commits

3 changed files with 8 additions and 12 deletions

View File

@@ -360,6 +360,7 @@ class OpenAIClipAdapter(BaseClipAdapter):
is_eos_id = (text == self.eos_id)
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
text_mask = text_mask & (text != 0)
assert not self.cleared
text_embed = self.clip.encode_text(text)
@@ -434,6 +435,7 @@ class OpenClipAdapter(BaseClipAdapter):
is_eos_id = (text == self.eos_id)
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
text_mask = text_mask & (text != 0)
assert not self.cleared
text_embed = self.clip.encode_text(text)
@@ -1122,7 +1124,7 @@ class DiffusionPriorNetwork(nn.Module):
learned_queries = repeat(self.learned_query, 'd -> b 1 d', b = batch)
if self.self_cond:
learned_queries = torch.cat((image_embed, self_cond), dim = -2)
learned_queries = torch.cat((self_cond, learned_queries), dim = -2)
tokens = torch.cat((
text_encodings,
@@ -1332,10 +1334,7 @@ class DiffusionPrior(nn.Module):
# predict noise
if self.predict_x_start or self.predict_v:
pred_noise = self.noise_scheduler.predict_noise_from_start(image_embed, t = time_cond, x0 = x_start)
else:
pred_noise = pred
pred_noise = self.noise_scheduler.predict_noise_from_start(image_embed, t = time_cond, x0 = x_start)
if time_next < 0:
image_embed = x_start
@@ -2494,7 +2493,7 @@ class Decoder(nn.Module):
dynamic_thres_percentile = 0.95,
p2_loss_weight_gamma = 0., # p2 loss weight, from https://arxiv.org/abs/2204.00227 - 0 is equivalent to weight of 1 across time - 1. is recommended
p2_loss_weight_k = 1,
ddim_sampling_eta = 1. # can be set to 0. for deterministic sampling afaict
ddim_sampling_eta = 0. # can be set to 0. for deterministic sampling afaict
):
super().__init__()
@@ -2973,10 +2972,7 @@ class Decoder(nn.Module):
# predict noise
if predict_x_start or predict_v:
pred_noise = noise_scheduler.predict_noise_from_start(img, t = time_cond, x0 = x_start)
else:
pred_noise = pred
pred_noise = noise_scheduler.predict_noise_from_start(img, t = time_cond, x0 = x_start)
c1 = eta * ((1 - alpha / alpha_next) * (1 - alpha_next) / (1 - alpha)).sqrt()
c2 = ((1 - alpha_next) - torch.square(c1)).sqrt()

View File

@@ -1 +1 @@
__version__ = '1.11.2'
__version__ = '1.12.3'

View File

@@ -27,7 +27,7 @@ setup(
'accelerate',
'click',
'open-clip-torch>=2.0.0,<3.0.0',
'clip-anytorch>=2.4.0',
'clip-anytorch>=2.5.2',
'coca-pytorch>=0.0.5',
'ema-pytorch>=0.0.7',
'einops>=0.4',