mirror of
https://github.com/lucidrains/DALLE2-pytorch.git
synced 2026-02-12 19:44:26 +01:00
Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1892f1ac1d |
@@ -360,7 +360,6 @@ class OpenAIClipAdapter(BaseClipAdapter):
|
||||
is_eos_id = (text == self.eos_id)
|
||||
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
|
||||
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
|
||||
text_mask = text_mask & (text != 0)
|
||||
assert not self.cleared
|
||||
|
||||
text_embed = self.clip.encode_text(text)
|
||||
@@ -435,7 +434,6 @@ class OpenClipAdapter(BaseClipAdapter):
|
||||
is_eos_id = (text == self.eos_id)
|
||||
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
|
||||
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
|
||||
text_mask = text_mask & (text != 0)
|
||||
assert not self.cleared
|
||||
|
||||
text_embed = self.clip.encode_text(text)
|
||||
@@ -631,7 +629,7 @@ class NoiseScheduler(nn.Module):
|
||||
|
||||
def calculate_v(self, x_start, t, noise = None):
|
||||
return (
|
||||
extract(self.sqrt_alphas_cumprod, t, x_start.shape) * noise -
|
||||
extract(self.sqrt_alphas_cumprod, t, x_start.shape) * noise +
|
||||
extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * x_start
|
||||
)
|
||||
|
||||
@@ -1124,7 +1122,7 @@ class DiffusionPriorNetwork(nn.Module):
|
||||
learned_queries = repeat(self.learned_query, 'd -> b 1 d', b = batch)
|
||||
|
||||
if self.self_cond:
|
||||
learned_queries = torch.cat((self_cond, learned_queries), dim = -2)
|
||||
learned_queries = torch.cat((image_embed, self_cond), dim = -2)
|
||||
|
||||
tokens = torch.cat((
|
||||
text_encodings,
|
||||
@@ -1322,7 +1320,7 @@ class DiffusionPrior(nn.Module):
|
||||
elif self.predict_x_start:
|
||||
x_start = pred
|
||||
else:
|
||||
x_start = self.noise_scheduler.predict_start_from_noise(image_embed, t = time_cond, noise = pred)
|
||||
x_start = self.noise_scheduler.predict_start_from_noise(image_embed, t = time_cond, noise = pred_noise)
|
||||
|
||||
# clip x0 before maybe predicting noise
|
||||
|
||||
@@ -2496,7 +2494,7 @@ class Decoder(nn.Module):
|
||||
dynamic_thres_percentile = 0.95,
|
||||
p2_loss_weight_gamma = 0., # p2 loss weight, from https://arxiv.org/abs/2204.00227 - 0 is equivalent to weight of 1 across time - 1. is recommended
|
||||
p2_loss_weight_k = 1,
|
||||
ddim_sampling_eta = 0. # can be set to 0. for deterministic sampling afaict
|
||||
ddim_sampling_eta = 1. # can be set to 0. for deterministic sampling afaict
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
|
||||
@@ -4,13 +4,11 @@ from pydantic import BaseModel, validator, root_validator
|
||||
from typing import List, Optional, Union, Tuple, Dict, Any, TypeVar
|
||||
|
||||
from x_clip import CLIP as XCLIP
|
||||
from open_clip import list_pretrained
|
||||
from coca_pytorch import CoCa
|
||||
|
||||
from dalle2_pytorch.dalle2_pytorch import (
|
||||
CoCaAdapter,
|
||||
OpenAIClipAdapter,
|
||||
OpenClipAdapter,
|
||||
Unet,
|
||||
Decoder,
|
||||
DiffusionPrior,
|
||||
@@ -119,10 +117,6 @@ class AdapterConfig(BaseModel):
|
||||
def create(self):
|
||||
if self.make == "openai":
|
||||
return OpenAIClipAdapter(self.model)
|
||||
elif self.make == "open_clip":
|
||||
pretrained = dict(list_pretrained())
|
||||
checkpoint = pretrained[self.model]
|
||||
return OpenClipAdapter(name=self.model, pretrained=checkpoint)
|
||||
elif self.make == "x-clip":
|
||||
return XClipAdapter(XCLIP(**self.base_model_kwargs))
|
||||
elif self.make == "coca":
|
||||
|
||||
@@ -236,7 +236,7 @@ class DiffusionPriorTrainer(nn.Module):
|
||||
)
|
||||
|
||||
if exists(cosine_decay_max_steps):
|
||||
self.scheduler = CosineAnnealingLR(self.optimizer, T_max = cosine_decay_max_steps)
|
||||
self.scheduler = CosineAnnealingLR(optimizer, T_max = cosine_decay_max_steps)
|
||||
else:
|
||||
self.scheduler = LambdaLR(self.optimizer, lr_lambda = lambda _: 1.0)
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '1.12.1'
|
||||
__version__ = '1.11.0'
|
||||
|
||||
Reference in New Issue
Block a user