mirror of
https://github.com/lucidrains/DALLE2-pytorch.git
synced 2026-02-23 20:25:00 +01:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
848e8a480a | ||
|
|
cc58f75474 | ||
|
|
3b2cf7b0bc | ||
|
|
984d62a373 | ||
|
|
683dd98b96 | ||
|
|
067ac323da | ||
|
|
91c8d1ca13 | ||
|
|
08238a7200 | ||
|
|
7166ad6711 |
@@ -360,6 +360,7 @@ class OpenAIClipAdapter(BaseClipAdapter):
|
|||||||
is_eos_id = (text == self.eos_id)
|
is_eos_id = (text == self.eos_id)
|
||||||
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
|
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
|
||||||
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
|
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
|
||||||
|
text_mask = text_mask & (text != 0)
|
||||||
assert not self.cleared
|
assert not self.cleared
|
||||||
|
|
||||||
text_embed = self.clip.encode_text(text)
|
text_embed = self.clip.encode_text(text)
|
||||||
@@ -434,6 +435,7 @@ class OpenClipAdapter(BaseClipAdapter):
|
|||||||
is_eos_id = (text == self.eos_id)
|
is_eos_id = (text == self.eos_id)
|
||||||
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
|
text_mask_excluding_eos = is_eos_id.cumsum(dim = -1) == 0
|
||||||
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
|
text_mask = F.pad(text_mask_excluding_eos, (1, -1), value = True)
|
||||||
|
text_mask = text_mask & (text != 0)
|
||||||
assert not self.cleared
|
assert not self.cleared
|
||||||
|
|
||||||
text_embed = self.clip.encode_text(text)
|
text_embed = self.clip.encode_text(text)
|
||||||
@@ -1122,7 +1124,7 @@ class DiffusionPriorNetwork(nn.Module):
|
|||||||
learned_queries = repeat(self.learned_query, 'd -> b 1 d', b = batch)
|
learned_queries = repeat(self.learned_query, 'd -> b 1 d', b = batch)
|
||||||
|
|
||||||
if self.self_cond:
|
if self.self_cond:
|
||||||
learned_queries = torch.cat((image_embed, self_cond), dim = -2)
|
learned_queries = torch.cat((self_cond, learned_queries), dim = -2)
|
||||||
|
|
||||||
tokens = torch.cat((
|
tokens = torch.cat((
|
||||||
text_encodings,
|
text_encodings,
|
||||||
@@ -1320,7 +1322,7 @@ class DiffusionPrior(nn.Module):
|
|||||||
elif self.predict_x_start:
|
elif self.predict_x_start:
|
||||||
x_start = pred
|
x_start = pred
|
||||||
else:
|
else:
|
||||||
x_start = self.noise_scheduler.predict_start_from_noise(image_embed, t = time_cond, noise = pred_noise)
|
x_start = self.noise_scheduler.predict_start_from_noise(image_embed, t = time_cond, noise = pred)
|
||||||
|
|
||||||
# clip x0 before maybe predicting noise
|
# clip x0 before maybe predicting noise
|
||||||
|
|
||||||
@@ -1332,10 +1334,7 @@ class DiffusionPrior(nn.Module):
|
|||||||
|
|
||||||
# predict noise
|
# predict noise
|
||||||
|
|
||||||
if self.predict_x_start or self.predict_v:
|
pred_noise = self.noise_scheduler.predict_noise_from_start(image_embed, t = time_cond, x0 = x_start)
|
||||||
pred_noise = self.noise_scheduler.predict_noise_from_start(image_embed, t = time_cond, x0 = x_start)
|
|
||||||
else:
|
|
||||||
pred_noise = pred
|
|
||||||
|
|
||||||
if time_next < 0:
|
if time_next < 0:
|
||||||
image_embed = x_start
|
image_embed = x_start
|
||||||
@@ -2494,7 +2493,7 @@ class Decoder(nn.Module):
|
|||||||
dynamic_thres_percentile = 0.95,
|
dynamic_thres_percentile = 0.95,
|
||||||
p2_loss_weight_gamma = 0., # p2 loss weight, from https://arxiv.org/abs/2204.00227 - 0 is equivalent to weight of 1 across time - 1. is recommended
|
p2_loss_weight_gamma = 0., # p2 loss weight, from https://arxiv.org/abs/2204.00227 - 0 is equivalent to weight of 1 across time - 1. is recommended
|
||||||
p2_loss_weight_k = 1,
|
p2_loss_weight_k = 1,
|
||||||
ddim_sampling_eta = 1. # can be set to 0. for deterministic sampling afaict
|
ddim_sampling_eta = 0. # can be set to 0. for deterministic sampling afaict
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
@@ -2973,10 +2972,7 @@ class Decoder(nn.Module):
|
|||||||
|
|
||||||
# predict noise
|
# predict noise
|
||||||
|
|
||||||
if predict_x_start or predict_v:
|
pred_noise = noise_scheduler.predict_noise_from_start(img, t = time_cond, x0 = x_start)
|
||||||
pred_noise = noise_scheduler.predict_noise_from_start(img, t = time_cond, x0 = x_start)
|
|
||||||
else:
|
|
||||||
pred_noise = pred
|
|
||||||
|
|
||||||
c1 = eta * ((1 - alpha / alpha_next) * (1 - alpha_next) / (1 - alpha)).sqrt()
|
c1 = eta * ((1 - alpha / alpha_next) * (1 - alpha_next) / (1 - alpha)).sqrt()
|
||||||
c2 = ((1 - alpha_next) - torch.square(c1)).sqrt()
|
c2 = ((1 - alpha_next) - torch.square(c1)).sqrt()
|
||||||
|
|||||||
@@ -4,11 +4,13 @@ from pydantic import BaseModel, validator, root_validator
|
|||||||
from typing import List, Optional, Union, Tuple, Dict, Any, TypeVar
|
from typing import List, Optional, Union, Tuple, Dict, Any, TypeVar
|
||||||
|
|
||||||
from x_clip import CLIP as XCLIP
|
from x_clip import CLIP as XCLIP
|
||||||
|
from open_clip import list_pretrained
|
||||||
from coca_pytorch import CoCa
|
from coca_pytorch import CoCa
|
||||||
|
|
||||||
from dalle2_pytorch.dalle2_pytorch import (
|
from dalle2_pytorch.dalle2_pytorch import (
|
||||||
CoCaAdapter,
|
CoCaAdapter,
|
||||||
OpenAIClipAdapter,
|
OpenAIClipAdapter,
|
||||||
|
OpenClipAdapter,
|
||||||
Unet,
|
Unet,
|
||||||
Decoder,
|
Decoder,
|
||||||
DiffusionPrior,
|
DiffusionPrior,
|
||||||
@@ -117,6 +119,10 @@ class AdapterConfig(BaseModel):
|
|||||||
def create(self):
|
def create(self):
|
||||||
if self.make == "openai":
|
if self.make == "openai":
|
||||||
return OpenAIClipAdapter(self.model)
|
return OpenAIClipAdapter(self.model)
|
||||||
|
elif self.make == "open_clip":
|
||||||
|
pretrained = dict(list_pretrained())
|
||||||
|
checkpoint = pretrained[self.model]
|
||||||
|
return OpenClipAdapter(name=self.model, pretrained=checkpoint)
|
||||||
elif self.make == "x-clip":
|
elif self.make == "x-clip":
|
||||||
return XClipAdapter(XCLIP(**self.base_model_kwargs))
|
return XClipAdapter(XCLIP(**self.base_model_kwargs))
|
||||||
elif self.make == "coca":
|
elif self.make == "coca":
|
||||||
|
|||||||
@@ -236,7 +236,7 @@ class DiffusionPriorTrainer(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if exists(cosine_decay_max_steps):
|
if exists(cosine_decay_max_steps):
|
||||||
self.scheduler = CosineAnnealingLR(optimizer, T_max = cosine_decay_max_steps)
|
self.scheduler = CosineAnnealingLR(self.optimizer, T_max = cosine_decay_max_steps)
|
||||||
else:
|
else:
|
||||||
self.scheduler = LambdaLR(self.optimizer, lr_lambda = lambda _: 1.0)
|
self.scheduler = LambdaLR(self.optimizer, lr_lambda = lambda _: 1.0)
|
||||||
|
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
__version__ = '1.11.1'
|
__version__ = '1.12.3'
|
||||||
|
|||||||
3
setup.py
3
setup.py
@@ -26,7 +26,8 @@ setup(
|
|||||||
install_requires=[
|
install_requires=[
|
||||||
'accelerate',
|
'accelerate',
|
||||||
'click',
|
'click',
|
||||||
'clip-anytorch>=2.4.0',
|
'open-clip-torch>=2.0.0,<3.0.0',
|
||||||
|
'clip-anytorch>=2.5.2',
|
||||||
'coca-pytorch>=0.0.5',
|
'coca-pytorch>=0.0.5',
|
||||||
'ema-pytorch>=0.0.7',
|
'ema-pytorch>=0.0.7',
|
||||||
'einops>=0.4',
|
'einops>=0.4',
|
||||||
|
|||||||
Reference in New Issue
Block a user