mirror of
https://github.com/lucidrains/DALLE2-pytorch.git
synced 2026-02-12 11:34:29 +01:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
580274be79 | ||
|
|
848e8a480a | ||
|
|
cc58f75474 | ||
|
|
3b2cf7b0bc | ||
|
|
984d62a373 |
@@ -1124,7 +1124,7 @@ class DiffusionPriorNetwork(nn.Module):
|
||||
learned_queries = repeat(self.learned_query, 'd -> b 1 d', b = batch)
|
||||
|
||||
if self.self_cond:
|
||||
learned_queries = torch.cat((image_embed, self_cond), dim = -2)
|
||||
learned_queries = torch.cat((self_cond, learned_queries), dim = -2)
|
||||
|
||||
tokens = torch.cat((
|
||||
text_encodings,
|
||||
@@ -1334,10 +1334,7 @@ class DiffusionPrior(nn.Module):
|
||||
|
||||
# predict noise
|
||||
|
||||
if self.predict_x_start or self.predict_v:
|
||||
pred_noise = self.noise_scheduler.predict_noise_from_start(image_embed, t = time_cond, x0 = x_start)
|
||||
else:
|
||||
pred_noise = pred
|
||||
pred_noise = self.noise_scheduler.predict_noise_from_start(image_embed, t = time_cond, x0 = x_start)
|
||||
|
||||
if time_next < 0:
|
||||
image_embed = x_start
|
||||
@@ -2496,7 +2493,7 @@ class Decoder(nn.Module):
|
||||
dynamic_thres_percentile = 0.95,
|
||||
p2_loss_weight_gamma = 0., # p2 loss weight, from https://arxiv.org/abs/2204.00227 - 0 is equivalent to weight of 1 across time - 1. is recommended
|
||||
p2_loss_weight_k = 1,
|
||||
ddim_sampling_eta = 1. # can be set to 0. for deterministic sampling afaict
|
||||
ddim_sampling_eta = 0. # can be set to 0. for deterministic sampling afaict
|
||||
):
|
||||
super().__init__()
|
||||
|
||||
@@ -2730,11 +2727,16 @@ class Decoder(nn.Module):
|
||||
if exists(unet_number):
|
||||
unet = self.get_unet(unet_number)
|
||||
|
||||
# devices
|
||||
|
||||
cuda, cpu = torch.device('cuda'), torch.device('cpu')
|
||||
|
||||
self.cuda()
|
||||
|
||||
devices = [module_device(unet) for unet in self.unets]
|
||||
self.unets.cpu()
|
||||
unet.cuda()
|
||||
|
||||
self.unets.to(cpu)
|
||||
unet.to(cuda)
|
||||
|
||||
yield
|
||||
|
||||
@@ -2975,10 +2977,7 @@ class Decoder(nn.Module):
|
||||
|
||||
# predict noise
|
||||
|
||||
if predict_x_start or predict_v:
|
||||
pred_noise = noise_scheduler.predict_noise_from_start(img, t = time_cond, x0 = x_start)
|
||||
else:
|
||||
pred_noise = pred
|
||||
pred_noise = noise_scheduler.predict_noise_from_start(img, t = time_cond, x0 = x_start)
|
||||
|
||||
c1 = eta * ((1 - alpha / alpha_next) * (1 - alpha_next) / (1 - alpha)).sqrt()
|
||||
c2 = ((1 - alpha_next) - torch.square(c1)).sqrt()
|
||||
@@ -3120,7 +3119,8 @@ class Decoder(nn.Module):
|
||||
distributed = False,
|
||||
inpaint_image = None,
|
||||
inpaint_mask = None,
|
||||
inpaint_resample_times = 5
|
||||
inpaint_resample_times = 5,
|
||||
one_unet_in_gpu_at_time = True
|
||||
):
|
||||
assert self.unconditional or exists(image_embed), 'image embed must be present on sampling from decoder unless if trained unconditionally'
|
||||
|
||||
@@ -3143,6 +3143,7 @@ class Decoder(nn.Module):
|
||||
assert image.shape[0] == batch_size, 'image must have batch size of {} if starting at unet number > 1'.format(batch_size)
|
||||
prev_unet_output_size = self.image_sizes[start_at_unet_number - 2]
|
||||
img = resize_image_to(image, prev_unet_output_size, nearest = True)
|
||||
|
||||
is_cuda = next(self.parameters()).is_cuda
|
||||
|
||||
num_unets = self.num_unets
|
||||
@@ -3152,7 +3153,7 @@ class Decoder(nn.Module):
|
||||
if unet_number < start_at_unet_number:
|
||||
continue # It's the easiest way to do it
|
||||
|
||||
context = self.one_unet_in_gpu(unet = unet) if is_cuda else null_context()
|
||||
context = self.one_unet_in_gpu(unet = unet) if is_cuda and one_unet_in_gpu_at_time else null_context()
|
||||
|
||||
with context:
|
||||
# prepare low resolution conditioning for upsamplers
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '1.11.4'
|
||||
__version__ = '1.12.4'
|
||||
|
||||
Reference in New Issue
Block a user