Compare commits

..

3 Commits

3 changed files with 6 additions and 6 deletions

View File

@@ -337,7 +337,7 @@ def discretized_gaussian_log_likelihood(x, *, means, log_scales, thres = 0.999):
# attempting to correct nan gradients when learned variance is turned on
# in the setting of deepspeed fp16
eps = 1e-12 if x.dtype == torch.float32 else 1e-5
eps = 1e-12 if x.dtype == torch.float32 else 1e-3
centered_x = x - means
inv_stdv = torch.exp(-log_scales)
@@ -345,8 +345,8 @@ def discretized_gaussian_log_likelihood(x, *, means, log_scales, thres = 0.999):
cdf_plus = approx_standard_normal_cdf(plus_in)
min_in = inv_stdv * (centered_x - 1. / 255.)
cdf_min = approx_standard_normal_cdf(min_in)
log_cdf_plus = log(cdf_plus)
log_one_minus_cdf_min = log(1. - cdf_min)
log_cdf_plus = log(cdf_plus, eps = eps)
log_one_minus_cdf_min = log(1. - cdf_min, eps = eps)
cdf_delta = cdf_plus - cdf_min
log_probs = torch.where(x < -thres,

View File

@@ -228,7 +228,7 @@ class DiffusionPriorTrainer(nn.Module):
# track steps internally
self.register_buffer('step', torch.tensor([0], device = device))
self.register_buffer('step', torch.tensor([0], device = self.device))
# accelerator wrappers
@@ -473,7 +473,7 @@ class DecoderTrainer(nn.Module):
lr, wd, eps, warmup_steps = map(partial(cast_tuple, length = self.num_unets), (lr, wd, eps, warmup_steps))
assert all([unet_lr < 1e-3 for unet_lr in lr]), 'your learning rate is too high, recommend sticking with 1e-4, at most 5e-4'
assert all([unet_lr <= 1e-2 for unet_lr in lr]), 'your learning rate is too high, recommend sticking with 1e-4, at most 5e-4'
optimizers = []
schedulers = []

View File

@@ -1 +1 @@
__version__ = '0.16.11'
__version__ = '0.16.14'