lower default initial learning rate to what Jonathan Ho had in his original repo

This commit is contained in:
Phil Wang
2022-05-14 13:22:43 -07:00
parent d1f02e8f49
commit 591d37e266
3 changed files with 3 additions and 3 deletions

View File

@@ -7,7 +7,7 @@ def separate_weight_decayable_params(params):
def get_optimizer( def get_optimizer(
params, params,
lr = 3e-4, lr = 2e-5,
wd = 1e-2, wd = 1e-2,
betas = (0.9, 0.999), betas = (0.9, 0.999),
filter_by_requires_grad = False filter_by_requires_grad = False

View File

@@ -221,7 +221,7 @@ class DecoderTrainer(nn.Module):
self, self,
decoder, decoder,
use_ema = True, use_ema = True,
lr = 3e-4, lr = 2e-5,
wd = 1e-2, wd = 1e-2,
max_grad_norm = None, max_grad_norm = None,
amp = False, amp = False,

View File

@@ -10,7 +10,7 @@ setup(
'dream = dalle2_pytorch.cli:dream' 'dream = dalle2_pytorch.cli:dream'
], ],
}, },
version = '0.2.19', version = '0.2.20',
license='MIT', license='MIT',
description = 'DALL-E 2', description = 'DALL-E 2',
author = 'Phil Wang', author = 'Phil Wang',