diff --git a/dalle2_pytorch/optimizer.py b/dalle2_pytorch/optimizer.py index 5de2bfa..514bed9 100644 --- a/dalle2_pytorch/optimizer.py +++ b/dalle2_pytorch/optimizer.py @@ -7,7 +7,7 @@ def separate_weight_decayable_params(params): def get_optimizer( params, - lr = 3e-4, + lr = 2e-5, wd = 1e-2, betas = (0.9, 0.999), filter_by_requires_grad = False diff --git a/dalle2_pytorch/train.py b/dalle2_pytorch/train.py index 325ee3f..0fddb8a 100644 --- a/dalle2_pytorch/train.py +++ b/dalle2_pytorch/train.py @@ -221,7 +221,7 @@ class DecoderTrainer(nn.Module): self, decoder, use_ema = True, - lr = 3e-4, + lr = 2e-5, wd = 1e-2, max_grad_norm = None, amp = False, diff --git a/setup.py b/setup.py index cfe8f7a..b2327e6 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( 'dream = dalle2_pytorch.cli:dream' ], }, - version = '0.2.19', + version = '0.2.20', license='MIT', description = 'DALL-E 2', author = 'Phil Wang',