mirror of
https://github.com/lucidrains/DALLE2-pytorch.git
synced 2025-12-19 17:54:20 +01:00
lower default initial learning rate to what Jonathan Ho had in his original repo
This commit is contained in:
@@ -7,7 +7,7 @@ def separate_weight_decayable_params(params):
|
|||||||
|
|
||||||
def get_optimizer(
|
def get_optimizer(
|
||||||
params,
|
params,
|
||||||
lr = 3e-4,
|
lr = 2e-5,
|
||||||
wd = 1e-2,
|
wd = 1e-2,
|
||||||
betas = (0.9, 0.999),
|
betas = (0.9, 0.999),
|
||||||
filter_by_requires_grad = False
|
filter_by_requires_grad = False
|
||||||
|
|||||||
@@ -221,7 +221,7 @@ class DecoderTrainer(nn.Module):
|
|||||||
self,
|
self,
|
||||||
decoder,
|
decoder,
|
||||||
use_ema = True,
|
use_ema = True,
|
||||||
lr = 3e-4,
|
lr = 2e-5,
|
||||||
wd = 1e-2,
|
wd = 1e-2,
|
||||||
max_grad_norm = None,
|
max_grad_norm = None,
|
||||||
amp = False,
|
amp = False,
|
||||||
|
|||||||
Reference in New Issue
Block a user