allow for one to stop grouping out weight decayable parameters, to debug optimizer state dict problem

This commit is contained in:
Phil Wang
2022-05-24 21:42:32 -07:00
parent 8864fd0aa7
commit 857b9fbf1e
3 changed files with 14 additions and 7 deletions

View File

@@ -12,6 +12,7 @@ def get_optimizer(
betas = (0.9, 0.999),
eps = 1e-8,
filter_by_requires_grad = False,
group_wd_params = True,
**kwargs
):
if filter_by_requires_grad:
@@ -21,11 +22,13 @@ def get_optimizer(
return Adam(params, lr = lr, betas = betas, eps = eps)
params = set(params)
wd_params, no_wd_params = separate_weight_decayable_params(params)
param_groups = [
{'params': list(wd_params)},
{'params': list(no_wd_params), 'weight_decay': 0},
]
if group_wd_params:
wd_params, no_wd_params = separate_weight_decayable_params(params)
return AdamW(param_groups, lr = lr, weight_decay = wd, betas = betas, eps = eps)
params = [
{'params': list(wd_params)},
{'params': list(no_wd_params), 'weight_decay': 0},
]
return AdamW(params, lr = lr, weight_decay = wd, betas = betas, eps = eps)