diff --git a/dalle2_pytorch/trainer.py b/dalle2_pytorch/trainer.py index 73f6108..b02995e 100644 --- a/dalle2_pytorch/trainer.py +++ b/dalle2_pytorch/trainer.py @@ -451,6 +451,8 @@ class DecoderTrainer(nn.Module): lr, wd, eps = map(partial(cast_tuple, length = self.num_unets), (lr, wd, eps)) + assert all([unet_lr < 1e-3 for unet_lr in lr]), 'your learning rate is too high, recommend sticking with 1e-4, at most 5e-4' + optimizers = [] for unet, unet_lr, unet_wd, unet_eps in zip(decoder.unets, lr, wd, eps): diff --git a/dalle2_pytorch/version.py b/dalle2_pytorch/version.py index 8e747cc..eebcbfe 100644 --- a/dalle2_pytorch/version.py +++ b/dalle2_pytorch/version.py @@ -1 +1 @@ -__version__ = '0.11.4' +__version__ = '0.11.5'