lower default initial learning rate to what Jonathan Ho had in his original repo

2025-12-19 17:54:20 +01:00 · 2022-05-14 13:22:43 -07:00
parent d1f02e8f49
commit 591d37e266
3 changed files with 3 additions and 3 deletions
--- a/dalle2_pytorch/optimizer.py
+++ b/dalle2_pytorch/optimizer.py
@@ -7,7 +7,7 @@ def separate_weight_decayable_params(params):

 def get_optimizer(
    params,
-    lr = 3e-4,
+    lr = 2e-5,
    wd = 1e-2,
    betas = (0.9, 0.999),
    filter_by_requires_grad = False
--- a/dalle2_pytorch/train.py
+++ b/dalle2_pytorch/train.py
@@ -221,7 +221,7 @@ class DecoderTrainer(nn.Module):
        self,
        decoder,
        use_ema = True,
-        lr = 3e-4,
+        lr = 2e-5,
        wd = 1e-2,
        max_grad_norm = None,
        amp = False,
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.2.19',
+  version = '0.2.20',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',