use eval vs training mode to determine whether to call backprop on trainer forward

2025-12-19 09:44:19 +01:00 · 2022-05-15 14:20:59 -07:00
parent 156fe5ed9f
commit 7b7a62044a
2 changed files with 7 additions and 3 deletions
--- a/dalle2_pytorch/train.py
+++ b/dalle2_pytorch/train.py
@@ -279,6 +279,8 @@ class DiffusionPriorTrainer(nn.Module):
                loss = loss * chunk_size_frac

            total_loss += loss.item()
+
+            if self.training:
                self.scaler.scale(loss).backward()

        return total_loss
@@ -406,6 +408,8 @@ class DecoderTrainer(nn.Module):
                loss = loss * chunk_size_frac

            total_loss += loss.item()
+
+            if self.training:
                self.scale(loss, unet_number = unet_number).backward()

        return total_loss
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.2.31',
+  version = '0.2.32',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',