diff --git a/dalle2_pytorch/train.py b/dalle2_pytorch/train.py index 66124e6..b0b31c7 100644 --- a/dalle2_pytorch/train.py +++ b/dalle2_pytorch/train.py @@ -279,7 +279,9 @@ class DiffusionPriorTrainer(nn.Module): loss = loss * chunk_size_frac total_loss += loss.item() - self.scaler.scale(loss).backward() + + if self.training: + self.scaler.scale(loss).backward() return total_loss @@ -406,6 +408,8 @@ class DecoderTrainer(nn.Module): loss = loss * chunk_size_frac total_loss += loss.item() - self.scale(loss, unet_number = unet_number).backward() + + if self.training: + self.scale(loss, unet_number = unet_number).backward() return total_loss diff --git a/setup.py b/setup.py index 214487d..b6203e4 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( 'dream = dalle2_pytorch.cli:dream' ], }, - version = '0.2.31', + version = '0.2.32', license='MIT', description = 'DALL-E 2', author = 'Phil Wang',