From 124d8577c89becbebedee6d1247a5829f39b11b7 Mon Sep 17 00:00:00 2001 From: Phil Wang Date: Sat, 14 May 2022 00:37:10 -0700 Subject: [PATCH] move the inverse normalization function called before image embeddings are derived from clip to within the diffusion prior and decoder classes --- dalle2_pytorch/dalle2_pytorch.py | 6 +++--- setup.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dalle2_pytorch/dalle2_pytorch.py b/dalle2_pytorch/dalle2_pytorch.py index 9afb1e0..8ef5f21 100644 --- a/dalle2_pytorch/dalle2_pytorch.py +++ b/dalle2_pytorch/dalle2_pytorch.py @@ -278,7 +278,7 @@ class OpenAIClipAdapter(BaseClipAdapter): def embed_image(self, image): assert not self.cleared image = resize_image_to(image, self.image_size) - image = self.clip_normalize(unnormalize_img(image)) + image = self.clip_normalize(image) image_embed = self.clip.encode_image(image) return EmbeddedImage(l2norm(image_embed.float()), None) @@ -1037,7 +1037,7 @@ class DiffusionPrior(BaseGaussianDiffusion): assert not (self.condition_on_text_encodings and (not exists(text_encodings) and not exists(text))), 'text encodings must be present if you specified you wish to condition on it on initialization' if exists(image): - image_embed, _ = self.clip.embed_image(image) + image_embed, _ = self.clip.embed_image(unnormalize_img(image)) # calculate text conditionings, based on what is passed in @@ -2011,7 +2011,7 @@ class Decoder(BaseGaussianDiffusion): if not exists(image_embed): assert exists(self.clip), 'if you want to derive CLIP image embeddings automatically, you must supply `clip` to the decoder on init' - image_embed, _ = self.clip.embed_image(image) + image_embed, _ = self.clip.embed_image(unnormalize_img(image)) text_encodings = text_mask = None if exists(text) and not exists(text_encodings): diff --git a/setup.py b/setup.py index 49f7a19..0d23cfc 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( 'dream = dalle2_pytorch.cli:dream' ], }, - version = '0.2.14', + version = '0.2.15', license='MIT', description = 'DALL-E 2', author = 'Phil Wang',