use cross attention for conditioning unet based on image embedding tokens (which opens up the door on conditioning on text encodings as well

2026-02-23 18:04:44 +01:00 · 2022-04-14 10:10:04 -07:00
parent 95b018374a
commit 68e9883f59
3 changed files with 124 additions and 44 deletions
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.0.10',
+  version = '0.0.11',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',