from my vision transformer experience, dimension of attention head of 32 is sufficient for image feature maps

2026-02-23 08:15:18 +01:00 · 2022-04-20 11:40:24 -07:00
parent b8e8d3c164
commit faebf4c8b8
2 changed files with 15 additions and 9 deletions
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ setup(
      'dream = dalle2_pytorch.cli:dream'
    ],
  },
-  version = '0.0.30',
+  version = '0.0.31',
  license='MIT',
  description = 'DALL-E 2',
  author = 'Phil Wang',