add learned padding tokens, same strategy as dalle1, for diffusion prior, and get rid of masking in causal transformer

This commit is contained in:
Phil Wang
2022-07-12 17:33:14 -07:00
parent cd26c6b17d
commit 3ee3c56d2a
3 changed files with 31 additions and 15 deletions

View File

@@ -129,11 +129,11 @@ class AdapterConfig(BaseModel):
class DiffusionPriorNetworkConfig(BaseModel):
dim: int
depth: int
max_text_len: int = None
num_timesteps: int = None
num_time_embeds: int = 1
num_image_embeds: int = 1
num_text_embeds: int = 1
attend_all_text_encodings: bool = True
dim_head: int = 64
heads: int = 8
ff_mult: int = 4