diff --git a/README.md b/README.md index ad13861..345efea 100644 --- a/README.md +++ b/README.md @@ -577,14 +577,4 @@ Once built, images will be saved to the same directory the command is invoked } ``` -```bibtex -@article{Arar2021LearnedQF, - title = {Learned Queries for Efficient Local Attention}, - author = {Moab Arar and Ariel Shamir and Amit H. Bermano}, - journal = {ArXiv}, - year = {2021}, - volume = {abs/2112.11435} -} -``` - *Creating noise from data is easy; creating data from noise is generative modeling.* - Yang Song's paper diff --git a/dalle2_pytorch/dalle2_pytorch.py b/dalle2_pytorch/dalle2_pytorch.py index bb0a46e..730b99b 100644 --- a/dalle2_pytorch/dalle2_pytorch.py +++ b/dalle2_pytorch/dalle2_pytorch.py @@ -693,7 +693,7 @@ class DiffusionPrior(nn.Module): # decoder def Upsample(dim): - return QueryAttnUpsample(dim) + return nn.ConvTranspose2d(dim, dim, 4, 2, 1) def Downsample(dim): return nn.Conv2d(dim, dim, 4, 2, 1) diff --git a/dalle2_pytorch/vqgan_vae.py b/dalle2_pytorch/vqgan_vae.py index 8fd8153..39fd45c 100644 --- a/dalle2_pytorch/vqgan_vae.py +++ b/dalle2_pytorch/vqgan_vae.py @@ -378,7 +378,7 @@ class VQGanVAE(nn.Module): for layer_index, (dim_in, dim_out), layer_num_resnet_blocks, layer_use_attn in zip(range(layers), dim_pairs, num_resnet_blocks, use_attn): append(self.encoders, nn.Sequential(nn.Conv2d(dim_in, dim_out, 4, stride = 2, padding = 1), leaky_relu())) - prepend(self.decoders, nn.Sequential(QueryAttnUpsample(dim_out), nn.Conv2d(dim_out, dim_in, 3, padding = 1), leaky_relu())) + prepend(self.decoders, nn.Sequential(nn.ConvTranspose2d(dim_out, dim_in, 4, 2, 1), leaky_relu())) if layer_use_attn: prepend(self.decoders, VQGanAttention(dim = dim_out, heads = attn_heads, dim_head = attn_dim_head, dropout = attn_dropout))