diff --git a/README.md b/README.md
index ad13861..345efea 100644
--- a/README.md
+++ b/README.md
@@ -577,14 +577,4 @@ Once built, images will be saved to the same directory the command is invoked
 }
 ```
 
-```bibtex
-@article{Arar2021LearnedQF,
-    title   = {Learned Queries for Efficient Local Attention},
-    author  = {Moab Arar and Ariel Shamir and Amit H. Bermano},
-    journal = {ArXiv},
-    year    = {2021},
-    volume  = {abs/2112.11435}
-}
-```
-
 *Creating noise from data is easy; creating data from noise is generative modeling.* - Yang Song's <a href="https://arxiv.org/abs/2011.13456">paper</a>
diff --git a/dalle2_pytorch/dalle2_pytorch.py b/dalle2_pytorch/dalle2_pytorch.py
index bb0a46e..730b99b 100644
--- a/dalle2_pytorch/dalle2_pytorch.py
+++ b/dalle2_pytorch/dalle2_pytorch.py
@@ -693,7 +693,7 @@ class DiffusionPrior(nn.Module):
 # decoder
 
 def Upsample(dim):
-    return QueryAttnUpsample(dim)
+    return nn.ConvTranspose2d(dim, dim, 4, 2, 1)
 
 def Downsample(dim):
     return nn.Conv2d(dim, dim, 4, 2, 1)
diff --git a/dalle2_pytorch/vqgan_vae.py b/dalle2_pytorch/vqgan_vae.py
index 8fd8153..39fd45c 100644
--- a/dalle2_pytorch/vqgan_vae.py
+++ b/dalle2_pytorch/vqgan_vae.py
@@ -378,7 +378,7 @@ class VQGanVAE(nn.Module):
 
         for layer_index, (dim_in, dim_out), layer_num_resnet_blocks, layer_use_attn in zip(range(layers), dim_pairs, num_resnet_blocks, use_attn):
             append(self.encoders, nn.Sequential(nn.Conv2d(dim_in, dim_out, 4, stride = 2, padding = 1), leaky_relu()))
-            prepend(self.decoders, nn.Sequential(QueryAttnUpsample(dim_out), nn.Conv2d(dim_out, dim_in, 3, padding = 1), leaky_relu()))
+            prepend(self.decoders, nn.Sequential(nn.ConvTranspose2d(dim_out, dim_in, 4, 2, 1), leaky_relu()))
 
             if layer_use_attn:
                 prepend(self.decoders, VQGanAttention(dim = dim_out, heads = attn_heads, dim_head = attn_dim_head, dropout = attn_dropout))