mirror of
https://github.com/lucidrains/DALLE2-pytorch.git
synced 2026-02-13 03:54:35 +01:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2ee3fa3cc | ||
|
|
a58a370d75 | ||
|
|
1662bbf226 | ||
|
|
5be1f57448 | ||
|
|
c52ce58e10 |
2
.github/FUNDING.yml
vendored
2
.github/FUNDING.yml
vendored
@@ -1 +1 @@
|
||||
github: [lucidrains]
|
||||
github: [nousr, Veldrovive, lucidrains]
|
||||
|
||||
@@ -1550,6 +1550,7 @@ class Unet(nn.Module):
|
||||
init_conv_kernel_size = 7,
|
||||
resnet_groups = 8,
|
||||
num_resnet_blocks = 2,
|
||||
init_cross_embed = True,
|
||||
init_cross_embed_kernel_sizes = (3, 7, 15),
|
||||
cross_embed_downsample = False,
|
||||
cross_embed_downsample_kernel_sizes = (2, 4),
|
||||
@@ -1578,7 +1579,7 @@ class Unet(nn.Module):
|
||||
init_channels = channels if not lowres_cond else channels * 2 # in cascading diffusion, one concats the low resolution image, blurred, for conditioning the higher resolution synthesis
|
||||
init_dim = default(init_dim, dim)
|
||||
|
||||
self.init_conv = CrossEmbedLayer(init_channels, dim_out = init_dim, kernel_sizes = init_cross_embed_kernel_sizes, stride = 1)
|
||||
self.init_conv = CrossEmbedLayer(init_channels, dim_out = init_dim, kernel_sizes = init_cross_embed_kernel_sizes, stride = 1) if init_cross_embed else nn.Conv2d(init_channels, init_dim, init_conv_kernel_size, padding = init_conv_kernel_size // 2)
|
||||
|
||||
dims = [init_dim, *map(lambda m: dim * m, dim_mults)]
|
||||
in_out = list(zip(dims[:-1], dims[1:]))
|
||||
@@ -1926,7 +1927,7 @@ class Unet(nn.Module):
|
||||
hiddens.append(x)
|
||||
|
||||
x = attn(x)
|
||||
hiddens.append(x)
|
||||
hiddens.append(x.contiguous())
|
||||
|
||||
if exists(post_downsample):
|
||||
x = post_downsample(x)
|
||||
@@ -2172,6 +2173,7 @@ class Decoder(nn.Module):
|
||||
# random crop sizes (for super-resoluting unets at the end of cascade?)
|
||||
|
||||
self.random_crop_sizes = cast_tuple(random_crop_sizes, len(image_sizes))
|
||||
assert not exists(self.random_crop_sizes[0]), 'you would not need to randomly crop the image for the base unet'
|
||||
|
||||
# predict x0 config
|
||||
|
||||
|
||||
@@ -225,6 +225,7 @@ class UnetConfig(BaseModel):
|
||||
self_attn: ListOrTuple(int)
|
||||
attn_dim_head: int = 32
|
||||
attn_heads: int = 16
|
||||
init_cross_embed: bool = True
|
||||
|
||||
class Config:
|
||||
extra = "allow"
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.24.0'
|
||||
__version__ = '0.24.3'
|
||||
|
||||
Reference in New Issue
Block a user