Compare commits

...

4 Commits
1.0.1 ... 1.0.3

Author SHA1 Message Date
Phil Wang
417ff808e6 1.0.3 2022-07-22 13:16:57 -07:00
Aidan Dempster
f3d7e226ba Changed types to be generic instead of functions (#215)
This allows pylance to do proper type hinting and makes developing
extensions to the package much easier
2022-07-22 13:16:29 -07:00
Phil Wang
48a1302428 1.0.2 2022-07-20 23:01:51 -07:00
Aidan Dempster
ccaa46b81b Re-introduced change that was accidentally rolled back (#212) 2022-07-20 23:01:19 -07:00
3 changed files with 23 additions and 21 deletions

View File

@@ -528,8 +528,12 @@ class Tracker:
elif save_type == 'model': elif save_type == 'model':
if isinstance(trainer, DiffusionPriorTrainer): if isinstance(trainer, DiffusionPriorTrainer):
prior = trainer.ema_diffusion_prior.ema_model if trainer.use_ema else trainer.diffusion_prior prior = trainer.ema_diffusion_prior.ema_model if trainer.use_ema else trainer.diffusion_prior
state_dict = trainer.accelerator.unwrap_model(prior).state_dict() prior: DiffusionPrior = trainer.accelerator.unwrap_model(prior)
torch.save(state_dict, file_path) # Remove CLIP if it is part of the model
original_clip = prior.clip
prior.clip = None
model_state_dict = prior.state_dict()
prior.clip = original_clip
elif isinstance(trainer, DecoderTrainer): elif isinstance(trainer, DecoderTrainer):
decoder: Decoder = trainer.accelerator.unwrap_model(trainer.decoder) decoder: Decoder = trainer.accelerator.unwrap_model(trainer.decoder)
# Remove CLIP if it is part of the model # Remove CLIP if it is part of the model

View File

@@ -1,7 +1,7 @@
import json import json
from torchvision import transforms as T from torchvision import transforms as T
from pydantic import BaseModel, validator, root_validator from pydantic import BaseModel, validator, root_validator
from typing import List, Iterable, Optional, Union, Tuple, Dict, Any from typing import List, Optional, Union, Tuple, Dict, Any, TypeVar
from x_clip import CLIP as XCLIP from x_clip import CLIP as XCLIP
from coca_pytorch import CoCa from coca_pytorch import CoCa
@@ -25,11 +25,9 @@ def exists(val):
def default(val, d): def default(val, d):
return val if exists(val) else d return val if exists(val) else d
def ListOrTuple(inner_type): InnerType = TypeVar('InnerType')
return Union[List[inner_type], Tuple[inner_type]] ListOrTuple = Union[List[InnerType], Tuple[InnerType]]
SingularOrIterable = Union[InnerType, ListOrTuple[InnerType]]
def SingularOrIterable(inner_type):
return Union[inner_type, ListOrTuple(inner_type)]
# general pydantic classes # general pydantic classes
@@ -222,13 +220,13 @@ class TrainDiffusionPriorConfig(BaseModel):
class UnetConfig(BaseModel): class UnetConfig(BaseModel):
dim: int dim: int
dim_mults: ListOrTuple(int) dim_mults: ListOrTuple[int]
image_embed_dim: int = None image_embed_dim: int = None
text_embed_dim: int = None text_embed_dim: int = None
cond_on_text_encodings: bool = None cond_on_text_encodings: bool = None
cond_dim: int = None cond_dim: int = None
channels: int = 3 channels: int = 3
self_attn: ListOrTuple(int) self_attn: ListOrTuple[int]
attn_dim_head: int = 32 attn_dim_head: int = 32
attn_heads: int = 16 attn_heads: int = 16
init_cross_embed: bool = True init_cross_embed: bool = True
@@ -237,16 +235,16 @@ class UnetConfig(BaseModel):
extra = "allow" extra = "allow"
class DecoderConfig(BaseModel): class DecoderConfig(BaseModel):
unets: ListOrTuple(UnetConfig) unets: ListOrTuple[UnetConfig]
image_size: int = None image_size: int = None
image_sizes: ListOrTuple(int) = None image_sizes: ListOrTuple[int] = None
clip: Optional[AdapterConfig] # The clip model to use if embeddings are not provided clip: Optional[AdapterConfig] # The clip model to use if embeddings are not provided
channels: int = 3 channels: int = 3
timesteps: int = 1000 timesteps: int = 1000
sample_timesteps: Optional[SingularOrIterable(int)] = None sample_timesteps: Optional[SingularOrIterable[int]] = None
loss_type: str = 'l2' loss_type: str = 'l2'
beta_schedule: ListOrTuple(str) = 'cosine' beta_schedule: ListOrTuple[str] = None # None means all cosine
learned_variance: bool = True learned_variance: SingularOrIterable[bool] = True
image_cond_drop_prob: float = 0.1 image_cond_drop_prob: float = 0.1
text_cond_drop_prob: float = 0.5 text_cond_drop_prob: float = 0.5
@@ -305,11 +303,11 @@ class DecoderDataConfig(BaseModel):
class DecoderTrainConfig(BaseModel): class DecoderTrainConfig(BaseModel):
epochs: int = 20 epochs: int = 20
lr: SingularOrIterable(float) = 1e-4 lr: SingularOrIterable[float] = 1e-4
wd: SingularOrIterable(float) = 0.01 wd: SingularOrIterable[float] = 0.01
warmup_steps: Optional[SingularOrIterable(int)] = None warmup_steps: Optional[SingularOrIterable[int]] = None
find_unused_parameters: bool = True find_unused_parameters: bool = True
max_grad_norm: SingularOrIterable(float) = 0.5 max_grad_norm: SingularOrIterable[float] = 0.5
save_every_n_samples: int = 100000 save_every_n_samples: int = 100000
n_sample_images: int = 6 # The number of example images to produce when sampling the train and test dataset n_sample_images: int = 6 # The number of example images to produce when sampling the train and test dataset
cond_scale: Union[float, List[float]] = 1.0 cond_scale: Union[float, List[float]] = 1.0
@@ -320,7 +318,7 @@ class DecoderTrainConfig(BaseModel):
use_ema: bool = True use_ema: bool = True
ema_beta: float = 0.999 ema_beta: float = 0.999
amp: bool = False amp: bool = False
unet_training_mask: ListOrTuple(bool) = None # If None, use all unets unet_training_mask: ListOrTuple[bool] = None # If None, use all unets
class DecoderEvaluateConfig(BaseModel): class DecoderEvaluateConfig(BaseModel):
n_evaluation_samples: int = 1000 n_evaluation_samples: int = 1000

View File

@@ -1 +1 @@
__version__ = '1.0.1' __version__ = '1.0.3'