Compare commits

..

1 Commits

Author SHA1 Message Date
Phil Wang
c56336a104 pydantic 2 2023-07-15 09:08:39 -07:00
6 changed files with 39 additions and 36 deletions

View File

@@ -9,7 +9,7 @@
"dim_mults": [1, 2, 4, 8],
"attn_dim_head": 16,
"attn_heads": 4,
"self_attn": [false, true, true, true]
"self_attn": [false, true, true, true]
}
],
"clip": {

View File

@@ -1,3 +1,10 @@
import torch
from packaging import version
if version.parse(torch.__version__) >= version.parse('2.0.0'):
from einops._torch_specific import allow_ops_in_compiled_graph
allow_ops_in_compiled_graph()
from dalle2_pytorch.version import __version__
from dalle2_pytorch.dalle2_pytorch import DALLE2, DiffusionPriorNetwork, DiffusionPrior, Unet, Decoder
from dalle2_pytorch.dalle2_pytorch import OpenAIClipAdapter, OpenClipAdapter

View File

@@ -39,11 +39,11 @@ class TrainSplitConfig(BaseModel):
test: float = 0.1
@model_validator(mode = 'after')
def validate_all(self, m):
def validate_all(self):
actual_sum = sum([*dict(self).values()])
if actual_sum != 1.:
raise ValueError(f'{dict(self).keys()} must sum to 1.0. Found: {actual_sum}')
return self
raise ValueError(f'{fields.keys()} must sum to 1.0. Found: {actual_sum}')
return fields
class TrackerLogConfig(BaseModel):
log_type: str = 'console'
@@ -90,7 +90,7 @@ class TrackerConfig(BaseModel):
data_path: str = '.tracker_data'
overwrite_data_path: bool = False
log: TrackerLogConfig
load: Optional[TrackerLoadConfig] = None
load: Optional[TrackerLoadConfig]
save: Union[List[TrackerSaveConfig], TrackerSaveConfig]
def create(self, full_config: BaseModel, extra_config: dict, dummy_mode: bool = False) -> Tracker:
@@ -115,7 +115,7 @@ class TrackerConfig(BaseModel):
class AdapterConfig(BaseModel):
make: str = "openai"
model: str = "ViT-L/14"
base_model_kwargs: Optional[Dict[str, Any]] = None
base_model_kwargs: Dict[str, Any] = None
def create(self):
if self.make == "openai":
@@ -134,8 +134,8 @@ class AdapterConfig(BaseModel):
class DiffusionPriorNetworkConfig(BaseModel):
dim: int
depth: int
max_text_len: Optional[int] = None
num_timesteps: Optional[int] = None
max_text_len: int = None
num_timesteps: int = None
num_time_embeds: int = 1
num_image_embeds: int = 1
num_text_embeds: int = 1
@@ -158,7 +158,7 @@ class DiffusionPriorNetworkConfig(BaseModel):
return DiffusionPriorNetwork(**kwargs)
class DiffusionPriorConfig(BaseModel):
clip: Optional[AdapterConfig] = None
clip: AdapterConfig = None
net: DiffusionPriorNetworkConfig
image_embed_dim: int
image_size: int
@@ -195,7 +195,7 @@ class DiffusionPriorTrainConfig(BaseModel):
use_ema: bool = True
ema_beta: float = 0.99
amp: bool = False
warmup_steps: Optional[int] = None # number of warmup steps
warmup_steps: int = None # number of warmup steps
save_every_seconds: int = 3600 # how often to save
eval_timesteps: List[int] = [64] # which sampling timesteps to evaluate with
best_validation_loss: float = 1e9 # the current best valudation loss observed
@@ -228,12 +228,12 @@ class TrainDiffusionPriorConfig(BaseModel):
class UnetConfig(BaseModel):
dim: int
dim_mults: ListOrTuple[int]
image_embed_dim: Optional[int] = None
text_embed_dim: Optional[int] = None
cond_on_text_encodings: Optional[bool] = None
cond_dim: Optional[int] = None
image_embed_dim: int = None
text_embed_dim: int = None
cond_on_text_encodings: bool = None
cond_dim: int = None
channels: int = 3
self_attn: SingularOrIterable[bool] = False
self_attn: ListOrTuple[int]
attn_dim_head: int = 32
attn_heads: int = 16
init_cross_embed: bool = True
@@ -243,14 +243,14 @@ class UnetConfig(BaseModel):
class DecoderConfig(BaseModel):
unets: ListOrTuple[UnetConfig]
image_size: Optional[int] = None
image_size: int = None
image_sizes: ListOrTuple[int] = None
clip: Optional[AdapterConfig] = None # The clip model to use if embeddings are not provided
clip: Optional[AdapterConfig] # The clip model to use if embeddings are not provided
channels: int = 3
timesteps: int = 1000
sample_timesteps: Optional[SingularOrIterable[Optional[int]]] = None
loss_type: str = 'l2'
beta_schedule: Optional[ListOrTuple[str]] = None # None means all cosine
beta_schedule: ListOrTuple[str] = None # None means all cosine
learned_variance: SingularOrIterable[bool] = True
image_cond_drop_prob: float = 0.1
text_cond_drop_prob: float = 0.5
@@ -278,9 +278,9 @@ class DecoderConfig(BaseModel):
extra = "allow"
class DecoderDataConfig(BaseModel):
webdataset_base_url: str # path to a webdataset with jpg images
img_embeddings_url: Optional[str] = None # path to .npy files with embeddings
text_embeddings_url: Optional[str] = None # path to .npy files with embeddings
webdataset_base_url: str # path to a webdataset with jpg images
img_embeddings_url: Optional[str] # path to .npy files with embeddings
text_embeddings_url: Optional[str] # path to .npy files with embeddings
num_workers: int = 4
batch_size: int = 64
start_shard: int = 0
@@ -320,20 +320,20 @@ class DecoderTrainConfig(BaseModel):
n_sample_images: int = 6 # The number of example images to produce when sampling the train and test dataset
cond_scale: Union[float, List[float]] = 1.0
device: str = 'cuda:0'
epoch_samples: Optional[int] = None # Limits the number of samples per epoch. None means no limit. Required if resample_train is true as otherwise the number of samples per epoch is infinite.
validation_samples: Optional[int] = None # Same as above but for validation.
epoch_samples: int = None # Limits the number of samples per epoch. None means no limit. Required if resample_train is true as otherwise the number of samples per epoch is infinite.
validation_samples: int = None # Same as above but for validation.
save_immediately: bool = False
use_ema: bool = True
ema_beta: float = 0.999
amp: bool = False
unet_training_mask: Optional[ListOrTuple[bool]] = None # If None, use all unets
unet_training_mask: ListOrTuple[bool] = None # If None, use all unets
class DecoderEvaluateConfig(BaseModel):
n_evaluation_samples: int = 1000
FID: Optional[Dict[str, Any]] = None
IS: Optional[Dict[str, Any]] = None
KID: Optional[Dict[str, Any]] = None
LPIPS: Optional[Dict[str, Any]] = None
FID: Dict[str, Any] = None
IS: Dict[str, Any] = None
KID: Dict[str, Any] = None
LPIPS: Dict[str, Any] = None
class TrainDecoderConfig(BaseModel):
decoder: DecoderConfig
@@ -347,14 +347,11 @@ class TrainDecoderConfig(BaseModel):
def from_json_path(cls, json_path):
with open(json_path) as f:
config = json.load(f)
print(config)
return cls(**config)
@model_validator(mode = 'after')
def check_has_embeddings(self, m):
def check_has_embeddings(cls, values):
# Makes sure that enough information is provided to get the embeddings specified for training
values = dict(self)
data_config, decoder_config = values.get('data'), values.get('decoder')
if not exists(data_config) or not exists(decoder_config):
@@ -379,4 +376,4 @@ class TrainDecoderConfig(BaseModel):
if text_emb_url:
assert using_text_embeddings, "Text embeddings are being loaded, but text embeddings are not being conditioned on. This will slow down the dataloader for no reason."
return m
return values

View File

@@ -1 +1 @@
__version__ = '1.15.6'
__version__ = '1.15.0'

View File

@@ -30,7 +30,7 @@ setup(
'clip-anytorch>=2.5.2',
'coca-pytorch>=0.0.5',
'ema-pytorch>=0.0.7',
'einops>=0.7.0',
'einops>=0.6.1',
'embedding-reader',
'kornia>=0.5.4',
'numpy',

View File

@@ -577,7 +577,6 @@ def initialize_training(config: TrainDecoderConfig, config_path):
shards_per_process = len(all_shards) // world_size
assert shards_per_process > 0, "Not enough shards to split evenly"
my_shards = all_shards[rank * shards_per_process: (rank + 1) * shards_per_process]
dataloaders = create_dataloaders (
available_shards=my_shards,
img_preproc = config.data.img_preproc,