diff --git a/ldm/models/diffusion/plms.py b/ldm/models/diffusion/plms.py index 7002a36..ddb47f9 100644 --- a/ldm/models/diffusion/plms.py +++ b/ldm/models/diffusion/plms.py @@ -233,7 +233,7 @@ class PLMSSampler(object): # 2nd order Pseudo Linear Multistep (Adams-Bashforth) e_t_prime = (3 * e_t - old_eps[-1]) / 2 elif len(old_eps) == 2: - # 3nd order Pseudo Linear Multistep (Adams-Bashforth) + # 3rd order Pseudo Linear Multistep (Adams-Bashforth) e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12 elif len(old_eps) >= 3: # 4nd order Pseudo Linear Multistep (Adams-Bashforth) diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 509cd87..461d08b 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -12,9 +12,9 @@ from ldm.modules.diffusionmodules.util import checkpoint try: import xformers import xformers.ops - XFORMERS_IS_AVAILBLE = True + XFORMERS_IS_AVAILABLE = True except: - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False # CrossAttn precision handling import os @@ -251,7 +251,7 @@ class BasicTransformerBlock(nn.Module): def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None, gated_ff=True, checkpoint=True, disable_self_attn=False): super().__init__() - attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILBLE else "softmax" + attn_mode = "softmax-xformers" if XFORMERS_IS_AVAILABLE else "softmax" assert attn_mode in self.ATTENTION_MODES attn_cls = self.ATTENTION_MODES[attn_mode] self.disable_self_attn = disable_self_attn diff --git a/ldm/modules/diffusionmodules/model.py b/ldm/modules/diffusionmodules/model.py index b089eeb..46b13e6 100644 --- a/ldm/modules/diffusionmodules/model.py +++ b/ldm/modules/diffusionmodules/model.py @@ -11,9 +11,9 @@ from ldm.modules.attention import MemoryEfficientCrossAttention try: import xformers import xformers.ops - XFORMERS_IS_AVAILBLE = True + XFORMERS_IS_AVAILABLE = True except: - XFORMERS_IS_AVAILBLE = False + XFORMERS_IS_AVAILABLE = False print("No module 'xformers'. Proceeding without it.") @@ -279,7 +279,7 @@ class MemoryEfficientCrossAttentionWrapper(MemoryEfficientCrossAttention): def make_attn(in_channels, attn_type="vanilla", attn_kwargs=None): assert attn_type in ["vanilla", "vanilla-xformers", "memory-efficient-cross-attn", "linear", "none"], f'attn_type {attn_type} unknown' - if XFORMERS_IS_AVAILBLE and attn_type == "vanilla": + if XFORMERS_IS_AVAILABLE and attn_type == "vanilla": attn_type = "vanilla-xformers" print(f"making attention of type '{attn_type}' with {in_channels} in_channels") if attn_type == "vanilla": diff --git a/ldm/modules/diffusionmodules/openaimodel.py b/ldm/modules/diffusionmodules/openaimodel.py index 7df6b5a..8eb8b7e 100644 --- a/ldm/modules/diffusionmodules/openaimodel.py +++ b/ldm/modules/diffusionmodules/openaimodel.py @@ -345,7 +345,7 @@ def count_flops_attn(model, _x, y): class QKVAttentionLegacy(nn.Module): """ - A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping + A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping """ def __init__(self, n_heads): diff --git a/ldm/modules/ema.py b/ldm/modules/ema.py index bded250..29fbd64 100644 --- a/ldm/modules/ema.py +++ b/ldm/modules/ema.py @@ -3,14 +3,14 @@ from torch import nn class LitEma(nn.Module): - def __init__(self, model, decay=0.9999, use_num_upates=True): + def __init__(self, model, decay=0.9999, use_num_updates=True): super().__init__() if decay < 0.0 or decay > 1.0: raise ValueError('Decay must be between 0 and 1') self.m_name2s_name = {} self.register_buffer('decay', torch.tensor(decay, dtype=torch.float32)) - self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_upates + self.register_buffer('num_updates', torch.tensor(0, dtype=torch.int) if use_num_updates else torch.tensor(-1, dtype=torch.int)) for name, p in model.named_parameters(): diff --git a/ldm/modules/image_degradation/bsrgan.py b/ldm/modules/image_degradation/bsrgan.py index 32ef561..f51da0c 100644 --- a/ldm/modules/image_degradation/bsrgan.py +++ b/ldm/modules/image_degradation/bsrgan.py @@ -170,7 +170,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var [X, Y] = np.meshgrid(range(k_size[0]), range(k_size[1])) Z = np.stack([X, Y], 2)[:, :, :, None] - # Calcualte Gaussian for every pixel of the kernel + # Calculate Gaussian for every pixel of the kernel ZZ = Z - MU ZZ_t = ZZ.transpose(0, 1, 3, 2) raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise) @@ -613,7 +613,7 @@ def degradation_bsrgan_variant(image, sf=4, isp_model=None): return example -# TODO incase there is a pickle error one needs to replace a += x with a = a + x in add_speckle_noise etc... +# TODO in case there is a pickle error one needs to replace a += x with a = a + x in add_speckle_noise etc... def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=True, lq_patchsize=64, isp_model=None): """ This is an extended degradation model by combining diff --git a/ldm/modules/image_degradation/bsrgan_light.py b/ldm/modules/image_degradation/bsrgan_light.py index 808c7f8..36ef738 100644 --- a/ldm/modules/image_degradation/bsrgan_light.py +++ b/ldm/modules/image_degradation/bsrgan_light.py @@ -169,7 +169,7 @@ def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]), min_var [X, Y] = np.meshgrid(range(k_size[0]), range(k_size[1])) Z = np.stack([X, Y], 2)[:, :, :, None] - # Calcualte Gaussian for every pixel of the kernel + # Calculate Gaussian for every pixel of the kernel ZZ = Z - MU ZZ_t = ZZ.transpose(0, 1, 3, 2) raw_kernel = np.exp(-0.5 * np.squeeze(ZZ_t @ INV_SIGMA @ ZZ)) * (1 + noise) diff --git a/ldm/modules/image_degradation/utils_image.py b/ldm/modules/image_degradation/utils_image.py index 0175f15..3178188 100644 --- a/ldm/modules/image_degradation/utils_image.py +++ b/ldm/modules/image_degradation/utils_image.py @@ -59,7 +59,7 @@ def surf(Z, cmap='rainbow', figsize=None): ''' # -------------------------------------------- -# get image pathes +# get image paths # -------------------------------------------- ''' @@ -122,14 +122,14 @@ def imssave(imgs, img_path): cv2.imwrite(new_path, img) -def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=800, p_overlap=96, p_max=1000): +def split_imageset(original_dataroot, target_dataroot, n_channels=3, p_size=800, p_overlap=96, p_max=1000): """ split the large images from original_dataroot into small overlapped images with size (p_size)x(p_size), - and save them into taget_dataroot; only the images with larger size than (p_max)x(p_max) - will be splitted. + and save them into target_dataroot; only the images with larger size than (p_max)x(p_max) + will be split. Args: original_dataroot: - taget_dataroot: + target_dataroot: p_size: size of small images p_overlap: patch size in training is a good choice p_max: images with smaller size than (p_max)x(p_max) keep unchanged. @@ -139,8 +139,8 @@ def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_size=800, # img_name, ext = os.path.splitext(os.path.basename(img_path)) img = imread_uint(img_path, n_channels=n_channels) patches = patches_from_image(img, p_size, p_overlap, p_max) - imssave(patches, os.path.join(taget_dataroot,os.path.basename(img_path))) - #if original_dataroot == taget_dataroot: + imssave(patches, os.path.join(target_dataroot,os.path.basename(img_path))) + #if original_dataroot == target_dataroot: #del img_path ''' @@ -180,7 +180,7 @@ def mkdir_and_rename(path): # -------------------------------------------- -# get uint8 image of size HxWxn_channles (RGB) +# get uint8 image of size HxWxn_channels (RGB) # -------------------------------------------- def imread_uint(path, n_channels=3): # input: path @@ -215,7 +215,7 @@ def imwrite(img, img_path): # -------------------------------------------- -# get single image of size HxWxn_channles (BGR) +# get single image of size HxWxn_channels (BGR) # -------------------------------------------- def read_img(path): # read image by cv2 diff --git a/ldm/modules/midas/midas/transforms.py b/ldm/modules/midas/midas/transforms.py index 350cbc1..716da45 100644 --- a/ldm/modules/midas/midas/transforms.py +++ b/ldm/modules/midas/midas/transforms.py @@ -125,7 +125,7 @@ class Resize(object): # fit height scale_width = scale_height elif self.__resize_method == "minimal": - # scale as least as possbile + # scale as least as possible if abs(1 - scale_width) < abs(1 - scale_height): # fit width scale_height = scale_width diff --git a/ldm/util.py b/ldm/util.py index 8c09ca1..d223fcf 100644 --- a/ldm/util.py +++ b/ldm/util.py @@ -23,7 +23,7 @@ def log_txt_as_img(wh, xc, size=10): try: draw.text((0, 0), lines, fill="black", font=font) except UnicodeEncodeError: - print("Cant encode string for logging. Skipping.") + print("Can't encode string for logging. Skipping.") txt = np.array(txt).transpose(2, 0, 1) / 127.5 - 1.0 txts.append(txt) diff --git a/modelcard.md b/modelcard.md index 787f15c..0c8d5e6 100644 --- a/modelcard.md +++ b/modelcard.md @@ -112,7 +112,7 @@ In addition to the textual input, it receives a `noise_level` as an input parame - **Optimizer:** AdamW - **Gradient Accumulations**: 1 - **Batch:** 32 x 8 x 2 x 4 = 2048 -- **Learning rate:** warmup to 0.0001 for 10,000 steps and then kept constant +- **Learning rate:** warm up to 0.0001 for 10,000 steps and then kept constant ## Evaluation Results Evaluations with different classifier-free guidance scales (1.5, 2.0, 3.0, 4.0,