From e04300bb4f21942841a020313397f4977d632850 Mon Sep 17 00:00:00 2001 From: Robin Rombach Date: Mon, 20 Mar 2023 14:28:06 +0100 Subject: [PATCH] final ckpt links for unclip --- README.md | 2 +- doc/UNCLIP.MD | 5 +++-- scripts/streamlit/stableunclip.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 411c76c..d7d416d 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ new checkpoints. The following list provides an overview of all currently availa *Stable UnCLIP 2.1* - New stable diffusion finetune (_Stable unCLIP 2.1_, [HuggingFace](https://huggingface.co/stabilityai/)) at 768x768 resolution, based on SD2.1-768. This model allows for image variations and mixing operations as described in [*Hierarchical Text-Conditional Image Generation with CLIP Latents*](https://arxiv.org/abs/2204.06125), and, thanks to its modularity, can be combined with other models -such as [KARLO](https://github.com/kakaobrain/karlo). Comes in two variants: [*Stable unCLIP-L*](TODO) and [*Stable unCLIP-H*](TODO), which are conditioned on CLIP +such as [KARLO](https://github.com/kakaobrain/karlo). Comes in two variants: [*Stable unCLIP-L*](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/blob/main/sd21-unclip-l.ckpt) and [*Stable unCLIP-H*](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/blob/main/sd21-unclip-h.ckpt), which are conditioned on CLIP ViT-L and ViT-H image embeddings, respectively. Instructions are available [here](doc/UNCLIP.MD). **December 7, 2022** diff --git a/doc/UNCLIP.MD b/doc/UNCLIP.MD index d05272d..9a12c0c 100644 --- a/doc/UNCLIP.MD +++ b/doc/UNCLIP.MD @@ -5,7 +5,8 @@ trained to invert CLIP image embeddings. We finetuned SD 2.1 to accept a CLIP ViT-L/14 image embedding in addition to the text encodings. This means that the model can be used to produce image variations, but can also be combined with a text-to-image embedding prior to yield a full text-to-image model at 768x768 resolution. -We provide two models, trained on OpenAI CLIP-L and OpenCLIP-H image embeddings, respectively, available from [https://huggingface.co/stabilityai/](TODO). +We provide two models, trained on OpenAI CLIP-L and OpenCLIP-H image embeddings, respectively, +available from [https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/tree/main). To use them, download from Hugging Face, and put and the weights into the `checkpoints` folder. #### Image Variations ![image-variations-l-1](../assets/stable-samples/stable-unclip/unclip-variations.png) @@ -37,7 +38,7 @@ wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/0b623 wget https://arena.kakaocdn.net/brainrepo/models/karlo-public/v1.0.0.alpha/85626483eaca9f581e2a78d31ff905ca/prior-ckpt-step%3D01000000-of-01000000.ckpt cd ../../ ``` -and the finetuned SD2.1 unCLIP-L checkpoint from [https://huggingface.co/stabilityai/](https://huggingface.co/stabilityai/TODO), and put the ckpt into the `checkpoints folder` +and the finetuned SD2.1 unCLIP-L checkpoint from [here](https://huggingface.co/stabilityai/stable-diffusion-2-1-unclip/blob/main/sd21-unclip-l.ckpt), and put the ckpt into the `checkpoints folder` Then, run diff --git a/scripts/streamlit/stableunclip.py b/scripts/streamlit/stableunclip.py index 5898b23..122fa9a 100644 --- a/scripts/streamlit/stableunclip.py +++ b/scripts/streamlit/stableunclip.py @@ -198,11 +198,11 @@ def init(version="Stable unCLIP-L", load_karlo_prior=False): if not "model" in state: if version == "Stable unCLIP-L": config = "configs/stable-diffusion/v2-1-stable-unclip-l-inference.yaml" - ckpt = "checkpoints/v2-1-stable-unclip-l-ft.ckpt" + ckpt = "checkpoints/sd21-unclip-l.ckpt" elif version == "Stable unOpenCLIP-H": config = "configs/stable-diffusion/v2-1-stable-unclip-h-inference.yaml" - ckpt = "checkpoints/v2-1-stable-unclip-h-ft.ckpt" + ckpt = "checkpoints/sd21-unclip-h.ckpt" elif version == "Full Karlo": from ldm.modules.karlo.kakao.sampler import T2ISampler