diff --git a/infra/.gitignore b/infra/.gitignore new file mode 100644 index 0000000..69c2fcf --- /dev/null +++ b/infra/.gitignore @@ -0,0 +1,3 @@ +gcp.json +app +Pulumi.dev.yaml diff --git a/infra/.sample.env b/infra/.sample.env new file mode 100644 index 0000000..c539854 --- /dev/null +++ b/infra/.sample.env @@ -0,0 +1,8 @@ +export NAME=stablediffusion +export PROJECT={PROJECT} # <-- replace +export REGION={REGION} # <-- replace +export NODE_COUNT={NODE_COUNT} # <-- replace +export MACHINE_TYPE={MACHINE_TYPE} # <-- replace +export REPLICAS={REPLICAS} # <-- replace +export PULUMI_CONFIG_PASSPHRASE={PULUMI_CONFIG_PASSPHRASE} # <-- replace +export GOOGLE_APPLICATION_CREDENTIALS=./gcp.json diff --git a/infra/Dockerfile b/infra/Dockerfile new file mode 100644 index 0000000..c3164b9 --- /dev/null +++ b/infra/Dockerfile @@ -0,0 +1,39 @@ +FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel + +RUN apt update && \ + apt install -y \ + git \ + ffmpeg \ + libsm6 \ + libxext6 \ + wget + +# Install dependencies +WORKDIR /app +COPY ./app/requirements.txt /app/requirements.txt +COPY ./app/environment.yaml /app/environment.yaml +COPY ./app/setup.py /app/setup.py +RUN conda env create -f environment.yaml + +# Make RUN commands use the new environment: +SHELL ["conda", "run", "-n", "ldm", "/bin/bash", "-c"] + +# Install xformers for memory efficient flash attention +RUN conda install xformers -c xformers/label/dev + +RUN conda init bash +RUN echo "conda activate ldm" >> $HOME/.bashrc + +# Install server dependencies +RUN pip install \ + flask==2.3.2 \ + triton==2.0.0.post1 + +# Copy files into container +COPY ./app /app +COPY ./server.py /app/server.py +COPY ./cmd.sh /app/cmd.sh + +# Start server +EXPOSE 80 +CMD ["bash", "cmd.sh"] diff --git a/infra/Pulumi.yaml b/infra/Pulumi.yaml new file mode 100644 index 0000000..adb534e --- /dev/null +++ b/infra/Pulumi.yaml @@ -0,0 +1,2 @@ +name: stablediffusion +runtime: python diff --git a/infra/__main__.py b/infra/__main__.py new file mode 100644 index 0000000..4764fca --- /dev/null +++ b/infra/__main__.py @@ -0,0 +1,318 @@ +import pulumi +from pulumi_gcp import projects, container, config +from pulumi_docker import Image +from pulumi_kubernetes import Provider +from pulumi_kubernetes.core.v1 import Service +from pulumi_kubernetes.apps.v1 import Deployment +import google.auth +from google.auth.transport.requests import Request +from pulumi_kubernetes.apps.v1 import DaemonSet + + +config = pulumi.Config() +name = config.require("name") +project = config.require("project") +location = config.require("region") +node_count = config.require_int("node_count") +machine_type = config.require("machine_type") +replicas = config.require_int("replicas") + + +# Fetch access token from credentials +def get_access_token(): + scopes = ["https://www.googleapis.com/auth/cloud-platform"] + creds, _ = google.auth.default(scopes=scopes) + + if not creds.token: + creds.refresh(Request()) + + return creds.token + + +# Enable services +container_api = projects.Service( + "container.googleapis.com", + service="container.googleapis.com", + project=project, +) +cloud_resource_manager_api = projects.Service( + "cloudresourcemanager.googleapis.com", + service="cloudresourcemanager.googleapis.com", + project=project, +) + +# Build and push Docker image to container registry +image = Image( + name, + image_name=f"gcr.io/{project}/{name}", + build={ + "context": ".", + "platform": "linux/amd64", + }, + registry={ + "server": "gcr.io", + "username": "oauth2accesstoken", + "password": pulumi.Output.from_input(get_access_token()), + }, + opts=pulumi.ResourceOptions(depends_on=[container_api, cloud_resource_manager_api]), +) + +# Fetch GKE engine versions +def get_engine_versions(digest): + return container.get_engine_versions(project=project, location=location) + + +engine_versions = pulumi.Output.all([image.repo_digest]).apply(get_engine_versions) + +# Create Kubernetes cluster +cluster = container.Cluster( + name, + project=project, + location=location, + initial_node_count=node_count, + min_master_version=engine_versions.latest_master_version, + node_version=engine_versions.latest_master_version, + node_config={ + "machine_type": machine_type, + "oauth_scopes": [ + "https://www.googleapis.com/auth/compute", + "https://www.googleapis.com/auth/devstorage.read_only", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + ], + "image_type": "COS_CONTAINERD", + "guest_accelerator": [ + { + "type": "nvidia-tesla-a100", + "count": 1, + } + ], + }, + opts=pulumi.ResourceOptions(depends_on=[image]), +) + + +def generate_kubeconfig(name, endpoint, master_auth): + context = f"{project}_{location}_{name}" + return f"""apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: {master_auth['cluster_ca_certificate']} + server: https://{endpoint} + name: {context} +contexts: +- context: + cluster: {context} + user: {context} + name: {context} +current-context: {context} +kind: Config +preferences: {{}} +users: +- name: {context} + user: + exec: + apiVersion: client.authentication.k8s.io/v1beta1 + command: gke-gcloud-auth-plugin + installHint: Install gke-gcloud-auth-plugin for use with kubectl by following + https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke + provideClusterInfo: true +""" + + +kubeconfig = pulumi.Output.all( + cluster.name, cluster.endpoint, cluster.master_auth +).apply(lambda args: generate_kubeconfig(*args)) + +# Create a Kubernetes provider +cluster_provider = Provider(name, kubeconfig=kubeconfig) + +# Deploy NVIDIA daemon set +nvidia_gpu_device_plugin = DaemonSet( + "nvidia-gpu-device-plugin", + metadata={ + "name": "nvidia-driver-installer", + "namespace": "kube-system", + "labels": {"k8s-app": "nvidia-driver-installer"}, + }, + spec={ + "selector": {"matchLabels": {"k8s-app": "nvidia-driver-installer"}}, + "updateStrategy": {"type": "RollingUpdate"}, + "template": { + "metadata": { + "labels": { + "name": "nvidia-driver-installer", + "k8s-app": "nvidia-driver-installer", + } + }, + "spec": { + "priorityClassName": "system-node-critical", + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "cloud.google.com/gke-accelerator", + "operator": "Exists", + }, + { + "key": "cloud.google.com/gke-gpu-driver-version", + "operator": "DoesNotExist", + }, + ] + } + ] + } + } + }, + "tolerations": [{"operator": "Exists"}], + "hostNetwork": True, + "hostPID": True, + "volumes": [ + {"name": "dev", "hostPath": {"path": "/dev"}}, + { + "name": "vulkan-icd-mount", + "hostPath": { + "path": "/home/kubernetes/bin/nvidia/vulkan/icd.d" + }, + }, + { + "name": "nvidia-install-dir-host", + "hostPath": {"path": "/home/kubernetes/bin/nvidia"}, + }, + {"name": "root-mount", "hostPath": {"path": "/"}}, + {"name": "cos-tools", "hostPath": {"path": "/var/lib/cos-tools"}}, + {"name": "nvidia-config", "hostPath": {"path": "/etc/nvidia"}}, + ], + "initContainers": [ + { + "image": "cos-nvidia-installer:fixed", + "imagePullPolicy": "Never", + "name": "nvidia-driver-installer", + "resources": {"requests": {"cpu": "150m"}}, + "securityContext": {"privileged": True}, + "env": [ + { + "name": "NVIDIA_INSTALL_DIR_HOST", + "value": "/home/kubernetes/bin/nvidia", + }, + { + "name": "NVIDIA_INSTALL_DIR_CONTAINER", + "value": "/usr/local/nvidia", + }, + { + "name": "VULKAN_ICD_DIR_HOST", + "value": "/home/kubernetes/bin/nvidia/vulkan/icd.d", + }, + { + "name": "VULKAN_ICD_DIR_CONTAINER", + "value": "/etc/vulkan/icd.d", + }, + {"name": "ROOT_MOUNT_DIR", "value": "/root"}, + { + "name": "COS_TOOLS_DIR_HOST", + "value": "/var/lib/cos-tools", + }, + { + "name": "COS_TOOLS_DIR_CONTAINER", + "value": "/build/cos-tools", + }, + ], + "volumeMounts": [ + { + "name": "nvidia-install-dir-host", + "mountPath": "/usr/local/nvidia", + }, + { + "name": "vulkan-icd-mount", + "mountPath": "/etc/vulkan/icd.d", + }, + {"name": "dev", "mountPath": "/dev"}, + {"name": "root-mount", "mountPath": "/root"}, + {"name": "cos-tools", "mountPath": "/build/cos-tools"}, + ], + }, + { + "image": "gcr.io/gke-release/nvidia-partition-gpu@sha256:c54fd003948fac687c2a93a55ea6e4d47ffbd641278a9191e75e822fe72471c2", + "name": "partition-gpus", + "env": [ + { + "name": "LD_LIBRARY_PATH", + "value": "/usr/local/nvidia/lib64", + } + ], + "resources": {"requests": {"cpu": "150m"}}, + "securityContext": {"privileged": True}, + "volumeMounts": [ + { + "name": "nvidia-install-dir-host", + "mountPath": "/usr/local/nvidia", + }, + {"name": "dev", "mountPath": "/dev"}, + {"name": "nvidia-config", "mountPath": "/etc/nvidia"}, + ], + }, + ], + "containers": [ + {"image": "gcr.io/google-containers/pause:2.0", "name": "pause"} + ], + }, + }, + }, + opts=pulumi.ResourceOptions(provider=cluster_provider), +) + + +# Create Kubernetes deployment +deployment = Deployment( + name, + metadata={"name": name}, + spec={ + "strategy": { + "type": "Recreate", + }, + "replicas": replicas, + "selector": {"matchLabels": {"app": name}}, + "template": { + "metadata": {"labels": {"app": name}}, + "spec": { + "containers": [ + { + "name": name, + "image": image.repo_digest, + "resources": {"limits": {"nvidia.com/gpu": 1}}, + "ports": [{"containerPort": 80}], + }, + ], + }, + }, + }, + opts=pulumi.ResourceOptions( + provider=cluster_provider, depends_on=[nvidia_gpu_device_plugin] + ), +) + +# Create Kubernetes service to expose port 80 +service = Service( + name, + spec={ + "type": "LoadBalancer", + "selector": {"app": name}, + "ports": [ + { + "protocol": "TCP", + "port": 80, + "targetPort": 80, + }, + ], + }, + opts=pulumi.ResourceOptions(provider=cluster_provider, depends_on=[deployment]), +) + +# Export IP address of the LoadBalancer +pulumi.export( + "load_balancer_ip", + service.status.apply(lambda status: status.load_balancer.ingress[0].ip), +) diff --git a/infra/cmd.sh b/infra/cmd.sh new file mode 100755 index 0000000..c69e992 --- /dev/null +++ b/infra/cmd.sh @@ -0,0 +1,6 @@ +mkdir checkpoints +cd checkpoints +wget https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.ckpt +cd .. +mkdir static +python server.py diff --git a/infra/destroy.sh b/infra/destroy.sh new file mode 100755 index 0000000..1f7d82a --- /dev/null +++ b/infra/destroy.sh @@ -0,0 +1,2 @@ +source .env +pulumi destroy --yes --stack dev diff --git a/infra/index.html b/infra/index.html new file mode 100644 index 0000000..f26f77e --- /dev/null +++ b/infra/index.html @@ -0,0 +1,11 @@ + + + + Gallery + + + {% for image in images %} + Image + {% endfor %} + + diff --git a/infra/readme.md b/infra/readme.md new file mode 100644 index 0000000..bcc9c1d --- /dev/null +++ b/infra/readme.md @@ -0,0 +1,81 @@ +## Stable Diffusion on Google Cloud Platform using Pulumi + +### Requirements + +- Python 3 +- Pulumi, https://www.pulumi.com/docs/get-started/install/ + +### Instructions + +1. Create a service account in Google Cloud Platform as follows: + + * Log in to the Google Cloud Console (console.cloud.google.com) + * Select the project in which you want to create a service account + * Click on the "IAM & Admin" option in the left-hand menu + * Click on "Service Accounts" in the left-hand menu + * Click the "Create Service Account" button + * Enter a name for the service account + * Select "Editor" role for the service account + * Select "Furnish a new private key" option and choose JSON + * Click "Create" to create the service account + * Once you have created the service account, you will be prompted to download the private key file + +2. Rename service account private key file to `gcp.json` and place it inside the `/infra` directory +3. Rename `.sample.env` to `.env` and edit its contents +4. Execute in your terminal `./start.sh` to: + + * Enable Google Cloud Services + * Build and push a Docker image to Google Container Registry + * Spin up a Kubernetes cluster running a A100 GPU + * Install NVIDIA driver into Kubernetes cluster + * Launch the Stable Diffusion Kubernetes deployment + * Expose Stable Diffusion to the public internet using a Kubernetes Service + +### How to use + +Once `./start.sh` finishes running it will output `load_balancer_ip`, for example: `load_balancer_ip: "34.172.48.137"`. Use the IP provided to query Stable Diffusion. + +Parameters: +``` +prompt=args.get("prompt", "a professional photograph of an astronaut riding a triceratops"), +outdir=args.get("outdir", "static"), +steps=args.get("steps", 50), +plms=args.get("plms", False), +dpm=args.get("dpm", False), +fixed_code=args.get("fixed_code", False), +ddim_eta=args.get("ddim_eta", 0.0), +n_iter=args.get("n_iter", 3), +H=args.get("H", 512), +W=args.get("W", 512), +C=args.get("C", 4), +f=args.get("f", 8), +n_samples=args.get("n_samples", 3), +n_rows=args.get("n_rows", 0), +scale=args.get("scale", 9.0), +from_file=args.get("from_file", None), +config=args.get("config", "configs/stable-diffusion/v2-inference-v.yaml"), +ckpt=args.get("ckpt", "checkpoints/v2-1_768-ema-pruned.ckpt"), +seed=args.get("seed", 42), +precision=args.get("precision", "autocast"), +repeat=args.get("repeat", 1), +device=args.get("device", "cpu"), +torchscript=args.get("torchscript", False), +ipex=args.get("ipex", False), +bf16=args.get("bf16", False) + ``` + +For example: `http://34.172.48.137/?prompt=Your_Query_Here`. Replace `Your_Query_Here` with your desired query text. + +To check the generated images navigate to `http://34.172.48.137/images`. + +Remember to URL-encode the text parameter if it contains special characters or spaces. For example, you can replace spaces with `%20`. + +### Delete cluster and revert all changes + +To delete the cluster and revert all changes, execute in your terminal: `./destroy.sh`. + +### Support + +If you like this project and find it useful, please consider giving it a star. Your support is appreciated! :hearts: + +If you have any questions or suggestions, feel free to reach out to Carlos at calufa@gmail.com or connecting on LinkedIn: https://www.linkedin.com/in/carloschinchilla/. diff --git a/infra/requirements.txt b/infra/requirements.txt new file mode 100644 index 0000000..a3d79cd --- /dev/null +++ b/infra/requirements.txt @@ -0,0 +1,5 @@ +pulumi==3.64.0 +pulumi-gcp==6.54.0 +pulumi-docker==4.1.2 +pulumi-kubernetes==3.25.0 +google-auth==2.17.3 diff --git a/infra/server.py b/infra/server.py new file mode 100644 index 0000000..29536e9 --- /dev/null +++ b/infra/server.py @@ -0,0 +1,51 @@ +import os +from flask import Flask, request, jsonify, render_template +import argparse +from scripts.txt2img import main + +app = Flask(__name__, template_folder='.') + +@app.route("/", methods=["GET"]) +def index(): + args = request.args + + opt = argparse.Namespace( + prompt=args.get("prompt", "a professional photograph of an astronaut riding a triceratops"), + outdir=args.get("outdir", "static"), + steps=args.get("steps", 50), + plms=args.get("plms", False), + dpm=args.get("dpm", False), + fixed_code=args.get("fixed_code", False), + ddim_eta=args.get("ddim_eta", 0.0), + n_iter=args.get("n_iter", 3), + H=args.get("H", 512), + W=args.get("W", 512), + C=args.get("C", 4), + f=args.get("f", 8), + n_samples=args.get("n_samples", 3), + n_rows=args.get("n_rows", 0), + scale=args.get("scale", 9.0), + from_file=args.get("from_file", None), + config=args.get("config", "configs/stable-diffusion/v2-inference-v.yaml"), + ckpt=args.get("ckpt", "checkpoints/v2-1_768-ema-pruned.ckpt"), + seed=args.get("seed", 42), + precision=args.get("precision", "autocast"), + repeat=args.get("repeat", 1), + device=args.get("device", "cpu"), + torchscript=args.get("torchscript", False), + ipex=args.get("ipex", False), + bf16=args.get("bf16", False) + ) + + main(opt) + + return jsonify({"message": "Image generated successfully"}) + +@app.route('/images') +def images(): + images = os.listdir('/app/static') + images = [f"/app/static/{image}" for image in images] + return render_template('index.html', images=images) + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=80, debug=True) diff --git a/infra/start.sh b/infra/start.sh new file mode 100755 index 0000000..d5e2614 --- /dev/null +++ b/infra/start.sh @@ -0,0 +1,10 @@ +source .env +rm -rf ./app +rsync --exclude='.' --recursive --copy-links ../ ./app +pulumi config set name $NAME --stack dev +pulumi config set project $PROJECT --stack dev +pulumi config set region $REGION --stack dev +pulumi config set node_count $NODE_COUNT --stack dev +pulumi config set machine_type $MACHINE_TYPE --stack dev +pulumi config set replicas $REPLICAS --stack dev +pulumi up --yes --stack dev