diff --git a/infra/.gitignore b/infra/.gitignore new file mode 100644 index 0000000..69c2fcf --- /dev/null +++ b/infra/.gitignore @@ -0,0 +1,3 @@ +gcp.json +app +Pulumi.dev.yaml diff --git a/infra/.sample.env b/infra/.sample.env new file mode 100644 index 0000000..c539854 --- /dev/null +++ b/infra/.sample.env @@ -0,0 +1,8 @@ +export NAME=stablediffusion +export PROJECT={PROJECT} # <-- replace +export REGION={REGION} # <-- replace +export NODE_COUNT={NODE_COUNT} # <-- replace +export MACHINE_TYPE={MACHINE_TYPE} # <-- replace +export REPLICAS={REPLICAS} # <-- replace +export PULUMI_CONFIG_PASSPHRASE={PULUMI_CONFIG_PASSPHRASE} # <-- replace +export GOOGLE_APPLICATION_CREDENTIALS=./gcp.json diff --git a/infra/Dockerfile b/infra/Dockerfile new file mode 100644 index 0000000..c3164b9 --- /dev/null +++ b/infra/Dockerfile @@ -0,0 +1,39 @@ +FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel + +RUN apt update && \ + apt install -y \ + git \ + ffmpeg \ + libsm6 \ + libxext6 \ + wget + +# Install dependencies +WORKDIR /app +COPY ./app/requirements.txt /app/requirements.txt +COPY ./app/environment.yaml /app/environment.yaml +COPY ./app/setup.py /app/setup.py +RUN conda env create -f environment.yaml + +# Make RUN commands use the new environment: +SHELL ["conda", "run", "-n", "ldm", "/bin/bash", "-c"] + +# Install xformers for memory efficient flash attention +RUN conda install xformers -c xformers/label/dev + +RUN conda init bash +RUN echo "conda activate ldm" >> $HOME/.bashrc + +# Install server dependencies +RUN pip install \ + flask==2.3.2 \ + triton==2.0.0.post1 + +# Copy files into container +COPY ./app /app +COPY ./server.py /app/server.py +COPY ./cmd.sh /app/cmd.sh + +# Start server +EXPOSE 80 +CMD ["bash", "cmd.sh"] diff --git a/infra/Pulumi.yaml b/infra/Pulumi.yaml new file mode 100644 index 0000000..adb534e --- /dev/null +++ b/infra/Pulumi.yaml @@ -0,0 +1,2 @@ +name: stablediffusion +runtime: python diff --git a/infra/__main__.py b/infra/__main__.py new file mode 100644 index 0000000..4764fca --- /dev/null +++ b/infra/__main__.py @@ -0,0 +1,318 @@ +import pulumi +from pulumi_gcp import projects, container, config +from pulumi_docker import Image +from pulumi_kubernetes import Provider +from pulumi_kubernetes.core.v1 import Service +from pulumi_kubernetes.apps.v1 import Deployment +import google.auth +from google.auth.transport.requests import Request +from pulumi_kubernetes.apps.v1 import DaemonSet + + +config = pulumi.Config() +name = config.require("name") +project = config.require("project") +location = config.require("region") +node_count = config.require_int("node_count") +machine_type = config.require("machine_type") +replicas = config.require_int("replicas") + + +# Fetch access token from credentials +def get_access_token(): + scopes = ["https://www.googleapis.com/auth/cloud-platform"] + creds, _ = google.auth.default(scopes=scopes) + + if not creds.token: + creds.refresh(Request()) + + return creds.token + + +# Enable services +container_api = projects.Service( + "container.googleapis.com", + service="container.googleapis.com", + project=project, +) +cloud_resource_manager_api = projects.Service( + "cloudresourcemanager.googleapis.com", + service="cloudresourcemanager.googleapis.com", + project=project, +) + +# Build and push Docker image to container registry +image = Image( + name, + image_name=f"gcr.io/{project}/{name}", + build={ + "context": ".", + "platform": "linux/amd64", + }, + registry={ + "server": "gcr.io", + "username": "oauth2accesstoken", + "password": pulumi.Output.from_input(get_access_token()), + }, + opts=pulumi.ResourceOptions(depends_on=[container_api, cloud_resource_manager_api]), +) + +# Fetch GKE engine versions +def get_engine_versions(digest): + return container.get_engine_versions(project=project, location=location) + + +engine_versions = pulumi.Output.all([image.repo_digest]).apply(get_engine_versions) + +# Create Kubernetes cluster +cluster = container.Cluster( + name, + project=project, + location=location, + initial_node_count=node_count, + min_master_version=engine_versions.latest_master_version, + node_version=engine_versions.latest_master_version, + node_config={ + "machine_type": machine_type, + "oauth_scopes": [ + "https://www.googleapis.com/auth/compute", + "https://www.googleapis.com/auth/devstorage.read_only", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring", + ], + "image_type": "COS_CONTAINERD", + "guest_accelerator": [ + { + "type": "nvidia-tesla-a100", + "count": 1, + } + ], + }, + opts=pulumi.ResourceOptions(depends_on=[image]), +) + + +def generate_kubeconfig(name, endpoint, master_auth): + context = f"{project}_{location}_{name}" + return f"""apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: {master_auth['cluster_ca_certificate']} + server: https://{endpoint} + name: {context} +contexts: +- context: + cluster: {context} + user: {context} + name: {context} +current-context: {context} +kind: Config +preferences: {{}} +users: +- name: {context} + user: + exec: + apiVersion: client.authentication.k8s.io/v1beta1 + command: gke-gcloud-auth-plugin + installHint: Install gke-gcloud-auth-plugin for use with kubectl by following + https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke + provideClusterInfo: true +""" + + +kubeconfig = pulumi.Output.all( + cluster.name, cluster.endpoint, cluster.master_auth +).apply(lambda args: generate_kubeconfig(*args)) + +# Create a Kubernetes provider +cluster_provider = Provider(name, kubeconfig=kubeconfig) + +# Deploy NVIDIA daemon set +nvidia_gpu_device_plugin = DaemonSet( + "nvidia-gpu-device-plugin", + metadata={ + "name": "nvidia-driver-installer", + "namespace": "kube-system", + "labels": {"k8s-app": "nvidia-driver-installer"}, + }, + spec={ + "selector": {"matchLabels": {"k8s-app": "nvidia-driver-installer"}}, + "updateStrategy": {"type": "RollingUpdate"}, + "template": { + "metadata": { + "labels": { + "name": "nvidia-driver-installer", + "k8s-app": "nvidia-driver-installer", + } + }, + "spec": { + "priorityClassName": "system-node-critical", + "affinity": { + "nodeAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": { + "nodeSelectorTerms": [ + { + "matchExpressions": [ + { + "key": "cloud.google.com/gke-accelerator", + "operator": "Exists", + }, + { + "key": "cloud.google.com/gke-gpu-driver-version", + "operator": "DoesNotExist", + }, + ] + } + ] + } + } + }, + "tolerations": [{"operator": "Exists"}], + "hostNetwork": True, + "hostPID": True, + "volumes": [ + {"name": "dev", "hostPath": {"path": "/dev"}}, + { + "name": "vulkan-icd-mount", + "hostPath": { + "path": "/home/kubernetes/bin/nvidia/vulkan/icd.d" + }, + }, + { + "name": "nvidia-install-dir-host", + "hostPath": {"path": "/home/kubernetes/bin/nvidia"}, + }, + {"name": "root-mount", "hostPath": {"path": "/"}}, + {"name": "cos-tools", "hostPath": {"path": "/var/lib/cos-tools"}}, + {"name": "nvidia-config", "hostPath": {"path": "/etc/nvidia"}}, + ], + "initContainers": [ + { + "image": "cos-nvidia-installer:fixed", + "imagePullPolicy": "Never", + "name": "nvidia-driver-installer", + "resources": {"requests": {"cpu": "150m"}}, + "securityContext": {"privileged": True}, + "env": [ + { + "name": "NVIDIA_INSTALL_DIR_HOST", + "value": "/home/kubernetes/bin/nvidia", + }, + { + "name": "NVIDIA_INSTALL_DIR_CONTAINER", + "value": "/usr/local/nvidia", + }, + { + "name": "VULKAN_ICD_DIR_HOST", + "value": "/home/kubernetes/bin/nvidia/vulkan/icd.d", + }, + { + "name": "VULKAN_ICD_DIR_CONTAINER", + "value": "/etc/vulkan/icd.d", + }, + {"name": "ROOT_MOUNT_DIR", "value": "/root"}, + { + "name": "COS_TOOLS_DIR_HOST", + "value": "/var/lib/cos-tools", + }, + { + "name": "COS_TOOLS_DIR_CONTAINER", + "value": "/build/cos-tools", + }, + ], + "volumeMounts": [ + { + "name": "nvidia-install-dir-host", + "mountPath": "/usr/local/nvidia", + }, + { + "name": "vulkan-icd-mount", + "mountPath": "/etc/vulkan/icd.d", + }, + {"name": "dev", "mountPath": "/dev"}, + {"name": "root-mount", "mountPath": "/root"}, + {"name": "cos-tools", "mountPath": "/build/cos-tools"}, + ], + }, + { + "image": "gcr.io/gke-release/nvidia-partition-gpu@sha256:c54fd003948fac687c2a93a55ea6e4d47ffbd641278a9191e75e822fe72471c2", + "name": "partition-gpus", + "env": [ + { + "name": "LD_LIBRARY_PATH", + "value": "/usr/local/nvidia/lib64", + } + ], + "resources": {"requests": {"cpu": "150m"}}, + "securityContext": {"privileged": True}, + "volumeMounts": [ + { + "name": "nvidia-install-dir-host", + "mountPath": "/usr/local/nvidia", + }, + {"name": "dev", "mountPath": "/dev"}, + {"name": "nvidia-config", "mountPath": "/etc/nvidia"}, + ], + }, + ], + "containers": [ + {"image": "gcr.io/google-containers/pause:2.0", "name": "pause"} + ], + }, + }, + }, + opts=pulumi.ResourceOptions(provider=cluster_provider), +) + + +# Create Kubernetes deployment +deployment = Deployment( + name, + metadata={"name": name}, + spec={ + "strategy": { + "type": "Recreate", + }, + "replicas": replicas, + "selector": {"matchLabels": {"app": name}}, + "template": { + "metadata": {"labels": {"app": name}}, + "spec": { + "containers": [ + { + "name": name, + "image": image.repo_digest, + "resources": {"limits": {"nvidia.com/gpu": 1}}, + "ports": [{"containerPort": 80}], + }, + ], + }, + }, + }, + opts=pulumi.ResourceOptions( + provider=cluster_provider, depends_on=[nvidia_gpu_device_plugin] + ), +) + +# Create Kubernetes service to expose port 80 +service = Service( + name, + spec={ + "type": "LoadBalancer", + "selector": {"app": name}, + "ports": [ + { + "protocol": "TCP", + "port": 80, + "targetPort": 80, + }, + ], + }, + opts=pulumi.ResourceOptions(provider=cluster_provider, depends_on=[deployment]), +) + +# Export IP address of the LoadBalancer +pulumi.export( + "load_balancer_ip", + service.status.apply(lambda status: status.load_balancer.ingress[0].ip), +) diff --git a/infra/cmd.sh b/infra/cmd.sh new file mode 100755 index 0000000..c69e992 --- /dev/null +++ b/infra/cmd.sh @@ -0,0 +1,6 @@ +mkdir checkpoints +cd checkpoints +wget https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.ckpt +cd .. +mkdir static +python server.py diff --git a/infra/destroy.sh b/infra/destroy.sh new file mode 100755 index 0000000..1f7d82a --- /dev/null +++ b/infra/destroy.sh @@ -0,0 +1,2 @@ +source .env +pulumi destroy --yes --stack dev diff --git a/infra/index.html b/infra/index.html new file mode 100644 index 0000000..f26f77e --- /dev/null +++ b/infra/index.html @@ -0,0 +1,11 @@ + + +
+