add pulumi

2024-12-22 15:44:58 +00:00 · 2023-05-07 11:59:07 -05:00 · 2023-05-07 11:59:07 -05:00 · 535d370de0
commit 535d370de0
parent cf1d67a6fd
12 changed files with 536 additions and 0 deletions
--- a/infra/.gitignore
+++ b/infra/.gitignore
@ -0,0 +1,3 @@
 gcp.json
 app
 Pulumi.dev.yaml
--- a/infra/.sample.env
+++ b/infra/.sample.env
@ -0,0 +1,8 @@
 export NAME=stablediffusion
 export PROJECT={PROJECT} # <-- replace
 export REGION={REGION} # <-- replace
 export NODE_COUNT={NODE_COUNT} # <-- replace
 export MACHINE_TYPE={MACHINE_TYPE} # <-- replace
 export REPLICAS={REPLICAS} # <-- replace
 export PULUMI_CONFIG_PASSPHRASE={PULUMI_CONFIG_PASSPHRASE} # <-- replace
 export GOOGLE_APPLICATION_CREDENTIALS=./gcp.json
--- a/infra/Dockerfile
+++ b/infra/Dockerfile
@ -0,0 +1,39 @@
 FROM pytorch/pytorch:1.12.1-cuda11.3-cudnn8-devel
 RUN apt update && \
 	apt install -y \
 		git \
 		ffmpeg \
 		libsm6 \
 		libxext6 \
 		wget
 # Install dependencies
 WORKDIR /app
 COPY ./app/requirements.txt /app/requirements.txt
 COPY ./app/environment.yaml /app/environment.yaml
 COPY ./app/setup.py /app/setup.py
 RUN conda env create -f environment.yaml
 # Make RUN commands use the new environment:
 SHELL ["conda", "run", "-n", "ldm", "/bin/bash", "-c"]
 # Install xformers for memory efficient flash attention
 RUN conda install xformers -c xformers/label/dev
 RUN conda init bash
 RUN echo "conda activate ldm" >> $HOME/.bashrc
 # Install server dependencies
 RUN pip install \
 	flask==2.3.2 \
 	triton==2.0.0.post1
 # Copy files into container
 COPY ./app /app
 COPY ./server.py /app/server.py
 COPY ./cmd.sh /app/cmd.sh
 # Start server
 EXPOSE 80
 CMD ["bash", "cmd.sh"]
--- a/infra/Pulumi.yaml
+++ b/infra/Pulumi.yaml
@ -0,0 +1,2 @@
 name: stablediffusion
 runtime: python
--- a/infra/main.py
+++ b/infra/main.py
@ -0,0 +1,318 @@
 import pulumi
 from pulumi_gcp import projects, container, config
 from pulumi_docker import Image
 from pulumi_kubernetes import Provider
 from pulumi_kubernetes.core.v1 import Service
 from pulumi_kubernetes.apps.v1 import Deployment
 import google.auth
 from google.auth.transport.requests import Request
 from pulumi_kubernetes.apps.v1 import DaemonSet
 config = pulumi.Config()
 name = config.require("name")
 project = config.require("project")
 location = config.require("region")
 node_count = config.require_int("node_count")
 machine_type = config.require("machine_type")
 replicas = config.require_int("replicas")
 # Fetch access token from credentials
 def get_access_token():
    scopes = ["https://www.googleapis.com/auth/cloud-platform"]
    creds, _ = google.auth.default(scopes=scopes)
    if not creds.token:
        creds.refresh(Request())
    return creds.token
 # Enable services
 container_api = projects.Service(
    "container.googleapis.com",
    service="container.googleapis.com",
    project=project,
 )
 cloud_resource_manager_api = projects.Service(
    "cloudresourcemanager.googleapis.com",
    service="cloudresourcemanager.googleapis.com",
    project=project,
 )
 # Build and push Docker image to container registry
 image = Image(
    name,
    image_name=f"gcr.io/{project}/{name}",
    build={
        "context": ".",
        "platform": "linux/amd64",
    },
    registry={
        "server": "gcr.io",
        "username": "oauth2accesstoken",
        "password": pulumi.Output.from_input(get_access_token()),
    },
    opts=pulumi.ResourceOptions(depends_on=[container_api, cloud_resource_manager_api]),
 )
 # Fetch GKE engine versions
 def get_engine_versions(digest):
    return container.get_engine_versions(project=project, location=location)
 engine_versions = pulumi.Output.all([image.repo_digest]).apply(get_engine_versions)
 # Create Kubernetes cluster
 cluster = container.Cluster(
    name,
    project=project,
    location=location,
    initial_node_count=node_count,
    min_master_version=engine_versions.latest_master_version,
    node_version=engine_versions.latest_master_version,
    node_config={
        "machine_type": machine_type,
        "oauth_scopes": [
            "https://www.googleapis.com/auth/compute",
            "https://www.googleapis.com/auth/devstorage.read_only",
            "https://www.googleapis.com/auth/logging.write",
            "https://www.googleapis.com/auth/monitoring",
        ],
        "image_type": "COS_CONTAINERD",
        "guest_accelerator": [
            {
                "type": "nvidia-tesla-a100",
                "count": 1,
            }
        ],
    },
    opts=pulumi.ResourceOptions(depends_on=[image]),
 )
 def generate_kubeconfig(name, endpoint, master_auth):
    context = f"{project}_{location}_{name}"
    return f"""apiVersion: v1
 clusters:
 - cluster:
    certificate-authority-data: {master_auth['cluster_ca_certificate']}
    server: https://{endpoint}
  name: {context}
 contexts:
 - context:
    cluster: {context}
    user: {context}
  name: {context}
 current-context: {context}
 kind: Config
 preferences: {{}}
 users:
 - name: {context}
  user:
    exec:
      apiVersion: client.authentication.k8s.io/v1beta1
      command: gke-gcloud-auth-plugin
      installHint: Install gke-gcloud-auth-plugin for use with kubectl by following
        https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke
      provideClusterInfo: true
 """
 kubeconfig = pulumi.Output.all(
    cluster.name, cluster.endpoint, cluster.master_auth
 ).apply(lambda args: generate_kubeconfig(*args))
 # Create a Kubernetes provider
 cluster_provider = Provider(name, kubeconfig=kubeconfig)
 # Deploy NVIDIA daemon set
 nvidia_gpu_device_plugin = DaemonSet(
    "nvidia-gpu-device-plugin",
    metadata={
        "name": "nvidia-driver-installer",
        "namespace": "kube-system",
        "labels": {"k8s-app": "nvidia-driver-installer"},
    },
    spec={
        "selector": {"matchLabels": {"k8s-app": "nvidia-driver-installer"}},
        "updateStrategy": {"type": "RollingUpdate"},
        "template": {
            "metadata": {
                "labels": {
                    "name": "nvidia-driver-installer",
                    "k8s-app": "nvidia-driver-installer",
                }
            },
            "spec": {
                "priorityClassName": "system-node-critical",
                "affinity": {
                    "nodeAffinity": {
                        "requiredDuringSchedulingIgnoredDuringExecution": {
                            "nodeSelectorTerms": [
                                {
                                    "matchExpressions": [
                                        {
                                            "key": "cloud.google.com/gke-accelerator",
                                            "operator": "Exists",
                                        },
                                        {
                                            "key": "cloud.google.com/gke-gpu-driver-version",
                                            "operator": "DoesNotExist",
                                        },
                                    ]
                                }
                            ]
                        }
                    }
                },
                "tolerations": [{"operator": "Exists"}],
                "hostNetwork": True,
                "hostPID": True,
                "volumes": [
                    {"name": "dev", "hostPath": {"path": "/dev"}},
                    {
                        "name": "vulkan-icd-mount",
                        "hostPath": {
                            "path": "/home/kubernetes/bin/nvidia/vulkan/icd.d"
                        },
                    },
                    {
                        "name": "nvidia-install-dir-host",
                        "hostPath": {"path": "/home/kubernetes/bin/nvidia"},
                    },
                    {"name": "root-mount", "hostPath": {"path": "/"}},
                    {"name": "cos-tools", "hostPath": {"path": "/var/lib/cos-tools"}},
                    {"name": "nvidia-config", "hostPath": {"path": "/etc/nvidia"}},
                ],
                "initContainers": [
                    {
                        "image": "cos-nvidia-installer:fixed",
                        "imagePullPolicy": "Never",
                        "name": "nvidia-driver-installer",
                        "resources": {"requests": {"cpu": "150m"}},
                        "securityContext": {"privileged": True},
                        "env": [
                            {
                                "name": "NVIDIA_INSTALL_DIR_HOST",
                                "value": "/home/kubernetes/bin/nvidia",
                            },
                            {
                                "name": "NVIDIA_INSTALL_DIR_CONTAINER",
                                "value": "/usr/local/nvidia",
                            },
                            {
                                "name": "VULKAN_ICD_DIR_HOST",
                                "value": "/home/kubernetes/bin/nvidia/vulkan/icd.d",
                            },
                            {
                                "name": "VULKAN_ICD_DIR_CONTAINER",
                                "value": "/etc/vulkan/icd.d",
                            },
                            {"name": "ROOT_MOUNT_DIR", "value": "/root"},
                            {
                                "name": "COS_TOOLS_DIR_HOST",
                                "value": "/var/lib/cos-tools",
                            },
                            {
                                "name": "COS_TOOLS_DIR_CONTAINER",
                                "value": "/build/cos-tools",
                            },
                        ],
                        "volumeMounts": [
                            {
                                "name": "nvidia-install-dir-host",
                                "mountPath": "/usr/local/nvidia",
                            },
                            {
                                "name": "vulkan-icd-mount",
                                "mountPath": "/etc/vulkan/icd.d",
                            },
                            {"name": "dev", "mountPath": "/dev"},
                            {"name": "root-mount", "mountPath": "/root"},
                            {"name": "cos-tools", "mountPath": "/build/cos-tools"},
                        ],
                    },
                    {
                        "image": "gcr.io/gke-release/nvidia-partition-gpu@sha256:c54fd003948fac687c2a93a55ea6e4d47ffbd641278a9191e75e822fe72471c2",
                        "name": "partition-gpus",
                        "env": [
                            {
                                "name": "LD_LIBRARY_PATH",
                                "value": "/usr/local/nvidia/lib64",
                            }
                        ],
                        "resources": {"requests": {"cpu": "150m"}},
                        "securityContext": {"privileged": True},
                        "volumeMounts": [
                            {
                                "name": "nvidia-install-dir-host",
                                "mountPath": "/usr/local/nvidia",
                            },
                            {"name": "dev", "mountPath": "/dev"},
                            {"name": "nvidia-config", "mountPath": "/etc/nvidia"},
                        ],
                    },
                ],
                "containers": [
                    {"image": "gcr.io/google-containers/pause:2.0", "name": "pause"}
                ],
            },
        },
    },
    opts=pulumi.ResourceOptions(provider=cluster_provider),
 )
 # Create Kubernetes deployment
 deployment = Deployment(
    name,
    metadata={"name": name},
    spec={
        "strategy": {
            "type": "Recreate",
        },
        "replicas": replicas,
        "selector": {"matchLabels": {"app": name}},
        "template": {
            "metadata": {"labels": {"app": name}},
            "spec": {
                "containers": [
                    {
                        "name": name,
                        "image": image.repo_digest,
                        "resources": {"limits": {"nvidia.com/gpu": 1}},
                        "ports": [{"containerPort": 80}],
                    },
                ],
            },
        },
    },
    opts=pulumi.ResourceOptions(
        provider=cluster_provider, depends_on=[nvidia_gpu_device_plugin]
    ),
 )
 # Create Kubernetes service to expose port 80
 service = Service(
    name,
    spec={
        "type": "LoadBalancer",
        "selector": {"app": name},
        "ports": [
            {
                "protocol": "TCP",
                "port": 80,
                "targetPort": 80,
            },
        ],
    },
    opts=pulumi.ResourceOptions(provider=cluster_provider, depends_on=[deployment]),
 )
 # Export IP address of the LoadBalancer
 pulumi.export(
    "load_balancer_ip",
    service.status.apply(lambda status: status.load_balancer.ingress[0].ip),
 )
--- a/infra/cmd.sh
+++ b/infra/cmd.sh
@ -0,0 +1,6 @@
 mkdir checkpoints
 cd checkpoints
 wget https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.ckpt
 cd ..
 mkdir static
 python server.py
--- a/infra/destroy.sh
+++ b/infra/destroy.sh
@ -0,0 +1,2 @@
 source .env
 pulumi destroy --yes --stack dev
--- a/infra/index.html
+++ b/infra/index.html
@ -0,0 +1,11 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Gallery</title>
 </head>
 <body>
    {% for image in images %}
        <img src="{{ image }}" alt="Image">
    {% endfor %}
 </body>
 </html>
--- a/infra/readme.md
+++ b/infra/readme.md
@ -0,0 +1,81 @@
 ## Stable Diffusion on Google Cloud Platform using Pulumi
 ### Requirements
 - Python 3
 - Pulumi, https://www.pulumi.com/docs/get-started/install/
 ### Instructions
 1. Create a service account in Google Cloud Platform as follows:
 	* Log in to the Google Cloud Console (console.cloud.google.com)
 	* Select the project in which you want to create a service account
 	* Click on the "IAM & Admin" option in the left-hand menu
 	* Click on "Service Accounts" in the left-hand menu
 	* Click the "Create Service Account" button
 	* Enter a name for the service account
 	* Select "Editor" role for the service account
 	* Select "Furnish a new private key" option and choose JSON
 	* Click "Create" to create the service account
 	* Once you have created the service account, you will be prompted to download the private key file
 2. Rename service account private key file to `gcp.json` and place it inside the `/infra` directory
 3. Rename `.sample.env` to `.env` and edit its contents
 4. Execute in your terminal `./start.sh` to:
 	* Enable Google Cloud Services
 	* Build and push a Docker image to Google Container Registry
 	* Spin up a Kubernetes cluster running a A100 GPU
 	* Install NVIDIA driver into Kubernetes cluster
 	* Launch the Stable Diffusion Kubernetes deployment
 	* Expose Stable Diffusion to the public internet using a Kubernetes Service
 ### How to use
 Once `./start.sh` finishes running it will output `load_balancer_ip`, for example: `load_balancer_ip: "34.172.48.137"`. Use the IP provided to query Stable Diffusion.
 Parameters:
 ```
 prompt=args.get("prompt", "a professional photograph of an astronaut riding a triceratops"),
 outdir=args.get("outdir", "static"),
 steps=args.get("steps", 50),
 plms=args.get("plms", False),
 dpm=args.get("dpm", False),
 fixed_code=args.get("fixed_code", False),
 ddim_eta=args.get("ddim_eta", 0.0),
 n_iter=args.get("n_iter", 3),
 H=args.get("H", 512),
 W=args.get("W", 512),
 C=args.get("C", 4),
 f=args.get("f", 8),
 n_samples=args.get("n_samples", 3),
 n_rows=args.get("n_rows", 0),
 scale=args.get("scale", 9.0),
 from_file=args.get("from_file", None),
 config=args.get("config", "configs/stable-diffusion/v2-inference-v.yaml"),
 ckpt=args.get("ckpt", "checkpoints/v2-1_768-ema-pruned.ckpt"),
 seed=args.get("seed", 42),
 precision=args.get("precision", "autocast"),
 repeat=args.get("repeat", 1),
 device=args.get("device", "cpu"),
 torchscript=args.get("torchscript", False),
 ipex=args.get("ipex", False),
 bf16=args.get("bf16", False)
 ```
 For example: `http://34.172.48.137/?prompt=Your_Query_Here`. Replace `Your_Query_Here` with your desired query text.
 To check the generated images navigate to `http://34.172.48.137/images`.
 Remember to URL-encode the text parameter if it contains special characters or spaces. For example, you can replace spaces with `%20`.
 ### Delete cluster and revert all changes
 To delete the cluster and revert all changes, execute in your terminal: `./destroy.sh`.
 ### Support
 If you like this project and find it useful, please consider giving it a star. Your support is appreciated! :hearts:
 If you have any questions or suggestions, feel free to reach out to Carlos at calufa@gmail.com or connecting on LinkedIn: https://www.linkedin.com/in/carloschinchilla/.
--- a/infra/requirements.txt
+++ b/infra/requirements.txt
@ -0,0 +1,5 @@
 pulumi==3.64.0
 pulumi-gcp==6.54.0
 pulumi-docker==4.1.2
 pulumi-kubernetes==3.25.0
 google-auth==2.17.3
--- a/infra/server.py
+++ b/infra/server.py
@ -0,0 +1,51 @@
 import os
 from flask import Flask, request, jsonify, render_template
 import argparse
 from scripts.txt2img import main
 app = Flask(__name__, template_folder='.')
@app.route("/", methods=["GET"])
 def index():
    args = request.args
    opt = argparse.Namespace(
        prompt=args.get("prompt", "a professional photograph of an astronaut riding a triceratops"),
        outdir=args.get("outdir", "static"),
        steps=args.get("steps", 50),
        plms=args.get("plms", False),
        dpm=args.get("dpm", False),
        fixed_code=args.get("fixed_code", False),
        ddim_eta=args.get("ddim_eta", 0.0),
        n_iter=args.get("n_iter", 3),
        H=args.get("H", 512),
        W=args.get("W", 512),
        C=args.get("C", 4),
        f=args.get("f", 8),
        n_samples=args.get("n_samples", 3),
        n_rows=args.get("n_rows", 0),
        scale=args.get("scale", 9.0),
        from_file=args.get("from_file", None),
        config=args.get("config", "configs/stable-diffusion/v2-inference-v.yaml"),
        ckpt=args.get("ckpt", "checkpoints/v2-1_768-ema-pruned.ckpt"),
        seed=args.get("seed", 42),
        precision=args.get("precision", "autocast"),
        repeat=args.get("repeat", 1),
        device=args.get("device", "cpu"),
        torchscript=args.get("torchscript", False),
        ipex=args.get("ipex", False),
        bf16=args.get("bf16", False)
    )
    main(opt)
    return jsonify({"message": "Image generated successfully"})
@app.route('/images')
 def images():
    images = os.listdir('/app/static')
    images = [f"/app/static/{image}" for image in images]
    return render_template('index.html', images=images)
 if __name__ == "__main__":
    app.run(host="0.0.0.0", port=80, debug=True)
--- a/infra/start.sh
+++ b/infra/start.sh
@ -0,0 +1,10 @@
 source .env
 rm -rf ./app
 rsync --exclude='.' --recursive --copy-links ../ ./app
 pulumi config set name $NAME --stack dev
 pulumi config set project $PROJECT --stack dev
 pulumi config set region $REGION --stack dev
 pulumi config set node_count $NODE_COUNT --stack dev
 pulumi config set machine_type $MACHINE_TYPE --stack dev
 pulumi config set replicas $REPLICAS --stack dev
 pulumi up --yes --stack dev
		`@ -0,0 +1,2 @@`
							`source .env`
							`pulumi destroy --yes --stack dev`