apiVersion: ray.io/v1
kind: RayService
metadata:
  name: stable-diffusion
spec:
  serviceUnhealthySecondThreshold: 900 # Config for the health check threshold for Ray Serve applications. Default value is 900.
  deploymentUnhealthySecondThreshold: 300 # Config for the health check threshold for Ray dashboard agent. Default value is 300.
  serveConfigV2: |
    applications:
      - name: stable_diffusion
        import_path: stable_diffusion.stable_diffusion:entrypoint
        runtime_env:
          working_dir: "https://github.com/ray-project/serve_config_examples/archive/d6acf9b99ef076a1848f506670e1290a11654ec2.zip"
          pip: ["diffusers==0.12.1"]
  rayClusterConfig:
    rayVersion: '2.7.0' # Should match the Ray version in the image of the containers
    ######################headGroupSpecs#################################
    # Ray head pod template.
    headGroupSpec:
      # The `rayStartParams` are used to configure the `ray start` command.
      # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay.
      # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`.
      rayStartParams:
        dashboard-host: '0.0.0.0'
      # Pod template
      template:
        spec:
          containers:
          - name: ray-head
            image: rayproject/ray-ml:2.7.0
            ports:
            - containerPort: 6379
              name: gcs
            - containerPort: 8265
              name: dashboard
            - containerPort: 10001
              name: client
            - containerPort: 8000
              name: serve
            volumeMounts:
              - mountPath: /tmp/ray
                name: ray-logs
            resources:
              limits:
                cpu: "2"
                memory: "8G"
              requests:
                cpu: "2"
                memory: "8G"
          volumes:
            - name: ray-logs
              emptyDir: {}
    workerGroupSpecs:
    # The pod replicas in this group typed worker
    - replicas: 1
      minReplicas: 1
      maxReplicas: 10
      groupName: gpu-group
      rayStartParams: {}
      # Pod template
      template:
        spec:
          containers:
          - name: ray-worker
            image: rayproject/ray-ml:2.7.0
            resources:
              limits:
                cpu: 4
                memory: "16G"
                nvidia.com/gpu: 1
              requests:
                cpu: 3
                memory: "12G"
                nvidia.com/gpu: 1
          # Please add the following taints to the GPU node.
          tolerations:
            - key: "ray.io/node-type"
              operator: "Equal"
              value: "worker"
              effect: "NoSchedule"