apiVersion: ray.io/v1 kind: RayService metadata: name: stable-diffusion spec: serveConfigV2: | applications: - name: stable_diffusion import_path: stable_diffusion.stable_diffusion:entrypoint runtime_env: working_dir: "https://github.com/ray-project/serve_config_examples/archive/b8af221bc0e5d6cc7daa0dd071295fd9e456e671.zip" pip: ["diffusers==0.32.2", "transformers==4.48.2"] rayClusterConfig: rayVersion: '2.46.0' # Should match the Ray version in the image of the containers ######################headGroupSpecs################################# # Ray head pod template. headGroupSpec: rayStartParams: {} # Pod template template: spec: containers: - name: ray-head image: rayproject/ray-ml:2.46.0.0e19ea-py39-gpu volumeMounts: - mountPath: /tmp/ray name: ray-logs resources: limits: cpu: "2" memory: "8G" requests: cpu: "2" memory: "8G" volumes: - name: ray-logs emptyDir: {} workerGroupSpecs: # The pod replicas in this group typed worker - replicas: 1 minReplicas: 1 maxReplicas: 10 groupName: gpu-group rayStartParams: {} # Pod template template: spec: containers: - name: ray-worker image: rayproject/ray-ml:2.46.0.0e19ea-py39-gpu resources: limits: cpu: 4 memory: "16G" nvidia.com/gpu: 1 requests: cpu: 3 memory: "12G" nvidia.com/gpu: 1 # Please add the following taints to the GPU node. tolerations: - key: "ray.io/node-type" operator: "Equal" value: "worker" effect: "NoSchedule"