# For most use-cases, it makes sense to schedule one Ray pod per Kubernetes node. # This is a configuration for an autoscaling RayCluster with 1 Ray head pod and 1 Ray worker pod, # with capacity to scale up to 10 worker pods. # Each pod requests 54 Gi memory and 14 CPU. # Each pod can be scheduled on a virtual machine with roughly 64 Gi memory and 16 CPU. # (AWS: m5.4xlarge, GCP: e2-standard-16, Azure: Standard_D5_v2) # Optimal resource allocation will depend on your Kubernetes infrastructure and might # require some experimentation. # The Ray autoscaler and KubeRay operator scale Ray pod quantities. # To achieve Kubernetes node autoscaling with this example, we recommend setting up an autoscaling node group/pool with # - 2 nodes minimum for the Ray head pod and Ray worker pod # - 11 nodes maximum to accommodate up to 10 Ray worker pods. apiVersion: ray.io/v1alpha1 kind: RayCluster metadata: labels: controller-tools.k8s.io: "1.0" # A unique identifier for the head node and workers of this cluster. name: raycluster-autoscaler spec: # The version of Ray you are using. Make sure all Ray containers are running this version of Ray. rayVersion: '2.0.0' # If enableInTreeAutoscaling is true, the autoscaler sidecar will be added to the Ray head pod. # Ray autoscaler integration is supported only for Ray versions >= 1.11.0 # Ray autoscaler integration is Beta with KubeRay >= 0.3.0 and Ray >= 2.0.0. enableInTreeAutoscaling: true # autoscalerOptions is an OPTIONAL field specifying configuration overrides for the Ray autoscaler. # The example configuration shown below below represents the DEFAULT values. # (You may delete autoscalerOptions if the defaults are suitable.) autoscalerOptions: # upscalingMode is "Default" or "Aggressive." # Conservative: Upscaling is rate-limited; the number of pending worker pods is at most the size of the Ray cluster. # Default: Upscaling is not rate-limited. # Aggressive: An alias for Default; upscaling is not rate-limited. upscalingMode: Default # idleTimeoutSeconds is the number of seconds to wait before scaling down a worker pod which is not using Ray resources. idleTimeoutSeconds: 60 # image optionally overrides the autoscaler's container image. # If instance.spec.rayVersion is at least "2.0.0", the autoscaler will default to the same image as # the ray container by. For older Ray versions, the autoscaler will default to using the Ray 2.0.0 image. ## image: "my-repo/my-custom-autoscaler-image:tag" # imagePullPolicy optionally overrides the autoscaler container's image pull policy. imagePullPolicy: Always # resources specifies optional resource request and limit overrides for the autoscaler container. # For large Ray clusters, we recommend monitoring container resource usage to determine if overriding the defaults is required. resources: limits: cpu: "500m" memory: "512Mi" requests: cpu: "500m" memory: "512Mi" ######################headGroupSpec################################# # head group template and specs, (perhaps 'group' is not needed in the name) headGroupSpec: # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer' serviceType: ClusterIP # logical group name, for this called head-group, also can be functional # pod type head or worker # rayNodeType: head # Not needed since it is under the headgroup # the following params are used to complete the ray start: ray start --head --block --port=6379 ... rayStartParams: # Flag "no-monitor" will be automatically set when autoscaling is enabled. dashboard-host: '0.0.0.0' block: 'true' # num-cpus: '14' # can be auto-completed from the limits # Use `resources` to optionally specify custom resource annotations for the Ray node. # The value of `resources` is a string-integer mapping. # Currently, `resources` must be provided in the specific format demonstrated below: # resources: '"{\"Custom1\": 1, \"Custom2\": 5}"' #pod template template: spec: containers: # The Ray head container - name: ray-head image: rayproject/ray:2.0.0 imagePullPolicy: Always # Optimal resource allocation will depend on your Kubernetes infrastructure and might # require some experimentation. # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal # resource accounting. K8s requests are not used by Ray. resources: limits: cpu: 14 memory: 54Gi requests: cpu: 14 memory: 54Gi ports: - containerPort: 6379 name: gcs - containerPort: 8265 name: dashboard - containerPort: 10001 name: client lifecycle: preStop: exec: command: ["/bin/sh","-c","ray stop"] workerGroupSpecs: # the pod replicas in this group typed worker - replicas: 1 minReplicas: 1 maxReplicas: 10 # logical group name, for this called large-group, also can be functional groupName: large-group # if worker pods need to be added, we can simply increment the replicas # if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list # the operator will remove pods from the list until the number of replicas is satisfied # when a pod is confirmed to be deleted, its name will be removed from the list below #scaleStrategy: # workersToDelete: # - raycluster-complete-worker-large-group-bdtwh # - raycluster-complete-worker-large-group-hv457 # - raycluster-complete-worker-large-group-k8tj7 # the following params are used to complete the ray start: ray start --block --node-ip-address= ... rayStartParams: block: 'true' #pod template template: metadata: labels: key: value # annotations for pod annotations: key: value spec: initContainers: # the env var $RAY_IP is set by the operator if missing, with the value of the head service name - name: init-myservice image: busybox:1.28 command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"] containers: - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' image: rayproject/ray:2.0.0 # Optimal resource allocation will depend on your Kubernetes infrastructure and might # require some experimentation. # Setting requests=limits is recommended with Ray. K8s limits are used for Ray-internal # resource accounting. K8s requests are not used by Ray. resources: limits: cpu: 14 memory: 54Gi requests: cpu: 14 memory: 54Gi lifecycle: preStop: exec: command: ["/bin/sh","-c","ray stop"]