apiVersion: ray.io/v1
kind: RayCluster
metadata:
  name: ray-label-cluster
spec:
  enableInTreeAutoscaling: true
  autoscalerOptions:
    version: v2
    upscalingMode: Default
    idleTimeoutSeconds: 600
    imagePullPolicy: Always
    securityContext: {}
    env: []
    envFrom: []
    resources:
      limits:
        cpu: "500m"
        memory: "512Mi"
      requests:
        cpu: "500m"
        memory: "512Mi"
  headGroupSpec:
    labels:
      ray.io/region: us-central2
    resources:
      CPU: "0"
    template:
      spec:
        containers:
        - name: ray-head
          image: rayproject/ray:nightly
          ports:
          - containerPort: 6379
            name: gcs
          - containerPort: 8265
            name: dashboard
          - containerPort: 10001
            name: client
          resources:
            limits:
              cpu: "1"
              memory: "5Gi"
            requests:
              cpu: "1"
              memory: "2Gi"
          volumeMounts:
          - mountPath: /home/ray/samples
            name: ray-example-configmap
        volumes:
        - name: ray-example-configmap
          configMap:
            name: ray-example
            defaultMode: 0777
            items:
            - key: example_task.py
              path: example_task.py
            - key: example_actor.py
              path: example_actor.py
            - key: example_placement_group.py
              path: example_placement_group.py
  workerGroupSpecs:
  - replicas: 1
    minReplicas: 1
    maxReplicas: 10
    groupName: large-cpu-group
    labels:
      cpu-family: intel
      ray.io/market-type: on-demand
    rayStartParams: {}
    template:
      spec:
        containers:
        - name: ray-worker
          image: rayproject/ray:nightly
          resources:
            limits:
              cpu: "2"
              memory: "4Gi"
            requests:
              cpu: "2"
              memory: "4Gi"
        nodeSelector:
          cloud.google.com/machine-family: "N4"
  - replicas: 0
    minReplicas: 0
    maxReplicas: 10
    groupName: accelerator-group
    labels:
      ray.io/accelerator-type: A100
      ray.io/market-type: on-demand
      ray.io/region: us-central2
    resources:
      GPU: "1"
    template:
      spec:
        containers:
        - name: ray-worker
          image: rayproject/ray:nightly-gpu
          resources:
            limits:
              cpu: "1"
              memory: "1Gi"
            requests:
              cpu: "1"
              memory: "1Gi"
        nodeSelector:
          cloud.google.com/gke-spot: "true"
          cloud.google.com/gke-accelerator: "nvidia-tesla-a100"
  - replicas: 0
    minReplicas: 0
    maxReplicas: 5
    groupName: spot-group
    labels:
      cpu-family: amd
      ray.io/market-type: spot
    rayStartParams: {}
    template:
      spec:
        containers:
        - name: ray-worker
          image: rayproject/ray:nightly
          resources:
            limits:
              cpu: "1"
              memory: "1Gi"
            requests:
              cpu: "1"
              memory: "1Gi"
        nodeSelector:
          cloud.google.com/gke-spot: "true"
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: ray-example
data:
  example_task.py: |
    import ray
    @ray.remote(num_cpus=1, label_selector={"ray.io/market-type": "on-demand", "cpu-family": "in(intel,amd)"})
    def test_task():
      pass
    ray.init()
    ray.get(test_task.remote())
  example_actor.py: |
    import ray
    @ray.remote(num_gpus=1, label_selector={"ray.io/accelerator-type": "A100"})
    class Actor:
      def ready(self):
        return True
    ray.init()
    my_actor = Actor.remote()
    ray.get(my_actor.ready.remote())
  example_placement_group.py: |
    import ray
    from ray.util.placement_group import placement_group
    ray.init()
    pg = placement_group(
      [{"CPU": 1}] * 2,
      bundle_label_selector=[{"ray.io/market-type": "spot", "ray.io/region": "!us-central2"},] * 2, strategy="SPREAD"
    )
    ray.get(pg.ready())