apiVersion: v1
kind: Namespace
metadata:
  labels:
    control-plane: kfserving-controller-manager
    controller-tools.k8s.io: "1.0"
    istio-injection: disabled
  name: kfserving-system
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  creationTimestamp: null
  labels:
    controller-tools.k8s.io: "1.0"
  name: inferenceservices.serving.kubeflow.org
spec:
  additionalPrinterColumns:
  - JSONPath: .status.url
    name: URL
    type: string
  - JSONPath: .status.conditions[?(@.type=='Ready')].status
    name: Ready
    type: string
  - JSONPath: .status.traffic
    name: Default Traffic
    type: integer
  - JSONPath: .status.canaryTraffic
    name: Canary Traffic
    type: integer
  - JSONPath: .metadata.creationTimestamp
    name: Age
    type: date
  group: serving.kubeflow.org
  names:
    kind: InferenceService
    plural: inferenceservices
    shortNames:
    - inferenceservice
  scope: Namespaced
  validation:
    openAPIV3Schema:
      properties:
        apiVersion:
          description: 'APIVersion defines the versioned schema of this representation
            of an object. Servers should convert recognized schemas to the latest
            internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources'
          type: string
        kind:
          description: 'Kind is a string value representing the REST resource this
            object represents. Servers may infer this from the endpoint the client
            submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds'
          type: string
        metadata:
          type: object
        spec:
          properties:
            canary:
              description: Canary defines an alternate endpoints to route a percentage
                of traffic.
              properties:
                explainer:
                  description: Explainer defines the model explanation service spec,
                    explainer service calls to predictor or transformer if it is specified.
                  properties:
                    alibi:
                      description: Spec for alibi explainer
                      properties:
                        config:
                          description: Inline custom parameter settings for explainer
                          type: object
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Defaults to latest Alibi Version
                          type: string
                        storageUri:
                          description: The location of a trained explanation model
                          type: string
                        type:
                          description: The type of Alibi explainer
                          type: string
                      required:
                      - type
                      type: object
                    custom:
                      description: Spec for a custom explainer
                      properties:
                        container:
                          type: object
                      required:
                      - container
                      type: object
                    logger:
                      description: Activate request/response logging
                      properties:
                        mode:
                          description: What payloads to log
                          type: string
                        url:
                          description: URL to send request logging CloudEvents
                          type: string
                      type: object
                    maxReplicas:
                      description: This is the up bound for autoscaler to scale to
                      format: int64
                      type: integer
                    minReplicas:
                      description: Minimum number of replicas, pods won't scale down
                        to 0 in case of no traffic
                      format: int64
                      type: integer
                    parallelism:
                      description: Parallelism, how many requests can be processed
                        concurrently
                      format: int64
                      type: integer
                    serviceAccountName:
                      description: ServiceAccountName is the name of the ServiceAccount
                        to use to run the service
                      type: string
                  type: object
                predictor:
                  description: Predictor defines the model serving spec +required
                  properties:
                    custom:
                      description: Spec for a custom predictor
                      properties:
                        container:
                          type: object
                      required:
                      - container
                      type: object
                    logger:
                      description: Activate request/response logging
                      properties:
                        mode:
                          description: What payloads to log
                          type: string
                        url:
                          description: URL to send request logging CloudEvents
                          type: string
                      type: object
                    maxReplicas:
                      description: This is the up bound for autoscaler to scale to
                      format: int64
                      type: integer
                    minReplicas:
                      description: Minimum number of replicas, pods won't scale down
                        to 0 in case of no traffic
                      format: int64
                      type: integer
                    onnx:
                      description: Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    parallelism:
                      description: Parallelism, how many requests can be processed
                        concurrently
                      format: int64
                      type: integer
                    pytorch:
                      description: Spec for PyTorch predictor
                      properties:
                        modelClassName:
                          description: Defaults PyTorch model class name to 'PyTorchModel'
                          type: string
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    serviceAccountName:
                      description: ServiceAccountName is the name of the ServiceAccount
                        to use to run the service
                      type: string
                    sklearn:
                      description: Spec for SKLearn predictor
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    tensorflow:
                      description: Spec for Tensorflow Serving (https://github.com/tensorflow/serving)
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map.
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    tensorrt:
                      description: Spec for TensorRT Inference Server (https://github.com/NVIDIA/triton-inference-server)
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    xgboost:
                      description: Spec for XGBoost predictor
                      properties:
                        nthread:
                          description: Number of thread to be used by XGBoost
                          format: int64
                          type: integer
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                  type: object
                transformer:
                  description: Transformer defines the pre/post processing before
                    and after the predictor call, transformer service calls to predictor
                    service.
                  properties:
                    custom:
                      description: Spec for a custom transformer
                      properties:
                        container:
                          type: object
                      required:
                      - container
                      type: object
                    logger:
                      description: Activate request/response logging
                      properties:
                        mode:
                          description: What payloads to log
                          type: string
                        url:
                          description: URL to send request logging CloudEvents
                          type: string
                      type: object
                    maxReplicas:
                      description: This is the up bound for autoscaler to scale to
                      format: int64
                      type: integer
                    minReplicas:
                      description: Minimum number of replicas, pods won't scale down
                        to 0 in case of no traffic
                      format: int64
                      type: integer
                    parallelism:
                      description: Parallelism, how many requests can be processed
                        concurrently
                      format: int64
                      type: integer
                    serviceAccountName:
                      description: ServiceAccountName is the name of the ServiceAccount
                        to use to run the service
                      type: string
                  type: object
              required:
              - predictor
              type: object
            canaryTrafficPercent:
              description: CanaryTrafficPercent defines the percentage of traffic
                going to canary InferenceService endpoints
              format: int64
              type: integer
            default:
              description: Default defines default InferenceService endpoints +required
              properties:
                explainer:
                  description: Explainer defines the model explanation service spec,
                    explainer service calls to predictor or transformer if it is specified.
                  properties:
                    alibi:
                      description: Spec for alibi explainer
                      properties:
                        config:
                          description: Inline custom parameter settings for explainer
                          type: object
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Defaults to latest Alibi Version
                          type: string
                        storageUri:
                          description: The location of a trained explanation model
                          type: string
                        type:
                          description: The type of Alibi explainer
                          type: string
                      required:
                      - type
                      type: object
                    custom:
                      description: Spec for a custom explainer
                      properties:
                        container:
                          type: object
                      required:
                      - container
                      type: object
                    logger:
                      description: Activate request/response logging
                      properties:
                        mode:
                          description: What payloads to log
                          type: string
                        url:
                          description: URL to send request logging CloudEvents
                          type: string
                      type: object
                    maxReplicas:
                      description: This is the up bound for autoscaler to scale to
                      format: int64
                      type: integer
                    minReplicas:
                      description: Minimum number of replicas, pods won't scale down
                        to 0 in case of no traffic
                      format: int64
                      type: integer
                    parallelism:
                      description: Parallelism, how many requests can be processed
                        concurrently
                      format: int64
                      type: integer
                    serviceAccountName:
                      description: ServiceAccountName is the name of the ServiceAccount
                        to use to run the service
                      type: string
                  type: object
                predictor:
                  description: Predictor defines the model serving spec +required
                  properties:
                    custom:
                      description: Spec for a custom predictor
                      properties:
                        container:
                          type: object
                      required:
                      - container
                      type: object
                    logger:
                      description: Activate request/response logging
                      properties:
                        mode:
                          description: What payloads to log
                          type: string
                        url:
                          description: URL to send request logging CloudEvents
                          type: string
                      type: object
                    maxReplicas:
                      description: This is the up bound for autoscaler to scale to
                      format: int64
                      type: integer
                    minReplicas:
                      description: Minimum number of replicas, pods won't scale down
                        to 0 in case of no traffic
                      format: int64
                      type: integer
                    onnx:
                      description: Spec for ONNX runtime (https://github.com/microsoft/onnxruntime)
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    parallelism:
                      description: Parallelism, how many requests can be processed
                        concurrently
                      format: int64
                      type: integer
                    pytorch:
                      description: Spec for PyTorch predictor
                      properties:
                        modelClassName:
                          description: Defaults PyTorch model class name to 'PyTorchModel'
                          type: string
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    serviceAccountName:
                      description: ServiceAccountName is the name of the ServiceAccount
                        to use to run the service
                      type: string
                    sklearn:
                      description: Spec for SKLearn predictor
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    tensorflow:
                      description: Spec for Tensorflow Serving (https://github.com/tensorflow/serving)
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map.
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    tensorrt:
                      description: Spec for TensorRT Inference Server (https://github.com/NVIDIA/triton-inference-server)
                      properties:
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                    xgboost:
                      description: Spec for XGBoost predictor
                      properties:
                        nthread:
                          description: Number of thread to be used by XGBoost
                          format: int64
                          type: integer
                        resources:
                          description: Defaults to requests and limits of 1CPU, 2Gb
                            MEM.
                          type: object
                        runtimeVersion:
                          description: Allowed runtime versions are specified in the
                            inferenceservice config map
                          type: string
                        storageUri:
                          description: The location of the trained model
                          type: string
                      required:
                      - storageUri
                      type: object
                  type: object
                transformer:
                  description: Transformer defines the pre/post processing before
                    and after the predictor call, transformer service calls to predictor
                    service.
                  properties:
                    custom:
                      description: Spec for a custom transformer
                      properties:
                        container:
                          type: object
                      required:
                      - container
                      type: object
                    logger:
                      description: Activate request/response logging
                      properties:
                        mode:
                          description: What payloads to log
                          type: string
                        url:
                          description: URL to send request logging CloudEvents
                          type: string
                      type: object
                    maxReplicas:
                      description: This is the up bound for autoscaler to scale to
                      format: int64
                      type: integer
                    minReplicas:
                      description: Minimum number of replicas, pods won't scale down
                        to 0 in case of no traffic
                      format: int64
                      type: integer
                    parallelism:
                      description: Parallelism, how many requests can be processed
                        concurrently
                      format: int64
                      type: integer
                    serviceAccountName:
                      description: ServiceAccountName is the name of the ServiceAccount
                        to use to run the service
                      type: string
                  type: object
              required:
              - predictor
              type: object
          required:
          - default
          type: object
        status:
          properties:
            canary:
              description: Statuses for the canary endpoints of the InferenceService
              type: object
            canaryTraffic:
              description: Traffic percentage that goes to canary services
              format: int64
              type: integer
            conditions:
              description: Conditions the latest available observations of a resource's
                current state. +patchMergeKey=type +patchStrategy=merge
              items:
                properties:
                  lastTransitionTime:
                    description: LastTransitionTime is the last time the condition
                      transitioned from one status to another. We use VolatileTime
                      in place of metav1.Time to exclude this from creating equality.Semantic
                      differences (all other things held constant).
                    type: string
                  message:
                    description: A human readable message indicating details about
                      the transition.
                    type: string
                  reason:
                    description: The reason for the condition's last transition.
                    type: string
                  severity:
                    description: Severity with which to treat failures of this type
                      of condition. When this is not specified, it defaults to Error.
                    type: string
                  status:
                    description: Status of the condition, one of True, False, Unknown.
                      +required
                    type: string
                  type:
                    description: Type of condition. +required
                    type: string
                required:
                - type
                - status
                type: object
              type: array
            default:
              description: Statuses for the default endpoints of the InferenceService
              type: object
            observedGeneration:
              description: ObservedGeneration is the 'Generation' of the Service that
                was last processed by the controller.
              format: int64
              type: integer
            traffic:
              description: Traffic percentage that goes to default services
              format: int64
              type: integer
            url:
              description: URL of the InferenceService
              type: string
          type: object
  version: v1alpha2
status:
  acceptedNames:
    kind: ""
    plural: ""
  conditions: []
  storedVersions: []
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: kfserving-proxy-role
rules:
- apiGroups:
  - authentication.k8s.io
  resources:
  - tokenreviews
  verbs:
  - create
- apiGroups:
  - authorization.k8s.io
  resources:
  - subjectaccessreviews
  verbs:
  - create
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  creationTimestamp: null
  name: manager-role
rules:
- apiGroups:
  - serving.knative.dev
  resources:
  - services
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - serving.knative.dev
  resources:
  - services/status
  verbs:
  - get
  - update
  - patch
- apiGroups:
  - networking.istio.io
  resources:
  - virtualservices
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - networking.istio.io
  resources:
  - virtualservices/status
  verbs:
  - get
  - update
  - patch
- apiGroups:
  - serving.kubeflow.org
  resources:
  - inferenceservices
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - serving.kubeflow.org
  resources:
  - inferenceservices/status
  verbs:
  - get
  - update
  - patch
- apiGroups:
  - ""
  resources:
  - serviceaccounts
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - secrets
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - configmaps
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - admissionregistration.k8s.io
  resources:
  - mutatingwebhookconfigurations
  - validatingwebhookconfigurations
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - ""
  resources:
  - secrets
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - ""
  resources:
  - services
  verbs:
  - get
  - list
  - watch
  - create
  - update
  - patch
  - delete
- apiGroups:
  - ""
  resources:
  - namespaces
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: kfserving-proxy-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: kfserving-proxy-role
subjects:
- kind: ServiceAccount
  name: default
  namespace: kfserving-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  creationTimestamp: null
  name: manager-rolebinding
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: manager-role
subjects:
- kind: ServiceAccount
  name: default
  namespace: kfserving-system
---
apiVersion: v1
data:
  credentials: |-
    {
       "gcs": {
           "gcsCredentialFileName": "gcloud-application-credentials.json"
       },
       "s3": {
           "s3AccessKeyIDName": "awsAccessKeyID",
           "s3SecretAccessKeyName": "awsSecretAccessKey"
       }
    }
  explainers: |-
    {
        "alibi": {
            "image" : "gcr.io/kfserving/alibi-explainer",
            "defaultImageVersion": "0.2.2",
            "allowedImageVersions": [
               "0.2.2"
            ]
        }
    }
  ingress: |-
    {
        "ingressGateway" : "knative-ingress-gateway.knative-serving",
        "ingressService" : "istio-ingressgateway.istio-system.svc.cluster.local"
    }
  logger: |-
    {
        "image" : "gcr.io/kfserving/logger:0.2.2",
        "memoryRequest": "100Mi",
        "memoryLimit": "1Gi",
        "cpuRequest": "100m",
        "cpuLimit": "1"
    }
  predictors: |-
    {
        "tensorflow": {
            "image": "tensorflow/serving",
            "defaultImageVersion": "1.14.0",
            "defaultGpuImageVersion": "1.14.0-gpu",
            "allowedImageVersions": [
               "1.11.0",
               "1.11.0-gpu",
               "1.12.0",
               "1.12.0-gpu",
               "1.13.0",
               "1.13.0-gpu",
               "1.14.0",
               "1.14.0-gpu"
            ]
        },
        "onnx": {
            "image": "mcr.microsoft.com/onnxruntime/server",
            "defaultImageVersion": "v0.5.1",
            "allowedImageVersions": [
               "v0.5.1"
            ]
        },
        "sklearn": {
            "image": "gcr.io/kfserving/sklearnserver",
            "defaultImageVersion": "0.2.2",
            "allowedImageVersions": [
               "0.2.2"
            ]
        },
        "xgboost": {
            "image": "gcr.io/kfserving/xgbserver",
            "defaultImageVersion": "0.2.2",
            "allowedImageVersions": [
               "0.2.2"
            ]
        },
        "pytorch": {
            "image": "gcr.io/kfserving/pytorchserver",
            "defaultImageVersion": "0.2.2",
            "allowedImageVersions": [
               "0.2.2"
            ]
        },
        "tensorrt": {
            "image": "nvcr.io/nvidia/tensorrtserver",
            "defaultImageVersion": "19.05-py3",
            "allowedImageVersions": [
               "19.05-py3"
            ]
        }
    }
  storageInitializer: |-
    {
        "image" : "gcr.io/kfserving/storage-initializer:0.2.2",
        "memoryRequest": "100Mi",
        "memoryLimit": "1Gi",
        "cpuRequest": "100m",
        "cpuLimit": "1"
    }
  transformers: |-
    {
    }
kind: ConfigMap
metadata:
  name: inferenceservice-config
  namespace: kfserving-system
---
apiVersion: v1
kind: Secret
metadata:
  name: kfserving-webhook-server-secret
  namespace: kfserving-system
---
apiVersion: v1
kind: Service
metadata:
  annotations:
    prometheus.io/port: "8443"
    prometheus.io/scheme: https
    prometheus.io/scrape: "true"
  labels:
    control-plane: controller-manager
    controller-tools.k8s.io: "1.0"
  name: kfserving-controller-manager-metrics-service
  namespace: kfserving-system
spec:
  ports:
  - name: https
    port: 8443
    targetPort: https
  selector:
    control-plane: controller-manager
    controller-tools.k8s.io: "1.0"
---
apiVersion: v1
kind: Service
metadata:
  labels:
    control-plane: kfserving-controller-manager
    controller-tools.k8s.io: "1.0"
  name: kfserving-controller-manager-service
  namespace: kfserving-system
spec:
  ports:
  - port: 443
  selector:
    control-plane: kfserving-controller-manager
    controller-tools.k8s.io: "1.0"
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  labels:
    control-plane: kfserving-controller-manager
    controller-tools.k8s.io: "1.0"
  name: kfserving-controller-manager
  namespace: kfserving-system
spec:
  selector:
    matchLabels:
      control-plane: kfserving-controller-manager
      controller-tools.k8s.io: "1.0"
  serviceName: controller-manager-service
  template:
    metadata:
      labels:
        control-plane: kfserving-controller-manager
        controller-tools.k8s.io: "1.0"
    spec:
      containers:
      - args:
        - --secure-listen-address=0.0.0.0:8443
        - --upstream=http://127.0.0.1:8080/
        - --logtostderr=true
        - --v=10
        image: gcr.io/kubebuilder/kube-rbac-proxy:v0.4.0
        name: kube-rbac-proxy
        ports:
        - containerPort: 8443
          name: https
      - args:
        - --metrics-addr=127.0.0.1:8080
        command:
        - /manager
        env:
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: SECRET_NAME
          value: kfserving-webhook-server-secret
        image: gcr.io/kfserving/kfserving-controller:0.2.2
        imagePullPolicy: Always
        name: manager
        ports:
        - containerPort: 9876
          name: webhook-server
          protocol: TCP
        resources:
          limits:
            cpu: 100m
            memory: 300Mi
          requests:
            cpu: 100m
            memory: 200Mi
        volumeMounts:
        - mountPath: /tmp/cert
          name: cert
          readOnly: true
      terminationGracePeriodSeconds: 10
      volumes:
      - name: cert
        secret:
          defaultMode: 420
          secretName: kfserving-webhook-server-secret