global: namePrefix: "zymtrace" imageRegistry: "ghcr.io/zystem-io" registry: requirePullSecret: false username: "" # --set global.registry.username= password: "" # --set global.registry.password= imagePullSecrets: - name: null # Will be overridden by the template --> zymtrace-registry-cred imagePullPolicy: IfNotPresent # ClusterMetadata is typically provided by parent chart when used as a subchart # Set cluster_id to automatically add it as a tag to the profiler agent ClusterMetadata: cluster_id: "" # Override with --set global.ClusterMetadata.cluster_id= rbac: create: true rules: - apiGroups: [""] resources: ["nodes", "pods"] verbs: ["get", "list", "watch"] - apiGroups: ["apps"] resources: ["deployments", "daemonsets"] verbs: ["get", "list", "watch"] serviceAccount: annotations: {} # eks.amazonaws.com/role-arn: arn:aws:iam::123456789012:role/operations/monitoring/zymtrace-profiler-role profiler: image: repository: zymtrace-pub-profiler tag: "" # Override with --set profiler.image.tag= securityContext: capabilities: add: - SYS_ADMIN # CAP_SYS_ADMIN is required cudaProfiler: enabled: false # Set to true to enable CUDA profiler. Override with --set profiler.cudaProfiler.enabled=true hostMountPath: "/var/lib/zymtrace/profiler" #can't use the standard /opt/zymtrace/profiler path because GKE mounts /opt as readonly for secuirty reasons. args: # For all available arguments, see: https://docs.zymtrace.com/profiler-cli-args # Override the collection agent with: --set profiler.args[0]="-collection-agent=your-service.namespace.svc.cluster.local:80" - "-collection-agent=zymtrace-gateway.zymtrace.svc.cluster.local:80" - "-disable-tls" # - "-project=project-name" # - "-zymtrace-protocol" # - "-tags=cloud_region:us-central1;env:staging" # - "-v" # - "-dwarf" # - "-zymtrace-protocol" # - "-auth-token=mycustomauthtoken" # # GPU Metrics: https://docs.zymtrace.com/gpu-metrics#available-gpu-metrics # To enable GPU metrics, add: # - "--enable-gpu-metrics" # - "-nvml-path=/path/to/nvml" # Optional: profiler auto-detects if not specified # - "-nvml-auto-scan" # Optional: profiler will scan host system for NVML. # # Not recommended for production: if lib isn't at standard path, # # it's best to just run with that flag once, then specify the # # path found and printed by the profiler explicitly in future runs. # Note: CLI args take precedence over environment variables env: # For all env variables, see: https://docs.zymtrace.com/profiler-env-variables # Override environment variables with: --set profiler.env.ZYMTRACE_COLLECTION_AGENT="your-service:80" #ZYMTRACE_COLLECTION_AGENT: "zymtrace-ui.zymtrace.svc.cluster.local:80" #ZYMTRACE_DISABLE_TLS: "true" # ZYMTRACE_TAGS: "region:us-west;env:prod" # # GPU Metrics: To enable GPU metrics via env vars instead of args: # ZYMTRACE_ENABLE_GPU_METRICS: "true" # ZYMTRACE_NVML_PATH: "/path/to/nvml" # Optional: profiler auto-detects if not specified # ZYMTRACE_NVML_AUTO_SCAN: 1 # Optional: profiler will scan host system for NVML. # # Not recommended for production: if lib isn't at standard path, # # it's best to just run with that flag once, then specify the # # path found and printed by the profiler explicitly in future runs. # Proxy settings #HTTPS_PROXY: "" #http://username:password@proxy:port # Kubernetes downward API field references fieldRefs: # Default node name variables (set to false to disable) NODE_NAME: true # maps to spec.nodeName KUBERNETES_NODE_NAME: true # maps to spec.nodeName # POD_NAME: metadata.name # POD_NAMESPACE: metadata.namespace # Uncomment to ensure profiler pods are scheduled even on busy nodes. # This allows the DaemonSet to preempt lower-priority pods when resources are scarce. priorityClassName: null # priorityClassName: "system-node-critical" # Highest: preempts all non-critical pods # priorityClassName: "system-cluster-critical" # High: preempts regular pods but not node-critical ones resources: requests: cpu: "200m" memory: "256Mi" limits: cpu: "1000m" memory: "1Gi" nodeSelector: {} # Override with: --set profiler.nodeSelector."kubernetes\.io/arch"=amd64 # kubernetes.io/arch: amd64 # nvidia.com/gpu: "true" # Any node with NVIDIA GPU # nvidia.com/gpu.product: "A100" # Specific GPU model # nvidia.com/gpu.memory: "40GB" # Specific GPU memory # accelerator: nvidia-tesla-k80 # For Google GKE GPU nodes tolerations: # Allow deploying to GPU nodes. - key: "nvidia.com/gpu" operator: "Exists" effect: "NoSchedule" affinity: {} # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: nvidia.com/gpu # operator: In # values: # - "true" # preferredDuringSchedulingIgnoredDuringExecution: # - weight: 1 # preference: # matchExpressions: # - key: gpu.memory # operator: Gt # values: # - "32Gi"