## Configure the task container defaults. Tasks include trials, commands, tensorboards and more. ## For all task containers, shm_size_bytes and network_mode are configurable. For trials, the ## network interface used by distributed (multi-machine) training is configurable. #task_container_defaults: # shm_size_bytes: 4294967296 # network_mode: bridge # dtrain_network_interface: ## Resource manager configuration. Defaults to using the agent resource manager. #resource_manager: # type: agent # scheduler: # type: fair_share # fitting_policy: best # default_cpu_resource_pool: default # default_gpu_resource_pool: default ## Resource pools configuration. #resource_pools: # - pool_name: default ## The maximum CPU containers for the agent. Defaults to 100. # max_cpu_containers_per_agent: 100 ## The provider of dynamic agents. # provider: ## The url of the Determined master. # master_url: ## The startup script for the agent. This runs on the node the agent runs on. # startup_script: ## The startup script for the agent's container. This runs in the container determined-agent runs in. # container_startup_script: ## The Docker network to use for the agent when using dynamic agents. If this is ## set to "host", Docker host-mode networking will be used instead. Defaults to "default". # agent_docker_network: default ## The docker runtime to use for the agent when using dynamic agent. Defaults to "runc. # agent_docker_runtime: runc ## The docker image to use for the agent when using dynamic agents. This value ## must be configured. # agent_docker_image: determinedai/hpe-mlde-agent: ## The maximum idle period of agents. The master waits for this period of time ## before shutting down idle agents. Defaults to 5 min. # max_idle_agent_period: 5m ## The maximum starting period of agents. The master waits for this period of time ## for starting agents before retrying. Defaults to 5 min. # max_agent_starting_period: 5m ## Configure AWS dynamic agents. The `provider`, `image_id`, `security_group`, and ## `ssh_key_name` are required to be set to use AWS dynamic agents. # type: aws # region: # root_volume_size: 200 # image_id: # tag_key: # tag_value: # instance_name: determined-ai-agent # ssh_key_name: # iam_instance_profile_arn: # network_interface: # public_ip: true # security_group_id: # subnet_id: # instance_type: g4dn.metal # max_instances: 5 ## Configure GCP dynamic agents. The `provider`, `boot_disk_source_image`, ## `network_interface` are required to be set to use GCP dynamic agents. # type: gcp # base_config: # project: # zone: # boot_disk_size: 200 # boot_disk_source_image: projects//global/images/ # labels: {: , : , ...} # label_key: # label_value: # name_prefix: # network_interface: # network: projects//global/networks/ # subnetwork: projects//regions//subnetworks/ # external_ip: false # network_tags: ["", ""] # service_account: # email: "" # scopes: ["https://www.googleapis.com/auth/cloud-platform"] # instance_type: # machine_type: n1-standard-32 # gpu_type: nvidia-tesla-t4 # gpu_num: 4 # max_instances: 5 # operation_timeout_period: 5m ## Configure default checkpoint storage configuration. checkpoint_storage: type: shared_fs host_path: /tmp storage_path: determined-checkpoint save_experiment_best: 0 save_trial_best: 1 save_trial_latest: 1 ## Configure the location where the master looks for static files. # root: /usr/share/determined/master ## Configure the connection to the Postgres database. db: user: postgres # host: determined-db port: 5432 name: determined # password: database_password observability: enable_prometheus: false ## A user friendly name to identify this cluster by. # cluster_name: Dev ## Configure whether we collect anonymous information about the usage of ## Determined. # telemetry: # enabled: true ## Configure the credentials for using the SCIM server to provision users. # scim: # enabled: false # auth: # # "oauth" can be used instead of "basic"; then there should be no # # username or password fields. # type: basic # username: scim_username # password: scim_password ## Configure the settings for using the SAML server to sign in. # saml: # enabled: false # provider: okta # idp_sso_url: sso_url # idp_sso_descriptor_url: descriptor_url # idp_recipient_url: recipient_url # idp_cert_path: cert_path