// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; package mesos; option cc_enable_arenas = true; option java_package = "org.apache.mesos"; option java_outer_classname = "Protos"; /** * Status is used to indicate the state of the scheduler and executor * driver after function calls. */ enum Status { DRIVER_NOT_STARTED = 1; DRIVER_RUNNING = 2; DRIVER_ABORTED = 3; DRIVER_STOPPED = 4; } /** * A unique ID assigned to a framework. A framework can reuse this ID * in order to do failover (see MesosSchedulerDriver). */ message FrameworkID { required string value = 1; } /** * A unique ID assigned to an offer. */ message OfferID { required string value = 1; } /** * A unique ID assigned to a slave. Currently, a slave gets a new ID * whenever it (re)registers with Mesos. Framework writers shouldn't * assume any binding between a slave ID and and a hostname. */ message SlaveID { required string value = 1; } /** * A framework-generated ID to distinguish a task. The ID must remain * unique while the task is active. A framework can reuse an ID _only_ * if the previous task with the same ID has reached a terminal state * (e.g., TASK_FINISHED, TASK_KILLED, etc.). However, reusing task IDs * is strongly discouraged (MESOS-2198). */ message TaskID { required string value = 1; } /** * A framework-generated ID to distinguish an executor. Only one * executor with the same ID can be active on the same slave at a * time. However, reusing executor IDs is discouraged. */ message ExecutorID { required string value = 1; } /** * ID used to uniquely identify a container. If the `parent` is not * specified, the ID is a UUID generated by the agent to uniquely * identify the container of an executor run. If the `parent` field is * specified, it represents a nested container. */ message ContainerID { required string value = 1; optional ContainerID parent = 2; } /** * A unique ID assigned to a resource provider. Currently, a resource * provider gets a new ID whenever it (re)registers with Mesos. */ message ResourceProviderID { required string value = 1; } /** * A framework-generated ID to distinguish an operation. The ID * must be unique within the framework. */ message OperationID { required string value = 1; } /** * Represents time since the epoch, in nanoseconds. */ message TimeInfo { required int64 nanoseconds = 1; } /** * Represents duration in nanoseconds. */ message DurationInfo { required int64 nanoseconds = 1; } /** * A network address. * * TODO(bmahler): Use this more widely. */ message Address { // May contain a hostname, IP address, or both. optional string hostname = 1; optional string ip = 2; required int32 port = 3; } /** * Represents a URL. */ message URL { required string scheme = 1; required Address address = 2; optional string path = 3; repeated Parameter query = 4; optional string fragment = 5; } /** * Represents an interval, from a given start time over a given duration. * This interval pertains to an unavailability event, such as maintenance, * and is not a generic interval. */ message Unavailability { required TimeInfo start = 1; // When added to `start`, this represents the end of the interval. // If unspecified, the duration is assumed to be infinite. optional DurationInfo duration = 2; // TODO(josephw): Add additional fields for expressing the purpose and // urgency of the unavailability event. } /** * Represents a single machine, which may hold one or more slaves. * * NOTE: In order to match a slave to a machine, both the `hostname` and * `ip` must match the values advertised by the slave to the master. * Hostname is not case-sensitive. */ message MachineID { optional string hostname = 1; optional string ip = 2; } /** * Holds information about a single machine, its `mode`, and any other * relevant information which may affect the behavior of the machine. */ message MachineInfo { // Describes the several states that a machine can be in. A `Mode` // applies to a machine and to all associated slaves on the machine. enum Mode { // In this mode, a machine is behaving normally; // offering resources, executing tasks, etc. UP = 1; // In this mode, all slaves on the machine are expected to cooperate with // frameworks to drain resources. In general, draining is done ahead of // a pending `unavailability`. The resources should be drained so as to // maximize utilization prior to the maintenance but without knowingly // violating the frameworks' requirements. DRAINING = 2; // In this mode, a machine is not running any tasks and will not offer // any of its resources. Slaves on the machine will not be allowed to // register with the master. DOWN = 3; } required MachineID id = 1; optional Mode mode = 2; // Signifies that the machine may be unavailable during the given interval. // See comments in `Unavailability` and for the `unavailability` fields // in `Offer` and `InverseOffer` for more information. optional Unavailability unavailability = 3; } /** * Describes a framework. */ message FrameworkInfo { // Used to determine the Unix user that an executor or task should be // launched as. // // When using the MesosSchedulerDriver, if the field is set to an // empty string, it will automagically set it to the current user. // // When using the HTTP Scheduler API, the user has to be set // explicitly. required string user = 1; // Name of the framework that shows up in the Mesos Web UI. required string name = 2; // Used to uniquely identify the framework. // // This field must be unset when the framework subscribes for the // first time upon which the master will assign a new ID. To // resubscribe after scheduler failover the framework should set // 'id' to the ID assigned by the master. Setting 'id' to values // not assigned by Mesos masters is unsupported. optional FrameworkID id = 3; // The amount of time (in seconds) that the master will wait for the // scheduler to failover before it tears down the framework by // killing all its tasks/executors. This should be non-zero if a // framework expects to reconnect after a failure and not lose its // tasks/executors. // // NOTE: To avoid accidental destruction of tasks, production // frameworks typically set this to a large value (e.g., 1 week). optional double failover_timeout = 4 [default = 0.0]; // If set, agents running tasks started by this framework will write // the framework pid, executor pids and status updates to disk. If // the agent exits (e.g., due to a crash or as part of upgrading // Mesos), this checkpointed data allows the restarted agent to // reconnect to executors that were started by the old instance of // the agent. Enabling checkpointing improves fault tolerance, at // the cost of a (usually small) increase in disk I/O. optional bool checkpoint = 5 [default = false]; // Roles are the entities to which allocations are made. // The framework must have at least one role in order to // be offered resources. Note that `role` is deprecated // in favor of `roles` and only one of these fields must // be used. Since we cannot distinguish between empty // `roles` and the default unset `role`, we require that // frameworks set the `MULTI_ROLE` capability if // setting the `roles` field. optional string role = 6 [default = "*", deprecated=true]; repeated string roles = 12; // Used to indicate the current host from which the scheduler is // registered in the Mesos Web UI. If set to an empty string Mesos // will automagically set it to the current hostname if one is // available. optional string hostname = 7; // This field should match the credential's principal the framework // uses for authentication. This field is used for framework API // rate limiting and dynamic reservations. It should be set even // if authentication is not enabled if these features are desired. optional string principal = 8; // This field allows a framework to advertise its web UI, so that // the Mesos web UI can link to it. It is expected to be a full URL, // for example http://my-scheduler.example.com:8080/. optional string webui_url = 9; message Capability { enum Type { // This must be the first enum value in this list, to // ensure that if 'type' is not set, the default value // is UNKNOWN. This enables enum values to be added // in a backwards-compatible way. See: MESOS-4997. UNKNOWN = 0; // Receive offers with revocable resources. See 'Resource' // message for details. REVOCABLE_RESOURCES = 1; // Receive the TASK_KILLING TaskState when a task is being // killed by an executor. The executor will examine this // capability to determine whether it can send TASK_KILLING. TASK_KILLING_STATE = 2; // Indicates whether the framework is aware of GPU resources. // Frameworks that are aware of GPU resources are expected to // avoid placing non-GPU workloads on GPU agents, in order // to avoid occupying a GPU agent and preventing GPU workloads // from running! Currently, if a framework is unaware of GPU // resources, it will not be offered *any* of the resources on // an agent with GPUs. This restriction is in place because we // do not have a revocation mechanism that ensures GPU workloads // can evict GPU agent occupants if necessary. // // TODO(bmahler): As we add revocation we can relax the // restriction here. See MESOS-5634 for more information. GPU_RESOURCES = 3; // Receive offers with resources that are shared. SHARED_RESOURCES = 4; // Indicates that (1) the framework is prepared to handle the // following TaskStates: TASK_UNREACHABLE, TASK_DROPPED, // TASK_GONE, TASK_GONE_BY_OPERATOR, and TASK_UNKNOWN, and (2) // the framework will assume responsibility for managing // partitioned tasks that reregister with the master. // // Frameworks that enable this capability can define how they // would like to handle partitioned tasks. Frameworks will // receive TASK_UNREACHABLE for tasks on agents that are // partitioned from the master. // // Without this capability, frameworks will receive TASK_LOST // for tasks on partitioned agents. // NOTE: Prior to Mesos 1.5, such tasks will be killed by Mesos // when the agent reregisters (unless the master has failed over). // However due to the lack of benefit in maintaining different // behaviors depending on whether the master has failed over // (see MESOS-7215), as of 1.5, Mesos will not kill these // tasks in either case. PARTITION_AWARE = 5; // This expresses the ability for the framework to be // "multi-tenant" via using the newly introduced `roles` // field, and examining `Offer.allocation_info` to determine // which role the offers are being made to. We also // expect that "single-tenant" schedulers eventually // provide this and move away from the deprecated // `role` field. MULTI_ROLE = 6; // This capability has two effects for a framework. // // (1) The framework is offered resources in a new format. // // The offered resources have the `Resource.reservations` field set // rather than `Resource.role` and `Resource.reservation`. In short, // an empty `reservations` field denotes unreserved resources, and // each `ReservationInfo` in the `reservations` field denotes a // reservation that refines the previous one. // // See the 'Resource Format' section for more details. // // (2) The framework can create refined reservations. // // A framework can refine an existing reservation via the // `Resource.reservations` field. For example, a reservation for role // `eng` can be refined to `eng/front_end`. // // See `ReservationInfo.reservations` for more details. // // NOTE: Without this capability, a framework is not offered resources // that have refined reservations. A resource is said to have refined // reservations if it uses the `Resource.reservations` field, and // `Resource.reservations_size() > 1`. RESERVATION_REFINEMENT = 7; // EXPERIMENTAL. // Indicates that the framework is prepared to receive offers // for agents whose region is different from the master's // region. Network links between hosts in different regions // typically have higher latency and lower bandwidth than // network links within a region, so frameworks should be // careful to only place suitable workloads in remote regions. // Frameworks that are not region-aware will never receive // offers for remote agents; region-aware frameworks are assumed // to implement their own logic to decide which workloads (if // any) are suitable for placement on remote agents. REGION_AWARE = 8; } // Enum fields should be optional, see: MESOS-4997. optional Type type = 1; } // This field allows a framework to advertise its set of // capabilities (e.g., ability to receive offers for revocable // resources). repeated Capability capabilities = 10; // Labels are free-form key value pairs supplied by the framework // scheduler (e.g., to describe additional functionality offered by // the framework). These labels are not interpreted by Mesos itself. // Labels should not contain duplicate key-value pairs. optional Labels labels = 11; // Specifc resource requirements for each of the framework's roles. This field // is used by e.g., the default allocator to decide whether a framework is // interested in seeing a resource of a certain shape. map offer_filters = 13; } /** * Describes a general non-interpreting non-killing check for a task or * executor (or any arbitrary process/command). A type is picked by * specifying one of the optional fields. Specifying more than one type * is an error. * * NOTE: This API is subject to change and the related feature is experimental. */ message CheckInfo { enum Type { UNKNOWN = 0; COMMAND = 1; HTTP = 2; TCP = 3; // TODO(alexr): Consider supporting custom user checks. They should // probably be paired with a `data` field and complemented by a // `data` response in `CheckStatusInfo`. } // Describes a command check. If applicable, enters mount and/or network // namespaces of the task. message Command { required CommandInfo command = 1; } // Describes an HTTP check. Sends a GET request to // http://:port/path. Note that is not configurable and is // resolved automatically to 127.0.0.1. message Http { // Port to send the HTTP request. required uint32 port = 1; // HTTP request path. optional string path = 2; // TODO(alexr): Add support for HTTP method. While adding POST // and PUT is simple, supporting payload is more involved. // TODO(alexr): Add support for custom HTTP headers. // TODO(alexr): Consider adding an optional message to describe TLS // options and thus enabling https. Such message might contain certificate // validation, TLS version. } // Describes a TCP check, i.e. based on establishing a TCP connection to // the specified port. Note that is not configurable and is resolved // automatically to 127.0.0.1. message Tcp { required uint32 port = 1; } // The type of the check. optional Type type = 1; // Command check. optional Command command = 2; // HTTP check. optional Http http = 3; // TCP check. optional Tcp tcp = 7; // Amount of time to wait to start checking the task after it // transitions to `TASK_RUNNING` or `TASK_STARTING` if the latter // is used by the executor. optional double delay_seconds = 4 [default = 15.0]; // Interval between check attempts, i.e., amount of time to wait after // the previous check finished or timed out to start the next check. optional double interval_seconds = 5 [default = 10.0]; // Amount of time to wait for the check to complete. Zero means infinite // timeout. // // After this timeout, the check attempt is aborted and no result is // reported. Note that this may be considered a state change and hence // may trigger a check status change delivery to the corresponding // scheduler. See `CheckStatusInfo` for more details. optional double timeout_seconds = 6 [default = 20.0]; } /** * Describes a health check for a task or executor (or any arbitrary * process/command). A type is picked by specifying one of the * optional fields. Specifying more than one type is an error. */ message HealthCheck { enum Type { UNKNOWN = 0; COMMAND = 1; HTTP = 2; TCP = 3; } // Describes an HTTP health check. Sends a GET request to // scheme://:port/path. Note that is not configurable and is // resolved automatically, in most cases to 127.0.0.1. Default executors // treat return codes between 200 and 399 as success; custom executors // may employ a different strategy, e.g. leveraging the `statuses` field. message HTTPCheckInfo { optional NetworkInfo.Protocol protocol = 5 [default = IPv4]; // Currently "http" and "https" are supported. optional string scheme = 3; // Port to send the HTTP request. required uint32 port = 1; // HTTP request path. optional string path = 2; // TODO(alexr): Add support for HTTP method. While adding POST // and PUT is simple, supporting payload is more involved. // TODO(alexr): Add support for custom HTTP headers. // TODO(alexr): Add support for success and possibly failure // statuses. // NOTE: It is up to the custom executor to interpret and act on this // field. Setting this field has no effect on the default executors. // // TODO(haosdent): Deprecate this field when we add better support for // success and possibly failure statuses, e.g. ranges of success and // failure statuses. repeated uint32 statuses = 4; // TODO(haosdent): Consider adding a flag to enable task's certificate // validation for HTTPS health checks, see MESOS-5997. // TODO(benh): Include an 'optional bytes data' field for checking // for specific data in the response. } // Describes a TCP health check, i.e. based on establishing // a TCP connection to the specified port. message TCPCheckInfo { optional NetworkInfo.Protocol protocol = 2 [default = IPv4]; // Port expected to be open. required uint32 port = 1; } // TODO(benh): Consider adding a URL health check strategy which // allows doing something similar to the HTTP strategy but // encapsulates all the details in a single string field. // Amount of time to wait to start health checking the task after it // transitions to `TASK_RUNNING` or `TASK_STATING` if the latter is // used by the executor. optional double delay_seconds = 2 [default = 15.0]; // Interval between health checks, i.e., amount of time to wait after // the previous health check finished or timed out to start the next // health check. optional double interval_seconds = 3 [default = 10.0]; // Amount of time to wait for the health check to complete. After this // timeout, the health check is aborted and treated as a failure. Zero // means infinite timeout. optional double timeout_seconds = 4 [default = 20.0]; // Number of consecutive failures until the task is killed by the executor. optional uint32 consecutive_failures = 5 [default = 3]; // Amount of time after the task is launched during which health check // failures are ignored. Once a check succeeds for the first time, // the grace period does not apply anymore. Note that it includes // `delay_seconds`, i.e., setting `grace_period_seconds` < `delay_seconds` // has no effect. optional double grace_period_seconds = 6 [default = 10.0]; // TODO(alexr): Add an optional `KillPolicy` that should be used // if the task is killed because of a health check failure. // The type of health check. optional Type type = 8; // Command health check. optional CommandInfo command = 7; // HTTP health check. optional HTTPCheckInfo http = 1; // TCP health check. optional TCPCheckInfo tcp = 9; } /** * Describes a kill policy for a task. Currently does not express * different policies (e.g. hitting HTTP endpoints), only controls * how long to wait between graceful and forcible task kill: * * graceful kill --------------> forcible kill * grace_period * * Kill policies are best-effort, because machine failures / forcible * terminations may occur. * * NOTE: For executor-less command-based tasks, the kill is performed * via sending a signal to the task process: SIGTERM for the graceful * kill and SIGKILL for the forcible kill. For the docker executor-less * tasks the grace period is passed to 'docker stop --time'. */ message KillPolicy { // The grace period specifies how long to wait before forcibly // killing the task. It is recommended to attempt to gracefully // kill the task (and send TASK_KILLING) to indicate that the // graceful kill is in progress. Once the grace period elapses, // if the task has not terminated, a forcible kill should occur. // The task should not assume that it will always be allotted // the full grace period. For example, the executor may be // shutdown more quickly by the agent, or failures / forcible // terminations may occur. optional DurationInfo grace_period = 1; } /** * Describes a command, executed via: '/bin/sh -c value'. Any URIs specified * are fetched before executing the command. If the executable field for an * uri is set, executable file permission is set on the downloaded file. * Otherwise, if the downloaded file has a recognized archive extension * (currently [compressed] tar and zip) it is extracted into the executor's * working directory. This extraction can be disabled by setting `extract` to * false. In addition, any environment variables are set before executing * the command (so they can be used to "parameterize" your command). */ message CommandInfo { message URI { required string value = 1; optional bool executable = 2; // In case the fetched file is recognized as an archive, extract // its contents into the sandbox. Note that a cached archive is // not copied from the cache to the sandbox in case extraction // originates from an archive in the cache. optional bool extract = 3 [default = true]; // If this field is "true", the fetcher cache will be used. If not, // fetching bypasses the cache and downloads directly into the // sandbox directory, no matter whether a suitable cache file is // available or not. The former directs the fetcher to download to // the file cache, then copy from there to the sandbox. Subsequent // fetch attempts with the same URI will omit downloading and copy // from the cache as long as the file is resident there. Cache files // may get evicted at any time, which then leads to renewed // downloading. See also "docs/fetcher.md" and // "docs/fetcher-cache-internals.md". optional bool cache = 4; // The fetcher's default behavior is to use the URI string's basename to // name the local copy. If this field is provided, the local copy will be // named with its value instead. If there is a directory component (which // must be a relative path), the local copy will be stored in that // subdirectory inside the sandbox. optional string output_file = 5; } repeated URI uris = 1; optional Environment environment = 2; // There are two ways to specify the command: // 1) If 'shell == true', the command will be launched via shell // (i.e., /bin/sh -c 'value'). The 'value' specified will be // treated as the shell command. The 'arguments' will be ignored. // 2) If 'shell == false', the command will be launched by passing // arguments to an executable. The 'value' specified will be // treated as the filename of the executable. The 'arguments' // will be treated as the arguments to the executable. This is // similar to how POSIX exec families launch processes (i.e., // execlp(value, arguments(0), arguments(1), ...)). // NOTE: The field 'value' is changed from 'required' to 'optional' // in 0.20.0. It will only cause issues if a new framework is // connecting to an old master. optional bool shell = 6 [default = true]; optional string value = 3; repeated string arguments = 7; // Enables executor and tasks to run as a specific user. If the user // field is present both in FrameworkInfo and here, the CommandInfo // user value takes precedence. optional string user = 5; } /** * Describes information about an executor. */ message ExecutorInfo { enum Type { UNKNOWN = 0; // Mesos provides a simple built-in default executor that frameworks can // leverage to run shell commands and containers. // // NOTES: // // 1) `command` must not be set when using a default executor. // // 2) Default executor only accepts a *single* `LAUNCH` or `LAUNCH_GROUP` // operation. // // 3) If `container` is set, `container.type` must be `MESOS` // and `container.mesos.image` must not be set. DEFAULT = 1; // For frameworks that need custom functionality to run tasks, a `CUSTOM` // executor can be used. Note that `command` must be set when using a // `CUSTOM` executor. CUSTOM = 2; } // For backwards compatibility, if this field is not set when using `LAUNCH` // operation, Mesos will infer the type by checking if `command` is set // (`CUSTOM`) or unset (`DEFAULT`). `type` must be set when using // `LAUNCH_GROUP` operation. // // TODO(vinod): Add support for explicitly setting `type` to `DEFAULT` in // `LAUNCH` operation. optional Type type = 15; required ExecutorID executor_id = 1; optional FrameworkID framework_id = 8; // TODO(benh): Make this required. optional CommandInfo command = 7; // Executor provided with a container will launch the container // with the executor's CommandInfo and we expect the container to // act as a Mesos executor. optional ContainerInfo container = 11; repeated Resource resources = 5; optional string name = 9; // 'source' is an identifier style string used by frameworks to // track the source of an executor. This is useful when it's // possible for different executor ids to be related semantically. // // NOTE: 'source' is exposed alongside the resource usage of the // executor via JSON on the slave. This allows users to import usage // information into a time series database for monitoring. // // This field is deprecated since 1.0. Please use labels for // free-form metadata instead. optional string source = 10 [deprecated = true]; // Since 1.0. // This field can be used to pass arbitrary bytes to an executor. optional bytes data = 4; // Service discovery information for the executor. It is not // interpreted or acted upon by Mesos. It is up to a service // discovery system to use this information as needed and to handle // executors without service discovery information. optional DiscoveryInfo discovery = 12; // When shutting down an executor the agent will wait in a // best-effort manner for the grace period specified here // before forcibly destroying the container. The executor // must not assume that it will always be allotted the full // grace period, as the agent may decide to allot a shorter // period and failures / forcible terminations may occur. optional DurationInfo shutdown_grace_period = 13; // Labels are free-form key value pairs which are exposed through // master and slave endpoints. Labels will not be interpreted or // acted upon by Mesos itself. As opposed to the data field, labels // will be kept in memory on master and slave processes. Therefore, // labels should be used to tag executors with lightweight metadata. // Labels should not contain duplicate key-value pairs. optional Labels labels = 14; } /** * Describes a domain. A domain is a collection of hosts that have * similar characteristics. Mesos currently only supports "fault * domains", which identify groups of hosts with similar failure * characteristics. * * Frameworks can generally assume that network links between hosts in * the same fault domain have lower latency, higher bandwidth, and better * availability than network links between hosts in different domains. * Schedulers may prefer to place network-intensive workloads in the * same domain, as this may improve performance. Conversely, a single * failure that affects a host in a domain may be more likely to * affect other hosts in the same domain; hence, schedulers may prefer * to place workloads that require high availability in multiple * domains. (For example, all the hosts in a single rack might lose * power or network connectivity simultaneously.) * * There are two kinds of fault domains: regions and zones. Regions * offer the highest degree of fault isolation, but network latency * between regions is typically high (typically >50 ms). Zones offer a * modest degree of fault isolation along with reasonably low network * latency (typically <10 ms). * * The mapping from fault domains to physical infrastructure is up to * the operator to configure. In cloud environments, regions and zones * can be mapped to the "region" and "availability zone" concepts * exposed by most cloud providers, respectively. In on-premise * deployments, regions and zones can be mapped to data centers and * racks, respectively. * * Both masters and agents can be configured with domains. Frameworks * can compare the domains of two hosts to determine if the hosts are * in the same zone, in different zones in the same region, or in * different regions. Note that all masters in a given Mesos cluster * must be in the same region. * * Complex deployments may have additional levels of hierarchy: for example, * multiple racks might be grouped together into "halls" and multiple DCs in * the same geographical vicinity might be grouped together. As a convention, * the recommended way to represent additional levels of hierarchy is via dot- * separated labels in the existing zone and region fields. For example, the * fact that racks "abc" and "def" are in the same hall might be represented * using the zone names "rack-abc.hall-1" and "rack-def.hall-1", for example. * Software that is not aware of this additional structure will compare the * zone names for equality- hence, the two zones will be treated as different * (unrelated) zones. Software that is "hall-aware" can inspect the zone names * and make use of the additional hierarchy. */ message DomainInfo { message FaultDomain { message RegionInfo { required string name = 1; } message ZoneInfo { required string name = 1; } required RegionInfo region = 1; required ZoneInfo zone = 2; } optional FaultDomain fault_domain = 1; } /** * Describes a master. This will probably have more fields in the * future which might be used, for example, to link a framework webui * to a master webui. */ message MasterInfo { required string id = 1; // The IP address (only IPv4) as a packed 4-bytes integer, // stored in network order. Deprecated, use `address.ip` instead. required uint32 ip = 2; // The TCP port the Master is listening on for incoming // HTTP requests; deprecated, use `address.port` instead. required uint32 port = 3 [default = 5050]; // In the default implementation, this will contain information // about both the IP address, port and Master name; it should really // not be relied upon by external tooling/frameworks and be // considered an "internal" implementation field. optional string pid = 4; // The server's hostname, if available; it may be unreliable // in environments where the DNS configuration does not resolve // internal hostnames (eg, some public cloud providers). // Deprecated, use `address.hostname` instead. optional string hostname = 5; // The running Master version, as a string; taken from the // generated "master/version.hpp". optional string version = 6; // The full IP address (supports both IPv4 and IPv6 formats) // and supersedes the use of `ip`, `port` and `hostname`. // Since Mesos 0.24. optional Address address = 7; // The domain that this master belongs to. All masters in a Mesos // cluster should belong to the same region. optional DomainInfo domain = 8; message Capability { enum Type { UNKNOWN = 0; // NOTE: When the master starts to use a new capability that // may prevent compatible downgrade, remember to add the // capability to `Registry::MinimumCapability`. Conversely, // the added minimum capability should be removed if the capability // is deemed to be no longer required for compatible downgrade. // See MESOS-8878 for more details. // The master can handle slaves whose state // changes after reregistering. AGENT_UPDATE = 1; // The master can drain or deactivate agents when requested // via operator APIs. AGENT_DRAINING = 2; // The master can handle the new quota API, which supports setting // limits separately from guarantees (introduced in Mesos 1.9). QUOTA_V2 = 3; } optional Type type = 1; } repeated Capability capabilities = 9; } /** * Describes a slave. Note that the 'id' field is only available after * a slave is registered with the master, and is made available here * to facilitate re-registration. */ message SlaveInfo { required string hostname = 1; optional int32 port = 8 [default = 5051]; // The configured resources at the agent. This does not include any // dynamic reservations or persistent volumes that may currently // exist at the agent. repeated Resource resources = 3; repeated Attribute attributes = 5; optional SlaveID id = 6; // The domain that this slave belongs to. If the slave's region // differs from the master's region, it will not appear in resource // offers to frameworks that have not enabled the REGION_AWARE // capability. optional DomainInfo domain = 10; // Slave checkpointing is always enabled in recent Mesos versions; // the value of this field is ignored. // TODO(joerg84): Remove checkpoint field after deprecation cycle starting // with 0.27 (MESOS-2317). optional bool checkpoint = 7 [default = false]; message Capability { enum Type { // This must be the first enum value in this list, to // ensure that if 'type' is not set, the default value // is UNKNOWN. This enables enum values to be added // in a backwards-compatible way. See: MESOS-4997. UNKNOWN = 0; // This expresses the ability for the agent to be able // to launch tasks of a 'multi-role' framework. MULTI_ROLE = 1; // This expresses the ability for the agent to be able to launch // tasks, reserve resources, and create volumes using resources // allocated to a 'hierarchical-role'. // NOTE: This capability is required specifically for creating // volumes because a hierchical role includes '/' (slashes) in them. // Agents with this capability know to transform the '/' (slashes) // into ' ' (spaces). HIERARCHICAL_ROLE = 2; // This capability has three effects for an agent. // // (1) The format of the checkpointed resources, and // the resources reported to master. // // These resources are reported in the "pre-reservation-refinement" // format if none of the resources have refined reservations. If any // of the resources have refined reservations, they are reported in // the "post-reservation-refinement" format. The purpose is to allow // downgrading of an agent as well as communication with a pre-1.4.0 // master until the reservation refinement feature is actually used. // // See the 'Resource Format' section for more details. // // (2) The format of the resources reported by the HTTP endpoints. // // For resources reported by agent endpoints, the // "pre-reservation-refinement" format is "injected" if possible. // That is, resources without refined reservations will have the // `Resource.role` and `Resource.reservation` set, whereas // resources with refined reservations will not. // // See the 'Resource Format' section for more details. // // (3) The ability for the agent to launch tasks, reserve resources, and // create volumes using resources that have refined reservations. // // See `ReservationInfo.reservations` section for more details. // // NOTE: Resources are said to have refined reservations if it uses the // `Resource.reservations` field, and `Resource.reservations_size() > 1`. RESERVATION_REFINEMENT = 3; // This expresses the ability for the agent to handle resource // provider related operations. This includes the following: // // (1) The ability to report resources that are provided by some // local resource providers through the resource provider API. // // (2) The ability to provide operation feedback. This also means // that this capability is a prerequisite for full support of // feedback for operations on agent default resources. If an // agent has the mandatory AGENT_OPERATION_FEEDBACK capability // set but not the RESOURCE_PROVIDER capability, then // operations on agent default resources which request feedback // will not be allowed. RESOURCE_PROVIDER = 4; // This expresses the capability for the agent to handle persistent volume // resize operations safely. This capability is turned on by default. RESIZE_VOLUME = 5; // This expresses the ability of the agent to handle operation feedback // for operations on agent default resources. // // Note that full support for this feature also requires the // RESOURCE_PROVIDER capability; if you would like the agent to // handle feedback for operations on agent default resources, the // RESOURCE_PROVIDER capability should be set as well. AGENT_OPERATION_FEEDBACK = 6; // This expresses the ability for the agent to automatically drain tasks // in preparation for operator maintenance. This capability is required. AGENT_DRAINING = 7; // This expresses the ability for the agent to launch tasks which specify // resource limits for CPU and/or memory. TASK_RESOURCE_LIMITS = 8; } // Enum fields should be optional, see: MESOS-4997. optional Type type = 1; } } /** * Describes the container configuration to run a managed CSI plugin. */ message CSIPluginContainerInfo { enum Service { UNKNOWN = 0; CONTROLLER_SERVICE = 1; NODE_SERVICE = 2; } repeated Service services = 1; optional CommandInfo command = 2; repeated Resource resources = 3; optional ContainerInfo container = 4; } /** * Describes the endpoint of an unmanaged CSI plugin service. */ message CSIPluginEndpoint { required CSIPluginContainerInfo.Service csi_service = 1; required string endpoint = 2; } /** * Describes a CSI plugin. */ message CSIPluginInfo { // The type of the CSI plugin. This uniquely identifies a CSI // implementation. For instance: // org.apache.mesos.csi.test // // Please follow to Java package naming convention // (https://en.wikipedia.org/wiki/Java_package#Package_naming_conventions) // to avoid conflicts on type names. required string type = 1; // The name of the CSI plugin. There could be multiple instances of a // type of CSI plugin within a Mesos cluster. The name field is used to // distinguish these instances. It should be a legal Java identifier // (https://docs.oracle.com/javase/tutorial/java/nutsandbolts/variables.html) // to avoid conflicts on concatenation of type and name. // // The type and name together provide the means to uniquely identify a storage // backend and its resources in the cluster, so the operator should ensure // that the concatenation of type and name is unique in the cluster, and it // remains the same if the instance is migrated to another agent (e.g., there // is a change in the agent ID). optional string name = 2 [default = "default"]; // We support two kinds of CSI plugins: // 1. Managed CSI plugins: This is the plugin which will be launched by // Mesos as standalone container, and Mesos will internally determine // its endpoint when launching it and manage its whole lifecyle. For this // kind of plugins, the `containers` field below must be specified. // 2. Unmanaged CSI plugins: This is the plugin which is launched out of // Mesos (e.g., manually launched by the operator). For this kind of // plugins, the `endpoints` field below must be specified because Mesos // needs it to call CSI gRPC methods. // Please note that only one of the `containers` and `endpoints` fields should // be specified. // A list of container configurations to run managed CSI plugin. // The controller service will be served by the first configuration // that contains `CONTROLLER_SERVICE`, and the node service will be // served by the first configuration that contains `NODE_SERVICE`. repeated CSIPluginContainerInfo containers = 3; // The service endpoints of the unmanaged CSI plugin. An endpoint is usually // a path to a Unix domain socket. repeated CSIPluginEndpoint endpoints = 4; // The root directory of all the target paths managed by the CSI plugin. // Each volume will be published by the CSI plugin at a sub-directory // under this path. optional string target_path_root = 5; // For some CSI plugins which implement CSI v1 spec, they expect the target // path is an existing path which is actually not CSI v1 spec compliant. In // such case this field should be set to `true` as a work around for those // plugins. For the CSI plugins which implement CSI v0 spec, this field will // be just ignored. optional bool target_path_exists = 6; } /** * Describes a resource provider. Note that the 'id' field is only available * after a resource provider is registered with the master, and is made * available here to facilitate re-registration. */ message ResourceProviderInfo { optional ResourceProviderID id = 1; repeated Attribute attributes = 2; // The type of the resource provider. This uniquely identifies a // resource provider implementation. For instance: // org.apache.mesos.rp.local.storage // // Please follow to Java package naming convention // (https://en.wikipedia.org/wiki/Java_package#Package_naming_conventions) // to avoid conflicts on type names. required string type = 3; // The name of the resource provider. There could be multiple // instances of a type of resource provider. The name field is used // to distinguish these instances. It should be a legal Java identifier // (https://docs.oracle.com/javase/tutorial/java/nutsandbolts/variables.html) // to avoid conflicts on concatenation of type and name. required string name = 4; // The stack of default reservations. If this field is not empty, it // indicates that resources from this resource provider are reserved // by default, except for the resources that have been reserved or // unreserved through operations. The first `ReservationInfo` // may have type `STATIC` or `DYNAMIC`, but the rest must have // `DYNAMIC`. One can create a new reservation on top of an existing // one by pushing a new `ReservationInfo` to the back. The last // `ReservationInfo` in this stack is the "current" reservation. The // new reservation's role must be a child of the current one. repeated Resource.ReservationInfo default_reservations = 5; // EXPERIMENTAL. // Storage resource provider related information. message Storage { required CSIPluginInfo plugin = 1; // Amount of time to wait after the resource provider finishes reconciling // existing volumes and storage pools against the CSI plugin to start the // next reconciliation. A non-positive value means that no reconciliation // will happen after startup. optional double reconciliation_interval_seconds = 2; } optional Storage storage = 6; // EXPERIMENTAL. } /** * Describes an Attribute or Resource "value". A value is described * using the standard protocol buffer "union" trick. */ message Value { enum Type { SCALAR = 0; RANGES = 1; SET = 2; TEXT = 3; } message Scalar { // Scalar values are represented using floating point. To reduce // the chance of unpredictable floating point behavior due to // roundoff error, Mesos only supports three decimal digits of // precision for scalar resource values. That is, floating point // values are converted to a fixed point format that supports // three decimal digits of precision, and then converted back to // floating point on output. Any additional precision in scalar // resource values is discarded (via rounding). required double value = 1; } message Range { required uint64 begin = 1; required uint64 end = 2; } message Ranges { repeated Range range = 1; } message Set { repeated string item = 1; } message Text { required string value = 1; } required Type type = 1; optional Scalar scalar = 2; optional Ranges ranges = 3; optional Set set = 4; optional Text text = 5; } /** * Describes an attribute that can be set on a machine. For now, * attributes and resources share the same "value" type, but this may * change in the future and attributes may only be string based. */ message Attribute { required string name = 1; required Value.Type type = 2; optional Value.Scalar scalar = 3; optional Value.Ranges ranges = 4; optional Value.Set set = 6; optional Value.Text text = 5; } /** * Describes a resource from a resource provider. The `name` field is * a string like "cpus" or "mem" that indicates which kind of resource * this is; the rest of the fields describe the properties of the * resource. A resource can take on one of three types: scalar * (double), a list of finite and discrete ranges (e.g., [1-10, * 20-30]), or a set of items. A resource is described using the * standard protocol buffer "union" trick. * * Note that "disk" and "mem" resources are scalar values expressed in * megabytes. Fractional "cpus" values are allowed (e.g., "0.5"), * which correspond to partial shares of a CPU. */ message Resource { // Specified if the resource comes from a particular resource provider. optional ResourceProviderID provider_id = 12; required string name = 1; required Value.Type type = 2; optional Value.Scalar scalar = 3; optional Value.Ranges ranges = 4; optional Value.Set set = 5; // The role that this resource is reserved for. If "*", this indicates // that the resource is unreserved. Otherwise, the resource will only // be offered to frameworks that belong to this role. // // NOTE: Frameworks must not set this field if `reservations` is set. // See the 'Resource Format' section for more details. // // TODO(mpark): Deprecate once `reservations` is no longer experimental. optional string role = 6 [default = "*", deprecated=true]; // This was initially introduced to support MULTI_ROLE capable // frameworks. Frameworks that are not MULTI_ROLE capable can // continue to assume that the offered resources are allocated // to their role. message AllocationInfo { // If set, this resource is allocated to a role. Note that in the // future, this may be unset and the scheduler may be responsible // for allocating to one of its roles. optional string role = 1; // In the future, we may add additional fields here, e.g. priority // tier, type of allocation (quota / fair share). } optional AllocationInfo allocation_info = 11; // Resource Format: // // Frameworks receive resource offers in one of two formats, depending on // whether the RESERVATION_REFINEMENT capability is enabled. // // __WITHOUT__ the RESERVATION_REFINEMENT capability, the framework is offered // resources in the "pre-reservation-refinement" format. In this format, the // `Resource.role` and `Resource.reservation` fields are used in conjunction // to describe the reservation state of a `Resource` message. // // The following is an overview of the possible reservation states: // // +------------+------------------------------------------------------------+ // | unreserved | { | // | | role: "*", | // | | reservation: , | // | | reservations: | // | | } | // +------------+------------------------------------------------------------+ // | static | { | // | | role: "eng", | // | | reservation: , | // | | reservations: | // | | } | // +------------+------------------------------------------------------------+ // | dynamic | { | // | | role: "eng", | // | | reservation: { | // | | type: , | // | | role: , | // | | principal: , | // | | labels: | // | | }, | // | | reservations: | // | | } | // +------------+------------------------------------------------------------+ // // __WITH__ the RESERVATION_REFINEMENT capability, the framework is offered // resources in the "post-reservation-refinement" format. In this format, the // reservation state of a `Resource` message is expressed solely in // `Resource.reservations` field. // // The following is an overview of the possible reservation states: // // +------------+------------------------------------------------------------+ // | unreserved | { | // | | role: , | // | | reservation: , | // | | reservations: [] | // | | } | // +------------+------------------------------------------------------------+ // | static | { | // | | role: , | // | | reservation: , | // | | reservations: [ | // | | { | // | | type: STATIC, | // | | role: "eng", | // | | principal: , | // | | labels: | // | | } | // | | ] | // | | } | // +------------+------------------------------------------------------------+ // | dynamic | { | // | | role: , | // | | reservation: , | // | | reservations: [ | // | | { | // | | type: DYNAMIC, | // | | role: "eng", | // | | principal: , | // | | labels: | // | | } | // | | ] | // | | } | // +------------+------------------------------------------------------------+ // // We can also __refine__ reservations with this capability like so: // // +------------+------------------------------------------------------------+ // | refined | { | // | | role: , | // | | reservation: , | // | | reservations: [ | // | | { | // | | type: STATIC or DYNAMIC, | // | | role: "eng", | // | | principal: , | // | | labels: | // | | }, | // | | { | // | | type: DYNAMIC, | // | | role: "eng/front_end", | // | | principal: , | // | | labels: | // | | } | // | | ] | // | | } | // +------------+------------------------------------------------------------+ // // NOTE: Each `ReservationInfo` in the `reservations` field denotes // a reservation that refines the previous `ReservationInfo`. message ReservationInfo { // Describes a reservation. A static reservation is set by the operator on // the command-line and they are immutable without agent restart. A dynamic // reservation is made by an operator via the '/reserve' HTTP endpoint // or by a framework via the offer cycle by sending back an // 'Offer::Operation::Reserve' message. // // NOTE: We currently do not allow frameworks with role "*" to make dynamic // reservations. enum Type { UNKNOWN = 0; STATIC = 1; DYNAMIC = 2; } // The type of this reservation. // // NOTE: This field must not be set for `Resource.reservation`. // See the 'Resource Format' section for more details. optional Type type = 4; // The role to which this reservation is made for. // // NOTE: This field must not be set for `Resource.reservation`. // See the 'Resource Format' section for more details. optional string role = 3; // Indicates the principal, if any, of the framework or operator // that reserved this resource. If reserved by a framework, the // field should match the `FrameworkInfo.principal`. It is used in // conjunction with the `UnreserveResources` ACL to determine // whether the entity attempting to unreserve this resource is // permitted to do so. optional string principal = 1; // Labels are free-form key value pairs that can be used to // associate arbitrary metadata with a reserved resource. For // example, frameworks can use labels to identify the intended // purpose for a portion of the resources the framework has // reserved at a given slave. Labels should not contain duplicate // key-value pairs. optional Labels labels = 2; } // If this is set, this resource was dynamically reserved by an // operator or a framework. Otherwise, this resource is either unreserved // or statically reserved by an operator via the --resources flag. // // NOTE: Frameworks must not set this field if `reservations` is set. // See the 'Resource Format' section for more details. // // TODO(mpark): Deprecate once `reservations` is no longer experimental. optional ReservationInfo reservation = 8; // The stack of reservations. If this field is empty, it indicates that this // resource is unreserved. Otherwise, the resource is reserved. The first // `ReservationInfo` may have type `STATIC` or `DYNAMIC`, but the rest must // have `DYNAMIC`. One can create a new reservation on top of an existing // one by pushing a new `ReservationInfo` to the back. The last // `ReservationInfo` in this stack is the "current" reservation. The new // reservation's role must be a child of the current reservation's role. // // NOTE: Frameworks must not set this field if `reservation` is set. // See the 'Resource Format' section for more details. // // TODO(mpark): Deprecate `role` and `reservation` once this is stable. repeated ReservationInfo reservations = 13; // EXPERIMENTAL. message DiskInfo { // Describes a persistent disk volume. // // A persistent disk volume will not be automatically garbage // collected if the task/executor/slave terminates, but will be // re-offered to the framework(s) belonging to the 'role'. // // NOTE: Currently, we do not allow persistent disk volumes // without a reservation (i.e., 'role' cannot be '*'). message Persistence { // A unique ID for the persistent disk volume. This ID must be // unique per role on each slave. Although it is possible to use // the same ID on different slaves in the cluster and to reuse // IDs after a volume with that ID has been destroyed, both // practices are discouraged. required string id = 1; // This field indicates the principal of the operator or // framework that created this volume. It is used in conjunction // with the "destroy" ACL to determine whether an entity // attempting to destroy the volume is permitted to do so. // // NOTE: This field should match the FrameworkInfo.principal of // the framework that created the volume. optional string principal = 2; } optional Persistence persistence = 1; // Describes how this disk resource will be mounted in the // container. If not set, the disk resource will be used as the // sandbox. Otherwise, it will be mounted according to the // 'container_path' inside 'volume'. The 'host_path' inside // 'volume' is ignored. // NOTE: If 'volume' is set but 'persistence' is not set, the // volume will be automatically garbage collected after // task/executor terminates. Currently, if 'persistence' is set, // 'volume' must be set. optional Volume volume = 2; // Describes where a disk originates from. message Source { enum Type { UNKNOWN = 0; PATH = 1; MOUNT = 2; BLOCK = 3; RAW = 4; } // A folder that can be located on a separate disk device. This // can be shared and carved up as necessary between frameworks. message Path { // Path to the folder (e.g., /mnt/raid/disk0). If the path is a // relative path, it is relative to the agent work directory. optional string root = 1; } // A mounted file-system set up by the Agent administrator. This // can only be used exclusively: a framework cannot accept a // partial amount of this disk. message Mount { // Path to mount point (e.g., /mnt/raid/disk0). If the path is a // relative path, it is relative to the agent work directory. optional string root = 1; } required Type type = 1; optional Path path = 2; optional Mount mount = 3; // The vendor of this source. If present, this field provides the means to // uniquely identify the storage backend of this source in the cluster. optional string vendor = 7; // EXPERIMENTAL. // The identifier of this source. This field maps onto CSI volume IDs and // is not expected to be set by frameworks. If both `vendor` and `id` are // present, these two fields together provide the means to uniquely // identify this source in the cluster. optional string id = 4; // EXPERIMENTAL. // Additional metadata for this source. This field maps onto CSI volume // context. Frameworks should neither alter this field, nor expect this // field to remain unchanged. optional Labels metadata = 5; // EXPERIMENTAL. // This field serves as an indirection to a set of storage // vendor specific disk parameters which describe the properties // of the disk. The operator will setup mappings between a // profile name to a set of vendor specific disk parameters. And // the framework will do disk selection based on profile names, // instead of vendor specific disk parameters. // // Also see the DiskProfileAdaptor module. optional string profile = 6; // EXPERIMENTAL. } optional Source source = 3; } optional DiskInfo disk = 7; message RevocableInfo {} // If this is set, the resources are revocable, i.e., any tasks or // executors launched using these resources could get preempted or // throttled at any time. This could be used by frameworks to run // best effort tasks that do not need strict uptime or performance // guarantees. Note that if this is set, 'disk' or 'reservation' // cannot be set. optional RevocableInfo revocable = 9; // Allow the resource to be shared across tasks. message SharedInfo {} // If this is set, the resources are shared, i.e. multiple tasks // can be launched using this resource and all of them shall refer // to the same physical resource on the cluster. Note that only // persistent volumes can be shared currently. // // NOTE: Different shared resources must be uniquely identifiable. // This currently holds as persistent volume should have unique `id` // (this is not validated for enforced though). optional SharedInfo shared = 10; } /** * Represents filters that allow a framework to control the shape of * offers that will be sent to its role(s). These filters apply * globally to any agent (unlike the existing `DECLINE` filter which * us a time-based resource subset filter that only applies to the * agent that was declined). * * NOTE: Custom allocators might interpret these fields in a different * way, or not at all. */ message OfferFilters { message ResourceQuantities { // Quantities are pairs of identifiers of scalar resources and // an associated value, e.g., `{"disk": Scalar {"value": 30}}`. map quantities = 1; } message MinAllocatableResources { // A set of resources is considered allocatable if contained in any of // the following quantities. If no quantities are specified any resource // is considered allocatable. repeated ResourceQuantities quantities = 1; } optional MinAllocatableResources min_allocatable_resources = 1; } /** * When the network bandwidth caps are enabled and the container * is over its limit, outbound packets may be either delayed or * dropped completely either because it exceeds the maximum bandwidth * allocation for a single container (the cap) or because the combined * network traffic of multiple containers on the host exceeds the * transmit capacity of the host (the share). We can report the * following statistics for each of these conditions exported directly * from the Linux Traffic Control Queueing Discipline. * * id : name of the limiter, e.g. 'tx_bw_cap' * backlog : number of packets currently delayed * bytes : total bytes seen * drops : number of packets dropped in total * overlimits : number of packets which exceeded allocation * packets : total packets seen * qlen : number of packets currently queued * rate_bps : throughput in bytes/sec * rate_pps : throughput in packets/sec * requeues : number of times a packet has been delayed due to * locking or device contention issues * * More information on the operation of Linux Traffic Control can be * found at http://www.lartc.org/lartc.html. */ message TrafficControlStatistics { required string id = 1; optional uint64 backlog = 2; optional uint64 bytes = 3; optional uint64 drops = 4; optional uint64 overlimits = 5; optional uint64 packets = 6; optional uint64 qlen = 7; optional uint64 ratebps = 8; optional uint64 ratepps = 9; optional uint64 requeues = 10; } message IpStatistics { optional int64 Forwarding = 1; optional int64 DefaultTTL = 2; optional int64 InReceives = 3; optional int64 InHdrErrors = 4; optional int64 InAddrErrors = 5; optional int64 ForwDatagrams = 6; optional int64 InUnknownProtos = 7; optional int64 InDiscards = 8; optional int64 InDelivers = 9; optional int64 OutRequests = 10; optional int64 OutDiscards = 11; optional int64 OutNoRoutes = 12; optional int64 ReasmTimeout = 13; optional int64 ReasmReqds = 14; optional int64 ReasmOKs = 15; optional int64 ReasmFails = 16; optional int64 FragOKs = 17; optional int64 FragFails = 18; optional int64 FragCreates = 19; } message IcmpStatistics { optional int64 InMsgs = 1; optional int64 InErrors = 2; optional int64 InCsumErrors = 3; optional int64 InDestUnreachs = 4; optional int64 InTimeExcds = 5; optional int64 InParmProbs = 6; optional int64 InSrcQuenchs = 7; optional int64 InRedirects = 8; optional int64 InEchos = 9; optional int64 InEchoReps = 10; optional int64 InTimestamps = 11; optional int64 InTimestampReps = 12; optional int64 InAddrMasks = 13; optional int64 InAddrMaskReps = 14; optional int64 OutMsgs = 15; optional int64 OutErrors = 16; optional int64 OutDestUnreachs = 17; optional int64 OutTimeExcds = 18; optional int64 OutParmProbs = 19; optional int64 OutSrcQuenchs = 20; optional int64 OutRedirects = 21; optional int64 OutEchos = 22; optional int64 OutEchoReps = 23; optional int64 OutTimestamps = 24; optional int64 OutTimestampReps = 25; optional int64 OutAddrMasks = 26; optional int64 OutAddrMaskReps = 27; } message TcpStatistics { optional int64 RtoAlgorithm = 1; optional int64 RtoMin = 2; optional int64 RtoMax = 3; optional int64 MaxConn = 4; optional int64 ActiveOpens = 5; optional int64 PassiveOpens = 6; optional int64 AttemptFails = 7; optional int64 EstabResets = 8; optional int64 CurrEstab = 9; optional int64 InSegs = 10; optional int64 OutSegs = 11; optional int64 RetransSegs = 12; optional int64 InErrs = 13; optional int64 OutRsts = 14; optional int64 InCsumErrors = 15; } message UdpStatistics { optional int64 InDatagrams = 1; optional int64 NoPorts = 2; optional int64 InErrors = 3; optional int64 OutDatagrams = 4; optional int64 RcvbufErrors = 5; optional int64 SndbufErrors = 6; optional int64 InCsumErrors = 7; optional int64 IgnoredMulti = 8; } message SNMPStatistics { optional IpStatistics ip_stats = 1; optional IcmpStatistics icmp_stats = 2; optional TcpStatistics tcp_stats = 3; optional UdpStatistics udp_stats = 4; } message DiskStatistics { optional Resource.DiskInfo.Source source = 1; optional Resource.DiskInfo.Persistence persistence = 2; optional uint64 limit_bytes = 3; optional uint64 used_bytes = 4; } /** * A snapshot of resource usage statistics. */ message ResourceStatistics { required double timestamp = 1; // Snapshot time, in seconds since the Epoch. optional uint32 processes = 30; optional uint32 threads = 31; // CPU Usage Information: // Total CPU time spent in user mode, and kernel mode. optional double cpus_user_time_secs = 2; optional double cpus_system_time_secs = 3; // Hard CPU limit. optional double cpus_limit = 4; // Soft CPU limit. optional double cpus_soft_limit = 45; // cpu.stat on process throttling (for contention issues). optional uint32 cpus_nr_periods = 7; optional uint32 cpus_nr_throttled = 8; optional double cpus_throttled_time_secs = 9; // Memory Usage Information: // mem_total_bytes was added in 0.23.0 to represent the total memory // of a process in RAM (as opposed to in Swap). This was previously // reported as mem_rss_bytes, which was also changed in 0.23.0 to // represent only the anonymous memory usage, to keep in sync with // Linux kernel's (arguably erroneous) use of terminology. optional uint64 mem_total_bytes = 36; // Total memory + swap usage. This is set if swap is enabled. optional uint64 mem_total_memsw_bytes = 37; // Current kernel memory allocation. optional uint64 mem_kmem_usage_bytes = 52; // Current TCP buf memory allocation. optional uint64 mem_kmem_tcp_usage_bytes = 53; // Hard memory limit. optional uint64 mem_limit_bytes = 6; // Soft memory limit. optional uint64 mem_soft_limit_bytes = 38; // Broken out memory usage information: pagecache, rss (anonymous), // mmaped files and swap. // TODO(chzhcn) mem_file_bytes and mem_anon_bytes are deprecated in // 0.23.0 and will be removed in 0.24.0. optional uint64 mem_file_bytes = 10; optional uint64 mem_anon_bytes = 11; // mem_cache_bytes is added in 0.23.0 to represent page cache usage. optional uint64 mem_cache_bytes = 39; // Since 0.23.0, mem_rss_bytes is changed to represent only // anonymous memory usage. Note that neither its requiredness, type, // name nor numeric tag has been changed. optional uint64 mem_rss_bytes = 5; optional uint64 mem_mapped_file_bytes = 12; // This is only set if swap is enabled. optional uint64 mem_swap_bytes = 40; optional uint64 mem_unevictable_bytes = 41; // Number of occurrences of different levels of memory pressure // events reported by memory cgroup. Pressure listening (re)starts // with these values set to 0 when slave (re)starts. See // https://www.kernel.org/doc/Documentation/cgroups/memory.txt for // more details. optional uint64 mem_low_pressure_counter = 32; optional uint64 mem_medium_pressure_counter = 33; optional uint64 mem_critical_pressure_counter = 34; // Disk Usage Information for executor working directory. optional uint64 disk_limit_bytes = 26; optional uint64 disk_used_bytes = 27; // Per disk (resource) statistics. repeated DiskStatistics disk_statistics = 43; // Cgroups blkio statistics. optional CgroupInfo.Blkio.Statistics blkio_statistics = 44; // Perf statistics. optional PerfStatistics perf = 13; // Network Usage Information: optional uint64 net_rx_packets = 14; optional uint64 net_rx_bytes = 15; optional uint64 net_rx_errors = 16; optional uint64 net_rx_dropped = 17; optional uint64 net_tx_packets = 18; optional uint64 net_tx_bytes = 19; optional uint64 net_tx_errors = 20; optional uint64 net_tx_dropped = 21; optional uint64 net_tx_rate_limit = 46; optional uint64 net_tx_burst_rate_limit = 47; optional uint64 net_tx_burst_size = 48; optional uint64 net_rx_rate_limit = 49; optional uint64 net_rx_burst_rate_limit = 50; optional uint64 net_rx_burst_size = 51; message RatePercentiles { optional uint64 min = 1; optional uint64 max = 2; optional uint64 p50 = 3; optional uint64 p90 = 4; optional uint64 p95 = 5; optional uint64 p99 = 6; optional uint64 p999 = 7; optional uint64 p9999 = 8; optional uint64 samples = 9; } // Network rate statistics measured in bytes per second // or packets per second. // // Rates are sampled every {sampling_interval_secs}. A // time series is created out of the samples taken over // a moving sampling window of {sampling_window_secs}. // Percentiles for each time series are exposed through // RatePercentiles. // // Linux documentation for more information: // https://docs.kernel.org/networking/statistics.html#c.rtnl_link_stats64 message RateStatistics { // Bytes received per second. optional RatePercentiles rx_rate = 1; // Packets received per second. optional RatePercentiles rx_packet_rate = 2; // Received packets dropped per second. optional RatePercentiles rx_drop_rate = 3; // Receiving packet errors per second. optional RatePercentiles rx_error_rate = 4; // Bytes sent per second. optional RatePercentiles tx_rate = 5; // Packets sent per second. optional RatePercentiles tx_packet_rate = 6; // Send packets dropped per second. optional RatePercentiles tx_drop_rate = 7; // Sending packet errors per second. optional RatePercentiles tx_error_rate = 8; // Duration of the sliding time series window. optional double sampling_window_secs = 9; // The delay between rate samples. optional double sampling_interval_secs = 10; } optional RateStatistics net_rate_statistics = 54; // Inclusive ephemeral ports range of the container. optional Value.Range net_ephemeral_ports = 55; // The kernel keeps track of RTT (round-trip time) for its TCP // sockets. RTT is a way to tell the latency of a container. optional double net_tcp_rtt_microsecs_p50 = 22; optional double net_tcp_rtt_microsecs_p90 = 23; optional double net_tcp_rtt_microsecs_p95 = 24; optional double net_tcp_rtt_microsecs_p99 = 25; optional double net_tcp_active_connections = 28; optional double net_tcp_time_wait_connections = 29; // Network traffic flowing into or out of a container can be delayed // or dropped due to congestion or policy inside and outside the // container. repeated TrafficControlStatistics net_traffic_control_statistics = 35; // Network SNMP statistics for each container. optional SNMPStatistics net_snmp_statistics = 42; } /** * Describes a snapshot of the resource usage for executors. */ message ResourceUsage { message Executor { required ExecutorInfo executor_info = 1; // This includes resources used by the executor itself // as well as its active tasks. repeated Resource allocated = 2; // Current resource usage. If absent, the containerizer // cannot provide resource usage. optional ResourceStatistics statistics = 3; // The container id for the executor specified in the executor_info field. required ContainerID container_id = 4; message Task { required string name = 1; required TaskID id = 2; repeated Resource resources = 3; optional Labels labels = 4; } // Non-terminal tasks. repeated Task tasks = 5; } repeated Executor executors = 1; // Slave's total resources including checkpointed dynamic // reservations and persistent volumes. repeated Resource total = 2; } /** * Describes a sample of events from "perf stat". Only available on * Linux. * * NOTE: Each optional field matches the name of a perf event (see * "perf list") with the following changes: * 1. Names are downcased. * 2. Hyphens ('-') are replaced with underscores ('_'). * 3. Events with alternate names use the name "perf stat" returns, * e.g., for the event "cycles OR cpu-cycles" perf always returns * cycles. */ message PerfStatistics { required double timestamp = 1; // Start of sample interval, in seconds since the Epoch. required double duration = 2; // Duration of sample interval, in seconds. // Hardware event. optional uint64 cycles = 3; optional uint64 stalled_cycles_frontend = 4; optional uint64 stalled_cycles_backend = 5; optional uint64 instructions = 6; optional uint64 cache_references = 7; optional uint64 cache_misses = 8; optional uint64 branches = 9; optional uint64 branch_misses = 10; optional uint64 bus_cycles = 11; optional uint64 ref_cycles = 12; // Software event. optional double cpu_clock = 13; optional double task_clock = 14; optional uint64 page_faults = 15; optional uint64 minor_faults = 16; optional uint64 major_faults = 17; optional uint64 context_switches = 18; optional uint64 cpu_migrations = 19; optional uint64 alignment_faults = 20; optional uint64 emulation_faults = 21; // Hardware cache event. optional uint64 l1_dcache_loads = 22; optional uint64 l1_dcache_load_misses = 23; optional uint64 l1_dcache_stores = 24; optional uint64 l1_dcache_store_misses = 25; optional uint64 l1_dcache_prefetches = 26; optional uint64 l1_dcache_prefetch_misses = 27; optional uint64 l1_icache_loads = 28; optional uint64 l1_icache_load_misses = 29; optional uint64 l1_icache_prefetches = 30; optional uint64 l1_icache_prefetch_misses = 31; optional uint64 llc_loads = 32; optional uint64 llc_load_misses = 33; optional uint64 llc_stores = 34; optional uint64 llc_store_misses = 35; optional uint64 llc_prefetches = 36; optional uint64 llc_prefetch_misses = 37; optional uint64 dtlb_loads = 38; optional uint64 dtlb_load_misses = 39; optional uint64 dtlb_stores = 40; optional uint64 dtlb_store_misses = 41; optional uint64 dtlb_prefetches = 42; optional uint64 dtlb_prefetch_misses = 43; optional uint64 itlb_loads = 44; optional uint64 itlb_load_misses = 45; optional uint64 branch_loads = 46; optional uint64 branch_load_misses = 47; optional uint64 node_loads = 48; optional uint64 node_load_misses = 49; optional uint64 node_stores = 50; optional uint64 node_store_misses = 51; optional uint64 node_prefetches = 52; optional uint64 node_prefetch_misses = 53; } /** * Describes a request for resources that can be used by a framework * to proactively influence the allocator. If 'slave_id' is provided * then this request is assumed to only apply to resources on that * slave. */ message Request { optional SlaveID slave_id = 1; repeated Resource resources = 2; } /** * Describes some resources available on a slave. An offer only * contains resources from a single slave. */ message Offer { required OfferID id = 1; required FrameworkID framework_id = 2; required SlaveID slave_id = 3; required string hostname = 4; // URL for reaching the slave running on the host. optional URL url = 8; // The domain of the slave. optional DomainInfo domain = 11; repeated Resource resources = 5; repeated Attribute attributes = 7; // Executors of the same framework running on this agent. repeated ExecutorID executor_ids = 6; // Signifies that the resources in this Offer may be unavailable during // the given interval. Any tasks launched using these resources may be // killed when the interval arrives. For example, these resources may be // part of a planned maintenance schedule. // // This field only provides information about a planned unavailability. // The unavailability interval may not necessarily start at exactly this // interval, nor last for exactly the duration of this interval. // The unavailability may also be forever! See comments in // `Unavailability` for more details. optional Unavailability unavailability = 9; // An offer represents resources allocated to *one* of the // roles managed by the scheduler. (Therefore, each // `Offer.resources[i].allocation_info` will match the // top level `Offer.allocation_info`). optional Resource.AllocationInfo allocation_info = 10; // Defines an operation that can be performed against offers. message Operation { enum Type { UNKNOWN = 0; LAUNCH = 1; LAUNCH_GROUP = 6; RESERVE = 2; UNRESERVE = 3; CREATE = 4; DESTROY = 5; GROW_VOLUME = 11; // EXPERIMENTAL. SHRINK_VOLUME = 12; // EXPERIMENTAL. CREATE_DISK = 13; // EXPERIMENTAL. DESTROY_DISK = 14; // EXPERIMENTAL. } // TODO(vinod): Deprecate this in favor of `LaunchGroup` below. message Launch { repeated TaskInfo task_infos = 1; } // Unlike `Launch` above, all the tasks in a `task_group` are // atomically delivered to an executor. // // `NetworkInfo` set on executor will be shared by all tasks in // the task group. // // TODO(vinod): Any volumes set on executor could be used by a // task by explicitly setting `Volume.source` in its resources. message LaunchGroup { required ExecutorInfo executor = 1; required TaskGroupInfo task_group = 2; } message Reserve { repeated Resource source = 2; repeated Resource resources = 1; } message Unreserve { repeated Resource resources = 1; } message Create { repeated Resource volumes = 1; } message Destroy { repeated Resource volumes = 1; } // Grow a volume by an additional disk resource. // NOTE: This is currently experimental and only for persistent volumes // created on ROOT/PATH disk. message GrowVolume { required Resource volume = 1; required Resource addition = 2; } // Shrink a volume by the size specified in the `subtract` field. // NOTE: This is currently experimental and only for persistent volumes // created on ROOT/PATH disk. message ShrinkVolume { required Resource volume = 1; // See comments in `Value.Scalar` for maximum precision supported. required Value.Scalar subtract = 2; } // Create a `MOUNT` or `BLOCK` disk resource backed by a CSI volume from a // `RAW` disk resource. // // In the typical case where the `RAW` disk resource has a profile and no // source ID, a new CSI volume will be provisioned by Mesos to back the // returned `MOUNT` or `BLOCK` disk resource. However, the `RAW` disk // resource can instead have no profile but a source ID, indicating that // it is already backed by a CSI volume in one of the following scenarios: // // (1) The CSI volume is preprovisioned out-of-band. // // (2) The CSI volume is provisioned by Mesos, but Mesos has lost the // corresponding `MOUNT` or `BLOCK` resource metadata. This could // happen if there has been a change in the agent ID or resource // provider ID where the volume belongs. // // In the above cases, Mesos won't provision a new CSI volume, but instead // will simply return a `MOUNT` or `BLOCK` disk resource backed by the same // CSI volume, with the profile specified in this call. // // NOTE: For the time being, this API is subject to change and the related // feature is experimental. message CreateDisk { required Resource source = 1; // NOTE: Only `MOUNT` or `BLOCK` is allowed in this field. required Resource.DiskInfo.Source.Type target_type = 2; // Apply the specified profile to the created disk. This field must be set // if `source` does not have a profile, and must not be set if it has one. // // NOTE: The operation will fail If the specified profile is unknown to // Mesos, i.e., not reported by the disk profile adaptor. optional string target_profile = 3; } // Destroy a disk resource backed by a CSI volume. // // In the typical case where the CSI plugin of the volume supports volume // deprovisioning and the disk resource is a `MOUNT` or `BLOCK` disk with a // profile known to Mesos, the volume will be deprovisioned and a `RAW` disk // resource with the same profile but no source ID will be returned. // However, the following scenarios could lead to different outcomes: // // (1) If the CSI plugin supports volume deprovisioning but the profile of // the disk resource is unknown to the disk profile adaptor, or the disk // resource is a `RAW` disk with no profile but a source ID (see above // for possible scenarios), the volume will be deprovisioned but no // resource will be returned. // // (2) If the CSI plugin does not support volume deprovisioning, the volume // won't be deprovisioned and a `RAW` disk resource with no profile but // the same source ID will be returned. // // NOTE: For the time being, this API is subject to change and the related // feature is experimental. message DestroyDisk { // NOTE: Only a `MOUNT`, `BLOCK` or `RAW` disk is allowed in this field. required Resource source = 1; } optional Type type = 1; // The `id` field allows frameworks to indicate that they wish to receive // feedback about an operation via the UPDATE_OPERATION_STATUS event in the // v1 scheduler API. optional OperationID id = 12; // EXPERIMENTAL. optional Launch launch = 2; optional LaunchGroup launch_group = 7; optional Reserve reserve = 3; optional Unreserve unreserve = 4; optional Create create = 5; optional Destroy destroy = 6; optional GrowVolume grow_volume = 13; // EXPERIMENTAL. optional ShrinkVolume shrink_volume = 14; // EXPERIMENTAL. optional CreateDisk create_disk = 15; // EXPERIMENTAL. optional DestroyDisk destroy_disk = 16; // EXPERIMENTAL. } } /** * A request to return some resources occupied by a framework. */ message InverseOffer { // This is the same OfferID as found in normal offers, which allows // re-use of some of the OfferID-only messages. required OfferID id = 1; // URL for reaching the slave running on the host. This enables some // optimizations as described in MESOS-3012, such as allowing the // scheduler driver to bypass the master and talk directly with a slave. optional URL url = 2; // The framework that should release its resources. // If no specifics are provided (i.e. which slave), all the framework's // resources are requested back. required FrameworkID framework_id = 3; // Specified if the resources need to be released from a particular slave. // All the framework's resources on this slave are requested back, // unless further qualified by the `resources` field. optional SlaveID slave_id = 4; // This InverseOffer represents a planned unavailability event in the // specified interval. Any tasks running on the given framework or slave // may be killed when the interval arrives. Therefore, frameworks should // aim to gracefully terminate tasks prior to the arrival of the interval. // // For reserved resources, the resources are expected to be returned to the // framework after the unavailability interval. This is an expectation, // not a guarantee. For example, if the unavailability duration is not set, // the resources may be removed permanently. // // For other resources, there is no guarantee that requested resources will // be returned after the unavailability interval. The allocator has no // obligation to re-offer these resources to the prior framework after // the unavailability. required Unavailability unavailability = 5; // A list of resources being requested back from the framework, // on the slave identified by `slave_id`. If no resources are specified // then all resources are being requested back. For the purpose of // maintenance, this field is always empty (maintenance always requests // all resources back). repeated Resource resources = 6; // TODO(josephw): Add additional options for narrowing down the resources // being requested back. Such as specific executors, tasks, etc. } /** * Describes a task. Passed from the scheduler all the way to an * executor (see SchedulerDriver::launchTasks and * Executor::launchTask). Either ExecutorInfo or CommandInfo should be set. * A different executor can be used to launch this task, and subsequent tasks * meant for the same executor can reuse the same ExecutorInfo struct. */ message TaskInfo { required string name = 1; required TaskID task_id = 2; required SlaveID slave_id = 3; repeated Resource resources = 4; optional ExecutorInfo executor = 5; optional CommandInfo command = 7; // Task provided with a container will launch the container as part // of this task paired with the task's CommandInfo. optional ContainerInfo container = 9; // A health check for the task. Implemented for executor-less // command-based tasks. For tasks that specify an executor, it is // the executor's responsibility to implement the health checking. optional HealthCheck health_check = 8; // A general check for the task. Implemented for all built-in executors. // For tasks that specify an executor, it is the executor's responsibility // to implement checking support. Executors should (all built-in executors // will) neither interpret nor act on the check's result. // // NOTE: Check support in built-in executors is experimental. // // TODO(alexr): Consider supporting multiple checks per task. optional CheckInfo check = 13; // A kill policy for the task. Implemented for executor-less // command-based and docker tasks. For tasks that specify an // executor, it is the executor's responsibility to implement // the kill policy. optional KillPolicy kill_policy = 12; optional bytes data = 6; // Labels are free-form key value pairs which are exposed through // master and slave endpoints. Labels will not be interpreted or // acted upon by Mesos itself. As opposed to the data field, labels // will be kept in memory on master and slave processes. Therefore, // labels should be used to tag tasks with light-weight meta-data. // Labels should not contain duplicate key-value pairs. optional Labels labels = 10; // Service discovery information for the task. It is not interpreted // or acted upon by Mesos. It is up to a service discovery system // to use this information as needed and to handle tasks without // service discovery information. optional DiscoveryInfo discovery = 11; // Maximum duration for task completion. If the task is non-terminal at the // end of this duration, it will fail with the reason // `REASON_MAX_COMPLETION_TIME_REACHED`. Mesos supports this field for // executor-less tasks, and tasks that use Docker or default executors. // It is the executor's responsibility to implement this, so it might not be // supported by all custom executors. optional DurationInfo max_completion_time = 14; // Resource limits associated with the task. map limits = 15; } /** * Describes a group of tasks that belong to an executor. The * executor will receive the task group in a single message to * allow the group to be launched "atomically". * * NOTES: * 1) `NetworkInfo` must not be set inside task's `ContainerInfo`. * 2) `TaskInfo.executor` doesn't need to set. If set, it should match * `LaunchGroup.executor`. */ message TaskGroupInfo { repeated TaskInfo tasks = 1; } // TODO(bmahler): Add executor_uuid here, and send it to the master. This will // allow us to expose executor work directories for tasks in the webui when // looking from the master level. Currently only the slave knows which run the // task belongs to. /** * Describes a task, similar to `TaskInfo`. * * `Task` is used in some of the Mesos messages found below. * `Task` is used instead of `TaskInfo` if: * 1) we need additional IDs, such as a specific * framework, executor, or agent; or * 2) we do not need the additional data, such as the command run by the * task. These additional fields may be large and unnecessary for some * Mesos messages. * * `Task` is generally constructed from a `TaskInfo`. See protobuf::createTask. */ message Task { required string name = 1; required TaskID task_id = 2; required FrameworkID framework_id = 3; optional ExecutorID executor_id = 4; required SlaveID slave_id = 5; required TaskState state = 6; // Latest state of the task. repeated Resource resources = 7; repeated TaskStatus statuses = 8; // These fields correspond to the state and uuid of the latest // status update forwarded to the master. // NOTE: Either both the fields must be set or both must be unset. optional TaskState status_update_state = 9; optional bytes status_update_uuid = 10; optional Labels labels = 11; // Service discovery information for the task. It is not interpreted // or acted upon by Mesos. It is up to a service discovery system // to use this information as needed and to handle tasks without // service discovery information. optional DiscoveryInfo discovery = 12; // Container information for the task. optional ContainerInfo container = 13; optional HealthCheck health_check = 15; // TODO(greggomann): Add the task's `CheckInfo`. See MESOS-8780. // The kill policy used for this task when it is killed. It's possible for // this policy to be overridden by the scheduler when killing the task. optional KillPolicy kill_policy = 16; // Specific user under which task is running. optional string user = 14; // Resource limits associated with the task. map limits = 17; } /** * Describes possible task states. IMPORTANT: Mesos assumes tasks that * enter terminal states (see below) imply the task is no longer * running and thus clean up any thing associated with the task * (ultimately offering any resources being consumed by that task to * another task). */ enum TaskState { TASK_STAGING = 6; // Initial state. Framework status updates should not use. TASK_STARTING = 0; // The task is being launched by the executor. TASK_RUNNING = 1; // NOTE: This should only be sent when the framework has // the TASK_KILLING_STATE capability. TASK_KILLING = 8; // The task is being killed by the executor. // The task finished successfully on its own without external interference. TASK_FINISHED = 2; // TERMINAL. TASK_FAILED = 3; // TERMINAL: The task failed to finish successfully. TASK_KILLED = 4; // TERMINAL: The task was killed by the executor. TASK_ERROR = 7; // TERMINAL: The task description contains an error. // In Mesos 1.3, this will only be sent when the framework does NOT // opt-in to the PARTITION_AWARE capability. // // NOTE: This state is not always terminal. For example, tasks might // transition from TASK_LOST to TASK_RUNNING or other states when a // partitioned agent reregisters. TASK_LOST = 5; // The task failed but can be rescheduled. // The following task states are only sent when the framework // opts-in to the PARTITION_AWARE capability. // The task failed to launch because of a transient error. The // task's executor never started running. Unlike TASK_ERROR, the // task description is valid -- attempting to launch the task again // may be successful. TASK_DROPPED = 9; // TERMINAL. // The task was running on an agent that has lost contact with the // master, typically due to a network failure or partition. The task // may or may not still be running. TASK_UNREACHABLE = 10; // The task is no longer running. This can occur if the agent has // been terminated along with all of its tasks (e.g., the host that // was running the agent was rebooted). It might also occur if the // task was terminated due to an agent or containerizer error, or if // the task was preempted by the QoS controller in an // oversubscription scenario. TASK_GONE = 11; // TERMINAL. // The task was running on an agent that the master cannot contact; // the operator has asserted that the agent has been shutdown, but // this has not been directly confirmed by the master. If the // operator is correct, the task is not running and this is a // terminal state; if the operator is mistaken, the task may still // be running and might return to RUNNING in the future. TASK_GONE_BY_OPERATOR = 12; // The master has no knowledge of the task. This is typically // because either (a) the master never had knowledge of the task, or // (b) the master forgot about the task because it garbage collected // its metadata about the task. The task may or may not still be // running. TASK_UNKNOWN = 13; } /** * Describes a resource limitation that caused a task failure. */ message TaskResourceLimitation { // This field contains the resource whose limits were violated. // // NOTE: 'Resources' is used here because the resource may span // multiple roles (e.g. `"mem(*):1;mem(role):2"`). repeated Resource resources = 1; } /** * A 128 bit (16 byte) UUID, see RFC 4122. */ message UUID { required bytes value = 1; } /** * Describes an operation, similar to `Offer.Operation`, with * some additional information. */ message Operation { optional FrameworkID framework_id = 1; optional SlaveID slave_id = 2; required Offer.Operation info = 3; required OperationStatus latest_status = 4; // All the statuses known to this operation. Some of the statuses in this // list might not have been acknowledged yet. The statuses are ordered. repeated OperationStatus statuses = 5; // This is the internal UUID for the operation, which is kept independently // from the framework-specified operation ID, which is optional. required UUID uuid = 6; } /** * Describes possible operation states. */ enum OperationState { // Default value if the enum is not set. See MESOS-4997. OPERATION_UNSUPPORTED = 0; // Initial state. OPERATION_PENDING = 1; // TERMINAL: The operation was successfully applied. OPERATION_FINISHED = 2; // TERMINAL: The operation failed to apply. OPERATION_FAILED = 3; // TERMINAL: The operation description contains an error. OPERATION_ERROR = 4; // TERMINAL: The operation was dropped due to a transient error. OPERATION_DROPPED = 5; // The operation affects an agent that has lost contact with the master, // typically due to a network failure or partition. The operation may or may // not still be pending. OPERATION_UNREACHABLE = 6; // The operation affected an agent that the master cannot contact; // the operator has asserted that the agent has been shutdown, but this has // not been directly confirmed by the master. // // If the operator is correct, the operation is not pending and this is a // terminal state; if the operator is mistaken, the operation may still be // pending and might return to a different state in the future. OPERATION_GONE_BY_OPERATOR = 7; // The operation affects an agent that the master recovered from its // state, but that agent has not yet re-registered. // // The operation can transition to `OPERATION_UNREACHABLE` if the // corresponding agent is marked as unreachable, and will transition to // another status if the agent re-registers. OPERATION_RECOVERING = 8; // The master has no knowledge of the operation. This is typically // because either (a) the master never had knowledge of the operation, or // (b) the master forgot about the operation because it garbage collected // its metadata about the operation. The operation may or may not still be // pending. OPERATION_UNKNOWN = 9; } /** * Describes the current status of an operation. */ message OperationStatus { // While frameworks will only receive status updates for operations on which // they have set an ID, this field is optional because this message is also // used internally by Mesos components when the operation's ID has not been // set. optional OperationID operation_id = 1; required OperationState state = 2; optional string message = 3; // Converted resources after applying the operation. This only // applies if the `state` is `OPERATION_FINISHED`. repeated Resource converted_resources = 4; // Statuses that are delivered reliably to the scheduler will // include a `uuid`. The status is considered delivered once // it is acknowledged by the scheduler. optional UUID uuid = 5; // If the operation affects resources from a local resource provider, // both `slave_id` and `resource_provider_id` will be set. // // If the operation affects resources that belong to an external // resource provider, only `resource_provider_id` will be set. // // In certain cases, e.g., invalid operations, neither `uuid`, // `slave_id` nor `resource_provider_id` will be set, and the // scheduler does not need to acknowledge this status update. optional SlaveID slave_id = 6; optional ResourceProviderID resource_provider_id = 7; } /** * Describes the status of a check. Type and the corresponding field, i.e., * `command` or `http` must be set. If the result of the check is not available * (e.g., the check timed out), these fields must contain empty messages, i.e., * `exit_code` or `status_code` will be unset. * * NOTE: This API is subject to change and the related feature is experimental. */ message CheckStatusInfo { message Command { // Exit code of a command check. It is the result of calling // `WEXITSTATUS()` on `waitpid()` termination information on // Posix and calling `GetExitCodeProcess()` on Windows. optional int32 exit_code = 1; } message Http { // HTTP status code of an HTTP check. optional uint32 status_code = 1; } message Tcp { // Whether a TCP connection succeeded. optional bool succeeded = 1; } // TODO(alexr): Consider adding a `data` field, which can contain, e.g., // truncated stdout/stderr output for command checks or HTTP response body // for HTTP checks. Alternatively, it can be an even shorter `message` field // containing the last line of stdout or Reason-Phrase of the status line of // the HTTP response. // The type of the check this status corresponds to. optional CheckInfo.Type type = 1; // Status of a command check. optional Command command = 2; // Status of an HTTP check. optional Http http = 3; // Status of a TCP check. optional Tcp tcp = 4; // TODO(alexr): Consider introducing a "last changed at" timestamp, since // task status update's timestamp may not correspond to the last check's // state, e.g., for reconciliation. // TODO(alexr): Consider introducing a `reason` enum here to explicitly // distinguish between completed, delayed, and timed out checks. } /** * Describes the current status of a task. */ message TaskStatus { // Describes the source of the task status update. enum Source { SOURCE_MASTER = 0; SOURCE_SLAVE = 1; SOURCE_EXECUTOR = 2; } // Detailed reason for the task status update. // Refer to docs/task-state-reasons.md for additional explanation. enum Reason { // TODO(jieyu): The default value when a caller doesn't check for // presence is 0 and so ideally the 0 reason is not a valid one. // Since this is not used anywhere, consider removing this reason. REASON_COMMAND_EXECUTOR_FAILED = 0; REASON_CONTAINER_LAUNCH_FAILED = 21; REASON_CONTAINER_LIMITATION = 19; REASON_CONTAINER_LIMITATION_DISK = 20; REASON_CONTAINER_LIMITATION_MEMORY = 8; REASON_CONTAINER_PREEMPTED = 17; REASON_CONTAINER_UPDATE_FAILED = 22; REASON_MAX_COMPLETION_TIME_REACHED = 33; REASON_EXECUTOR_REGISTRATION_TIMEOUT = 23; REASON_EXECUTOR_REREGISTRATION_TIMEOUT = 24; REASON_EXECUTOR_TERMINATED = 1; REASON_EXECUTOR_UNREGISTERED = 2; // No longer used. REASON_FRAMEWORK_REMOVED = 3; REASON_GC_ERROR = 4; REASON_INVALID_FRAMEWORKID = 5; REASON_INVALID_OFFERS = 6; REASON_IO_SWITCHBOARD_EXITED = 27; REASON_MASTER_DISCONNECTED = 7; REASON_RECONCILIATION = 9; REASON_RESOURCES_UNKNOWN = 18; REASON_SLAVE_DISCONNECTED = 10; REASON_SLAVE_DRAINING = 34; REASON_SLAVE_REMOVED = 11; REASON_SLAVE_REMOVED_BY_OPERATOR = 31; REASON_SLAVE_REREGISTERED = 32; REASON_SLAVE_RESTARTED = 12; REASON_SLAVE_UNKNOWN = 13; REASON_TASK_KILLED_DURING_LAUNCH = 30; REASON_TASK_CHECK_STATUS_UPDATED = 28; REASON_TASK_HEALTH_CHECK_STATUS_UPDATED = 29; REASON_TASK_GROUP_INVALID = 25; REASON_TASK_GROUP_UNAUTHORIZED = 26; REASON_TASK_INVALID = 14; REASON_TASK_UNAUTHORIZED = 15; REASON_TASK_UNKNOWN = 16; } required TaskID task_id = 1; required TaskState state = 2; optional string message = 4; // Possible message explaining state. optional Source source = 9; optional Reason reason = 10; optional bytes data = 3; optional SlaveID slave_id = 5; optional ExecutorID executor_id = 7; // TODO(benh): Use in master/slave. optional double timestamp = 6; // Statuses that are delivered reliably to the scheduler will // include a 'uuid'. The status is considered delivered once // it is acknowledged by the scheduler. Schedulers can choose // to either explicitly acknowledge statuses or let the scheduler // driver implicitly acknowledge (default). // // TODO(bmahler): This is currently overwritten in the scheduler // driver and executor driver, but executors will need to set this // to a valid RFC-4122 UUID if using the HTTP API. optional bytes uuid = 11; // Describes whether the task has been determined to be healthy (true) or // unhealthy (false) according to the `health_check` field in `TaskInfo`. optional bool healthy = 8; // Contains check status for the check specified in the corresponding // `TaskInfo`. If no check has been specified, this field must be // absent, otherwise it must be present even if the check status is // not available yet. If the status update is triggered for a different // reason than `REASON_TASK_CHECK_STATUS_UPDATED`, this field will contain // the last known value. // // NOTE: A check-related task status update is triggered if and only if // the value or presence of any field in `CheckStatusInfo` changes. // // NOTE: Check support in built-in executors is experimental. optional CheckStatusInfo check_status = 15; // Labels are free-form key value pairs which are exposed through // master and slave endpoints. Labels will not be interpreted or // acted upon by Mesos itself. As opposed to the data field, labels // will be kept in memory on master and slave processes. Therefore, // labels should be used to tag TaskStatus message with light-weight // meta-data. Labels should not contain duplicate key-value pairs. optional Labels labels = 12; // Container related information that is resolved dynamically such as // network address. optional ContainerStatus container_status = 13; // The time (according to the master's clock) when the agent where // this task was running became unreachable. This is only set on // status updates for tasks running on agents that are unreachable // (e.g., partitioned away from the master). optional TimeInfo unreachable_time = 14; // If the reason field indicates a container resource limitation, // this field optionally contains additional information. optional TaskResourceLimitation limitation = 16; } /** * Describes possible filters that can be applied to unused resources * (see SchedulerDriver::launchTasks) to influence the allocator. */ message Filters { // Time to consider unused resources refused. Note that all unused // resources will be considered refused and use the default value // (below) regardless of whether Filters was passed to // SchedulerDriver::launchTasks. You MUST pass Filters with this // field set to change this behavior (i.e., get another offer which // includes unused resources sooner or later than the default). // // If this field is set to a number of seconds greater than 31536000 // (365 days), then the resources will be considered refused for 365 // days. If it is set to a negative number, then the default value // will be used. optional double refuse_seconds = 1 [default = 5.0]; } /** * Describes a collection of environment variables. This is used with * CommandInfo in order to set environment variables before running a * command. The contents of each variable may be specified as a string * or a Secret; only one of `value` and `secret` must be set. */ message Environment { message Variable { required string name = 1; enum Type { UNKNOWN = 0; VALUE = 1; SECRET = 2; } // In Mesos 1.2, the `Environment.variables.value` message was made // optional. The default type for `Environment.variables.type` is now VALUE, // which requires `value` to be set, maintaining backward compatibility. // // TODO(greggomann): The default can be removed in Mesos 2.1 (MESOS-7134). optional Type type = 3 [default = VALUE]; // Only one of `value` and `secret` must be set. optional string value = 2; optional Secret secret = 4; } repeated Variable variables = 1; } /** * A generic (key, value) pair used in various places for parameters. */ message Parameter { required string key = 1; required string value = 2; } /** * Collection of Parameter. */ message Parameters { repeated Parameter parameter = 1; } /** * Credential used in various places for authentication and * authorization. * * NOTE: A 'principal' is different from 'FrameworkInfo.user'. The * former is used for authentication and authorization while the * latter is used to determine the default user under which the * framework's executors/tasks are run. */ message Credential { required string principal = 1; optional string secret = 2; } /** * Credentials used for framework authentication, HTTP authentication * (where the common 'username' and 'password' are captured as * 'principal' and 'secret' respectively), etc. */ message Credentials { repeated Credential credentials = 1; } /** * Secret used to pass privileged information. It is designed to provide * pass-by-value or pass-by-reference semantics, where the REFERENCE type can be * used by custom modules which interact with a secure back-end. */ message Secret { enum Type { UNKNOWN = 0; REFERENCE = 1; VALUE = 2; } // Can be used by modules to refer to a secret stored in a secure back-end. // The `key` field is provided to permit reference to a single value within a // secret containing arbitrary key-value pairs. // // For example, given a back-end secret store with a secret named // "my-secret" containing the following key-value pairs: // // { // "username": "my-user", // "password": "my-password // } // // the username could be referred to in a `Secret` by specifying // "my-secret" for the `name` and "username" for the `key`. message Reference { required string name = 1; optional string key = 2; } // Used to pass the value of a secret. message Value { required bytes data = 1; } optional Type type = 1; // Only one of `reference` and `value` must be set. optional Reference reference = 2; optional Value value = 3; } /** * Rate (queries per second, QPS) limit for messages from a framework to master. * Strictly speaking they are the combined rate from all frameworks of the same * principal. */ message RateLimit { // Leaving QPS unset gives it unlimited rate (i.e., not throttled), // which also implies unlimited capacity. optional double qps = 1; // Principal of framework(s) to be throttled. Should match // FrameworkInfo.principal and Credential.principal (if using authentication). required string principal = 2; // Max number of outstanding messages from frameworks of this principal // allowed by master before the next message is dropped and an error is sent // back to the sender. Messages received before the capacity is reached are // still going to be processed after the error is sent. // If unspecified, this principal is assigned unlimited capacity. // NOTE: This value is ignored if 'qps' is not set. optional uint64 capacity = 3; } /** * Collection of RateLimit. * Frameworks without rate limits defined here are not throttled unless * 'aggregate_default_qps' is specified. */ message RateLimits { // Items should have unique principals. repeated RateLimit limits = 1; // All the frameworks not specified in 'limits' get this default rate. // This rate is an aggregate rate for all of them, i.e., their combined // traffic is throttled together at this rate. optional double aggregate_default_qps = 2; // All the frameworks not specified in 'limits' get this default capacity. // This is an aggregate value similar to 'aggregate_default_qps'. optional uint64 aggregate_default_capacity = 3; } /** * Describe an image used by tasks or executors. Note that it's only * for tasks or executors launched by MesosContainerizer currently. */ message Image { enum Type { APPC = 1; DOCKER = 2; } // Protobuf for specifying an Appc container image. See: // https://github.com/appc/spec/blob/master/spec/aci.md message Appc { // The name of the image. required string name = 1; // An image ID is a string of the format "hash-value", where // "hash" is the hash algorithm used and "value" is the hex // encoded string of the digest. Currently the only permitted // hash algorithm is sha512. optional string id = 2; // Optional labels. Suggested labels: "version", "os", and "arch". optional Labels labels = 3; } message Docker { // The name of the image. Expected format: // [REGISTRY_HOST[:REGISTRY_PORT]/]REPOSITORY[:TAG|@TYPE:DIGEST] // // See: https://docs.docker.com/reference/commandline/pull/ required string name = 1; // Credential to authenticate with docker registry. // NOTE: This is not encrypted, therefore framework and operators // should enable SSL when passing this information. // // This field has never been used in Mesos before and is // deprecated since Mesos 1.3. Please use `config` below // (see MESOS-7088 for details). optional Credential credential = 2 [deprecated = true]; // Since 1.3. // Docker config containing credentials to authenticate with // docker registry. The secret is expected to be a docker // config file in JSON format with UTF-8 character encoding. optional Secret config = 3; } required Type type = 1; // Only one of the following image messages should be set to match // the type. optional Appc appc = 2; optional Docker docker = 3; // With this flag set to false, the mesos containerizer will pull // the docker/appc image from the registry even if the image is // already downloaded on the agent. optional bool cached = 4 [default = true]; } /** * Describes how the mount will be propagated for a volume. See the * following doc for more details about mount propagation: * https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt */ message MountPropagation { enum Mode { UNKNOWN = 0; // The volume in a container will receive new mounts from the host // or other containers, but filesystems mounted inside the // container won't be propagated to the host or other containers. // This is currently the default behavior for all volumes. HOST_TO_CONTAINER = 1; // The volume in a container will receive new mounts from the host // or other containers, and its own mounts will be propagated from // the container to the host or other containers. BIDIRECTIONAL = 2; } optional Mode mode = 1; } /** * Describes a volume mapping either from host to container or vice * versa. Both paths can either refer to a directory or a file. */ message Volume { enum Mode { RW = 1; // read-write. RO = 2; // read-only. } // TODO(gyliu513): Make this as `optional` after deprecation cycle of 1.0. required Mode mode = 3; // Path pointing to a directory or file in the container. If the path // is a relative path, it is relative to the container work directory. // If the path is an absolute path and the container does not have its // own rootfs, that path must already exist in the agent host rootfs. required string container_path = 1; // The following specifies the source of this volume. At most one of // the following should be set. // Absolute path pointing to a directory or file on the host or a // path relative to the container work directory. optional string host_path = 2; // The source of the volume is an Image which describes a root // filesystem which will be provisioned by Mesos. optional Image image = 4; // Describes where a volume originates from. message Source { enum Type { // This must be the first enum value in this list, to // ensure that if 'type' is not set, the default value // is UNKNOWN. This enables enum values to be added // in a backwards-compatible way. See: MESOS-4997. UNKNOWN = 0; // TODO(gyliu513): Add IMAGE as volume source type. DOCKER_VOLUME = 1; HOST_PATH = 4; SANDBOX_PATH = 2; SECRET = 3; CSI_VOLUME = 5; } message DockerVolume { // Driver of the volume, it can be flocker, convoy, raxrey etc. optional string driver = 1; // Name of the volume. required string name = 2; // Volume driver specific options. optional Parameters driver_options = 3; } // Absolute path pointing to a directory or file on the host. message HostPath { required string path = 1; optional MountPropagation mount_propagation = 2; } // Describe a path from a container's sandbox. The container can // be the current container (SELF), or its parent container // (PARENT). PARENT allows all child containers to share a volume // from their parent container's sandbox. It'll be an error if // the current container is a top level container. message SandboxPath { enum Type { UNKNOWN = 0; SELF = 1; PARENT = 2; } optional Type type = 1; // A path relative to the corresponding container's sandbox. // Note that upwards traversal (i.e. ../../abc) is not allowed. required string path = 2; } // A volume which will be handled by the `volume/csi` isolator. message CSIVolume { // The name of the CSI plugin. required string plugin_name = 1; // Specifies a capability of a volume. // https://github.com/container-storage-interface/spec/blob/v1.3.0/csi.proto#L379:L438 message VolumeCapability { // Indicates that the volume will be accessed via the block device API. message BlockVolume { // Intentionally empty, for now. } // Indicates that the volume will be accessed via the filesystem API. message MountVolume { // The filesystem type. An empty string is equal to an unspecified // field value. optional string fs_type = 1; // The mount options that can be used for the volume. This field is // OPTIONAL. `mount_flags` MAY contain sensitive information. // Therefore, Mesos and the Plugin MUST NOT leak this information // to untrusted entities. The total size of this repeated field // SHALL NOT exceed 4 KiB. repeated string mount_flags = 2; } // Specifies how a volume can be accessed. message AccessMode { enum Mode { UNKNOWN = 0; // Can only be published once as read/write on a single node, at // any given time. SINGLE_NODE_WRITER = 1; // Can only be published once as readonly on a single node, at // any given time. SINGLE_NODE_READER_ONLY = 2; // Can be published as readonly at multiple nodes simultaneously. MULTI_NODE_READER_ONLY = 3; // Can be published at multiple nodes simultaneously. Only one of // the node can be used as read/write. The rest will be readonly. MULTI_NODE_SINGLE_WRITER = 4; // Can be published as read/write at multiple nodes // simultaneously. MULTI_NODE_MULTI_WRITER = 5; } required Mode mode = 1; } // Specifies what API the volume will be accessed using. One of the // following fields MUST be specified. oneof access_type { BlockVolume block = 1; MountVolume mount = 2; } required AccessMode access_mode = 3; } // Specifies the parameters used to stage/publish a pre-provisioned volume // on an agent host. The fields are merged from `NodeStageVolumeRequest` // and `NodePublishVolumeRequest` protobuf messages defined in CSI spec // except two fields `staging_target_path` and `target_path` which will be // internally determined by Mesos when staging/publishing the volume. message StaticProvisioning { required string volume_id = 1; required VolumeCapability volume_capability = 2; // The secrets needed for staging/publishing the volume, e.g.: // { // "username": {"type": REFERENCE, "reference": {"name": "U_SECRET"}}, // "password": {"type": REFERENCE, "reference": {"name": "P_SECRET"}} // } map node_stage_secrets = 3; map node_publish_secrets = 4; map volume_context = 5; } optional StaticProvisioning static_provisioning = 2; } // Enum fields should be optional, see: MESOS-4997. optional Type type = 1; // The following specifies the source of this volume. At most one of // the following should be set. // The source of the volume created by docker volume driver. optional DockerVolume docker_volume = 2; optional HostPath host_path = 5; optional SandboxPath sandbox_path = 3; // The volume/secret isolator uses the secret-fetcher module (third-party or // internal) downloads the secret and makes it available at container_path. optional Secret secret = 4; optional CSIVolume csi_volume = 6; } optional Source source = 5; } /** * Describes a network request from a framework as well as network resolution * provided by Mesos. * * A framework may request the network isolator on the Agent to isolate the * container in a network namespace and create a virtual network interface. * The `NetworkInfo` message describes the properties of that virtual * interface, including the IP addresses and network isolation policy * (network group membership). * * The NetworkInfo message is not interpreted by the Master or Agent and is * intended to be used by Agent and Master modules implementing network * isolation. If the modules are missing, the message is simply ignored. In * future, the task launch will fail if there is no module providing the * network isolation capabilities (MESOS-3390). * * An executor, Agent, or an Agent module may append NetworkInfos inside * TaskStatus::container_status to provide information such as the container IP * address and isolation groups. */ message NetworkInfo { enum Protocol { IPv4 = 1; IPv6 = 2; } // Specifies a request for an IP address, or reports the assigned container // IP address. // // Users can request an automatically assigned IP (for example, via an // IPAM service) or a specific IP by adding a NetworkInfo to the // ContainerInfo for a task. On a request, specifying neither `protocol` // nor `ip_address` means that any available address may be assigned. message IPAddress { // Specify IP address requirement. Set protocol to the desired value to // request the network isolator on the Agent to assign an IP address to the // container being launched. If a specific IP address is specified in // ip_address, this field should not be set. optional Protocol protocol = 1 [default = IPv4]; // Statically assigned IP provided by the Framework. This IP will be // assigned to the container by the network isolator module on the Agent. // This field should not be used with the protocol field above. // // If an explicit address is requested but is unavailable, the network // isolator should fail the task. optional string ip_address = 2; } // When included in a ContainerInfo, each of these represent a // request for an IP address. Each request can specify an explicit address // or the IP protocol to use. // // When included in a TaskStatus message, these inform the framework // scheduler about the IP addresses that are bound to the container // interface. When there are no custom network isolator modules installed, // this field is filled in automatically with the Agent IP address. repeated IPAddress ip_addresses = 5; // Name of the network which will be used by network isolator to determine // the network that the container joins. It's up to the network isolator // to decide how to interpret this field. optional string name = 6; // A group is the name given to a set of logically-related interfaces that // are allowed to communicate among themselves. Network traffic is allowed // between two container interfaces that share at least one network group. // For example, one might want to create separate groups for isolating dev, // testing, qa and prod deployment environments. repeated string groups = 3; // To tag certain metadata to be used by Isolator/IPAM, e.g., rack, etc. optional Labels labels = 4; // Specifies a port mapping request for the task on this network. message PortMapping { required uint32 host_port = 1; required uint32 container_port = 2; // Protocol to expose as (ie: tcp, udp). optional string protocol = 3; } repeated PortMapping port_mappings = 7; }; /** * Encapsulation of `Capabilities` supported by Linux. * Reference: http://linux.die.net/man/7/capabilities. */ message CapabilityInfo { // We start the actual values at an offset(1000) because Protobuf 2 // uses the first value as the default one. Separating the default // value from the real first value helps to disambiguate them. This // is especially valuable for backward compatibility. // See: MESOS-4997. enum Capability { UNKNOWN = 0; CHOWN = 1000; DAC_OVERRIDE = 1001; DAC_READ_SEARCH = 1002; FOWNER = 1003; FSETID = 1004; KILL = 1005; SETGID = 1006; SETUID = 1007; SETPCAP = 1008; LINUX_IMMUTABLE = 1009; NET_BIND_SERVICE = 1010; NET_BROADCAST = 1011; NET_ADMIN = 1012; NET_RAW = 1013; IPC_LOCK = 1014; IPC_OWNER = 1015; SYS_MODULE = 1016; SYS_RAWIO = 1017; SYS_CHROOT = 1018; SYS_PTRACE = 1019; SYS_PACCT = 1020; SYS_ADMIN = 1021; SYS_BOOT = 1022; SYS_NICE = 1023; SYS_RESOURCE = 1024; SYS_TIME = 1025; SYS_TTY_CONFIG = 1026; MKNOD = 1027; LEASE = 1028; AUDIT_WRITE = 1029; AUDIT_CONTROL = 1030; SETFCAP = 1031; MAC_OVERRIDE = 1032; MAC_ADMIN = 1033; SYSLOG = 1034; WAKE_ALARM = 1035; BLOCK_SUSPEND = 1036; AUDIT_READ = 1037; PERFMON = 1038; BPF = 1039; CHECKPOINT_RESTORE = 1040; } repeated Capability capabilities = 1; } /** * Encapsulation for Seccomp configuration, which is Linux specific. */ message SeccompInfo { // A filename of the Seccomp profile. This should be a path // relative to the directory containing Seccomp profiles, // which is specified on the agent via the `--seccomp_config_dir` flag. optional string profile_name = 1; // If set to `true`, Seccomp is not applied to the container. // If not set or set to `false`, the container is launched with // the profile specified in the `profile_name` field. // // NOTE: `profile_name` must not be specified if `unconfined` set to `true`. // `profile_name` must be specified if `unconfined` is not set or // is set to `false`. optional bool unconfined = 2; } /** * Encapsulation for Linux specific configuration. * E.g, capabilities, limits etc. */ message LinuxInfo { // Since 1.4.0, deprecated in favor of `effective_capabilities`. optional CapabilityInfo capability_info = 1 [deprecated = true]; // The set of capabilities that are allowed but not initially // granted to tasks. optional CapabilityInfo bounding_capabilities = 2; // Represents the set of capabilities that the task will // be executed with. optional CapabilityInfo effective_capabilities = 3; // If set as 'true', the container shares the pid namespace with // its parent. If the container is a top level container, it will // share the pid namespace with the agent. If the container is a // nested container, it will share the pid namespace with its // parent container. This field will be ignored if 'namespaces/pid' // isolator is not enabled. optional bool share_pid_namespace = 4; // Represents Seccomp configuration, which is used for syscall filtering. // This field is used to override the agent's default Seccomp configuration. optional SeccompInfo seccomp = 5; enum IpcMode { UNKNOWN = 0; // The container will have its own IPC namespace and /dev/shm, with a // possibility to share them with its child containers. PRIVATE = 1; // The container will share the IPC namespace and /dev/shm from its // parent. If the container is a top level container, it will share // the IPC namespace and /dev/shm from the agent host, if the container // is a nested container, it will share the IPC namespace and /dev/shm // from its parent container. The implication is if a nested container // wants to share the IPC namespace and /dev/shm from the agent host, // its parent container has to do it first. SHARE_PARENT = 2; } // There are two special cases that we need to handle for this field: // 1. This field is not set: For backward compatibility we will keep the // previous behavior: Top level container will have its own IPC namespace // and nested container will share the IPC namespace from its parent // container. If the container does not have its own rootfs, it will share // agent's /dev/shm, otherwise it will have its own /dev/shm. // 2. The `namespaces/ipc` isolator is not enabled: This field will be ignored // in this case. For backward compatibility, in the `filesystem/linux` // isolator we will keep the previous behavior: Any containers will share // IPC namespace from agent, and if the container does not have its own // rootfs, it will also share agent's /dev/shm, otherwise it will have its // own /dev/shm. // // TODO(qianzhang): Remove the support for the above two cases after the // deprecation cycle (started in 1.9). Eventually we want a single isolator // (`namespaces/ipc`) to handle both IPC namespace and /dev/shm, and decouple // /dev/shm from container's rootfs (i.e., whether a container will have its // own /dev/shm depends on its `ipc_mode` instead of whether the container // has its own rootfs). optional IpcMode ipc_mode = 6; // Size of /dev/shm in MB. If not set, the size of the /dev/shm for container // will be value of the `--default_container_shm_size` agent flag, if that // flag is not set too, the size of the /dev/shm will be half of the host RAM // which is the default behavior of Linux. This field will be ignored for the // container which shares /dev/shm from its parent and it will be also ignored // for any containers if the `namespaces/ipc` isolator is not enabled. Please // note that we only support setting this field when the `ipc_mode` field is // set to `PRIVATE` otherwise the container launch will be rejected. optional uint32 shm_size = 7; // If set as 'true', the container will share the cgroups from its parent // container, otherwise it will have its own cgroups created. Please note: // 1. For tasks in a task group launched via the LAUNCH_GROUP operation, // this field may be set to 'true' or 'false'. Resource limits may only be // set for tasks in a task group when this field is set to 'false'. // 2. For tasks launched via the LAUNCH operation, this field may only be set // to 'true', and in this case resource limits may be set on these tasks. // 3. For containers launched via the agent's LAUNCH_NESTED_CONTAINER_SESSION // call, this field must be set to 'true'. // 4. For executor containers, this field may only be set to 'false'. // 5. All tasks under a single executor must share the same value of this // field, if it is set. Note that this means that all tasks within a single // task group must set this field to the same value. optional bool share_cgroups = 8 [default = true]; } /** * Encapsulation for POSIX rlimits, see * http://pubs.opengroup.org/onlinepubs/009695399/functions/getrlimit.html. * Note that some types might only be defined for Linux. * We use a custom prefix to avoid conflict with existing system macros * (e.g., `RLIMIT_CPU` or `NOFILE`). */ message RLimitInfo { message RLimit { enum Type { UNKNOWN = 0; RLMT_AS = 1; RLMT_CORE = 2; RLMT_CPU = 3; RLMT_DATA = 4; RLMT_FSIZE = 5; RLMT_LOCKS = 6; RLMT_MEMLOCK = 7; RLMT_MSGQUEUE = 8; RLMT_NICE = 9; RLMT_NOFILE = 10; RLMT_NPROC = 11; RLMT_RSS = 12; RLMT_RTPRIO = 13; RLMT_RTTIME = 14; RLMT_SIGPENDING = 15; RLMT_STACK = 16; } optional Type type = 1; // Either both are set or both are not set. // If both are not set, it represents unlimited. // If both are set, we require `soft` <= `hard`. optional uint64 hard = 2; optional uint64 soft = 3; } repeated RLimit rlimits = 1; } /** * Describes the information about (pseudo) TTY that can * be attached to a process running in a container. */ message TTYInfo { message WindowSize { required uint32 rows = 1; required uint32 columns = 2; } optional WindowSize window_size = 1; } /** * Describes a container configuration and allows extensible * configurations for different container implementations. * * NOTE: `ContainerInfo` may be specified, e.g., by a task, even if no * container image is provided. In this case neither `MesosInfo` nor * `DockerInfo` is set, the required `type` must be `MESOS`. This is to * address a case when a task without an image, e.g., a shell script * with URIs, wants to use features originally designed for containers, * for example custom network isolation via `NetworkInfo`. */ message ContainerInfo { // All container implementation types. // For each type there should be a field in the ContainerInfo itself // with exactly matching name in lowercase. enum Type { DOCKER = 1; MESOS = 2; } message DockerInfo { // The docker image that is going to be passed to the registry. required string image = 1; // Network options. enum Network { HOST = 1; BRIDGE = 2; NONE = 3; USER = 4; } optional Network network = 2 [default = HOST]; message PortMapping { required uint32 host_port = 1; required uint32 container_port = 2; // Protocol to expose as (ie: tcp, udp). optional string protocol = 3; } repeated PortMapping port_mappings = 3; optional bool privileged = 4 [default = false]; // Allowing arbitrary parameters to be passed to docker CLI. // Note that anything passed to this field is not guaranteed // to be supported moving forward, as we might move away from // the docker CLI. repeated Parameter parameters = 5; // With this flag set to true, the docker containerizer will // pull the docker image from the registry even if the image // is already downloaded on the slave. optional bool force_pull_image = 6; // The name of volume driver plugin. optional string volume_driver = 7 [deprecated = true]; // Since 1.0 } message MesosInfo { optional Image image = 1; } required Type type = 1; repeated Volume volumes = 2; optional string hostname = 4; // At most one of the following *Info messages should be set to match // the type, i.e. the "protobuf union" in ContainerInfo should be valid. optional DockerInfo docker = 3; optional MesosInfo mesos = 5; // A list of network requests. A framework can request multiple IP addresses // for the container. repeated NetworkInfo network_infos = 7; // Linux specific information for the container. optional LinuxInfo linux_info = 8; // (POSIX only) rlimits of the container. optional RLimitInfo rlimit_info = 9; // If specified a tty will be attached to the container entrypoint. optional TTYInfo tty_info = 10; } /** * Container related information that is resolved during container * setup. The information is sent back to the framework as part of the * TaskStatus message. */ message ContainerStatus { optional ContainerID container_id = 4; // This field can be reliably used to identify the container IP address. repeated NetworkInfo network_infos = 1; // Information about Linux control group (cgroup). optional CgroupInfo cgroup_info = 2; // Information about Executor PID. optional uint32 executor_pid = 3; } /** * Linux control group (cgroup) information. */ message CgroupInfo { // Configuration of a blkio cgroup subsystem. message Blkio { enum Operation { UNKNOWN = 0; TOTAL = 1; READ = 2; WRITE = 3; SYNC = 4; ASYNC = 5; DISCARD = 6; } // Describes a stat value without the device descriptor part. message Value { optional Operation op = 1; // Required. optional uint64 value = 2; // Required. } message CFQ { message Statistics { // Stats are grouped by block devices. If `device` is not // set, it represents `Total`. optional Device.Number device = 1; // blkio.sectors optional uint64 sectors = 2; // blkio.time optional uint64 time = 3; // blkio.io_serviced repeated Value io_serviced = 4; // blkio.io_service_bytes repeated Value io_service_bytes = 5; // blkio.io_service_time repeated Value io_service_time = 6; // blkio.io_wait_time repeated Value io_wait_time = 7; // blkio.io_merged repeated Value io_merged = 8; // blkio.io_queued repeated Value io_queued = 9; } // TODO(jasonlai): Add fields for blkio weight and weight // device. } message Throttling { message Statistics { // Stats are grouped by block devices. If `device` is not // set, it represents `Total`. optional Device.Number device = 1; // blkio.throttle.io_serviced repeated Value io_serviced = 2; // blkio.throttle.io_service_bytes repeated Value io_service_bytes = 3; } // TODO(jasonlai): Add fields for blkio.throttle.*_device. } message Statistics { repeated CFQ.Statistics cfq = 1; repeated CFQ.Statistics cfq_recursive = 2; repeated Throttling.Statistics throttling = 3; } } // Configuration of a net_cls cgroup subsystem. message NetCls { // The 32-bit classid consists of two parts, a 16 bit major handle // and a 16-bit minor handle. The major and minor handle are // represented using the format 0xAAAABBBB, where 0xAAAA is the // 16-bit major handle and 0xBBBB is the 16-bit minor handle. optional uint32 classid = 1; } optional NetCls net_cls = 1; } /** * Collection of labels. Labels should not contain duplicate key-value * pairs. */ message Labels { repeated Label labels = 1; } /** * Key, value pair used to store free form user-data. */ message Label { required string key = 1; optional string value = 2; } /** * Named port used for service discovery. */ message Port { // Port number on which the framework exposes a service. required uint32 number = 1; // Name of the service hosted on this port. optional string name = 2; // Layer 4-7 protocol on which the framework exposes its services. optional string protocol = 3; // This field restricts discovery within a framework (FRAMEWORK), // within a Mesos cluster (CLUSTER), or places no restrictions (EXTERNAL). // The visibility setting for a Port overrides the general visibility setting // in the DiscoveryInfo. optional DiscoveryInfo.Visibility visibility = 4; // This can be used to decorate the message with metadata to be // interpreted by external applications such as firewalls. optional Labels labels = 5; } /** * Collection of ports. */ message Ports { repeated Port ports = 1; } /** * Service discovery information. * The visibility field restricts discovery within a framework (FRAMEWORK), * within a Mesos cluster (CLUSTER), or places no restrictions (EXTERNAL). * Each port in the ports field also has an optional visibility field. * If visibility is specified for a port, it overrides the default service-wide * DiscoveryInfo.visibility for that port. * The environment, location, and version fields provide first class support for * common attributes used to differentiate between similar services. The * environment may receive values such as PROD/QA/DEV, the location field may * receive values like EAST-US/WEST-US/EUROPE/AMEA, and the version field may * receive values like v2.0/v0.9. The exact use of these fields is up to each * service discovery system. */ message DiscoveryInfo { enum Visibility { FRAMEWORK = 0; CLUSTER = 1; EXTERNAL = 2; } required Visibility visibility = 1; optional string name = 2; optional string environment = 3; optional string location = 4; optional string version = 5; optional Ports ports = 6; optional Labels labels = 7; } /** * Named WeightInfo to indicate resource allocation * priority between the different roles. */ message WeightInfo { required double weight = 1; // Related role name. optional string role = 2; } /** * Version information of a component. */ message VersionInfo { required string version = 1; optional string build_date = 2; optional double build_time = 3; optional string build_user = 4; optional string git_sha = 5; optional string git_branch = 6; optional string git_tag = 7; } /** * Flag consists of a name and optionally its value. */ message Flag { required string name = 1; optional string value = 2; } /** * Describes a Role. Roles can be used to specify that certain resources are * reserved for the use of one or more frameworks. */ message Role { required string name = 1; required double weight = 2; repeated FrameworkID frameworks = 3; // TODO(bmahler): Deprecate `resources` and introduce quota, // consumed quota, allocated, offered, and reserved resource // quantity fields. This is blocked by MESOS-9497 since the // computation of these quantities is currently expensive. repeated Resource resources = 4; } /** * Metric consists of a name and optionally its value. */ message Metric { required string name = 1; optional double value = 2; } /** * Describes a File. */ message FileInfo { // Absolute path to the file. required string path = 1; // Number of hard links. optional int32 nlink = 2; // Total size in bytes. optional uint64 size = 3; // Last modification time. optional TimeInfo mtime = 4; // Represents a file's mode and permission bits. The bits have the same // definition on all systems and is portable. optional uint32 mode = 5; // User ID of owner. optional string uid = 6; // Group ID of owner. optional string gid = 7; } /** * Describes information about a device. */ message Device { message Number { required uint64 major_number = 1; required uint64 minor_number = 2; } optional string path = 1; optional Number number = 2; } /** * Describes a device whitelist entry that expose from host to container. */ message DeviceAccess { message Access { optional bool read = 1; optional bool write = 2; optional bool mknod = 3; } required Device device = 1; required Access access = 2; } message DeviceWhitelist { repeated DeviceAccess allowed_devices = 1; } enum DrainState { UNKNOWN = 0; // The agent is currently draining. DRAINING = 1; // The agent has been drained: all tasks have terminated, all terminal // task status updates have been acknowledged by the frameworks, and all // operations have finished and had their terminal updates acknowledged. DRAINED = 2; } message DrainConfig { // An upper bound for tasks with a KillPolicy. // If a task has a KillPolicy grace period greater than this value, this value // will be used instead. This allows the operator to limit the maximum time it // will take the agent to drain. If this field is unset, the task's KillPolicy // or the executor's default grace period is used. // // NOTE: Grace periods start when the executor receives the associated kill. // If, for example, the agent is unreachable when this call is made, // tasks will still receive their full grace period to kill gracefully. optional DurationInfo max_grace_period = 1; // Whether or not this agent will be removed permanently from the cluster when // draining is complete. This transition is automatic and does **NOT** require // a separate call to `MarkAgentGone`. If this field is unset, then the // default value of `false` is used. // // Compared to `MarkAgentGone`, which is used for unreachable agents, // marking agents gone after draining will respect kill policies. // To notify frameworks, tasks terminated during draining will return // a `TASK_GONE_BY_OPERATOR` status update instead of any other terminal // status. Executors will not need to account for this case, because // the terminal status update will be intercepted and modified by the agent. optional bool mark_gone = 2 [default = false]; } message DrainInfo { // The drain state of the agent. required DrainState state = 1; // The configuration used to drain the agent. required DrainConfig config = 2; }