# This is an example config showing all the possible options # Required options are filled with default values # Non-required options are commented with defaul values in comments # set verbosity of console messages, use "off" to disable verbosity-console = "warn" # set verbosity of syslog messages, use "off" to disable #verbosity-syslog = "off" # become a unix daemon: will be done in a standard unix # console verbosity will be turned off in this mode # NOTE: syslog verbosity will stay the same, and there will be no loggind at all if it is turned off #daemon = false # Number of network worker threads in any mode, use 0(not recommended) to use all CPU cores n-threads = 4 # Number of parsing "fast" threads, use 0(not recommended) to use all CPU cores p-threads = 4 # Number of "slow" worker threads, use 0(not recommended) to use all CPU cores w-threads = 4 # Number of asynchronous threads # use 0(not recommended) to use all CPU cores # setting this value to lower than 4 may lead to random lockups and timer delays a-threads = 4 # Queue size for fast and slow threads task-queue-size = 1024 # If server should become leader from it's very start start-as-leader = true # How often to gather own stats, in ms. Use 0 to disable (stats are still gathered and printed to log, # but not included in metric dump stats-interval = 10000 # Prefix for sending own stats stats-prefix = "resources.monitoring.bioyino" # What consensus to use: "internal" or "none" consensus = "none" [carbon] # IP and port of the carbon-protocol backend to send aggregated data to address = "127.0.0.1:2003" # Address to bind carbon client to when connecting # default: not specified, so no bind happens #bind-address = "127.0.0.1:2003" # How often to send metrics to carbon backend, ms interval = 30000 # How much to sleep when connection to backend fails, ms connect-delay = 250 # Multiply delay to this value for each consequent connection failure, float connect-delay-multiplier = 2 # Maximum retry delay, ms connect-delay-max = 10000 # How much times to retry when sending data to backend before giving up and dropping all metrics #note, that 0 means 1 try send-retries = 30 # The resulting aggregated set can be split into chunks to be sent to i.e. a cluster # of receiving nodes in parallel in `chunks` parallel TCP connections. # This may speedup a delivery and make a postprocessing in parallel chunks = 1 # Network settings [network] # address and UDP port to listen for statsd metrics at listen = "127.0.0.1:8125" # Address and port for peer protocol TCP server to listen on peer-listen = "127.0.0.1:8136" # Address for peer client to bind to # format is string with ip:port as with other addresses # Only binds if specified, doesn't do binding if not # peer-client-bind = # Address and port for management server to listen on mgmt-listen = "127.0.0.1:8137" # UDP buffer size for single packet. Needs to be around MTU. Packet's bytes after that value # may be lost bufsize = 1500 # Enable multimessage(recvmmsg) mode multimessage = false # Number of multimessage packets to receive at once if in multimessage mode # Note that this setting is per thread, so in reality one can only see metrics # after receiving at least mm-packets*n_threads datagrams mm-packets = 100 # Do multimessage operations in async mode. # This means recvmmsg will receive 0..mm-packets datagrams instead of waiting for mm-packets mm-async = false # Multimessage mode assumes early return by timeout, but ONLY when received # a packet after timeout expiration. # Basically this should be changed in very rare and specific cases. # 0 means this value will be equal to buffer-flush-time # mm-timeout = 0 # To avoid packets staying in queue forever, this option can be used to flush # incoming data buffer forcing it to be sent even if it's not full buffer-flush-time = 0 # Same as buffer-flush-time, but riggers on buffer length. Please, notice that multimessage # mode can only consider timer, so this check is only possible every mm-packets packets. # zero value means automatic management depending on memory allocator internal logic, # which on tests was found to reach 30Mb # if in multimessage mode this value is lower that mm-packets*bufsize, it will be set to this value buffer-flush-length = 65536 # Nmber of green threads for single-message mode greens = 4 # Socket pool size for single-message mode async-sockets = 4 # List of nodes to replicate metrics to nodes = [] # Interval(milliseconds) to take snapshots for aggregation and remote replication snapshot-interval = 1000 [metrics] # Process buffers from different hosts separately, this gives more guarantee to parse # metrics from different hosts correctly. Can play bad if lots of metrics is received from a single host, set # it to false if you have such use case # consistent-parsing = true # Log all buffers being dropped due to parsing errors. Can be very spammyif you have many incorrect data coming in. # log-parse-errors = false # Size of buffer that parser considers invalid. Used to avoid DoS attacks on parser. # Increase this if you have metrics taking more than 1000 bytes # max-unparsed-buffer = 10000 # Size of tags part of a metric (after semicolon character, not including the leading semicolon itself) # max-tags-len = 9000 # In some systems tagged metric becomes a totally different metric. This means the timeseries for such metrics # can be broken. To avoid this for the time the metrics is adopted, this option allows to create a copy of # such tagged metric, but without tags # Please note that this is effectively doubles the amount of memory and processing times # create-untagged-copy = false [aggregation] # round timestamp to interval of aggregation # possible values: # "up" - round to upper bound of interval # "down" - round to lower bound # "no" - use exact timestamp, i.e. no rounding # round-timestamp = "no" # `updates` aggregate is usually a debug value than some real one. # Usually one want it to see which metrics come too fast taking too much resources. # Since this is the only one used for every type of metric, not only timers, # it is reasonable to avoid receiving a doubled amount of all metrics. # # This option allows to receive only metrics updated too often. # In this example is is more than 200 per interval. update-count-threshold = 200 # A list of aggregates gathered per type of metric. # Setting this value will define the exact way, so, i.e. ["mean"] will gather ONLY ONE aggregate # # To add new percentiles an integer value with "percentile-" prefix should be used. # It will then be "appended" (in reality this is not string based calculation) with "0." # For example, "percentile-9999" will become 0.9999th percentile # # this is the full list of default values for timer #aggregates.timer = [ "count", "rate", "last", "min", "max", "sum", "median", "mean", "updates", "percentile-75", "percentile-95", "percentile-98", "percentile-99", "percentile-999" ] # an example for gauge with "updates" aggregate turned off #aggregates.gauge = [ "value" ] # Naming section covers all aspects of changing metric name according to different settings # there is nothing to set in [naming] section, only subsections # Keys of map in this section are metric types: gauge, counter, diff-counter, timer, set # All sections have same set of parameters. # More examples can be seen in docs/aggregation.md file. [naming.default] # These setting will be applied for all unspecified naming parts. # # for example if naming.set does not exist or naming.set.tag-values is not specified, # the tag-values from this section will be used instead, excluding the impossible aggregates ## available values: smart (explained below), name, tag, both # smart means detection based on existence of tags in the metric, # so, for example sum for `some.metric` will be aggregated as `some.metric.sum` # but for `some.metric;tag=value` it will automatically become `some.metric;tag=value;aggregate=sum` destination = "smart" ## This will be a global prefix for all metrics prefix = "resources.monitoring.bioyino" ## this allows to override the default prefix for specific metric types # Example: # This will make all medians and update counters have specific namespace #prefix-overrides = { "updates" = "metrics.updates", "median" = "all.medians" } ## Tag name to specific aggregation type in tags # tag = "aggregate" ## This aloows overriding postfix names for specific aggregates. # By default the names are the same as keys (see aggregation section for a full list) # The percentiles are named like in previous versions of bioyino: with dot, got get tree-based structure # I.e precentile-99 aggregate for some.metric becomes some.metric.percentile.99 postfixes = { "min" = "lower", "max" = "upper", "percentile-50" = "percentile.50", "updates" = "" } ## this is the same as for postfixes, but for tag values # Example: # This setting will aggregate max for some.metric;tag=value into some.metric;aggregate=upper;tag=value # Percentile metrics are by default exactly the same as in postfixes: i.e. percentile.99 # tag-values = { "max" = "upper" } ## Example of overriding some naming options for timer #[naming.timer] #prefix-overrides = { "updates" = "metrics.timer.update-counts" } # Settings for internal Raft [raft] # Defer start of raft consensus to avoid node becoming leader too early # Such situation is very likely when restarting current leader node # and means losing metrics in most cases #start-delay = 0 # Timeouts tuned according to the Raft paper and typical network latency. # Better not to change if unsure #heartbeat-timeout = 250 #election-timeout-min = 500 #election-timeout-max = 750 # The name of the current node is taken from hostname by default # After that all hostnames are resolved using DNS. If node name cannot # be resolved through DNS for some reason, it can be specified in this-node # parameter in a format similar to one in node list. # this-node = # A map of other raft nodes. Keys are in form of hostname:port or IP:port # values are integer IDs # this map is crucial to have the same address <-> ID mappings for all nodes nodes = {} # allow binding raft outgoing connnections to specific IP # default: not specified, so no bind happens # client-bind = "127.0.0.1:8138"