/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

/*
 * Copyright (c) 2019, Joyent, Inc.
 */

/*
 * node-triton-metrics
 */

var v8 = require('v8');

var artedi = require('artedi');
var assert = require('assert-plus');
// NOTE: the gc-stats is also used in this file, but conditionally. It is
// require()d when createNodejsMetrics() is called to avoid overhead otherwise.
var gc_stats;
var vasync = require('vasync');
var VError = require('verror');

var EventloopMetrics = require('./eventloop-metrics');

var MS_PER_SECOND = 1e3;
var US_PER_SECOND = 1e6;
var NS_PER_SECOND = 1e9;

function getMetricsHandler(collector, preCollectFuncs) {
    function getMetrics(req, res, next) {
        /*
         * Restify GET requests will keep socket open until entire request
         * body is read. req.resume() is used to prevent connection leaks.
         *
         * More information at:
         * https://jira.joyent.us/browse/MANTA-3338
         * https://cr.joyent.us/#/c/2823/1/lib/other.js
         */
        req.on('end', function collectMetrics() {
            vasync.parallel({funcs: preCollectFuncs}, function _onPreCollected(
                preCollectErr
            ) {
                assert.ifError(
                    preCollectErr,
                    'preCollectFuncs should not callback with error'
                );

                collector.collect(artedi.FMT_PROM, function sendMetrics(
                    err,
                    metrics
                ) {
                    if (err) {
                        next(new VError(err, 'error retrieving metrics'));
                        return;
                    }
                    /*
                     * Content-Type header is set to indicate the Prometheus
                     * exposition format version
                     *
                     * More information at:
                     * https://github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#format-version-004
                     */
                    res.setHeader('Content-Type', 'text/plain; version=0.0.4');
                    res.send(metrics);
                    next();
                });
            });
        });

        req.resume();
    }
    var chain = [getMetrics];
    return chain;
}

function MetricsManager(config) {
    assert.object(config, 'config');
    assert.optionalBool(
        config.handleUncaughtExceptions,
        'config.handleUncaughtExceptions'
    );
    assert.object(config.log, 'config.log');
    assert.object(config.restify, 'config.restify');
    assert.object(config.staticLabels, 'config.staticLabels');
    assert.string(
        config.staticLabels.datacenter,
        'config.staticLabels.datacenter'
    );
    assert.string(config.staticLabels.instance, 'config.staticLabels.instance');
    assert.string(config.staticLabels.server, 'config.staticLabels.server');
    assert.string(config.staticLabels.service, 'config.staticLabels.service');
    assert.optionalObject(config.metricOpts, 'config.metricOpts');
    if (config.metricOpts !== undefined) {
        assert.optionalArray(
            config.metricOpts.buckets,
            'config.metricOpts.buckets'
        );
        assert.optionalObject(
            config.metricOpts.labels,
            'config.metricOpts.labels'
        );
    }

    if (config.port) {
        assert.string(config.address, 'config.address');
        assert.number(config.port, 'config.port');
    } else {
        assert.equal(undefined, config.address, 'config.address');
        assert.string(config.path, 'config.path');
    }

    var collector = artedi.createCollector({labels: config.staticLabels});
    var createOpts = {serverName: 'Metrics'};

    if (config.handleUncaughtExceptions !== undefined) {
        createOpts.handleUncaughtExceptions = config.handleUncaughtExceptions;
    }

    this.address = config.address;
    this.collector = collector;
    this.eventloopMetrics = null;
    this.gcCounts = {};
    this.gcPauses = {};
    this.log = config.log;
    this.metricOpts = config.metricOpts;
    this.metricsGroups = {};
    this.path = config.path;
    this.port = config.port;
    this.preCollectFuncs = [];
    this.previousCpuUsage = {user: 0, system: 0};
    this.server = config.restify.createServer(createOpts);
    this.server.get(
        '/metrics',
        getMetricsHandler(collector, this.preCollectFuncs)
    );
}

MetricsManager.prototype.listen = function startMetricsServer(callback) {
    var self = this;
    var port = self.port || self.path;

    self.server.listen(port, self.address, function serverStarted() {
        if (self.port) {
            self.log.info('metrics server started on %s', self.server.url);
        } else {
            self.log.info('metrics server started on %s', self.path);
        }
        callback();
    });
};

MetricsManager.prototype.close = function stopMetricsServer(callback) {
    var self = this;

    if (self.eventloopMetrics !== null) {
        self.eventloopMetrics.stop();
        self.eventloopMetrics = null;
    }

    self.server.close(function serverStopped() {
        self.log.info('metrics server has shutdown');
        callback();
    });
};

MetricsManager.prototype.addPreCollectFunc = function addPreCollectFunc(func) {
    assert.func(func, 'func');

    this.preCollectFuncs.push(func);
};

MetricsManager.prototype.createMetrics = function createMetrics(
    name,
    collectMetrics
) {
    assert.string(name, 'name');
    assert.func(collectMetrics, 'collectMetrics');

    this.metricsGroups[name] = collectMetrics;
};

MetricsManager.prototype.collectMetrics = function updateMetrics(name) {
    assert.string(name, 'name');

    var collectArgs = Array.prototype.slice.call(arguments, 1);
    this.metricsGroups[name].apply(this, collectArgs);
};

function gcType(typeNum) {
    // See: https://github.com/nodejs/node/blob/554fa24916c5c6d052b51c5cee9556b76489b3f7/deps/v8/include/v8.h#L6137-L6144
    switch (Number(typeNum)) {
        case 1:
            return 'scavenge';
        case 2:
            return 'mark/sweep/compact';
        case 4:
            return 'incremental';
        case 8:
            return 'weak/phantom';
        case 15:
            return 'all';
        default:
            return 'unknown/' + typeNum;
    }
}

//
// Metrics here were inspired by (among other sources):
//
// https://github.com/Netflix-Skunkworks/atlas-node-client
// https://github.com/RuntimeTools/appmetrics
// https://github.com/siimon/prom-client
// https://www.oreilly.com/ideas/top-nodejs-metrics-to-watch
//
MetricsManager.prototype.createNodejsMetrics = function createNodejsMetrics() {
    var self = this;

    var collectors = {
        counter: {
            nodejs_eventloop_lag_seconds_total: {
                source: 'eventloopMetrics',
                key: 'lagSum',
                help:
                    'Represents the total number of seconds setTimeout() ' +
                    'has taken beyond the timeout when sampling the eventloop.'
            },
            nodejs_eventloop_samples_total: {
                source: 'eventloopMetrics',
                key: 'sampleCount',
                help: 'Number of times eventloop data has been collected.'
            },
            nodejs_eventloop_tick_count_total: {
                source: 'eventloopMetrics',
                key: 'loopCountSum',
                help:
                    'Number of times eventloop has ticked since start of ' +
                    'observation.'
            },
            nodejs_eventloop_time_seconds_total: {
                source: 'eventloopMetrics',
                key: 'loopTimeSum',
                help:
                    'Total amount of time spent in the eventloop since ' +
                    'start of observation.'
            },
            nodejs_gc_execution_count_total: {
                source: 'special',
                help: 'Number of times node GC has run.'
            },
            nodejs_gc_pause_seconds_total: {
                source: 'special',
                help: 'Total number of seconds node has spent paused for GC.'
            },
            nodejs_system_cpu_seconds_total: {
                source: 'cpuUsage',
                key: 'system',
                help: 'Number of CPU seconds spent in system code.'
            },
            nodejs_user_cpu_seconds_total: {
                source: 'cpuUsage',
                key: 'user',
                help: 'Number of CPU seconds spent in user code.'
            }
        },
        gauge: {
            nodejs_active_handles_count: {
                source: 'special',
                help:
                    'Number of active handles reported by ' +
                    'process._getActiveHandles().'
            },
            nodejs_active_requests_count: {
                source: 'special',
                help:
                    'Total number of active requests reported by ' +
                    'process._getActiveRequests().'
            },
            nodejs_eventloop_max_lag_seconds: {
                source: 'eventloopMetrics',
                key: 'lagMax',
                help: 'Maximum number of seconds setTimeout() was delayed.'
            },
            nodejs_eventloop_max_tick_count: {
                source: 'eventloopMetrics',
                key: 'loopCountMax',
                help: 'Maximum number of ticks in a single setTimeout() loop.'
            },
            nodejs_eventloop_max_time_seconds: {
                source: 'eventloopMetrics',
                key: 'loopTimeMax',
                help:
                    'Maximum number of seconds spent on a single eventloop ' +
                    'tick.'
            },
            nodejs_eventloop_min_lag_seconds: {
                source: 'eventloopMetrics',
                key: 'lagMin',
                help: 'Minimum number of seconds setTimeout() was delayed.'
            },
            nodejs_eventloop_min_tick_count: {
                source: 'eventloopMetrics',
                key: 'loopCountMin',
                help: 'Minimum number of ticks in a single setTimeout() loop.'
            },
            nodejs_eventloop_min_time_seconds: {
                source: 'eventloopMetrics',
                key: 'loopTimeMin',
                help:
                    'Minimum number of seconds spent on a single eventloop ' +
                    'tick.'
            },
            nodejs_memory_external_bytes: {
                source: 'memoryUsage',
                key: 'external',
                help:
                    'Number of bytes of memory used by C++ objects bound to ' +
                    'JavaScript objects managed by V8.'
            },
            nodejs_memory_heapTotal_bytes: {
                source: 'memoryUsage',
                key: 'heapTotal',
                help: "Number of total bytes in V8's heap."
            },
            nodejs_memory_heapUsed_bytes: {
                source: 'memoryUsage',
                key: 'heapUsed',
                help: "Number of used bytes in V8's heap"
            },
            nodejs_memory_rss_bytes: {
                source: 'memoryUsage',
                key: 'rss',
                help: 'Number of bytes in main memory for the process'
            },
            nodejs_process_start_time_seconds: {
                source: 'special',
                help:
                    'Start time of this process in seconds since ' +
                    '1970-01-01T00:00:00Z'
            },
            nodejs_V8_does_zap_garbage_boolean: {
                source: 'v8HeapStats',
                key: 'does_zap_garbage',
                help:
                    'A 0/1 boolean, which signifies whether the ' +
                    '--zap_code_space option is enabled or not.'
            },
            nodejs_V8_heap_size_executable_bytes: {
                source: 'v8HeapStats',
                key: 'total_heap_size_executable',
                help: 'Number of bytes for compiled bytecode and JITed code.'
            },
            nodejs_V8_heap_size_limit_bytes: {
                source: 'v8HeapStats',
                key: 'heap_size_limit',
                help: 'Limit on the size of the V8 heap.'
            },
            nodejs_V8_malloced_memory_bytes: {
                source: 'v8HeapStats',
                key: 'malloced_memory',
                help: 'Current bytes of memory that has been malloc()ed.'
            },
            nodejs_V8_peak_malloced_memory_bytes: {
                source: 'v8HeapStats',
                key: 'peak_malloced_memory',
                help: 'Maximum bytes of memory that has been malloc()ed.'
            },
            nodejs_V8_total_available_size_bytes: {
                source: 'v8HeapStats',
                key: 'total_available_size',
                help: 'Number of bytes available in the V8 heap.'
            },
            nodejs_V8_total_heap_size_bytes: {
                source: 'v8HeapStats',
                key: 'total_heap_size',
                help: 'Total bytes V8 has allocated for its heap.'
            },
            nodejs_V8_total_physical_size_bytes: {
                source: 'v8HeapStats',
                key: 'total_physical_size',
                help: 'Total committed size of V8.'
            },
            nodejs_V8_used_heap_size_bytes: {
                source: 'v8HeapStats',
                key: 'used_heap_size',
                help: 'Number of bytes allocated in the V8 heap.'
            }
        }
    };

    function onEachCollector(fn, callback) {
        var idx;
        var keys;
        var name;
        var type;
        var typeIdx;
        var types = ['counter', 'gauge'];

        for (typeIdx = 0; typeIdx < types.length; typeIdx++) {
            type = types[typeIdx];

            keys = Object.keys(collectors[type]);
            for (idx = 0; idx < keys.length; idx++) {
                name = keys[idx];

                fn(type, name);
            }
        }

        if (callback !== undefined) {
            callback();
        }
    }

    function collectMetrics(cb) {
        var cpuUsage;
        var data = {};
        var gcTypeNum;
        var idx;
        var keys;

        // Fill in "special" metrics that don't come from one of the structures
        // we populate below (cpuUsage, memoryUsage, etc.)
        collectors.gauge['nodejs_active_handles_count']._handle.set(
            process._getActiveHandles().length
        );
        collectors.gauge['nodejs_active_requests_count']._handle.set(
            process._getActiveRequests().length
        );
        collectors.gauge['nodejs_process_start_time_seconds']._handle.set(
            Math.floor(Date.now() / MS_PER_SECOND - process.uptime())
        );

        // Passing in the previous value gives us just the delta, so we can just
        // add that. We set to 0's initially so that the first value gets
        // everything so far.
        cpuUsage = process.cpuUsage(self.previousCpuUsage);
        data.cpuUsage = {
            system: cpuUsage.system / US_PER_SECOND,
            user: cpuUsage.user / US_PER_SECOND
        };
        // Add the previous values back in since new values are a delta.
        cpuUsage.system += self.previousCpuUsage.system;
        cpuUsage.user += self.previousCpuUsage.user;
        self.previousCpuUsage = cpuUsage;

        data.eventloopMetrics = self.eventloopMetrics.get();
        // Times are in nanoseconds, convert to seconds
        data.eventloopMetrics.lagMax /= NS_PER_SECOND;
        data.eventloopMetrics.lagMin /= NS_PER_SECOND;
        data.eventloopMetrics.lagSum /= NS_PER_SECOND;
        data.eventloopMetrics.loopTimeMax /= NS_PER_SECOND;
        data.eventloopMetrics.loopTimeMin /= NS_PER_SECOND;
        data.eventloopMetrics.loopTimeSum /= NS_PER_SECOND;

        data.memoryUsage = process.memoryUsage();
        data.v8HeapStats = v8.getHeapStatistics();

        keys = Object.keys(self.gcCounts);
        for (idx = 0; idx < keys.length; idx++) {
            gcTypeNum = keys[idx];

            collectors.counter['nodejs_gc_execution_count_total']._handle.add(
                self.gcCounts[gcTypeNum],
                {gcType: gcType(gcTypeNum)}
            );
            collectors.counter['nodejs_gc_pause_seconds_total']._handle.add(
                self.gcPauses[gcTypeNum] / NS_PER_SECOND,
                {gcType: gcType(gcTypeNum)}
            );

            // Since we've added these to the counters, we'll delete so they're
            // reset next time. We don't just set to 0, since then we'd still be
            // adding 0 to the collector every time, even if this type never
            // happens again.
            delete self.gcCounts[gcTypeNum];
            delete self.gcPauses[gcTypeNum];
        }

        // FUTURE: Open FDs: readdir /proc/self/fd and count .length?

        // Now actually collect the remaining data into the artedi metrics.
        onEachCollector(function _updateArtediCollector(type, name) {
            var obj = collectors[type][name];
            var value;

            if (obj.source === 'special') {
                // we should have collected these above.
                return;
            }

            // Special case for eventloop metrics: if we haven't collected any
            // samples, don't update values (since that would break *Max).
            if (obj.source === 'eventloopMetrics') {
                if (
                    data.eventloopMetrics.sampleCount === 0 &&
                    ['lagMax', 'lagMin', 'lagSum'].indexOf(name) !== -1
                ) {
                    return;
                }
                if (
                    data.eventloopMetrics.loopCountSum === 0 &&
                    [
                        'loopTimeMax',
                        'loopTimeMin',
                        'loopTimeSum',
                        'loopCountMax',
                        'loopCountMin',
                        'loopCountSum'
                    ].indexOf(name) !== -1
                ) {
                    return;
                }
            }

            // This will blow up if obj.source is not in data, but that's what
            // we want since that's a programmer error.
            value = data[obj.source][obj.key];

            // These 3 are only available on node >= 7.2.0, so if we don't have
            // them, we'll just not collect them.
            if (
                obj.source === 'v8HeapStats' &&
                [
                    'does_zap_garbage',
                    'peak_malloced_memory',
                    'malloced_memory'
                ].indexOf(obj.key) !== -1 &&
                value === undefined
            ) {
                return;
            }

            switch (type) {
                case 'counter':
                    // cannot add negative values, so use 0 if negative
                    obj._handle.add(Math.max(0, value));
                    break;
                case 'gauge':
                    obj._handle.set(value);
                    break;
                default:
                    assert.fail('Unexpected type: ' + type);
                    break;
            }
        }, cb);
    }

    // Start the eventloop metric collector (will loop itself via setTimeout)
    self.eventloopMetrics = new EventloopMetrics();
    self.eventloopMetrics.start();

    // Start the GC collector. It will collect the values on every GC event.
    gc_stats = require('gc-stats')();
    gc_stats.on('stats', function(gcStats) {
        if (self.gcCounts[gcStats.gctype] === undefined) {
            // These are always updated together, so if one is undefined they
            // both will be.
            self.gcCounts[gcStats.gctype] = 0;
            self.gcPauses[gcStats.gctype] = 0;
        }
        self.gcCounts[gcStats.gctype]++;
        self.gcPauses[gcStats.gctype] += gcStats.pause; // nanoseconds

        // NOTE: There are more fields in this output that we're not
        // currently tracking. Including numbers of bytes before/after/diff
        // for different types of memory.
        self.log.trace({stats: gcStats}, 'nodejs GC detected');
    });

    // Create Artedi Collectors
    onEachCollector(function _createArtediCollector(type, name) {
        var obj = collectors[type][name];

        obj._handle = self.collector[type]({
            name: name,
            help: obj.help,
            labels: self._getLabels(name)
        });
    });

    // collect the metrics when someone calls `/metrics`
    self.addPreCollectFunc(collectMetrics);

    self.createMetrics('nodejs', collectMetrics);
};

MetricsManager.prototype._getBuckets = function _getBuckets(metricName) {
    var self = this;

    var buckets = [];

    if (
        self.metricOpts !== undefined &&
        self.metricOpts.hasOwnProperty(metricName) &&
        self.metricOpts[metricName].hasOwnProperty('buckets')
    ) {
        // We have specific buckets for this metric, use those.
        buckets = self.metricOpts[metricName].buckets;
    } else {
        // Default buckets look like:
        //
        //      0.001
        //      [0.001] 0.002 0.004 0.006 0.008 0.01
        //      [0.01] 0.02 0.04 0.06 0.08 0.1
        //      [0.1] 0.2 0.4 0.6 0.8 1
        //      [1] 2 4 6 8 10
        //      [10] 20 40 60 80 100
        //      [100] 200 400 600 800 1000
        //      [1000] 2000 4000
        //
        buckets = artedi
            .logLinearBuckets(10, -5, 3, 5)
            .filter(function _trimBuckets(v) {
                // Limit to >= 0.001 and <= 4000 as this is the range of
                // observed values for all Triton APIs we surveyed across
                // a month of production data.
                if (v > 4000 || v < 0.001) {
                    return false;
                }
                return true;
            });
    }

    return buckets;
};

MetricsManager.prototype._getLabels = function _getLabels(metricName) {
    var self = this;

    var labels = {};

    if (
        self.metricOpts !== undefined &&
        self.metricOpts.hasOwnProperty(metricName) &&
        self.metricOpts[metricName].hasOwnProperty('labels')
    ) {
        // We have specific labels for this metric, use those.
        labels = self.metricOpts[metricName].labels;
    }

    if (!labels.hasOwnProperty('buckets_version')) {
        labels.buckets_version = '1';
    }

    return labels;
};

MetricsManager.prototype.createRestifyMetrics = function createRestifyMetrics() {
    var self = this;

    self.collector.counter({
        name: 'http_requests_completed',
        help: 'count of requests completed',
        labels: self._getLabels('http_requests_completed')
    });

    self.collector.histogram({
        name: 'http_request_duration_seconds',
        help: 'total time to process requests',
        buckets: self._getBuckets('http_request_duration_seconds'),
        labels: self._getLabels('http_request_duration_seconds')
    });

    function collectMetrics(req, res, route) {
        var latency;
        var latencySeconds;
        var routeName = route ? route.name || route : 'unknown';
        var userAgent = req.userAgent();

        // Only the first token is added to the label to prevent cardinality
        // issues
        var shortUserAgent = userAgent ? userAgent.split(' ')[0] : 'unknown';

        var labels = {
            route: routeName,
            method: req.method,
            user_agent: shortUserAgent,
            status_code: res.statusCode
        };

        if (typeof req._time === 'number') {
            latency = Date.now() - req._time;
            latencySeconds = latency / 1000;
        } else {
            latency = process.hrtime(req._time);
            latencySeconds =
                (latency[0] * NS_PER_SECOND + latency[1]) / NS_PER_SECOND;
        }

        self.collector
            .getCollector('http_requests_completed')
            .increment(labels);
        self.collector
            .getCollector('http_request_duration_seconds')
            .observe(latencySeconds, labels);
    }

    self.createMetrics('restify', collectMetrics);
};

MetricsManager.prototype.collectRestifyMetrics = function collectRestifyMetrics(
    req,
    res,
    route
) {
    this.collectMetrics('restify', req, res, route);
};

function createMetricsManager(options) {
    return new MetricsManager(options);
}

module.exports = {
    createMetricsManager: createMetricsManager
};