#!/bin/sh

## -------------------------------------------------------------------
##
## rabbitmq-collect-env: Collect info for RabbitMQ troubleshooting
##
## Based on `riak-debug' as developed by Basho Technologies, Inc
##
## Copyright (c) 2017 Basho Technologies, Inc.  All Rights Reserved.
## Copyright (c) 2017 Pivotal Software, Inc.  All rights reserved.
##
## This file is provided to you under the Apache License,
## Version 2.0 (the "License"); you may not use this file
## except in compliance with the License.  You may obtain
## a copy of the License at
##
##   https://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing,
## software distributed under the License is distributed on an
## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
## KIND, either express or implied.  See the License for the
## specific language governing permissions and limitations
## under the License.
##
## -------------------------------------------------------------------

set +e

# value greater than 2 is debug verbosity
verbose=0

hostname="$(hostname)"
readonly hostname

output_dir='output_dir_init'
system_output_dir='system_output_dir_init'
rabbitmq_output_dir='rabbitmq_output_dir_init'

cp_args='-f'
rm_args='-rf'
mv_args='-f'

onexit()
{
    if [ -d "$output_dir" ]
    then
        if [ $verbose -gt 2 ]
        then
            printf "[DEBUG] preserving output directory '%s'\\n" "$output_dir"
        else
            rm "$rm_args" "$output_dir"
        fi
    fi
}

trap onexit EXIT

mkdir_or_die()
{
    if [ -d "$1" ]
    then
        return
    fi

    if mkdir -p "$1"
    then
        if [ $verbose -gt 2 ]
        then
            printf "[DEBUG] created directory: '%s'\\n" "$1"
        fi
    else
        printf "[ERROR] could not create directory '%s'\\n" "$1" 1>&2
        exit 1
    fi
}

command_exists()
{
    command -v "$1" > /dev/null 2>&1
}

user_check()
{
    name="$(id -un)"
    if [ "$name" = 'root' ]
    then
        if [ $verbose -gt 2 ]
        then
            printf "[DEBUG] running %s as root\\n" "$0"
        fi
    elif [ "$name" = 'rabbitmq' ]
    then
        printf "[INFO] running as 'rabbitmq' user, some commands may fail. Consider running as 'root' user.\\n"
    else
        printf "[WARN] script is not running as the 'root' or 'rabbitmq' user, many commands may fail.\\n"
    fi
}

usage()
{
    cat << 'EOF'
rabbitmq-collect-env: Collect info from a RabbitMQ node for troubleshooting

Usage: rabbitmq-collect-env [-h] [-v [-v [-v]]]

-h, --help       Print this usage statement.
-v, --verbose    Print verbose messages to stdout. More instances of
                 this option will result in more verbose output.
EOF
}

parse_options()
{
    while [ -n "$1" ]
    do
        case "$1" in
            -h|--help)
                usage
                exit 0
                ;;
            -v|--verbose)
                verbose="$((verbose + 1))"
                ;;
        esac
        shift
    done
}

create_output_directories()
{
    output_dir="$(mktemp -d)"
    top_output_dir="$output_dir/$hostname"
    system_output_dir="$top_output_dir/system"
    rabbitmq_output_dir="$top_output_dir/rabbitmq"

    mkdir_or_die "$system_output_dir"
    mkdir_or_die "$rabbitmq_output_dir"

    if [ $verbose -gt 2 ]
    then
        printf "[DEBUG] top-level output dir: '%s'\\n" "$output_dir"
    fi
}

build_command_args()
{
    if [ $verbose -gt 2 ]
    then
        cp_args='-vf'
        rm_args='-vrf'
        mv_args='-vf'
    fi
}

init_environment()
{
    # Start PCF / BOSH init

    if [ -s /var/vcap/jobs/rabbitmq-server/env ]
    then
        # shellcheck source=/dev/null
        . /var/vcap/jobs/rabbitmq-server/env
    fi

    if [ -d /var/vcap/packages/rabbitmq-server/bin ]
    then
        export PATH="/var/vcap/packages/rabbitmq-server/bin:$PATH"
    fi

    if [ -d /var/vcap/packages/rabbitmq-server/sbin ]
    then
        export PATH="/var/vcap/packages/rabbitmq-server/sbin:$PATH"
    fi

    if [ -d /var/vcap/packages/erlang/bin ]
    then
        export PATH="/var/vcap/packages/erlang/bin:$PATH"
    fi

    if [ -z "$RABBITMQ_ENV" ] && \
        [ -f /var/vcap/jobs/rabbitmq-server/packages/rabbitmq-server/privbin/rabbitmq-env ]
    then
        RABBITMQ_ENV='/var/vcap/jobs/rabbitmq-server/packages/rabbitmq-server/privbin/rabbitmq-env'
    fi

    if [ -z "$RABBITMQ_ENV" ] && \
        [ -f /var/vcap/jobs/rabbitmq-server/packages/rabbitmq-server/sbin/rabbitmq-env ]
    then
        RABBITMQ_ENV='/var/vcap/jobs/rabbitmq-server/packages/rabbitmq-server/sbin/rabbitmq-env'
    fi

    if [ -d /var/vcap/jobs/rabbitmq-server/packages/rabbitmq-server/etc ]
    then
        rabbitmq_etc_dir='/var/vcap/jobs/rabbitmq-server/packages/rabbitmq-server/etc'
    fi

    if [ -d /var/vcap/jobs/rabbitmq-server/etc ]
    then
        rabbitmq_etc_dir='/var/vcap/jobs/rabbitmq-server/etc'
    fi

    # End PCF / BOSH init

    if [ -z "$RABBITMQ_ENV" ]
    then
        RABBITMQ_ENV="$(which rabbitmq-env)"
    fi

    if [ -z "$RABBITMQ_ENV" ]
    then
        RABBITMQ_ENV='/usr/lib/rabbitmq/bin/rabbitmq-env'
    fi
    if [ -z "$RABBITMQ_SCRIPTS_DIR" ]
    then
        # shellcheck disable=SC2155
        RABBITMQ_SCRIPTS_DIR="$(dirname "$RABBITMQ_ENV")"
    fi
    if [ -s "$RABBITMQ_ENV" ]
    then
        # shellcheck source=/dev/null
        . "$RABBITMQ_ENV"
    else
        printf "[WARN] expected to find rabbitmq-env at '%s', but file does not exist.\\n" "$RABBITMQ_ENV"
    fi

    # NB: it is very important to *not* export these as they will screw up
    # execution of rabbitmqctl later on
    unset RABBITMQ_ENV
    unset RABBITMQ_SCRIPTS_DIR

    if [ -z "$rabbitmq_etc_dir" ]
    then
        rabbitmq_etc_dir='/etc/rabbitmq'
    fi
    if [ ! -d "$rabbitmq_etc_dir" ]
    then
        printf "[WARN] expected to find RabbitMQ 'etc' directory at '%s', but directory does not exist.\\n" "$rabbitmq_etc_dir"
    fi

    if [ -z "$RABBITMQ_LOG_BASE" ]
    then
        if [ -d /var/vcap/sys/log/rabbitmq-server ]
        then
            RABBITMQ_LOG_BASE=/var/vcap/sys/log/rabbitmq-server
        elif [ -d /var/log/rabbitmq ]
        then
            RABBITMQ_LOG_BASE=/var/log/rabbitmq
        fi
    fi
    if [ ! -d "$RABBITMQ_LOG_BASE" ]
    then
        printf "[WARN] expected to find log directory at '%s', but directory does not exist.\\n" "$RABBITMQ_LOG_BASE"
    fi
}

### collect output_file command [cmd_args...]
# Prints the output of a command to the given file.
# Relies on the ${verbose} global.
# When ${verbose} is 0, this script will output
#  - `.` for a successfully captured command
#  - `_` for a command that could not be found
#  - `E` for a command that returned a non-zero retval
# When $verbose is 1, failed commands will be logged.
# When $verbose is 2, missing commands will logged as well.
# When $verbose is 3, successful commands will be logged as well.
collect()
{
    # Capture and shift away the output directory.
    collect_output_dir="$1"
    shift

    if [ ! -d "$collect_output_dir" ]
    then
        printf "[ERROR] collect - expected directory '%s' to exist!\\n" "$collect_output_dir" 1>&2
        exit 1
    fi

    collect_output_info_dir="$collect_output_dir/.info"
    mkdir_or_die "$collect_output_info_dir"

    # Capture and shift away the output file name so we can handle target
    # command and arguments as a single variable.
    collect_outfile="$1"
    shift

    # If the executable can't be found, log the miss and return.
    if ! command_exists "$1"
    then
        if [ $verbose -gt 1 ]
        then
            printf "[WARN] command '%s' not found.\\n" "$1"
        else
            printf '_'
        fi
        return
    fi

    if [ $verbose -gt 2 ]
    then
        printf "[DEBUG] running '%s'\\n" "$*"
    fi
    # shellcheck disable=SC2048
    $* < /dev/null >> "$collect_output_dir/$collect_outfile" 2>&1
    collect_rv=$?

    # Record the text of the command and the returned value in the .info/$out
    # file to aid automation.
    # Note: this will miss some escaping for, e.g., find, but it's not critical.
    # The command that was run can be fetched with  `head -1 .info/$out`.
    # The returned value can be fetched with        `tail -1 .info/$out`.
    echo "$*" > "$collect_output_info_dir/$collect_outfile"
    echo $collect_rv >> "$collect_output_info_dir/$collect_outfile"

    if [ $collect_rv -eq 0 ]
    then
        if [ $verbose -gt 2 ]
        then
            printf "[DEBUG] '%s' exit code: %d\\n" "$*" "$collect_rv"
        else
            printf '.'
        fi
    else
        if [ $verbose -gt 0 ]
        then
            printf "[ERROR] command '%s' failed, exit code: %d\\n" "$*" "$collect_rv" 1>&2
        else
            printf 'E'
        fi
    fi

    return $collect_rv
}

collect_system()
{
    collect "$system_output_dir" "$@"
}

collect_system_info()
{
    collect_system blkid        blkid
    collect_system date         date
    collect_system df_h         df -h
    collect_system df_i         df -i
    collect_system dmesg        dmesg
    collect_system dmesg_t      dmesg -T # NB: This will fail on most systems.
    collect_system dmidecode    dmidecode
    collect_system dpkg         dpkg -l
    collect_system printenv     printenv
    collect_system fdisk        fdisk -l
    collect_system free         free -h
    collect_system hostname     hostname
    collect_system ifconfig     ifconfig -a
    collect_system java_version java -version
    collect_system last         last
    collect_system proc_uptime  cat /proc/uptime
    collect_system lsb_release  lsb_release -a
    collect_system lsblk        lsblk -a
    collect_system lscpu        lscpu
    collect_system lsof_tcp     lsof -nPi TCP
    collect_system mount        mount
    collect_system netstat_an   netstat -an
    collect_system netstat_i    netstat -i
    collect_system netstat_rn   netstat -rn
    collect_system netstat_s    netstat -s
    collect_system pfctl_nat    pfctl -s nat
    collect_system pfctl_rules  pfctl -s rules
    collect_system pkg_info     pkg_info
    collect_system ps           ps aux
    collect_system pstree       pstree -panl
    collect_system rpm          rpm -qa
    collect_system sestatus     sestatus -v
    collect_system sysctl       sysctl -a
    collect_system uname        uname -a
    collect_system uptime       uptime
    collect_system vmstat       vmstat -S M 1 10
    collect_system w            w
    collect_system zfs_list     zfs list
    collect_system zpool_list   zpool list

    # If swapctl exists, prefer it over swapon
    if command_exists swapctl
    then
        collect_system swapctl swapctl -s
    else
        collect_system swapon swapon -s
    fi

    # Get device readahead
    if command_exists blockdev
    then
        for mount_point in $(mount | awk '/^\// { print $1 }')
        do
            flat_point="$(echo "$mount_point" | tr '/' '_')"
            collect_system "blockdev$flat_point" blockdev --getra "$mount_point"
        done
    else
        collect_system blockdev_not_available echo 'blockdev command is not available'
    fi

    if command_exists systemctl
    then
        collect_system systemctl_show_rabbitmq systemctl show rabbitmq-server
        collect_system systemctl_status_rabbitmq systemctl status rabbitmq-server
    else
        collect_system systemctl_not_available echo 'systemctl command is not available'
    fi

    # Running iptables commands if the module is not loaded can automatically
    # load them. This is rarely desired and can even cause connectivity
    # problems if, e.g., nf_conntrack gets autoloaded and autoenabled.
    if [ -f /proc/modules ]
    then
        if grep '^iptable_filter' /proc/modules > /dev/null 2>&1
        then
            collect_system iptables_rules iptables -n -L
        else
            collect_system iptables_rules echo "iptables module not loaded"
        fi

        if grep '^nf_conntrack' /proc/modules > /dev/null 2>&1
        then
            collect_system iptables_nat iptables -t nat -n -L
        else
            collect_system iptables_nat echo "nf_conntrack module not loaded"
        fi
    fi

    # Capture iostat, based on (a guess of) which distribution is running.
    case "$(uname)" in
        Linux)
            collect_system iostat_linux iostat -txm 1 10;;
        *)
            collect_system iostat_bsd iostat -dIw 1 -c 5;;
    esac

    ## Collect Files
    [ -f /etc/release ]              && collect_system release cat /etc/release
    [ -f /etc/redhat-release ]       && collect_system redhat_release cat /etc/redhat-release
    [ -f /etc/debian_version ]       && collect_system debian_version cat /etc/debian_version
    [ -f /etc/security/limits.conf ] && collect_system limits_conf cat /etc/security/limits.conf

    [ -f /var/log/messages ] && collect_system messages cat /var/log/messages
    [ -f /var/log/syslog ]   && collect_system messages cat /var/log/syslog
    [ -f /var/log/kern.log ] && collect_system messages cat /var/log/kern.log

    [ -f /proc/version ]   && collect_system proc_version cat /proc/version
    [ -f /proc/cpuinfo ]   && collect_system proc_cpuinfo cat /proc/cpuinfo
    [ -f /proc/meminfo ]   && collect_system proc_meminfo cat /proc/meminfo
    [ -f /proc/diskstats ] && collect_system proc_diskstats cat /proc/diskstats
    [ -f /proc/vmstat ]    && collect_system proc_vmstat cat /proc/vmstat

    ## Collect Directories and Finds
    [ -d /dev/disk/by-id ] && collect_system disk_by_id \
                                  ls -l /dev/disk/by-id
    [ -d /sys/block ]      && collect_system schedulers \
                                  find /sys/block/ -type l -print \
                                      -exec cat {}/queue/scheduler \;
    [ -d /proc/net/bonding ] && collect_system bonding \
                                    find /proc/net/bonding/ -type f -print \
                                        -exec cat {} \;
    [ -d /sys/class/net ] && collect_system rx_crc_errors \
                                 find /sys/class/net/ -type l -print \
                                     -exec cat {}/statistics/rx_crc_errors \;

    if [ -d /etc/security/limits.d ] && ls -1 /etc/security/limits.d/*.conf > /dev/null 2>&1
    then
        collect_limits_dir="$system_output_dir/limits.d"
        mkdir_or_die "$collect_limits_dir"
        cp "$cp_args" /etc/security/limits.d/*.conf "$collect_limits_dir"
    fi
}

collect_rabbitmq()
{
    collect "$rabbitmq_output_dir" "$@"
}

collect_rabbitmq_info()
{
    set > "$rabbitmq_output_dir/shell_variables"

    if [ -d "$rabbitmq_etc_dir" ]
    then
        readonly rabbitmq_output_etc_dir="$rabbitmq_output_dir/etc"
        mkdir_or_die "$rabbitmq_output_etc_dir"
        # gh-31
        # Rather than copying everything in /etc/rabbitmq, only copy *.conf* files and enabled_plugins
        if [ -d "$rabbitmq_etc_dir" ] && ls -1 "$rabbitmq_etc_dir"/*.conf* > /dev/null 2>&1
        then
            if [ $verbose -gt 2 ]
            then
                echo "[DEBUG] running command 'cp $cp_args \"$rabbitmq_etc_dir/*.conf*\" \"$rabbitmq_output_etc_dir\""
            else
                printf '.'
            fi
            cp "$cp_args" "$rabbitmq_etc_dir"/*.conf* "$rabbitmq_output_etc_dir"
        fi

        readonly rabbitmq_enabled_plugins_file="$rabbitmq_etc_dir/enabled_plugins"
        if [ -f "$rabbitmq_enabled_plugins_file" ]
        then
            if [ $verbose -gt 2 ]
            then
                echo "[DEBUG] running command 'cp $cp_args \"$rabbitmq_enabled_plugins_file\" \"$rabbitmq_output_etc_dir\""
            else
                printf '.'
            fi
            cp "$cp_args" "$rabbitmq_enabled_plugins_file" "$rabbitmq_output_etc_dir"
        fi

        readonly rabbitmq_etc_confd_dir="$rabbitmq_etc_dir/conf.d"
        if [ -d "$rabbitmq_etc_confd_dir" ] && ls -1 "$rabbitmq_etc_confd_dir"/* > /dev/null 2>&1
        then
            readonly rabbitmq_output_etc_confd_dir="$rabbitmq_output_etc_dir/conf.d"
            mkdir_or_die "$rabbitmq_output_etc_confd_dir"
            if [ $verbose -gt 2 ]
            then
                echo "[DEBUG] running command 'cp $cp_args \"$rabbitmq_etc_confd_dir/*\" \"$rabbitmq_output_etc_confd_dir\""
            else
                printf '.'
            fi
            cp "$cp_args" "$rabbitmq_etc_confd_dir"/* "$rabbitmq_output_etc_confd_dir"
        fi
    else
        printf "[WARN] '%s' directory not present.\\n" "$rabbitmq_etc_dir"
    fi

    if [ -d "$RABBITMQ_LOG_BASE" ]
    then
        rabbitmq_output_logs_dir="$rabbitmq_output_dir/logs"
        mkdir_or_die "$rabbitmq_output_logs_dir"
        if [ $verbose -gt 2 ]
        then
            echo "[DEBUG] running command 'tar -C \"$RABBITMQ_LOG_BASE\" -cf - | tar -C \"$rabbitmq_output_logs_dir\" -xf -'"
        else
            printf '.'
        fi
        tar -C "$RABBITMQ_LOG_BASE" -cf - . | tar -C "$rabbitmq_output_logs_dir" -xf -
    else
        printf "[WARN] '%s' directory not present.\\n" "$RABBITMQ_LOG_BASE"
    fi

    readonly with_timeout="timeout --kill-after 30 120"

    # shellcheck disable=SC2086
    collect_rabbitmq rabbitmqctl_vhosts $with_timeout rabbitmqctl list_vhosts name default_queue_type cluster_state

    # shellcheck disable=SC2086
    collect_rabbitmq rabbitmqctl_status $with_timeout rabbitmqctl status

    # shellcheck disable=SC2086
    collect_rabbitmq rabbitmqctl_cluster_status $with_timeout rabbitmqctl cluster_status

    # shellcheck disable=SC2086
    collect_rabbitmq rabbitmqctl_maybe_stuck $with_timeout rabbitmqctl eval 'rabbit_diagnostics:maybe_stuck().'

    pid=$($with_timeout rabbitmqctl eval 'os:getpid().' | tr -d '""')
    [ -f "/proc/$pid/limits" ] && collect_rabbitmq rabbitmq_pid_limits cat "/proc/$pid/limits"

    # shellcheck disable=SC2086
    collect_rabbitmq rabbitmqctl_process_limit $with_timeout rabbitmqctl eval 'erlang:system_info(process_limit).'

    # shellcheck disable=SC2086
    collect_rabbitmq rabbitmqctl_report $with_timeout rabbitmqctl report

    # shellcheck disable=SC2086
    collect_rabbitmq rabbitmqctl_inet_i $with_timeout rabbitmqctl eval 'inet:i().'
}

collect_overview_info()
{
    overview_file="$top_output_dir/overview"
    ip_addresses="$(ifconfig | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1 }')"
    rabbitmq_version="$(rabbitmqctl eval '{_,_,V}=proplists:lookup(rabbit,application:which_applications()),V.' | tr -d '"')"
    erlang_version="$(rabbitmqctl eval 'erlang:system_info(otp_release).' | tr -d '"')"
    {
        echo "Hostname: $hostname"
        echo "IP Addresses:"
        echo "$ip_addresses"
        echo "RabbitMQ: $rabbitmq_version"
        echo "Erlang: $erlang_version"
    } > "$overview_file" 2>&1
}


build_output_archive()
{
    if [ $verbose -gt 2 ]
    then
        echo "[DEBUG] running command 'tar -C \"$output_dir\" -zcf \"$RABBITMQ_LOG_BASE/rabbitmq-env-$hostname.tgz\"'"
    else
        printf '.'
    fi

    old_output_archive_dir="$(mktemp -d)"
    readonly old_output_archive_dir

    output_archive_dir="$(mktemp -d)"
    readonly output_archive_dir

    output_archive="$output_archive_dir/rabbitmq-env-$hostname-$(date '+%Y%m%d-%H%M%S').tgz"
    readonly output_archive

    printf "[INFO] moving old tgz archives (if any) to : '%s'\\n" "$old_output_archive_dir"
    find "$output_dir" -type f -name 'rabbitmq-env-*.tgz' -exec mv "$mv_args" {} "$old_output_archive_dir" \;

    tar -C "$output_dir" -zcf "$output_archive" .
    printf "[INFO] output archive: '%s'\\n" "$output_archive"
}

init()
{
    parse_options "$@"
    user_check
    create_output_directories
    build_command_args
    init_environment
}

main()
{
    init "$@"
    collect_system_info
    collect_rabbitmq_info
    collect_overview_info
    build_output_archive
}

main "$@"