#!/usr/bin/env bash set -eu set -o pipefail # This setting enables the desired behavior in the "for corefile in ${core_directory}/${SEARCH_PATTERN_NON_TRACKED}; do" loop # more details at https://gist.github.com/springmeyer/6dd234ff89ba306a73608a6f45cb5506 shopt -s nullglob PLATFORM_UNAME=$(uname -s) REQUIRED_FILENAME="core" LOGBT_VERSION="v2.0.3" BASE_CORE_DIRECTORY=/tmp/logbt-coredumps if [[ ${PLATFORM_UNAME} == "Linux" ]]; then REQUIRED_PATTERN="${REQUIRED_FILENAME}.%p.%E" DEBUGGER="gdb" elif [[ ${PLATFORM_UNAME} == "Darwin" ]]; then # Recommend running with the following setting to only show crashes # in the notification center # defaults write com.apple.CrashReporter UseUNC 1 REQUIRED_PATTERN="${REQUIRED_FILENAME}.%P" DEBUGGER="lldb" else error "Unsupported platform: ${PLATFORM_UNAME}" fi function error() { >&2 echo "[logbt] $@" exit 1 } function process_core() { local program=${1} local corefile=${2} local debugger=${3} if [[ ${debugger} =~ "lldb" ]]; then lldb --core ${corefile} --batch -o "thread backtrace all" -o "quit" else gdb ${program} --core ${corefile} -ex "set pagination 0" -ex "thread apply all bt" --batch fi # note: on OS X the -f avoids a hang on prompt "remove write-protected regular file?" rm -f ${corefile} } function find_core_by_pid() { local program=${1} local core_directory=${2} local debugger=${3} local child_pid=${4} if [[ ${PLATFORM_UNAME} == "Darwin" ]]; then local single_corefile="${core_directory}/${REQUIRED_FILENAME}.${child_pid}" if [ -e ${single_corefile} ]; then echo "[logbt] Found corefile at ${single_corefile}" process_core ${program} ${single_corefile} ${debugger} fi else local SEARCH_PATTERN_BY_PID="${REQUIRED_FILENAME}.${child_pid}.*" # note: this for loop depends on the `shopt -s nullglob` above for corefile in ${core_directory}/${SEARCH_PATTERN_BY_PID}; do echo "[logbt] Found corefile at ${corefile}" # extract program name from corefile filename=$(basename "${corefile}") binary_program=/$(echo ${filename##*.\!} | tr "!" "/") process_core ${binary_program} ${corefile} ${debugger} done fi } function find_remaining_cores() { local program=${1} local core_directory=${2} local debugger=${3} local SEARCH_PATTERN_NON_TRACKED="${REQUIRED_FILENAME}.*" local hit=false for corefile in ${core_directory}/${SEARCH_PATTERN_NON_TRACKED}; do echo "[logbt] Found corefile (non-tracked) at ${corefile}" hit=true done if [[ ${hit} == true ]]; then echo "[logbt] Processing cores..." fi for corefile in ${core_directory}/${SEARCH_PATTERN_NON_TRACKED}; do # below two lines are linux specific, but harmless to run on osx filename=$(basename "${corefile}") binary_program=/$(echo ${filename##*.\!} | tr "!" "/") process_core ${binary_program} ${corefile} ${debugger} done } function snapshot { local program=${1} local debugger=${2} local child_pid=${3} echo "[logbt] snapshotting ${program} (${child_pid})" if [[ ${debugger} =~ "lldb" ]]; then lldb -p ${child_pid} --batch -o "thread backtrace all" -o "quit" else gdb --pid ${child_pid} -ex "set pagination 0" -ex "thread apply all bt" --batch fi } function backtrace { local program=${1} local core_directory=${2} local debugger=${3} local child_pid=${4} local child_return=${5} find_core_by_pid ${program} ${core_directory} ${debugger} ${child_pid} find_remaining_cores ${program} ${core_directory} ${debugger} } function warn_on_existing_cores() { local core_directory=${1} local SEARCH_PATTERN_NON_TRACKED="${REQUIRED_FILENAME}.*" for corefile in ${core_directory}/${SEARCH_PATTERN_NON_TRACKED}; do echo "[logbt] WARNING: Found corefile (existing) at ${corefile}" done } function error_on_existing_cores() { local core_directory=${1} local SEARCH_PATTERN_NON_TRACKED="${REQUIRED_FILENAME}.*" for corefile in ${core_directory}/${SEARCH_PATTERN_NON_TRACKED}; do error "Error: Found corefile (unexpected) at ${corefile}" done } function ensure_directory_is_writeable() { # ensure we can write to the directory, otherwise # core files might not be able to be written WRITE_RETURN=0 touch ${core_directory}/test.txt || WRITE_RETURN=$? if [[ ${WRITE_RETURN} != 0 ]]; then error "Permissions problem: unable to write to ${core_directory} (exited with ${WRITE_RETURN})" else # cleanup from test rm ${core_directory}/test.txt fi } function get_target_core_pattern() { echo ${BASE_CORE_DIRECTORY}/${REQUIRED_PATTERN} } function get_core_pattern() { if [[ ${PLATFORM_UNAME} == "Linux" ]]; then local core_pattern=$(cat /proc/sys/kernel/core_pattern) elif [[ ${PLATFORM_UNAME} == "Darwin" ]]; then # Recommend running with the following setting to only show crashes # in the notification center # defaults write com.apple.CrashReporter UseUNC 1 local core_pattern=$(sysctl -n kern.corefile) fi echo ${core_pattern} } function validate_core_pattern() { local core_pattern=${1} if [[ ! ${core_pattern} =~ ${REQUIRED_PATTERN} ]]; then error "unexpected core_pattern: ${core_pattern}" fi } function generic_signal_handler() { local code=$? local program=${1} local child_pid=${2} local sig=${3} # Bug note: On darwin ${code} will be incorrectly 0 after snapshot here # so we ignore ${code} and instead get it from the signal code=$(($(kill -l ${sig})+128)) echo "[logbt] received signal:${code} (${sig})" echo "[logbt] sending SIGTERM to ${program} (${child_pid})" # sleep here to help the stdout show in the right # order (accounts for an intermittant case where the above lines print after # the child outputs stdout during shutdown) sleep 1 KILL_RETURN=0 kill -TERM ${child_pid} || KILL_RETURN=$? if [[ ${KILL_RETURN} != 0 ]]; then echo "[logbt] could not terminate child process (kill returned ${KILL_RETURN})" else CHILD_EXIT=0 wait ${child_pid} || CHILD_EXIT=$? if [[ ${CHILD_EXIT} != 143 ]]; then error "child process exited abnormally: ${CHILD_EXIT}" fi fi echo "[logbt] exiting with ${code}" exit ${code} } : ' NOTE: SIGINT (aka ctrl-c) is special. First of all SIGINT is sent to the whole process group automatically in bash (http://stackoverflow.com/a/6804155). This means we do not need to reap the child process manually. And if logbt is a "foreground" process then SIGINT is ignored if sent directly with ./bin/logbt & kill -INT $! (http://stackoverflow.com/a/14697034). So the only way to send SIGINT is with ctrl-c or via another terminal that has a different process group. ' function sigint_handler() { local code=$? local program=${1} local child_pid=${2} local sig=${3} echo "[logbt] received signal:${code} (${sig})" CHILD_EXIT=0 wait ${child_pid} || CHILD_EXIT=$? if [[ ${CHILD_EXIT} != 130 ]]; then echo "[logbt] child process exited with:${CHILD_EXIT}" fi exit ${code} } function launch_and_wait() { local program=${1} local core_pattern=$(get_core_pattern) validate_core_pattern ${core_pattern} local core_directory=$(dirname ${core_pattern}) echo "[logbt] using corefile location: ${core_directory}" echo "[logbt] using core_pattern: $(basename ${core_pattern})" # ensure we have a debugger installed if ! which ${DEBUGGER} > /dev/null; then error "Could not find required command '${DEBUGGER}'" fi if [[ ! -d ${core_directory} ]]; then echo "[logbt] creating directory for core files at '${core_directory}'" mkdir -p -m a+w ${core_directory} fi ensure_directory_is_writeable ${core_directory} warn_on_existing_cores ${core_directory} # Enable corefile generation ulimit -c unlimited # Run the child process in a background process # in order to get the PID if [[ ${LD_PRELOAD:-} ]] && [[ ${PLATFORM_UNAME} == 'Darwin' ]]; then # on os x DYLD_INSERT_LIBRARIES is blocked from being inherited # so we accept LD_PRELOAD and foreward along DYLD_INSERT_LIBRARIES=${LD_PRELOAD} LOGBT_PID=$$ "$@" & CHILD_PID=$! else LOGBT_PID=$$ "$@" & CHILD_PID=$! fi # Hook up function to run when logbt received signal trap "snapshot ${program} ${DEBUGGER} ${CHILD_PID}" USR1 trap "generic_signal_handler ${program} ${CHILD_PID} TERM" TERM trap "generic_signal_handler ${program} ${CHILD_PID} HUP" HUP trap "sigint_handler ${program} ${CHILD_PID} INT" INT # Wait for child and attempt to generate a backtrace if child exits in non-zero way wait_for_child ${program} ${core_directory} ${DEBUGGER} ${CHILD_PID} } function wait_for_child() { local program=${1} local core_directory=${2} local debugger=${3} local child_pid=${4} CHILD_RETURN=0 wait ${child_pid} || CHILD_RETURN=$? # Bug note: on linux USR1 will trigger an exit which makes it looks like the child # has returned when it has not. So the below code ensures we stay alive and watching # the child if USR1 is hit if [[ $(uname -s) == 'Linux' ]] && [[ $(kill -l ${CHILD_RETURN}) == USR1 ]]; then wait_for_child ${program} ${core_directory} ${debugger} ${child_pid} fi if [[ ${CHILD_RETURN} == 127 ]]; then # command not found : http://www.tldp.org/LDP/abs/html/exitcodes.html echo "[logbt] command not found: ${program}" elif [[ ${CHILD_RETURN} != 0 ]]; then local exit_msg="saw '${program}' exit with code:${CHILD_RETURN}" exit_msg="${exit_msg} ($(kill -l ${CHILD_RETURN}))" echo "[logbt] ${exit_msg}" backtrace ${program} ${core_directory} ${DEBUGGER} ${child_pid} ${CHILD_RETURN} fi # exit logbt with the same code as the child exit ${CHILD_RETURN} } function setup_logbt() { local settable_core_pattern=$(get_target_core_pattern) if [[ ${PLATFORM_UNAME} == "Linux" ]]; then echo "[logbt] setting $(cat /proc/sys/kernel/core_pattern) -> ${settable_core_pattern}" # write new value to /proc/sys/kernel/core_pattern echo "${settable_core_pattern}" > /proc/sys/kernel/core_pattern elif [[ ${PLATFORM_UNAME} == "Darwin" ]]; then echo "[logbt] setting $(sysctl -n kern.corefile) -> ${settable_core_pattern}" sysctl kern.corefile=${settable_core_pattern} fi local core_pattern=$(get_core_pattern) validate_core_pattern ${core_pattern} local core_directory=$(dirname ${core_pattern}) if [[ ! -d ${core_directory} ]]; then echo "[logbt] creating directory for core files at '${core_directory}'" mkdir -p -m a+w ${core_directory} fi error_on_existing_cores ${core_directory} } function test_logbt() { ulimit -c unlimited # First we create a program that crashes itself # We use bash to avoid needing an external dep on some runtime # Due to https://github.com/mapbox/logbt/issues/29 we need to copy the bash # exe on OS X to a new location since coredumps are disabled for /bin/bash for # reasons I don't understand if [[ ${PLATFORM_UNAME} == "Darwin" ]]; then # first touch file to create it with writable permissions for this user # such that we can cleanup after # then copy the system bash there cp --no-preserve=all $(which bash) /tmp/tmp-bash chmod +x /tmp/tmp-bash echo '#!/tmp/tmp-bash' > /tmp/crasher.sh else # on linux the default bash is okay echo '#!/usr/bin/env bash' > /tmp/crasher.sh fi echo 'kill -SIGSEGV $$' >> /tmp/crasher.sh chmod +x /tmp/crasher.sh # run it in logbt RETURN=0 ${BASH_SOURCE} -- /tmp/crasher.sh >/tmp/logbt-stdout 2>/tmp/logbt-stderr || RETURN=$? local err_message if [[ ${RETURN} != 139 ]] || [[ ! $(cat /tmp/logbt-stdout) =~ "Found corefile at" ]]; then cat /tmp/logbt-stdout cat /tmp/logbt-stderr err_message="Expected return code of 139 and a corefile to be generated" fi if [[ ! $(cat /tmp/logbt-stdout) =~ "Found corefile at" ]]; then cat /tmp/logbt-stdout cat /tmp/logbt-stderr err_message="Expected a corefile to be generated" fi # cleanup rm -f /tmp/logbt-stderr rm -f /tmp/logbt-stdout rm -f /tmp/crasher.sh rm -f /tmp/tmp-bash if [[ ${err_message:-} ]]; then error ${err_message} else echo "[logbt] test success (coredumps are working with core pattern: '$(get_core_pattern)')" fi } function usage() { >&2 echo "Usage for logbt:" >&2 echo "" >&2 echo "Setup logbt (requires root privileges):" >&2 echo "" >&2 echo "$ sudo logbt --setup" >&2 echo "" >&2 echo "Test logbt is setup correctly" >&2 echo "" >&2 echo "$ logbt --test" >&2 echo "" >&2 echo "Launch a program with logbt:" >&2 echo "" >&2 echo "$ logbt -- ./program" >&2 echo "" >&2 echo "Other commands are:" >&2 echo "" >&2 echo " --current-pattern" >&2 echo " --target-pattern" >&2 echo " --version" exit 1 } function get_version() { echo ${LOGBT_VERSION} } if [[ ! ${1:-} ]]; then usage fi # https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash for i in "$@" do case $i in --) if [[ ! ${2:-} ]]; then usage fi shift launch_and_wait "$@" ;; --setup) setup_logbt shift ;; --test) test_logbt shift ;; --current-pattern) get_core_pattern ;; --target-pattern) get_target_core_pattern ;; -v | --version) get_version shift ;; -h | --help) usage shift ;; *) usage ;; esac done