#!/usr/bin/python3 # Collect information about a crash and create a report in the directory # specified by apport.fileutils.report_dir. # See https://wiki.ubuntu.com/Apport for details. # # Copyright (c) 2006 - 2016 Canonical Ltd. # Author: Martin Pitt # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. See http://www.gnu.org/copyleft/gpl.html for # the full text of the license. import sys, os, os.path, subprocess, time, traceback, pwd, io import signal, inspect, grp, fcntl, socket, atexit, array, struct import errno import apport, apport.fileutils ################################################################# # # functions # ################################################################# def check_lock(): '''Abort if another instance of apport is already running. This avoids bringing down the system to its knees if there is a series of crashes.''' # create a lock file try: fd = os.open("/var/run/apport.lock", os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW, mode=0o600) except OSError as e: error_log('cannot create lock file (uid %i): %s' % (os.getuid(), str(e))) sys.exit(1) try: fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: error_log('another apport instance is already running, aborting') sys.exit(1) (pidstat, real_uid, real_gid, cwd, proc_pid_fd) = (None, None, None, None, None) def proc_pid_opener(path, flags): return os.open(path, flags, dir_fd=proc_pid_fd) def get_pid_info(pid): '''Read /proc information about pid''' global pidstat, real_uid, real_gid, cwd, proc_pid_fd proc_pid_fd = os.open('/proc/%s' % pid, os.O_RDONLY | os.O_PATH | os.O_DIRECTORY) # unhandled exceptions on missing or invalidly formatted files are okay # here -- we want to know in the log file pidstat = os.stat('stat', dir_fd=proc_pid_fd) # determine real UID of the target process; do *not* use the owner of # /proc/pid/stat, as that will be root for setuid or unreadable programs! # (this matters when suid_dumpable is enabled) with open('status', opener=proc_pid_opener) as f: for line in f: if line.startswith('Uid:'): real_uid = int(line.split()[1]) elif line.startswith('Gid:'): real_gid = int(line.split()[1]) break assert real_uid is not None, 'failed to parse Uid' assert real_gid is not None, 'failed to parse Gid' cwd = os.open('cwd', os.O_RDONLY | os.O_PATH | os.O_DIRECTORY, dir_fd=proc_pid_fd) def drop_privileges(real_only=False): '''Change user and group to real_[ug]id Normally that irrevocably drops privileges to the real user/group of the target process. With real_only=True only the real IDs are changed, but the effective IDs remain. ''' if real_only: os.setregid(real_gid, -1) os.setreuid(real_uid, -1) else: os.setgid(real_gid) os.setuid(real_uid) assert os.getegid() == real_gid assert os.geteuid() == real_uid assert os.getgid() == real_gid assert os.getuid() == real_uid def init_error_log(): '''Open a suitable error log if sys.stderr is not a tty.''' if not os.isatty(2): log = os.environ.get('APPORT_LOG_FILE', '/var/log/apport.log') try: f = os.open(log, os.O_WRONLY | os.O_CREAT | os.O_APPEND, 0o600) try: admgid = grp.getgrnam('adm')[2] os.chown(log, -1, admgid) os.chmod(log, 0o640) except KeyError: pass # if group adm doesn't exist, just leave it as root except OSError: # on a permission error, don't touch stderr return os.dup2(f, 1) os.dup2(f, 2) sys.stderr = os.fdopen(2, 'wb') if sys.version_info.major >= 3: sys.stderr = io.TextIOWrapper(sys.stderr) sys.stdout = sys.stderr def error_log(msg): '''Output something to the error log.''' apport.error('apport (pid %s) %s: %s', os.getpid(), time.asctime(), msg) def _log_signal_handler(sgn, frame): '''Internal apport signal handler. Just log the signal handler and exit.''' # reset handler so that we do not get stuck in loops signal.signal(sgn, signal.SIG_IGN) try: error_log('Got signal %i, aborting; frame:' % sgn) for s in inspect.stack(): error_log(str(s)) except Exception: pass sys.exit(1) def setup_signals(): '''Install a signal handler for all crash-like signals, so that apport is not called on itself when apport crashed.''' signal.signal(signal.SIGILL, _log_signal_handler) signal.signal(signal.SIGABRT, _log_signal_handler) signal.signal(signal.SIGFPE, _log_signal_handler) signal.signal(signal.SIGSEGV, _log_signal_handler) signal.signal(signal.SIGPIPE, _log_signal_handler) signal.signal(signal.SIGBUS, _log_signal_handler) def write_user_coredump(pid, cwd, limit, from_report=None): '''Write the core into the current directory if ulimit requests it.''' # three cases: # limit == 0: do not write anything # limit < 0: unlimited, write out everything # limit nonzero: crashed process' core size ulimit in bytes if limit == 0: return # don't write a core dump for suid/sgid/unreadable or otherwise # protected executables, in accordance with core(5) # (suid_dumpable==2 and core_pattern restrictions); when this happens, # /proc/pid/stat is owned by root (or the user suid'ed to), but we already # changed to the crashed process' real uid assert pidstat, 'pidstat not initialized' if pidstat.st_uid != os.getuid() or pidstat.st_gid != os.getgid(): error_log('disabling core dump for suid/sgid/unreadable executable') return try: with open('/proc/sys/kernel/core_uses_pid') as f: if f.read().strip() == '0': core_path = 'core' else: core_path = 'core.%s' % (str(pid)) core_file = os.open(core_path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode=0o600, dir_fd=cwd) except (OSError, IOError): return error_log('writing core dump to %s (limit: %s)' % (core_path, str(limit))) written = 0 # Priming read if from_report: r = apport.Report() r.load(from_report) core_size = len(r['CoreDump']) if limit > 0 and core_size > limit: error_log('aborting core dump writing, size %i exceeds current limit' % core_size) os.close(core_file) os.unlink(core_path, dir_fd=cwd) return error_log('writing core dump %s of size %i' % (core_path, core_size)) os.write(core_file, r['CoreDump']) else: # read from stdin block = os.read(0, 1048576) while True: size = len(block) if size == 0: break written += size if limit > 0 and written > limit: error_log('aborting core dump writing, size exceeds current limit %i' % limit) os.close(core_file) os.unlink(core_path, dir_fd=cwd) return if os.write(core_file, block) != size: error_log('aborting core dump writing, could not write') os.close(core_file) os.unlink(core_path, dir_fd=cwd) return block = os.read(0, 1048576) os.close(core_file) def usable_ram(): '''Return how many bytes of RAM is currently available that can be allocated without causing major thrashing.''' # abuse our excellent RFC822 parser to parse /proc/meminfo r = apport.Report() with open('/proc/meminfo', 'rb') as f: r.load(f) memfree = int(r['MemFree'].split()[0]) cached = int(r['Cached'].split()[0]) writeback = int(r['Writeback'].split()[0]) return (memfree + cached - writeback) * 1024 def is_closing_session(uid): '''Check if pid is in a closing user session. During that, crashes are common as the session D-BUS and X.org are going away, etc. These crash reports are mostly noise, so should be ignored. ''' with open('environ', 'rb', opener=proc_pid_opener) as e: env = e.read().split(b'\0') for e in env: if e.startswith(b'DBUS_SESSION_BUS_ADDRESS='): dbus_addr = e.split(b'=', 1)[1].decode() break else: error_log('is_closing_session(): no DBUS_SESSION_BUS_ADDRESS in environment') return False orig_uid = os.geteuid() os.setresuid(-1, os.getuid(), -1) try: gdbus = subprocess.Popen(['/usr/bin/gdbus', 'call', '-e', '-d', 'org.gnome.SessionManager', '-o', '/org/gnome/SessionManager', '-m', 'org.gnome.SessionManager.IsSessionRunning'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'DBUS_SESSION_BUS_ADDRESS': dbus_addr}) (out, err) = gdbus.communicate() if err: error_log('gdbus call error: ' + err.decode('UTF-8')) except OSError as e: error_log('gdbus call failed, cannot determine running session: ' + str(e)) return False finally: os.setresuid(-1, orig_uid, -1) error_log('debug: session gdbus call: ' + out.decode('UTF-8')) if out.startswith(b'(false,'): return True return False def is_systemd_watchdog_restart(signum): '''Check if this is a restart by systemd's watchdog''' if signum != str(signal.SIGABRT) or not os.path.isdir('/run/systemd/system'): return False try: with open('cgroup', opener=proc_pid_opener) as f: for line in f: if 'name=systemd:' in line: unit = line.split('/')[-1].strip() break else: return False journalctl = subprocess.Popen(['/bin/journalctl', '--output=cat', '--since=-5min', '--priority=warning', '--unit', unit], stdout=subprocess.PIPE) out = journalctl.communicate()[0] return b'Watchdog timeout' in out except (IOError, OSError) as e: error_log('cannot determine if this crash is from systemd watchdog: %s' % e) return False def is_same_ns(pid, ns): if not os.path.exists('/proc/self/ns/%s' % ns) or \ not os.path.exists('/proc/%s/ns/%s' % (pid, ns)): # If the namespace doesn't exist, then it's obviously shared return True try: if os.readlink('/proc/%s/ns/%s' % (pid, ns)) == os.readlink('/proc/self/ns/%s' % ns): # Check that the inode for both namespaces is the same return True except OSError as e: if e.errno == errno.ENOENT: return True else: raise return False ################################################################# # # main # ################################################################# # systemd socket activation if 'LISTEN_FDS' in os.environ: try: from systemd.daemon import listen_fds except ImportError: error_log('Received a crash via apport-forward.socket, but systemd python module is not installed') sys.exit(0) # Extract and validate the fd fds = listen_fds() if len(fds) < 1: error_log('Invalid socket activation, no fd provided') sys.exit(1) # Open the socket sock = socket.fromfd(int(fds[0]), socket.AF_UNIX, socket.SOCK_STREAM) atexit.register(sock.shutdown, socket.SHUT_RDWR) # Replace stdin by the socket activation fd sys.stdin.close() fds = array.array('i') ucreds = array.array('i') msg, ancdata, flags, addr = sock.recvmsg(4096, 4096) for cmsg_level, cmsg_type, cmsg_data in ancdata: if (cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS): fds.fromstring(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)]) elif (cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_CREDENTIALS): ucreds.fromstring(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % ucreds.itemsize)]) sys.stdin = os.fdopen(int(fds[0]), 'r') # Replace argv by the arguments received over the socket sys.argv = [sys.argv[0]] sys.argv += msg.decode().split() if len(ucreds) >= 3: sys.argv[1] = "%d" % ucreds[0] if len(sys.argv) != 5: error_log('Received a bad number of arguments from forwarder, received %d, expected 5, aborting.' % len(sys.argv)) sys.exit(1) # Normal startup if len(sys.argv) not in (5, 6, 7): try: print('Usage: %s [global pid] [exe path]' % sys.argv[0]) print('The core dump is read from stdin.') except IOError: # sys.stderr might not actually exist, expecially not when being called # from the kernel pass sys.exit(1) init_error_log() # Check if we received a valid global PID (kernel >= 3.12). If we do, # then compare it with the local PID. If they don't match, it's an # indication that the crash originated from another PID namespace. # Simply log an entry in the host error log and exit 0. if len(sys.argv) >= 6: host_pid = int(sys.argv[5]) if not is_same_ns(host_pid, "pid") and not is_same_ns(host_pid, "mnt"): # If the crash came from a container, don't attempt to handle # locally as that would just result in wrong system information. # Instead, attempt to find apport inside the container and # forward the process information there. if not os.path.exists('/proc/%d/root/run/apport.socket' % host_pid): error_log('host pid %s crashed in a container without apport support' % host_pid) sys.exit(0) proc_host_pid_fd = os.open('/proc/%d' % host_pid, os.O_RDONLY | os.O_PATH | os.O_DIRECTORY) def proc_host_pid_opener(path, flags): return os.open(path, flags, dir_fd=proc_host_pid_fd) # Validate that the crashed binary is owned by the user namespace of the process task_uid = os.stat("exe", dir_fd=proc_host_pid_fd).st_uid try: with open("uid_map", "r", opener=proc_host_pid_opener) as fd: for line in fd: fields = line.split() if len(fields) != 3: continue host_start = int(fields[1]) host_end = host_start + int(fields[2]) if task_uid >= host_start and task_uid <= host_end: break else: error_log("host pid %s crashed in a container with no access to the binary" % host_pid) sys.exit(0) except OSError as e: if e.errno == errno.ENOENT: pass else: raise task_gid = os.stat("exe", dir_fd=proc_host_pid_fd).st_gid try: with open("gid_map", "r", opener=proc_host_pid_opener) as fd: for line in fd: fields = line.split() if len(fields) != 3: continue host_start = int(fields[1]) host_end = host_start + int(fields[2]) if task_gid >= host_start and task_gid <= host_end: break else: error_log("host pid %s crashed in a container with no access to the binary" % host_pid) sys.exit(0) except OSError as e: if e.errno == errno.ENOENT: pass else: raise # Chdir and chroot to the task # WARNING: After this point, all "import" calls are security issues __builtins__.__dict__['__import__'] = None host_cwd = os.open('cwd', os.O_RDONLY | os.O_PATH | os.O_DIRECTORY, dir_fd=proc_host_pid_fd) os.chdir(host_cwd) # WARNING: we really should be using a file descriptor here # but os.chroot won't take one. os.chroot('/proc/%d/root' % host_pid) sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) try: sock.connect('/run/apport.socket') except Exception as e: error_log('host pid %s crashed in a container with a broken apport' % sys.argv[5]) sys.exit(0) # Send all arguments except for the first (exec path) and last (global pid) args = ' '.join(sys.argv[1:5]) try: sock.sendmsg([args.encode()], [ # Send a ucred containing the global pid (socket.SOL_SOCKET, socket.SCM_CREDENTIALS, struct.pack("3i", host_pid, 0, 0)), # Send fd 0 (the coredump) (socket.SOL_SOCKET, socket.SCM_RIGHTS, array.array('i', [0]))]) sock.shutdown(socket.SHUT_RDWR) except socket.timeout: error_log('Container apport failed to process crash within 30s') sys.exit(0) sys.exit(0) elif not is_same_ns(host_pid, "mnt"): error_log('host pid %s crashed in a separate mount namespace, ignoring' % host_pid) sys.exit(0) else: # If it doesn't look like the crash originated from within a # full container or if the is_same_ns() function fails open (returning # True), then take the global pid and replace the local pid with it, # then move on to normal handling. # This bit is needed because some software like the chrome # sandbox will use container namespaces as a security measure but are # still otherwise host processes. When that's the case, we need to keep # handling those crashes locally using the global pid. sys.argv[1] = str(host_pid) check_lock() try: setup_signals() (pid, signum, core_ulimit, dump_mode) = sys.argv[1:5] get_pid_info(pid) # Partially drop privs to gain proper os.access() checks drop_privileges(True) error_log('called for pid %s, signal %s, core limit %s, dump mode %s' % (pid, signum, core_ulimit, dump_mode)) try: core_ulimit = int(core_ulimit) except ValueError: error_log('core limit is invalid, disabling core files') core_ulimit = 0 # clamp core_ulimit to a sensible size, for -1 the kernel reports something # absurdly big if core_ulimit > 9223372036854775807: error_log('ignoring implausibly big core limit, treating as unlimited') core_ulimit = -1 if dump_mode == '2': error_log('not creating core for pid with dump mode of %s' % (dump_mode)) # a report should be created but not a core file core_ulimit = 0 # ignore SIGQUIT (it's usually deliberately generated by users) if signum == str(int(signal.SIGQUIT)): drop_privileges() write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) # check if the executable was modified after the process started (e. g. # package got upgraded in between) exe_mtime = os.stat('exe', dir_fd=proc_pid_fd).st_mtime process_start = os.lstat('cmdline', dir_fd=proc_pid_fd).st_mtime if not os.path.exists(os.readlink('exe', dir_fd=proc_pid_fd)) or exe_mtime > process_start: error_log('executable was modified after program start, ignoring') sys.exit(0) info = apport.Report('Crash') info['Signal'] = signum core_size_limit = usable_ram() * 3 / 4 if sys.version_info.major < 3: info['CoreDump'] = (sys.stdin, True, core_size_limit, True) else: # read binary data from stdio info['CoreDump'] = (sys.stdin.detach(), True, core_size_limit, True) # We already need this here to figure out the ExecutableName (for scripts, # etc). if len(sys.argv) >= 7: exec_path = sys.argv[6].replace('!', '/') if os.path.exists(exec_path): info['ExecutablePath'] = exec_path euid = os.geteuid() egid = os.getegid() try: # Drop permissions temporarily to make sure that we don't # include information in the crash report that the user should # not be allowed to access. os.seteuid(os.getuid()) os.setegid(os.getgid()) info.add_proc_info(proc_pid_fd=proc_pid_fd) finally: os.seteuid(euid) os.setegid(egid) if 'ExecutablePath' not in info: error_log('could not determine ExecutablePath, aborting') sys.exit(1) subject = info['ExecutablePath'].replace('/', '_') base = '%s.%s.%s.hanging' % (subject, str(pidstat.st_uid), pid) hanging = os.path.join(apport.fileutils.report_dir, base) if os.path.exists(hanging): if (os.stat('/proc/uptime').st_ctime < os.stat(hanging).st_mtime): info['ProblemType'] = 'Hang' os.unlink(hanging) if 'InterpreterPath' in info: error_log('script: %s, interpreted by %s (command line "%s")' % (info['ExecutablePath'], info['InterpreterPath'], info['ProcCmdline'])) else: error_log('executable: %s (command line "%s")' % (info['ExecutablePath'], info['ProcCmdline'])) # ignore non-package binaries (unless configured otherwise) if not apport.fileutils.likely_packaged(info['ExecutablePath']): if not apport.fileutils.get_config('main', 'unpackaged', False, bool=True): error_log('executable does not belong to a package, ignoring') # check if the user wants a core dump drop_privileges() write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) # ignore SIGXCPU and SIGXFSZ since this indicates some external # influence changing soft RLIMIT values when running programs. if signum in [str(signal.SIGXCPU), str(signal.SIGXFSZ)]: error_log('Ignoring signal %s (caused by exceeding soft RLIMIT)' % signum) drop_privileges() write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) # ignore blacklisted binaries if info.check_ignored(): error_log('executable version is blacklisted, ignoring') sys.exit(0) if is_closing_session(pidstat.st_uid): error_log('happens for shutting down session, ignoring') sys.exit(0) # ignore systemd watchdog kills; most often they don't tell us the actual # reason (kernel hang, etc.), LP #1433320 if is_systemd_watchdog_restart(signum): error_log('Ignoring systemd watchdog restart') sys.exit(0) crash_counter = 0 # Create crash report file descriptor for writing the report into # report_dir try: report = '%s/%s.%i.crash' % (apport.fileutils.report_dir, info['ExecutablePath'].replace('/', '_'), pidstat.st_uid) if os.path.exists(report): if apport.fileutils.seen_report(report): # do not flood the logs and the user with repeated crashes with open(report, 'rb') as f: crash_counter = apport.fileutils.get_recent_crashes(f) crash_counter += 1 if crash_counter > 1: drop_privileges() write_user_coredump(pid, cwd, core_ulimit) error_log('this executable already crashed %i times, ignoring' % crash_counter) sys.exit(0) # remove the old file, so that we can create the new one with # os.O_CREAT|os.O_EXCL os.unlink(report) else: error_log('apport: report %s already exists and unseen, doing nothing to avoid disk usage DoS' % report) drop_privileges() write_user_coredump(pid, cwd, core_ulimit) sys.exit(0) # we prefer having a file mode of 0 while writing; this doesn't work # for suid binaries as we completely drop privs and thus can't chmod # them later on if pidstat.st_uid != os.getuid(): mode = 0o640 else: mode = 0 fd = os.open(report, os.O_RDWR | os.O_CREAT | os.O_EXCL, mode) reportfile = os.fdopen(fd, 'w+b') assert reportfile.fileno() > sys.stderr.fileno() # Make sure the crash reporting daemon can read this report try: gid = pwd.getpwnam('whoopsie').pw_gid os.fchown(fd, pidstat.st_uid, gid) except (OSError, KeyError): os.fchown(fd, pidstat.st_uid, pidstat.st_gid) except (OSError, IOError) as e: error_log('Could not create report file: %s' % str(e)) sys.exit(1) # Totally drop privs before writing out the reportfile. drop_privileges() info.add_user_info() info.add_os_info() if crash_counter > 0: info['CrashCounter'] = '%i' % crash_counter try: info.write(reportfile) if reportfile != sys.stderr: # Ensure that the file gets written to disk in the event of an # Upstart crash. if info.get('ExecutablePath', '') == '/sbin/init': reportfile.flush() os.fsync(reportfile.fileno()) parent_directory = os.path.dirname(report) try: fd = os.open(parent_directory, os.O_RDONLY) os.fsync(fd) finally: os.close(fd) except IOError: if reportfile != sys.stderr: os.unlink(report) raise if 'CoreDump' not in info: error_log('core dump exceeded %i MiB, dropped from %s to avoid memory overflow' % (core_size_limit / 1048576, report)) if report and mode == 0: # for non-suid programs, make the report writable now, when it's # completely written os.chmod(report, 0o640) if reportfile != sys.stderr: error_log('wrote report %s' % report) # Check if the user wants a core file. We need to create that from the # written report, as we can only read stdin once and write_user_coredump() # might abort reading from stdin and remove the written core file when # core_ulimit is > 0 and smaller than the core size. reportfile.seek(0) write_user_coredump(pid, cwd, core_ulimit, from_report=reportfile) except (SystemExit, KeyboardInterrupt): raise except Exception as e: error_log('Unhandled exception:') traceback.print_exc() error_log('pid: %i, uid: %i, gid: %i, euid: %i, egid: %i' % ( os.getpid(), os.getuid(), os.getgid(), os.geteuid(), os.getegid())) error_log('environment: %s' % str(os.environ))