Caffe2 - C++ API
A deep learning, cross platform ML framework
signal_handler.cc
1 #include "caffe2/utils/signal_handler.h"
2 #include "caffe2/core/logging.h"
3 
4 #if defined(CAFFE2_SUPPORTS_SIGNAL_HANDLER)
5 
6 // Normal signal handler implementation.
7 #include <cxxabi.h>
8 #include <dirent.h>
9 #include <dlfcn.h>
10 #include <pthread.h>
11 #include <sys/syscall.h>
12 #include <sys/types.h>
13 #include <unistd.h>
14 #include <unwind.h>
15 
16 #include <atomic>
17 #include <csignal>
18 #include <cstdio>
19 #include <cstdlib>
20 #include <mutex>
21 #include <unordered_set>
22 
23 #include "caffe2/core/init.h"
24 
25 #if CAFFE2_ANDROID
26 #ifndef SYS_gettid
27 #define SYS_gettid __NR_gettid
28 #endif
29 #ifndef SYS_tgkill
30 #define SYS_tgkill __NR_tgkill
31 #endif
32 #endif
33 
34 namespace {
35 
36 struct sigaction previousSighup;
37 struct sigaction previousSigint;
38 std::atomic<int> sigintCount(0);
39 std::atomic<int> sighupCount(0);
40 std::atomic<int> hookedUpCount(0);
41 
42 void handleSignal(int signal) {
43  switch (signal) {
44  // TODO: what if the previous handler uses sa_sigaction?
45  case SIGHUP:
46  sighupCount += 1;
47  if (previousSighup.sa_handler) {
48  previousSighup.sa_handler(signal);
49  }
50  break;
51  case SIGINT:
52  sigintCount += 1;
53  if (previousSigint.sa_handler) {
54  previousSigint.sa_handler(signal);
55  }
56  break;
57  }
58 }
59 
60 void hookupHandler() {
61  if (hookedUpCount++) {
62  return;
63  }
64  struct sigaction sa;
65  // Setup the handler
66  sa.sa_handler = &handleSignal;
67  // Restart the system call, if at all possible
68  sa.sa_flags = SA_RESTART;
69  // Block every signal during the handler
70  sigfillset(&sa.sa_mask);
71  // Intercept SIGHUP and SIGINT
72  if (sigaction(SIGHUP, &sa, &previousSighup) == -1) {
73  LOG(FATAL) << "Cannot install SIGHUP handler.";
74  }
75  if (sigaction(SIGINT, &sa, &previousSigint) == -1) {
76  LOG(FATAL) << "Cannot install SIGINT handler.";
77  }
78 }
79 
80 // Set the signal handlers to the default.
81 void unhookHandler() {
82  if (--hookedUpCount > 0) {
83  return;
84  }
85  struct sigaction sa;
86  // Setup the sighub handler
87  sa.sa_handler = SIG_DFL;
88  // Restart the system call, if at all possible
89  sa.sa_flags = SA_RESTART;
90  // Block every signal during the handler
91  sigfillset(&sa.sa_mask);
92  // Intercept SIGHUP and SIGINT
93  if (sigaction(SIGHUP, &previousSighup, nullptr) == -1) {
94  LOG(FATAL) << "Cannot uninstall SIGHUP handler.";
95  }
96  if (sigaction(SIGINT, &previousSigint, nullptr) == -1) {
97  LOG(FATAL) << "Cannot uninstall SIGINT handler.";
98  }
99 }
100 
101 #if defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
102 // The mutex protects the bool.
103 std::mutex fatalSignalHandlersInstallationMutex;
104 bool fatalSignalHandlersInstalled;
105 // We need to hold a reference to call the previous SIGUSR2 handler in case
106 // we didn't signal it
107 struct sigaction previousSigusr2;
108 // Flag dictating whether the SIGUSR2 handler falls back to previous handlers
109 // or is intercepted in order to print a stack trace.
110 std::atomic<bool> fatalSignalReceived(false);
111 // Global state set when a fatal signal is received so that backtracing threads
112 // know why they're printing a stacktrace.
113 const char* fatalSignalName("<UNKNOWN>");
114 int fatalSignum(-1);
115 // This wait condition is used to wait for other threads to finish writing
116 // their stack trace when in fatal sig handler (we can't use pthread_join
117 // because there's no way to convert from a tid to a pthread_t).
118 pthread_cond_t writingCond = PTHREAD_COND_INITIALIZER;
119 pthread_mutex_t writingMutex = PTHREAD_MUTEX_INITIALIZER;
120 
121 struct {
122  const char* name;
123  int signum;
124  struct sigaction previous;
125 } kSignalHandlers[] = {
126  { "SIGABRT", SIGABRT, {} },
127  { "SIGINT", SIGINT, {} },
128  { "SIGILL", SIGILL, {} },
129  { "SIGFPE", SIGFPE, {} },
130  { "SIGBUS", SIGBUS, {} },
131  { "SIGSEGV", SIGSEGV, {} },
132  { nullptr, 0, {} }
133 };
134 
135 struct sigaction* getPreviousSigaction(int signum) {
136  for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) {
137  if (handler->signum == signum) {
138  return &handler->previous;
139  }
140  }
141  return nullptr;
142 }
143 
144 const char* getSignalName(int signum) {
145  for (auto handler = kSignalHandlers; handler->name != nullptr; handler++) {
146  if (handler->signum == signum) {
147  return handler->name;
148  }
149  }
150  return nullptr;
151 }
152 
153 _Unwind_Reason_Code unwinder(struct _Unwind_Context* context, void* userInfo) {
154  auto& pcs = *reinterpret_cast<std::vector<uintptr_t>*>(userInfo);
155  pcs.push_back(_Unwind_GetIP(context));
156  return _URC_NO_REASON;
157 }
158 
159 std::vector<uintptr_t> getBacktrace() {
160  std::vector<uintptr_t> pcs;
161  _Unwind_Backtrace(unwinder, &pcs);
162  return pcs;
163 }
164 
165 void printStacktrace() {
166  std::vector<uintptr_t> pcs = getBacktrace();
167  Dl_info info;
168  size_t i = 0;
169  for (uintptr_t pcAddr : pcs) {
170  const void* pc = reinterpret_cast<const void*>(pcAddr);
171  const char* path = nullptr;
172  const char* name = "???";
173  char* demangled = nullptr;
174  int offset = -1;
175 
176  std::cerr << "[" << i << "] ";
177  if (dladdr(pc, &info)) {
178  path = info.dli_fname;
179  name = info.dli_sname ?: "???";
180  offset = reinterpret_cast<uintptr_t>(pc) -
181  reinterpret_cast<uintptr_t>(info.dli_saddr);
182 
183  int status;
184  demangled = abi::__cxa_demangle(name, nullptr, nullptr, &status);
185  if (status == 0) {
186  name = demangled;
187  }
188  }
189  std::cerr << name;
190  if (offset >= 0) {
191  std::cerr << "+" << reinterpret_cast<void*>(offset);
192  }
193  std::cerr << "(" << pc << ")";
194  if (path) {
195  std::cerr << " in " << path;
196  }
197  std::cerr << std::endl;
198  if (demangled) {
199  free(demangled);
200  }
201  i += 1;
202  }
203 }
204 
205 void callPreviousSignalHandler(
206  struct sigaction* action,
207  int signum,
208  siginfo_t* info,
209  void* ctx) {
210  if (!action->sa_handler) {
211  return;
212  }
213  if ((action->sa_flags & SA_SIGINFO) == SA_SIGINFO) {
214  action->sa_sigaction(signum, info, ctx);
215  } else {
216  action->sa_handler(signum);
217  }
218 }
219 
220 // needsLock signals whether we need to lock our writing mutex.
221 void stacktraceSignalHandler(bool needsLock) {
222  if (needsLock) {
223  pthread_mutex_lock(&writingMutex);
224  }
225  pid_t tid = syscall(SYS_gettid);
226  std::cerr << fatalSignalName << "(" << fatalSignum << "), Thread " << tid
227  << ": " << std::endl;
228  printStacktrace();
229  std::cerr << std::endl;
230  if (needsLock) {
231  pthread_mutex_unlock(&writingMutex);
232  pthread_cond_signal(&writingCond);
233  }
234 }
235 
236 // Our fatal signal entry point
237 void fatalSignalHandler(int signum) {
238  // Check if this is a proper signal that we declared above.
239  const char* name = getSignalName(signum);
240  if (!name) {
241  return;
242  }
243  if (fatalSignalReceived) {
244  return;
245  }
246  // Set the flag so that our SIGUSR2 handler knows that we're aborting and
247  // that it should intercept any SIGUSR2 signal.
248  fatalSignalReceived = true;
249  // Set state for other threads.
250  fatalSignum = signum;
251  fatalSignalName = name;
252  // Linux doesn't have a nice userland API for enumerating threads so we
253  // need to use the proc pseudo-filesystem.
254  DIR* procDir = opendir("/proc/self/task");
255  if (procDir) {
256  pid_t pid = getpid();
257  pid_t currentTid = syscall(SYS_gettid);
258  struct dirent* entry;
259  pthread_mutex_lock(&writingMutex);
260  while ((entry = readdir(procDir)) != nullptr) {
261  if (entry->d_name[0] == '.') {
262  continue;
263  }
264  pid_t tid = atoi(entry->d_name);
265  // If we've found the current thread then we'll jump into the SIGUSR2
266  // handler before calling pthread_cond_wait thus deadlocking, so branch
267  // our directly to the backtrace handler instead of signaling it.
268  if (tid != currentTid) {
269  syscall(SYS_tgkill, pid, tid, SIGUSR2);
270  pthread_cond_wait(&writingCond, &writingMutex);
271  } else {
272  stacktraceSignalHandler(false);
273  }
274  }
275  pthread_mutex_unlock(&writingMutex);
276  } else {
277  perror("Failed to open /proc/self/task");
278  }
279  sigaction(signum, getPreviousSigaction(signum), nullptr);
280  raise(signum);
281 }
282 
283 // Our SIGUSR2 entry point
284 void stacktraceSignalHandler(int signum, siginfo_t* info, void* ctx) {
285  if (fatalSignalReceived) {
286  stacktraceSignalHandler(true);
287  } else {
288  // We don't want to actually change the signal handler as we want to
289  // remain the signal handler so that we may get the usr2 signal later.
290  callPreviousSignalHandler(&previousSigusr2, signum, info, ctx);
291  }
292 }
293 
294 // Installs SIGABRT signal handler so that we get stack traces
295 // from every thread on SIGABRT caused exit. Also installs SIGUSR2 handler
296 // so that threads can communicate with each other (be sure if you use SIGUSR2)
297 // to install your handler before initing caffe2 (we properly fall back to
298 // the previous handler if we didn't initiate the SIGUSR2).
299 void installFatalSignalHandlers() {
300  std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex);
301  if (fatalSignalHandlersInstalled) {
302  return;
303  }
304  fatalSignalHandlersInstalled = true;
305  struct sigaction sa;
306  sigemptyset(&sa.sa_mask);
307  // Since we'll be in an exiting situation it's possible there's memory
308  // corruption, so make our own stack just in case.
309  sa.sa_flags = SA_ONSTACK | SA_SIGINFO;
310  sa.sa_handler = ::fatalSignalHandler;
311  for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) {
312  if (sigaction(handler->signum, &sa, &handler->previous)) {
313  std::string str("Failed to add ");
314  str += handler->name;
315  str += " handler!";
316  perror(str.c_str());
317  }
318  }
319  sa.sa_sigaction = ::stacktraceSignalHandler;
320  if (sigaction(SIGUSR2, &sa, &::previousSigusr2)) {
321  perror("Failed to add SIGUSR2 handler!");
322  }
323 }
324 
325 void uninstallFatalSignalHandlers() {
326  std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex);
327  if (!fatalSignalHandlersInstalled) {
328  return;
329  }
330  fatalSignalHandlersInstalled = false;
331  for (auto* handler = kSignalHandlers; handler->name != nullptr; handler++) {
332  if (sigaction(handler->signum, &handler->previous, nullptr)) {
333  std::string str("Failed to remove ");
334  str += handler->name;
335  str += " handler!";
336  perror(str.c_str());
337  } else {
338  handler->previous = {};
339  }
340  }
341  if (sigaction(SIGUSR2, &::previousSigusr2, nullptr)) {
342  perror("Failed to add SIGUSR2 handler!");
343  } else {
344  ::previousSigusr2 = {};
345  }
346 }
347 #endif // defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
348 
349 } // namespace
350 
351 #if defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
352 CAFFE2_DEFINE_bool(
353  caffe2_print_stacktraces,
354  false,
355  "If set, prints stacktraces when a fatal signal is raised.");
356 #endif
357 
358 namespace caffe2 {
359 
360 SignalHandler::SignalHandler(
361  SignalHandler::Action SIGINT_action,
362  SignalHandler::Action SIGHUP_action)
363  : SIGINT_action_(SIGINT_action),
364  SIGHUP_action_(SIGHUP_action),
365  my_sigint_count_(sigintCount),
366  my_sighup_count_(sighupCount) {
367  hookupHandler();
368 }
369 
370 SignalHandler::~SignalHandler() {
371  unhookHandler();
372 }
373 
374 // Return true iff a SIGINT has been received since the last time this
375 // function was called.
376 bool SignalHandler::GotSIGINT() {
377  uint64_t count = sigintCount;
378  bool result = (count != my_sigint_count_);
379  my_sigint_count_ = count;
380  return result;
381 }
382 
383 // Return true iff a SIGHUP has been received since the last time this
384 // function was called.
385 bool SignalHandler::GotSIGHUP() {
386  uint64_t count = sighupCount;
387  bool result = (count != my_sighup_count_);
388  my_sighup_count_ = count;
389  return result;
390 }
391 
392 SignalHandler::Action SignalHandler::CheckForSignals() {
393  if (GotSIGHUP()) {
394  return SIGHUP_action_;
395  }
396  if (GotSIGINT()) {
397  return SIGINT_action_;
398  }
399  return SignalHandler::Action::NONE;
400 }
401 
402 #if defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
403 void setPrintStackTracesOnFatalSignal(bool print) {
404  if (print) {
405  installFatalSignalHandlers();
406  } else {
407  uninstallFatalSignalHandlers();
408  }
409 }
410 bool printStackTracesOnFatalSignal() {
411  std::lock_guard<std::mutex> locker(fatalSignalHandlersInstallationMutex);
412  return fatalSignalHandlersInstalled;
413 }
414 
415 namespace internal {
416 bool Caffe2InitFatalSignalHandler(int*, char***) {
417  if (caffe2::FLAGS_caffe2_print_stacktraces) {
418  setPrintStackTracesOnFatalSignal(true);
419  }
420  return true;
421 }
422 
423 REGISTER_CAFFE2_INIT_FUNCTION(
424  Caffe2InitFatalSignalHandler,
425  &Caffe2InitFatalSignalHandler,
426  "Inits signal handlers for fatal signals so we can see what if"
427  " caffe2_print_stacktraces is set.");
428 
429 } // namepsace internal
430 #endif // defined(CAFFE2_SUPPORTS_FATAL_SIGNAL_HANDLERS)
431 } // namespace caffe2
432 
433 #else // defined(CAFFE2_SUPPORTS_SIGNAL_HANDLER)
434 
435 // TODO: Currently we do not support signal handling in non-Linux yet - below is
436 // a minimal implementation that makes things compile.
437 namespace caffe2 {
438 SignalHandler::SignalHandler(
439  SignalHandler::Action SIGINT_action,
440  SignalHandler::Action SIGHUP_action) {}
441 SignalHandler::~SignalHandler() {}
442 bool SignalHandler::GotSIGINT() {
443  return false;
444 }
445 bool SignalHandler::GotSIGHUP() {
446  return false;
447 }
448 SignalHandler::Action SignalHandler::CheckForSignals() {
449  return SignalHandler::Action::NONE;
450 }
451 } // namespace caffe2
452 
453 #endif // defined(CAFFE2_SUPPORTS_SIGNAL_HANDLER)
A global dictionary that holds information about what Caffe2 modules have been loaded in the current ...