Fawkes API Fawkes Development Version
ffwatchdog.cpp
1
2/***************************************************************************
3 * ffwatchdog.cpp - Fawkes process watchdog
4 *
5 * Created: Thu Mar 31 09:53:53 2011 (RoboCup German Open 2011)
6 * Copyright 2011 Tim Niemueller [www.niemueller.de]
7 *
8 ****************************************************************************/
9
10/* This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Library General Public License for more details.
19 *
20 * Read the full text in the LICENSE.GPL file in the doc directory.
21 */
22
23#include <core/exception.h>
24#include <libdaemon/dfork.h>
25#include <libdaemon/dlog.h>
26#include <libdaemon/dpid.h>
27#include <sys/stat.h>
28#include <sys/wait.h>
29
30#include <cerrno>
31#include <csignal>
32#include <cstdio>
33#include <cstdlib>
34#include <cstring>
35#include <unistd.h>
36
37int g_quit = 0;
38bool g_force_quit = false;
39int g_signum = SIGINT;
40
41void
42handle_signal(int signum)
43{
44 printf("Received %s signal\n", strsignal(signum));
45 g_signum = signum;
46 switch (signum) {
47 case SIGINT: g_quit += 1; break; // sigint escalates
48 case SIGTERM: g_quit = 3; break;
49 case SIGKILL: g_quit = 4; break;
50 default: break;
51 }
52}
53
54/** Print usage instructions.
55 * @param progname program name
56 */
57void
58usage(const char *progname)
59{
60 printf("Usage: %s [options] <progfile> [args...]\n"
61 "progfile full absolute path to executable\n"
62 "args any number of arguments, passed to program as-is\n\n"
63 "where [options] passed in before <progfile> are one or more of:\n"
64 " -D[pid file] Run daemonized in the background, pid file is optional,\n"
65 " defaults to /var/run/ffwatchdog_basename.pid, must be absolute path.\n"
66 " -D[pid file] -k Kill a daemonized process running in the background,\n"
67 " pid file is optional as above.\n"
68 " -D[pid file] -s Check status of daemon.\n"
69 " -h Show help instructions.\n\n",
70 progname);
71}
72
73pid_t
74fork_and_exec(int argc, char **argv, int prog_start)
75{
76 pid_t pid = fork();
77 if (pid == -1) {
78 // error
79 printf("Forking for new process failed: %s\n", strerror(errno));
80 throw fawkes::Exception(errno, "Forking for new process failed: %s");
81 } else if (pid == 0) {
82 // child
83 setsid();
84 signal(SIGINT, SIG_IGN);
85 if (execve(argv[prog_start], &argv[prog_start], environ) == -1) {
86 printf("Failed to execute %s, exited with %i: %s\n",
87 argv[prog_start],
88 errno,
89 strerror(errno));
90 exit(-1);
91 }
92 }
93
94 return pid;
95}
96
97void
98daemonize_cleanup()
99{
100 daemon_retval_send(-1);
101 daemon_retval_done();
102 daemon_pid_file_remove();
103}
104
105pid_t
106daemonize(int argc, char **argv)
107{
108 pid_t pid;
109 mode_t old_umask = umask(0);
110
111 // Prepare for return value passing
112 daemon_retval_init();
113
114 // Do the fork
115 if ((pid = daemon_fork()) < 0) {
116 return -1;
117
118 } else if (pid) { // the parent
119 int ret;
120
121 // Wait for 20 seconds for the return value passed from the daemon process
122 if ((ret = daemon_retval_wait(20)) < 0) {
123 daemon_log(LOG_ERR, "Could not recieve return value from daemon process.");
124 return -1;
125 }
126
127 if (ret != 0) {
128 daemon_log(LOG_ERR, "*** Daemon startup failed, see syslog for details. ***");
129 switch (ret) {
130 case 1: daemon_log(LOG_ERR, "Daemon failed to close file descriptors"); break;
131 case 2: daemon_log(LOG_ERR, "Daemon failed to create PID file"); break;
132 }
133 return -1;
134 } else {
135 return pid;
136 }
137
138 } else { // the daemon
139#ifdef DAEMON_CLOSE_ALL_AVAILABLE
140 if (daemon_close_all(-1) < 0) {
141 daemon_log(LOG_ERR, "Failed to close all file descriptors: %s", strerror(errno));
142 // Send the error condition to the parent process
143 daemon_retval_send(1);
144 return -1;
145 }
146#endif
147
148 // Create the PID file
149 if (daemon_pid_file_create() < 0) {
150 printf("Could not create PID file (%s).", strerror(errno));
151 daemon_log(LOG_ERR, "Could not create PID file (%s).", strerror(errno));
152
153 // Send the error condition to the parent process
154 daemon_retval_send(2);
155 return -1;
156 }
157
158 // Send OK to parent process
159 daemon_retval_send(0);
160
161 daemon_log(LOG_INFO, "Sucessfully started");
162
163 umask(old_umask);
164 return 0;
165 }
166}
167
168/** Global variable containing the path to the PID file.
169 * unfortunately needed for libdaemon */
170const char *ffwatchdog_pid_file;
171
172/** Function that returns the PID file name.
173 * @return PID file name
174 */
175const char *
176ffwatchdog_daemon_pid_file_proc()
177{
178 return ffwatchdog_pid_file;
179}
180
181/** Watchdog main.
182 * @param argc argument count
183 * @param argv arguments
184 */
185int
186main(int argc, char **argv)
187{
188 if (argc < 2) {
189 usage(argv[0]);
190 exit(1);
191 }
192
193 bool arg_verbose = false;
194 bool arg_daemonize = false;
195 bool arg_daemon_kill = false;
196 bool arg_daemon_status = false;
197 const char *daemon_pid_file = NULL;
198
199 int prog_start;
200 for (prog_start = 1; prog_start < argc; ++prog_start) {
201 if (argv[prog_start][0] == '-') {
202 // argument starts
203 char param = argv[prog_start][1];
204 if (param == '-') {
205 ++prog_start;
206 break;
207 } else {
208 if (param == 'D') {
209 arg_daemonize = true;
210 daemon_pid_file = NULL;
211 if (strlen(&argv[prog_start][1]) > 1) {
212 daemon_pid_file = &argv[prog_start][2];
213 }
214 } else if (param == 'k') {
215 arg_daemon_kill = true;
216 } else if (param == 's') {
217 arg_daemon_status = true;
218 } else if (param == 'v') {
219 arg_verbose = true;
220 } else if (param == 'h') {
221 usage(argv[0]);
222 exit(0);
223 } else {
224 printf("Unknown argument '%c'\n", param);
225 usage(argv[0]);
226 exit(3);
227 }
228 }
229 } else {
230 break;
231 }
232 }
233
234 if (prog_start >= argc) {
235 usage(argv[0]);
236 exit(1);
237 }
238
239 if (access(argv[prog_start], X_OK) != 0) {
240 printf("Cannot execute '%s': %s\n\n", argv[1], strerror(errno));
241 usage(argv[0]);
242 exit(2);
243 }
244
245 pid_t dpid;
246
247 char *daemon_ident = NULL;
248
249 if (arg_daemonize) {
250 // Set identification string for the daemon for both syslog and PID file
251
252 char *argv_copy = strdup(argv[prog_start]);
253 if (asprintf(&daemon_ident, "ffwatchdog_%s", basename(argv_copy)) == -1) {
254 free(argv_copy);
255 printf("Failed to create daemon ident, not enough memory\n");
256 exit(5);
257 }
258 free(argv_copy);
259 daemon_pid_file_ident = daemon_log_ident = daemon_ident;
260 if (daemon_pid_file != NULL) {
261 ffwatchdog_pid_file = daemon_pid_file;
262 daemon_pid_file_proc = ffwatchdog_daemon_pid_file_proc;
263 }
264
265 // We should daemonize, check if we were called to kill a daemonized copy
266 if (arg_daemon_kill) {
267 // Check that the daemon is not run twice a the same time
268 if ((dpid = daemon_pid_file_is_running()) < 0) {
269 daemon_log(LOG_ERR, "Watchdog daemon for %s not running.", argv[prog_start]);
270 return 1;
271 }
272
273 // Kill daemon with SIGINT
274 int ret;
275 if ((ret = daemon_pid_file_kill_wait(SIGINT, 5)) < 0) {
276 daemon_log(LOG_WARNING, "Failed to kill watchdog daemon for %s", argv[prog_start]);
277 }
278 return (ret < 0) ? 1 : 0;
279 }
280
281 if (arg_daemon_status) {
282 // Check daemon status
283 if (daemon_pid_file_is_running() < 0) {
284 if (arg_verbose) {
285 printf("Watchdog daemon for %s is not running\n", argv[prog_start]);
286 }
287 return 1;
288 } else {
289 if (arg_verbose) {
290 printf("Watchdog daemon for %s is running\n", argv[prog_start]);
291 }
292 return 0;
293 }
294 }
295
296 // Check that the daemon is not run twice a the same time
297 if ((dpid = daemon_pid_file_is_running()) >= 0) {
298 daemon_log(LOG_ERR,
299 "Watchdog daemon for %s already running on (PID %u)",
300 argv[prog_start],
301 dpid);
302 return 201;
303 }
304
305 dpid = daemonize(argc, argv);
306 if (dpid < 0) {
307 daemonize_cleanup();
308 return 201;
309 } else if (dpid) {
310 // parent
311 return 0;
312 } // else child, continue as usual
313 }
314
315 struct sigaction sa;
316 sa.sa_handler = handle_signal;
317 sigemptyset(&sa.sa_mask);
318 sa.sa_flags = 0;
319 sigaction(SIGINT, &sa, NULL);
320 sigaction(SIGKILL, &sa, NULL);
321 sigaction(SIGTERM, &sa, NULL);
322 sigaction(SIGUSR1, &sa, NULL);
323 sigaction(SIGUSR2, &sa, NULL);
324
325 pid_t pid = -1;
326 while (!g_quit) {
327 pid = fork_and_exec(argc, argv, prog_start);
328
329 while (pid != -1 && !g_quit) {
330 int status = 0;
331 pid_t cpid = waitpid(pid, &status, WUNTRACED | WCONTINUED);
332 printf("Wait returned\n");
333
334 if (cpid == -1) {
335 printf("Failed to wait for child: %s\n", strerror(errno));
336 } else if (WIFEXITED(status)) {
337 printf("%i|%s exited, status=%d\n", cpid, argv[prog_start], WEXITSTATUS(status));
338 pid = -1;
339 } else if (WIFSIGNALED(status)) {
340 printf("%i|%s killed by signal %s\n", cpid, argv[prog_start], strsignal(WTERMSIG(status)));
341 pid = -1;
342 } else if (WIFSTOPPED(status)) {
343 printf("%i|%s stopped by signal %s\n", cpid, argv[prog_start], strsignal(WSTOPSIG(status)));
344 pid = -1;
345 } else if (WIFCONTINUED(status)) {
346 printf("%i|%s continued\n", cpid, argv[prog_start]);
347 }
348 }
349 }
350
351 if (pid != -1) {
352 int last_quit = 0;
353 printf("Stopping child. Press Ctrl-C again to escalate.\n");
354
355 for (unsigned int i = 0; i < 600; ++i) {
356 if (last_quit != g_quit) {
357 int signum;
358 if (g_quit <= 2) {
359 signum = SIGINT;
360 } else if (g_quit == 3) {
361 signum = SIGTERM;
362 } else {
363 signum = SIGKILL;
364 }
365
366 printf("Killing %s with signal %s\n", argv[prog_start], strsignal(signum));
367 if (kill(pid, signum) == -1) {
368 printf("Failed to kill %s: %s\n", argv[prog_start], strerror(errno));
369 }
370 }
371 last_quit = g_quit;
372
373 usleep(10000);
374 int status;
375 int rv = waitpid(pid, &status, WNOHANG);
376 if (rv == -1) {
377 if (errno == EINTR)
378 continue;
379 if (errno == ECHILD) {
380 pid = -1;
381 break;
382 }
383 } else if (rv > 0) {
384 pid = -1;
385 break;
386 }
387 if (i >= 300)
388 g_quit = 2;
389 if (i >= 500)
390 g_quit = 3;
391 }
392 }
393
394 if (arg_daemonize) {
395 daemonize_cleanup();
396 }
397
398 return 0;
399}
Base class for exceptions in Fawkes.
Definition: exception.h:36