#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * TODO: * - Don't start procs instantly after they die, wait some seconds first. */ const char supdir[] = "/home/marius/r/svc"; char *super_path[] = { "/home/marius/r/supervise", NULL, NULL }; volatile sig_atomic_t wait_for_child = 0; /* Do we have to call wait in the main loop? */ volatile sig_atomic_t terminate = 0; /* Exit the main-loop */ struct svc { char dir[MAXNAMLEN + 1]; pid_t supervisor; TAILQ_ENTRY(svc) entries; }; TAILQ_HEAD(svclist, svc) services = TAILQ_HEAD_INITIALIZER(services); struct svc * find_svc_sup(pid_t p) { struct svc *np; TAILQ_FOREACH(np, &services, entries) { if (np->supervisor == p) return np; } return NULL; } struct svc * find_svc_dir(const char *dir) { struct svc *np; TAILQ_FOREACH(np, &services, entries) { if (strcmp(np->dir, dir) == 0) return np; } return NULL; } void remove_svc(struct svc *service) { TAILQ_REMOVE(&services, service, entries); if (service->supervisor != -1) fprintf(stderr, "Removed service (%s) with pid (%d) for supervisor\n", service->dir, service->supervisor); free(service); } int direxists(const char *dir) { int r = 0; struct stat sb; char *path; if ((asprintf(&path, "%s/%s", supdir, dir)) == -1 || path == NULL) err(1, "asprintf()"); if (stat(path, &sb) != -1) { if (S_ISDIR(sb.st_mode)) r = 1; } else { if (errno != ENOENT) perror("stat()"); } free(path); return r; } void start_supervisor(struct svc *service) { char *path; if ((asprintf(&path, "%s/%s", supdir, service->dir)) == -1 || path == NULL) err(1, "asprintf()"); pid_t p = fork(); if (p == 0) { /* Child */ super_path[1] = path; if (execv(super_path[0], super_path)) err(1, "execv()"); } else if (p > 0) { /* Parent */ service->supervisor = p; } else { err(1, "fork()"); } free(path); } void add_missing_svc(const char *name) { struct svc *tmp; tmp = find_svc_dir(name); if (tmp != NULL) return; /* We already have it */ tmp = malloc(sizeof(struct svc)); if (tmp == NULL) err(1, "malloc()"); if (strlcpy(tmp->dir, name, (MAXNAMLEN + 1)) >= (MAXNAMLEN + 1)) errx(1, "strlcpy()"); tmp->supervisor = -1; TAILQ_INSERT_TAIL(&services, tmp, entries); start_supervisor(tmp); } void scan_svcdir(int dir_fd) { DIR *dir; struct dirent *dp; if ((dir = fdopendir(dir_fd)) == NULL) err(1, "opendir()"); while ((dp = readdir(dir)) != NULL) { if (dp->d_type != DT_DIR) continue; if (dp->d_name[0] == '.') continue; add_missing_svc(dp->d_name); } rewinddir(dir); if (fdclosedir(dir) == -1) err(1, "closedir()"); } void handle_sigchild(int sig, siginfo_t *siginfo, void *ucontext) { wait_for_child = 1; } void handle_fatal(int sig, siginfo_t *siginfo, void *ucontext) { terminate = 1; } void setup_signals() { struct sigaction act; memset(&act, 0, sizeof(act)); act.sa_flags = SA_SIGINFO; act.sa_sigaction = &handle_sigchild; if (sigaction(SIGCHLD, &act, NULL) == -1) err(1, "sigaction()"); act.sa_sigaction = &handle_fatal; if (sigaction(SIGHUP, &act, NULL) == -1) err(1, "sigaction()"); if (sigaction(SIGINT, &act, NULL) == -1) err(1, "sigaction()"); if (sigaction(SIGTERM, &act, NULL) == -1) err(1, "sigaction()"); } void reset_signals() { struct sigaction act; memset(&act, 0, sizeof(act)); act.sa_handler = SIG_DFL; if (sigaction(SIGCHLD, &act, NULL) == -1) err(1, "sigaction()"); if (sigaction(SIGHUP, &act, NULL) == -1) err(1, "sigaction()"); if (sigaction(SIGINT, &act, NULL) == -1) err(1, "sigaction()"); if (sigaction(SIGTERM, &act, NULL) == -1) err(1, "sigaction()"); } void reap_all() { int r; for (;;) { r = wait(NULL); if (r != -1) continue; if (errno == EINTR) { continue; } else if (errno == ECHILD) { break; } else { perror("wait()"); break; } } } void signal_services(int sig) { struct svc *np; TAILQ_FOREACH(np, &services, entries) { if (np->supervisor != -1) { if (kill(np->supervisor, sig) == -1) /* XXX: Handle failure better? */ perror("kill()"); } } } void try_wait() { int p, s; for (;;) { struct svc *tmp; p = waitpid(-1, &s, WNOHANG); if (p == 0) break; if (p == -1) { if (errno == EINTR) /* Impossible? */ continue; else if (errno == ECHILD) /* In case there are no children */ break; else err(1, "waitpid()"); } tmp = find_svc_sup(p); if (tmp == NULL) continue; /* XXX: Log something here? */ tmp->supervisor = -1; if (direxists(tmp->dir)) start_supervisor(tmp); else remove_svc(tmp); } } int acquire_lock() { int lock_fd; char *lock_path; if (asprintf(&lock_path, "%s/lock", supdir) == -1 || lock_path == NULL) err(1, "asprintf()"); if ((lock_fd = open(lock_path, O_CREAT | O_TRUNC | O_WRONLY | O_CLOEXEC, 0644)) == -1) err(1, "open()"); if (flock(lock_fd, LOCK_EX | LOCK_NB) == -1) { if (errno == EWOULDBLOCK) { fprintf(stderr, "%s is locked\n", lock_path); exit(1); } else { err(1, "flock()"); } } free(lock_path); return lock_fd; } void signal_existing(int dir_fd) { DIR *dir; struct dirent *dp; char *ctrl_path, *lock_path; int ctrl_fd, lock_fd; sigset_t mask; sigemptyset(&mask); sigaddset(&mask, SIGPIPE); dir = fdopendir(dir_fd); while ((dp = readdir(dir)) != NULL) { if (dp->d_type != DT_DIR) continue; if (dp->d_name[0] == '.') continue; if (asprintf(&ctrl_path, "%s/supervise/control", dp->d_name) == -1 || ctrl_path == NULL) err(1, "asprintf()"); if (asprintf(&lock_path, "%s/supervise/lock", dp->d_name) == -1 || lock_path == NULL) err(1, "asprintf()"); if ((lock_fd = open(lock_path, O_RDONLY | O_NONBLOCK | O_CLOEXEC)) == -1) err(1, "open()"); if (flock(lock_fd, LOCK_EX | LOCK_NB) == -1) { if (errno == EWOULDBLOCK) { if ((ctrl_fd = open(ctrl_path, O_WRONLY | O_CLOEXEC)) == -1) err(1, "open()"); if (sigprocmask(SIG_BLOCK, &mask, NULL) == -1) err(1, "setprocmask()"); if (write(ctrl_fd, "x", 1) != 1) { if (errno != EPIPE) err(1, "write()"); } if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1) err(1, "setprocmask()"); if (close(ctrl_fd) == -1) err(1, "close()"); } else { err(1, "flock()"); } } if (close(lock_fd) == -1) err(1, "flock()"); free(lock_path); free(ctrl_path); } rewinddir(dir); if (fdclosedir(dir) == -1) err(1, "fdclosedir()"); } int main(int argc, char **argv) { int kq, dir_fd, lock_fd; struct kevent evt; pid_t mypid = getpid(); lock_fd = acquire_lock(); if (procctl(P_PID, mypid, PROC_REAP_ACQUIRE, NULL) == -1) err(1, "procctl()"); dir_fd = open(supdir, O_RDONLY | O_DIRECTORY | O_CLOEXEC); if (dir_fd == -1) err(1, "open()"); setup_signals(); scan_svcdir(dir_fd); kq = kqueue(); if (kq == -1) err(1, "kqueue()"); EV_SET(&evt, dir_fd, EVFILT_VNODE, EV_ADD | EV_ENABLE, NOTE_WRITE | NOTE_EXTEND, 0, 0); for (;;) { try_wait(); if (terminate) { signal_services(SIGTERM); /* XXX: Forward the received signal? */ break; } struct kevent revt; int e = kevent(kq, &evt, 1, &revt, 1, NULL); if (e == -1) { if (errno != EINTR) err(1, "kevent()"); } else if (e > 0) /* XXX: Check revt instead of blindly scanning? */ scan_svcdir(dir_fd); } if (close(kq) == -1) perror("close()"); if (close(dir_fd) == -1) perror("close()"); if (close(lock_fd) == -1) perror("close()"); reset_signals(); /* Make SIGTERM/INT work again in case reap_all uses a long time. */ reap_all(); return 0; }