blob: e230b005397f9712f31dd1a938907d6a35a272c5 [file] [log] [blame]
/* upstart
*
* job_process.c - job process handling
*
* Copyright © 2009 Canonical Ltd.
* Author: Scott James Remnant <scott@netsplit.com>.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif /* HAVE_CONFIG_H */
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
#include <time.h>
#include <errno.h>
#include <stdio.h>
#include <limits.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <nih/macros.h>
#include <nih/alloc.h>
#include <nih/string.h>
#include <nih/signal.h>
#include <nih/io.h>
#include <nih/logging.h>
#include <nih/error.h>
#include "paths.h"
#include "system.h"
#include "environ.h"
#include "process.h"
#include "job_process.h"
#include "job_class.h"
#include "job.h"
#include "errors.h"
/**
* SHELL_CHARS:
*
* This is the list of characters that, if encountered in a process, cause
* it to always be run with a shell.
**/
#define SHELL_CHARS "~`!$^&*()=|\\{}[];\"'<>?"
/**
* JobProcessWireError:
*
* This structure is used to pass an error from the child process back to the
* parent. It contains the same basic particulars as a JobProcessError but
* without the message.
**/
typedef struct job_process_wire_error {
JobProcessErrorType type;
int arg;
int errnum;
} JobProcessWireError;
/* Prototypes for static functions */
static void job_process_error_abort (int fd, JobProcessErrorType type,
int arg)
__attribute__ ((noreturn));
static int job_process_error_read (int fd)
__attribute__ ((warn_unused_result));
static void job_process_kill_timer (Job *job, NihTimer *timer);
static void job_process_terminated (Job *job, ProcessType process,
int status);
static int job_process_catch_runaway (Job *job);
static void job_process_stopped (Job *job, ProcessType process);
static void job_process_trace_new (Job *job, ProcessType process);
static void job_process_trace_new_child (Job *job, ProcessType process);
static void job_process_trace_signal (Job *job, ProcessType process,
int signum);
static void job_process_trace_fork (Job *job, ProcessType process);
static void job_process_trace_exec (Job *job, ProcessType process);
/**
* job_process_run:
* @job: job context for process to be run in,
* @process: job process to run.
*
* This function looks up @process in the job's process table and uses
* the information there to spawn a new process for the @job, storing the
* pid in that table entry.
*
* The process is normally executed using the system shell, unless the
* script member of @process is FALSE and there are no typical shell
* characters within the command member, in which case it is executed
* directly using exec after splitting on whitespace.
*
* When exectued with the shell, if the command (which may be an entire
* script) is reasonably small (less than 1KB) it is passed to the
* shell using the POSIX-specified -c option. Otherwise the shell is told
* to read commands from one of the special /dev/fd/NN devices and NihIo
* used to feed the script into that device. A pointer to the NihIo object
* is not kept or stored because it will automatically clean itself up should
* the script go away as the other end of the pipe will be closed.
*
* In either case the shell is run with the -e option so that commands will
* fail if their exit status is not checked.
*
* This function will block until the job_process_spawn() call succeeds or
* a non-temporary error occurs (such as file not found). It is up to the
* called to decide whether non-temporary errors are a reason to change the
* job state or not.
*
* Returns: zero on success, negative value on non-temporary error.
**/
int
job_process_run (Job *job,
ProcessType process)
{
Process *proc;
nih_local char **argv = NULL;
nih_local char **env = NULL;
nih_local char *script = NULL;
char **e;
size_t argc, envc;
int error = FALSE, fds[2], trace = FALSE, shell = FALSE;
nih_assert (job != NULL);
proc = job->class->process[process];
nih_assert (proc != NULL);
nih_assert (proc->command != NULL);
/* We run the process using a shell if it says it wants to be run
* as such, or if it contains any shell-like characters; since that's
* the best way to deal with things like variables.
*/
if ((proc->script) || strpbrk (proc->command, SHELL_CHARS)) {
struct stat statbuf;
char *nl, *p;
argc = 0;
argv = NIH_MUST (nih_str_array_new (NULL));
NIH_MUST (nih_str_array_add (&argv, NULL, &argc, SHELL));
NIH_MUST (nih_str_array_add (&argv, NULL, &argc, "-e"));
/* If the process wasn't originally marked to be run through
* a shell, prepend exec to the script so that the shell
* gets out of the way after parsing.
*/
if (proc->script) {
script = NIH_MUST (nih_strdup (NULL, proc->command));
} else {
script = NIH_MUST (nih_sprintf (NULL, "exec %s",
proc->command));
}
/* We can pass scripts over the command-line instead of
* piping using /dev/fd/NNN. Do it for single line scripts
* and when /dev/fd doesn't exist.
*/
p = nl = strchr (script, '\n');
while (p && (*p == '\n'))
p++;
if ((! nl) || (! *p)
|| (stat (DEV_FD, &statbuf) < 0)
|| (! S_ISDIR (statbuf.st_mode))) {
/* Strip off the newline(s) */
if (nl && (! *p))
*nl = '\0';
NIH_MUST (nih_str_array_add (&argv, NULL,
&argc, "-c"));
NIH_MUST (nih_str_array_addp (&argv, NULL,
&argc, script));
/* Next argument is argv[0]; just pass the shell */
NIH_MUST (nih_str_array_add (&argv, NULL,
&argc, SHELL));
} else {
nih_local char *cmd = NULL;
/* Close the writing end when the child is exec'd */
NIH_ZERO (pipe (fds));
nih_io_set_cloexec (fds[1]);
shell = TRUE;
/* FIXME actually always want it to be /dev/fd/3 and
* dup2() in the child to make it that way ... no way
* of passing that yet
*/
cmd = NIH_MUST (nih_sprintf (argv, "%s/%d",
DEV_FD, fds[0]));
NIH_MUST (nih_str_array_addp (&argv, NULL,
&argc, cmd));
}
} else {
/* Split the command on whitespace to produce a list of
* arguments that we can exec directly.
*/
argv = NIH_MUST (nih_str_split (NULL, proc->command,
" \t\r\n", TRUE));
}
/* We provide the standard job environment to all of its processes,
* except for pre-stop which also has the stop event environment,
* adding special variables that indicate which job it was -- mostly
* so that initctl can have clever behaviour when called within them.
*/
envc = 0;
if (job->env) {
env = NIH_MUST (nih_str_array_copy (NULL, &envc, job->env));
} else {
env = NIH_MUST (nih_str_array_new (NULL));
}
if (job->stop_env
&& ((process == PROCESS_PRE_STOP)
|| (process == PROCESS_POST_STOP)))
for (e = job->stop_env; *e; e++)
NIH_MUST (environ_set (&env, NULL, &envc, TRUE, *e));
NIH_MUST (environ_set (&env, NULL, &envc, TRUE,
"UPSTART_JOB=%s", job->class->name));
NIH_MUST (environ_set (&env, NULL, &envc, TRUE,
"UPSTART_INSTANCE=%s", job->name));
/* If we're about to spawn the main job and we expect it to become
* a daemon or fork before we can move out of spawned, we need to
* set a trace on it.
*/
if ((process == PROCESS_MAIN)
&& ((job->class->expect == EXPECT_DAEMON)
|| (job->class->expect == EXPECT_FORK)))
trace = TRUE;
/* Spawn the process, repeat until fork() works */
while ((job->pid[process] = job_process_spawn (job->class, argv,
env, trace)) < 0) {
NihError *err;
err = nih_error_get ();
if (err->number == JOB_PROCESS_ERROR) {
/* Non-temporary error condition, we're not going
* to be able to spawn this process. Clean up after
* ourselves before returning.
*/
if (shell) {
close (fds[0]);
close (fds[1]);
}
job->pid[process] = 0;
/* Return non-temporary error condition */
nih_warn (_("Failed to spawn %s %s process: %s"),
job_name (job), process_name (process),
err->message);
nih_free (err);
return -1;
} else if (! error)
nih_warn ("%s: %s", _("Temporary process spawn error"),
err->message);
nih_free (err);
error = TRUE;
}
nih_info (_("%s %s process (%d)"),
job_name (job), process_name (process), job->pid[process]);
job->trace_forks = 0;
job->trace_state = trace ? TRACE_NEW : TRACE_NONE;
/* Feed the script to the child process */
if (shell) {
NihIo *io;
/* Clean up and close the reading end (we don't need it) */
close (fds[0]);
/* Put the entire script into an NihIo send buffer and
* then mark it for closure so that the shell gets EOF
* and the structure gets cleaned up automatically.
*/
while (! (io = nih_io_reopen (job, fds[1], NIH_IO_STREAM,
NULL, NULL, NULL, NULL))) {
NihError *err;
err = nih_error_get ();
if (err->number != ENOMEM)
nih_assert_not_reached ();
nih_free (err);
}
NIH_ZERO (nih_io_write (io, script, strlen (script)));
nih_io_shutdown (io);
}
return 0;
}
/**
* job_process_spawn:
* @class: job class of process to be spawned,
* @argv: NULL-terminated list of arguments for the process,
* @env: NULL-terminated list of environment variables for the process,
* @trace: whether to trace this process.
*
* This function spawns a new process using the @class details to set up the
* environment for it; the process is always a session and process group
* leader as we never want anything in our own group.
*
* The process to be executed is given in the @argv array which is passed
* directly to execvp(), so should be in the same NULL-terminated form with
* the first argument containing the path or filename of the binary. The
* PATH environment in @class will be searched.
*
* If @trace is TRUE, the process will be traced with ptrace and this will
* cause the process to be stopped when the exec() call is made. You must
* wait for this and then may use it to set options before continuing the
* process.
*
* This function only spawns the process, it is up to the caller to ensure
* that the information is saved into the job and that the process is watched,
* etc.
*
* Spawning a process may fail for temporary reasons, usually due to a failure
* of the fork() syscall or communication with the child; or more permanent
* reasons such as a failure to setup the child environment. These latter
* are always represented by a JOB_PROCESS_ERROR error.
*
* Returns: process id of new process on success, -1 on raised error
**/
pid_t
job_process_spawn (JobClass *class,
char * const argv[],
char * const *env,
int trace)
{
sigset_t child_set, orig_set;
pid_t pid;
int i, fds[2];
char filename[PATH_MAX];
FILE *fd;
nih_assert (class != NULL);
/* Create a pipe to communicate with the child process until it
* execs so we know whether that was successful or an error occurred.
*/
if (pipe (fds) < 0)
nih_return_system_error (-1);
/* Block all signals while we fork to avoid the child process running
* our own signal handlers before we've reset them all back to the
* default.
*/
sigfillset (&child_set);
sigprocmask (SIG_BLOCK, &child_set, &orig_set);
/* Fork the child process, handling success and failure by resetting
* the signal mask and returning the new process id or a raised error.
*/
pid = fork ();
if (pid > 0) {
sigprocmask (SIG_SETMASK, &orig_set, NULL);
close (fds[1]);
/* Read error from the pipe, return if one is raised */
if (job_process_error_read (fds[0]) < 0) {
close (fds[0]);
return -1;
}
close (fds[0]);
return pid;
} else if (pid < 0) {
nih_error_raise_system ();
sigprocmask (SIG_SETMASK, &orig_set, NULL);
close (fds[0]);
close (fds[1]);
return -1;
}
/* We're now in the child process.
*
* The rest of this function sets the child up and ends by executing
* the new binary. Failures are handled by terminating the child
* and writing an error back to the parent.
*/
/* Close the reading end of the pipe with our parent and mark the
* writing end to be closed-on-exec so the parent knows we got that
* far because read() returned zero.
*/
close (fds[0]);
nih_io_set_cloexec (fds[1]);
/* Become the leader of a new session and process group, shedding
* any controlling tty (which we shouldn't have had anyway).
*/
setsid ();
/* Set the standard file descriptors to an output of our chosing;
* any other open descriptor must be intended for the child, or have
* the FD_CLOEXEC flag so it's automatically closed when we exec()
* later.
*/
if (system_setup_console (class->console, FALSE) < 0)
job_process_error_abort (fds[1], JOB_PROCESS_ERROR_CONSOLE, 0);
/* Set resource limits for the process, skipping over any that
* aren't set in the job class such that they inherit from
* ourselves (and we inherit from kernel defaults).
*/
for (i = 0; i < RLIMIT_NLIMITS; i++) {
if (! class->limits[i])
continue;
if (setrlimit (i, class->limits[i]) < 0) {
nih_error_raise_system ();
job_process_error_abort (fds[1],
JOB_PROCESS_ERROR_RLIMIT, i);
}
}
/* Set the process environment from the function paramters. */
environ = (char **)env;
/* Set the file mode creation mask; this is one of the few operations
* that can never fail.
*/
umask (class->umask);
/* Adjust the process priority ("nice level").
*/
if (setpriority (PRIO_PROCESS, 0, class->nice) < 0) {
nih_error_raise_system ();
job_process_error_abort (fds[1],
JOB_PROCESS_ERROR_PRIORITY, 0);
}
/* Adjust the process OOM killer priority.
*/
if (class->oom_adj) {
snprintf (filename, sizeof (filename),
"/proc/%d/oom_adj", getpid ());
fd = fopen (filename, "w");
if (! fd) {
nih_error_raise_system ();
job_process_error_abort (fds[1], JOB_PROCESS_ERROR_OOM_ADJ, 0);
} else {
fprintf (fd, "%d\n", class->oom_adj);
if (fclose (fd)) {
nih_error_raise_system ();
job_process_error_abort (fds[1], JOB_PROCESS_ERROR_OOM_ADJ, 0);
}
}
}
/* Change the root directory, confining path resolution within it;
* we do this before the working directory call so that is always
* relative to the new root.
*/
if (class->chroot) {
if (chroot (class->chroot) < 0) {
nih_error_raise_system ();
job_process_error_abort (fds[1],
JOB_PROCESS_ERROR_CHROOT, 0);
}
}
/* Change the working directory of the process, either to the one
* configured in the job, or to the root directory of the filesystem
* (or at least relative to the chroot).
*/
if (chdir (class->chdir ? class->chdir : "/") < 0) {
nih_error_raise_system ();
job_process_error_abort (fds[1], JOB_PROCESS_ERROR_CHDIR, 0);
}
/* Reset all the signal handlers back to their default handling so
* the child isn't unexpectedly ignoring any, and so we won't
* surprisingly handle them before we've exec()d the new process.
*/
nih_signal_reset ();
sigprocmask (SIG_SETMASK, &orig_set, NULL);
/* Set up a process trace if we need to trace forks */
if (trace) {
if (ptrace (PTRACE_TRACEME, 0, NULL, 0) < 0) {
nih_error_raise_system();
job_process_error_abort (fds[1],
JOB_PROCESS_ERROR_PTRACE, 0);
}
}
/* Execute the process, if we escape from here it failed */
if (execvp (argv[0], argv) < 0) {
nih_error_raise_system ();
job_process_error_abort (fds[1], JOB_PROCESS_ERROR_EXEC, 0);
}
nih_assert_not_reached ();
}
/**
* job_process_error_abort:
* @fd: writing end of pipe,
* @type: step that failed,
* @arg: argument to @type.
*
* Abort the child process, first writing the error details in @type, @arg
* and the currently raised NihError to the writing end of the pipe specified
* by @fd.
*
* This function calls the exit() system call, so never returns.
**/
static void
job_process_error_abort (int fd,
JobProcessErrorType type,
int arg)
{
NihError *err;
JobProcessWireError wire_err;
/* Get the currently raised system error */
err = nih_error_get ();
/* Fill in the structure we send over the pipe */
wire_err.type = type;
wire_err.arg = arg;
wire_err.errnum = err->number;
/* Write structure to the pipe; in theory this should never fail, but
* if it does, we abort anyway.
*/
while (write (fd, &wire_err, sizeof (wire_err)) < 0)
;
exit (255);
}
/**
* job_process_error_read:
* @fd: reading end of pipe.
*
* Read from the reading end of the pipe specified by @fd, if we receive
* data then the child raised a process error which we reconstruct and raise
* again; otherwise no problem was found and no action is taken.
*
* The reconstructed error will be of JOB_PROCESS_ERROR type, the human-
* readable message is generated according to the type of process error
* and argument passed along with it.
*
* Returns: zero if no error was found, or negative value on raised error.
**/
static int
job_process_error_read (int fd)
{
JobProcessWireError wire_err;
ssize_t len;
JobProcessError *err;
const char *res;
/* Read the error from the pipe; a zero read indicates that the
* exec succeeded so we return success, otherwise if we don't receive
* a JobProcessWireError structure, we return a temporary error so we
* try again.
*/
len = read (fd, &wire_err, sizeof (wire_err));
if (len == 0) {
return 0;
} else if (len < 0) {
nih_return_system_error (-1);
} else if (len != sizeof (wire_err)) {
errno = EILSEQ;
nih_return_system_error (-1);
}
/* Construct a JobProcessError to be raised containing information
* from the wire, augmented with human-readable information we
* generate here.
*/
err = NIH_MUST (nih_new (NULL, JobProcessError));
err->type = wire_err.type;
err->arg = wire_err.arg;
err->errnum = wire_err.errnum;
err->error.number = JOB_PROCESS_ERROR;
switch (err->type) {
case JOB_PROCESS_ERROR_CONSOLE:
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to open console: %s"),
strerror (err->errnum)));
break;
case JOB_PROCESS_ERROR_RLIMIT:
switch (err->arg) {
case RLIMIT_CPU:
res = "cpu";
break;
case RLIMIT_FSIZE:
res = "fsize";
break;
case RLIMIT_DATA:
res = "data";
break;
case RLIMIT_STACK:
res = "stack";
break;
case RLIMIT_CORE:
res = "core";
break;
case RLIMIT_RSS:
res = "rss";
break;
case RLIMIT_NPROC:
res = "nproc";
break;
case RLIMIT_NOFILE:
res = "nofile";
break;
case RLIMIT_MEMLOCK:
res = "memlock";
break;
case RLIMIT_AS:
res = "as";
break;
case RLIMIT_LOCKS:
res = "locks";
break;
case RLIMIT_SIGPENDING:
res = "sigpending";
break;
case RLIMIT_MSGQUEUE:
res = "msgqueue";
break;
case RLIMIT_NICE:
res = "nice";
break;
case RLIMIT_RTPRIO:
res = "rtprio";
break;
default:
nih_assert_not_reached ();
}
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to set \"%s\" resource limit: %s"),
res, strerror (err->errnum)));
break;
case JOB_PROCESS_ERROR_PRIORITY:
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to set priority: %s"),
strerror (err->errnum)));
break;
case JOB_PROCESS_ERROR_OOM_ADJ:
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to set oom adjustment: %s"),
strerror (err->errnum)));
break;
case JOB_PROCESS_ERROR_CHROOT:
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to change root directory: %s"),
strerror (err->errnum)));
break;
case JOB_PROCESS_ERROR_CHDIR:
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to change working directory: %s"),
strerror (err->errnum)));
break;
case JOB_PROCESS_ERROR_PTRACE:
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to set trace: %s"),
strerror (err->errnum)));
break;
case JOB_PROCESS_ERROR_EXEC:
err->error.message = NIH_MUST (nih_sprintf (
err, _("unable to execute: %s"),
strerror (err->errnum)));
break;
default:
nih_assert_not_reached ();
}
nih_error_raise_error (&err->error);
return -1;
}
/**
* job_process_kill:
* @job: job to kill process of,
* @process: process to be killed.
*
* This function forces a @job to leave its current state by sending
* @process the TERM signal, and maybe later the KILL signal. The actual
* state changes are performed by job_child_reaper when the process
* has actually terminated.
**/
void
job_process_kill (Job *job,
ProcessType process)
{
nih_assert (job != NULL);
nih_assert (job->pid[process] > 0);
nih_assert (job->kill_timer == NULL);
nih_assert (job->kill_process = -1);
nih_info (_("Sending TERM signal to %s %s process (%d)"),
job_name (job), process_name (process), job->pid[process]);
if (system_kill (job->pid[process], FALSE) < 0) {
NihError *err;
err = nih_error_get ();
if (err->number != ESRCH)
nih_warn (_("Failed to send TERM signal to %s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], err->message);
nih_free (err);
return;
}
job->kill_process = process;
job->kill_timer = NIH_MUST (nih_timer_add_timeout (
job, job->class->kill_timeout,
(NihTimerCb)job_process_kill_timer, job));
}
/**
* job_process_kill_timer:
* @job: job to kill process of,
* @timer: timer that caused us to be called.
*
* This callback is called if the process failed to terminate within
* a particular time of being sent the TERM signal. The process is killed
* more forcibly by sending the KILL signal.
**/
static void
job_process_kill_timer (Job *job,
NihTimer *timer)
{
ProcessType process;
nih_assert (job != NULL);
nih_assert (timer != NULL);
nih_assert (job->kill_timer == timer);
nih_assert (job->kill_process != (ProcessType)-1);
process = job->kill_process;
nih_assert (job->pid[process] > 0);
job->kill_timer = NULL;
job->kill_process = -1;
nih_info (_("Sending KILL signal to %s %s process (%d)"),
job_name (job), process_name (process), job->pid[process]);
if (system_kill (job->pid[process], TRUE) < 0) {
NihError *err;
err = nih_error_get ();
if (err->number != ESRCH)
nih_warn (_("Failed to send KILL signal to %s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], err->message);
nih_free (err);
}
}
/**
* job_process_handler:
* @data: unused,
* @pid: process that changed,
* @event: event that occurred on the child,
* @status: exit status, signal raised or ptrace event.
*
* This callback should be registered with nih_child_add_watch() so that
* when processes associated with jobs die, stop, receive signals or other
* ptrace events, the appropriate action is taken.
*
* Normally this is registered so it is called for all processes, and is
* safe to do as it only acts if the process is linked to a job.
**/
void
job_process_handler (void *data,
pid_t pid,
NihChildEvents event,
int status)
{
Job *job;
ProcessType process;
const char *sig;
nih_assert (pid > 0);
/* Find the job that an event ocurred for, and identify which of the
* job's process it was. If we don't know about it, then we simply
* ignore the event.
*/
nih_debug ("Ignored event %x (%d) for process %d", event, status, pid);
job = job_process_find (pid, &process);
if (! job)
return;
switch (event) {
case NIH_CHILD_EXITED:
/* Child exited; check status to see whether it exited
* normally (zero) or with a non-zero status.
*/
if (status) {
nih_warn (_("%s %s process (%d) "
"terminated with status %d"),
job_name (job), process_name (process),
pid, status);
} else {
nih_info (_("%s %s process (%d) exited normally"),
job_name (job), process_name (process), pid);
}
job_process_terminated (job, process, status);
break;
case NIH_CHILD_KILLED:
case NIH_CHILD_DUMPED:
/* Child was killed by a signal, and maybe dumped core. We
* store the signal value in the higher byte of status (it's
* safe to do that) to distinguish it from a normal exit
* status.
*/
sig = nih_signal_to_name (status);
if (sig) {
nih_warn (_("%s %s process (%d) killed by %s signal"),
job_name (job), process_name (process),
pid, sig);
} else {
nih_warn (_("%s %s process (%d) killed by signal %d"),
job_name (job), process_name (process),
pid, status);
}
status <<= 8;
job_process_terminated (job, process, status);
break;
case NIH_CHILD_STOPPED:
/* Child was stopped by a signal, make sure it was SIGSTOP
* and not a tty-related signal.
*/
sig = nih_signal_to_name (status);
if (sig) {
nih_info (_("%s %s process (%d) stopped by %s signal"),
job_name (job), process_name (process),
pid, sig);
} else {
nih_info (_("%s %s process (%d) stopped by signal %d"),
job_name (job), process_name (process),
pid, status);
}
if (status == SIGSTOP)
job_process_stopped (job, process);
break;
case NIH_CHILD_CONTINUED:
/* Child was continued by a signal.
*/
sig = nih_signal_to_name (status);
if (sig) {
nih_info (_("%s %s process (%d) continued by %s signal"),
job_name (job), process_name (process),
pid, sig);
} else {
nih_info (_("%s %s process (%d) continued by signal %d"),
job_name (job), process_name (process),
pid, status);
}
break;
case NIH_CHILD_TRAPPED:
/* Child received a signal while we were tracing it. This
* can be a signal raised inside the kernel as a side-effect
* of the trace because the child called fork() or exec();
* we only know that from our own state tracking.
*/
if ((job->trace_state == TRACE_NEW)
&& (status == SIGTRAP)) {
job_process_trace_new (job, process);
} else if ((job->trace_state == TRACE_NEW_CHILD)
&& (status == SIGSTOP)) {
job_process_trace_new_child (job, process);
} else {
job_process_trace_signal (job, process, status);
}
break;
case NIH_CHILD_PTRACE:
/* Child called an important syscall that can modify the
* state of the process trace we hold.
*/
switch (status) {
case PTRACE_EVENT_FORK:
job_process_trace_fork (job, process);
break;
case PTRACE_EVENT_EXEC:
job_process_trace_exec (job, process);
break;
default:
nih_assert_not_reached ();
}
break;
default:
nih_assert_not_reached ();
}
}
/**
* job_process_terminated:
* @job: job that changed,
* @process: specific process,
* @status: exit status or signal in higher byte.
*
* This function is called whenever a @process attached to @job terminates,
* @status should contain the exit status in the lower byte or signal in
* the higher byte.
*
* The job structure is updated and the next appropriate state for the job
* is chosen, which may involve changing the goal to stop first.
**/
static void
job_process_terminated (Job *job,
ProcessType process,
int status)
{
int failed = FALSE, stop = FALSE, state = TRUE;
nih_assert (job != NULL);
switch (process) {
case PROCESS_MAIN:
nih_assert ((job->state == JOB_RUNNING)
|| (job->state == JOB_SPAWNED)
|| (job->state == JOB_KILLED)
|| (job->state == JOB_STOPPING)
|| (job->state == JOB_POST_START)
|| (job->state == JOB_PRE_STOP));
/* We don't change the state if we're in post-start and there's
* a post-start process running, or if we're in pre-stop and
* there's a pre-stop process running; we wait for those to
* finish instead.
*/
if ((job->state == JOB_POST_START)
&& job->class->process[PROCESS_POST_START]
&& (job->pid[PROCESS_POST_START] > 0)) {
state = FALSE;
} else if ((job->state == JOB_PRE_STOP)
&& job->class->process[PROCESS_PRE_STOP]
&& (job->pid[PROCESS_PRE_STOP] > 0)) {
state = FALSE;
}
/* Dying when we killed it is perfectly normal and never
* considered a failure. We also don't want to tamper with
* the goal since we might be restarting the job anyway.
*/
if (job->state == JOB_KILLED)
break;
/* Yet another corner case is terminating when we were
* already stopping, we don't to tamper with the goal or
* state because we're still waiting for the stopping
* event to finish and that might restart it anyway.
* We also don't want to consider it a failure, because
* we want the stopping and stopped events to match.
*/
if (job->state == JOB_STOPPING) {
state = FALSE;
break;
}
/* We don't assume that because the primary process was
* killed or exited with a non-zero status, it failed.
* Instead we check the normalexit list to see whether
* the exit signal or status is in that list, and only
* if not, do we consider it failed.
*
* For services that can be respawned, a zero exit status is
* also a failure unless listed.
*/
if (status || (job->class->respawn && (! job->class->task)))
{
failed = TRUE;
for (size_t i = 0; i < job->class->normalexit_len; i++) {
if (job->class->normalexit[i] == status) {
failed = FALSE;
break;
}
}
/* We might be able to respawn the failed job;
* that's a simple matter of doing nothing. Check
* the job isn't running away first though.
*/
if (failed && job->class->respawn) {
if (job_process_catch_runaway (job)) {
nih_warn (_("%s respawning too fast, stopped"),
job_name (job));
failed = FALSE;
job_failed (job, -1, 0);
} else {
nih_warn (_("%s %s process ended, respawning"),
job_name (job),
process_name (process));
failed = FALSE;
/* If we're not going to change the
* state because there's a post-start
* or pre-stop script running, we need
* to remember to do it when that
* finishes.
*/
if (! state)
job_change_goal (job, JOB_RESPAWN);
break;
}
}
}
/* Otherwise whether it's failed or not, we should
* stop the job now.
*/
stop = TRUE;
break;
case PROCESS_PRE_START:
nih_assert (job->state == JOB_PRE_START);
/* If the pre-start script is killed or exits with a status
* other than zero, it's always considered a failure since
* we don't know what state the job might be in.
*/
if (status) {
failed = TRUE;
stop = TRUE;
}
break;
case PROCESS_POST_START:
nih_assert (job->state == JOB_POST_START);
/* We always want to change the state when the post-start
* script terminates; if the main process is running, we'll
* stay in that state, otherwise we'll skip through.
*
* Failure is ignored since there's not much we can do
* about it at this point.
*/
break;
case PROCESS_PRE_STOP:
nih_assert (job->state == JOB_PRE_STOP);
/* We always want to change the state when the pre-stop
* script terminates, we either want to go back into running
* or head towards killing the main process.
*
* Failure is ignored since there's not much we can do
* about it at this point.
*/
break;
case PROCESS_POST_STOP:
nih_assert (job->state == JOB_POST_STOP);
/* If the post-stop script is killed or exits with a status
* other than zero, it's always considered a failure since
* we don't know what state the job might be in.
*/
if (status) {
failed = TRUE;
stop = TRUE;
}
break;
default:
nih_assert_not_reached ();
}
/* Cancel any timer trying to kill the job, since it's just
* died. We could do this inside the main process block above, but
* leaving it here for now means we can use the timer for any
* additional process later.
*/
if (job->kill_timer) {
nih_unref (job->kill_timer, job);
job->kill_timer = NULL;
job->kill_process = -1;
}
/* Clear the process pid field */
job->pid[process] = 0;
/* Mark the job as failed */
if (failed)
job_failed (job, process, status);
/* Change the goal to stop; normally this doesn't have any
* side-effects, except when we're in the RUNNING state when it'll
* change the state as well. We obviously don't want to change the
* state twice.
*/
if (stop) {
if (job->state == JOB_RUNNING)
state = FALSE;
job_change_goal (job, JOB_STOP);
}
if (state)
job_change_state (job, job_next_state (job));
}
/**
* job_process_catch_runaway
* @job: job being started.
*
* This function is called when changing the state of a job to starting,
* before emitting the event. It ensures that a job doesn't end up in
* a restart loop by limiting the number of restarts in a particular
* time limit.
*
* Returns: TRUE if the job is respawning too fast, FALSE if not.
*/
static int
job_process_catch_runaway (Job *job)
{
struct timespec now;
nih_assert (job != NULL);
if (job->class->respawn_limit && job->class->respawn_interval) {
time_t interval;
/* Time since last respawn ... this goes very large if we
* haven't done one, which is fine
*/
nih_assert (clock_gettime (CLOCK_MONOTONIC, &now) == 0);
interval = now.tv_sec - job->respawn_time;
if (interval < job->class->respawn_interval) {
job->respawn_count++;
if (job->respawn_count > job->class->respawn_limit)
return TRUE;
} else {
job->respawn_time = now.tv_sec;
job->respawn_count = 1;
}
}
return FALSE;
}
/**
* job_process_stopped:
* @job: job that changed,
* @process: specific process.
*
* This function is called whenever a @process attached to @job is stopped
* by the SIGSTOP signal (and not by a tty-related signal).
*
* Some jobs use this signal to signify that they have completed starting
* up and are now running; thus we move them out of the spawned state.
**/
static void
job_process_stopped (Job *job,
ProcessType process)
{
nih_assert (job != NULL);
/* Any process can stop on a signal, but we only care about the
* main process when the state is still spawned.
*/
if ((process != PROCESS_MAIN) || (job->state != JOB_SPAWNED))
return;
/* Send SIGCONT back and change the state to the next one, if this
* job behaves that way.
*/
if (job->class->expect == EXPECT_STOP) {
kill (job->pid[process], SIGCONT);
job_change_state (job, job_next_state (job));
}
}
/**
* job_process_trace_new:
* @job: job that changed,
* @process: specific process.
*
* This function is called when the traced @process attached to @job calls
* its first exec(), still within the Upstart code before passing control
* to the new executable.
*
* It sets the options for the trace so that forks and execs are reported.
**/
static void
job_process_trace_new (Job *job,
ProcessType process)
{
nih_assert (job != NULL);
nih_assert ((job->trace_state == TRACE_NEW)
|| (job->trace_state == TRACE_NEW_CHILD));
/* Any process can get us to trace them, but we only care about the
* main process when the state is still spawned.
*/
if ((process != PROCESS_MAIN) || (job->state != JOB_SPAWNED))
return;
/* Set options so that we are notified when the process forks, and
* get a different kind of notification when it execs to a plain
* SIGTRAP.
*/
if (ptrace (PTRACE_SETOPTIONS, job->pid[process], NULL,
PTRACE_O_TRACEFORK | PTRACE_O_TRACEEXEC) < 0) {
nih_warn (_("Failed to set ptrace options for "
"%s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
return;
}
job->trace_state = TRACE_NORMAL;
/* Allow the process to continue without delivering the
* kernel-generated signal that was for our eyes not theirs.
*/
if (ptrace (PTRACE_CONT, job->pid[process], NULL, 0) < 0)
nih_warn (_("Failed to continue traced %s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
}
/**
* job_process_trace_new_child:
* @job: job that changed,
* @process: specific process.
*
* This function is called whenever a traced @process attached to @job stops
* after the fork() so that we can set the options before continuing it.
*
* Check to see whether we've reached the number of forks we expected, if
* so detach the process and move towards the running state; otherwise we
* set our important ptrace options, update the trace state so that
* further SIGSTOP or SIGTRAP signals are treated as just that and allow
* the process to continue.
**/
static void
job_process_trace_new_child (Job *job,
ProcessType process)
{
nih_assert (job != NULL);
nih_assert (job->trace_state == TRACE_NEW_CHILD);
/* Any process can get us to trace them, but we only care about the
* main process when the state is still spawned.
*/
if ((process != PROCESS_MAIN) || (job->state != JOB_SPAWNED))
return;
/* We need to fork at least twice unless we're expecting a
* single fork when we only need to fork once; once that limit
* has been reached, end the trace.
*/
job->trace_forks++;
if ((job->trace_forks > 1) || (job->class->expect == EXPECT_FORK))
{
if (ptrace (PTRACE_DETACH, job->pid[process], NULL, 0) < 0)
nih_warn (_("Failed to detach traced "
"%s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
job->trace_state = TRACE_NONE;
job_change_state (job, job_next_state (job));
return;
}
job_process_trace_new (job, process);
}
/**
* job_process_trace_signal:
* @job: job that changed,
* @process: specific process.
*
* This function is called whenever a traced @process attached to @job has
* a signal sent to it.
*
* We don't care about these, they're a side effect of ptrace that we can't
* turn off, so we just deliver them untampered with.
**/
static void
job_process_trace_signal (Job *job,
ProcessType process,
int signum)
{
nih_assert (job != NULL);
/* Any process can get us to trace them, but we only care about the
* main process when the state is still spawned.
*/
if ((process != PROCESS_MAIN) || (job->state != JOB_SPAWNED)
|| (job->trace_state != TRACE_NORMAL))
return;
/* Deliver the signal */
if (ptrace (PTRACE_CONT, job->pid[process], NULL, signum) < 0)
nih_warn (_("Failed to deliver signal to traced "
"%s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
}
/**
* job_process_trace_fork:
* @job: job that changed,
* @process: specific process.
*
* This function is called whenever a traced @process attached to @job calls
* the fork() system call.
*
* We obtain the new child process id from the message and update the
* structure so that we follow that instead, detaching from the process that
* called fork.
**/
static void
job_process_trace_fork (Job *job,
ProcessType process)
{
unsigned long data;
nih_assert (job != NULL);
/* Any process can get us to trace them, but we only care about the
* main process when the state is still spawned.
*/
if ((process != PROCESS_MAIN) || (job->state != JOB_SPAWNED)
|| (job->trace_state != TRACE_NORMAL))
return;
/* Obtain the child process id from the ptrace event. */
if (ptrace (PTRACE_GETEVENTMSG, job->pid[process], NULL, &data) < 0) {
nih_warn (_("Failed to obtain child process id "
"for %s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
return;
}
nih_info (_("%s %s process (%d) became new process (%d)"),
job_name (job), process_name (process),
job->pid[process], (pid_t)data);
/* We no longer care about this process, it's the child that we're
* interested in from now on, so detach it and allow it to go about
* its business unhindered.
*/
if (ptrace (PTRACE_DETACH, job->pid[process], NULL, 0) < 0)
nih_warn (_("Failed to detach traced %s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
/* Update the process we're supervising which is about to get SIGSTOP
* so set the trace options to capture it.
*/
job->pid[process] = (pid_t)data;
job->trace_state = TRACE_NEW_CHILD;
/* We may have already had the wait notification for the new child
* waiting at SIGSTOP, in which case a ptrace() call will succeed
* for it.
*/
if (ptrace (PTRACE_SETOPTIONS, job->pid[process], NULL, 0) < 0) {
nih_debug ("Failed to set options for new %s %s process (%d), "
"probably not yet forked: %s",
job_name (job), process_name (process),
job->pid[process], strerror (errno));
return;
}
job_process_trace_new_child (job, process);
}
/**
* job_process_trace_exec:
* @job: job that changed,
* @process: specific process.
*
* This function is called whenever a traced @process attached to @job calls
* the exec() system call after we've set options on it to distinguish them
* from ordinary SIGTRAPs.
*
* We assume that if the job calls exec that it's finished forking so we can
* drop the trace entirely; we have no interest in tracing the new child.
**/
static void
job_process_trace_exec (Job *job,
ProcessType process)
{
nih_assert (job != NULL);
/* Any process can get us to trace them, but we only care about the
* main process when the state is still spawned and we're tracing it.
*/
if ((process != PROCESS_MAIN) || (job->state != JOB_SPAWNED)
|| (job->trace_state != TRACE_NORMAL))
return;
nih_info (_("%s %s process (%d) executable changed"),
job_name (job), process_name (process), job->pid[process]);
if (job->trace_forks) {
if (ptrace (PTRACE_DETACH, job->pid[process], NULL, 0) < 0)
nih_warn (_("Failed to detach traced "
"%s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
job->trace_state = TRACE_NONE;
job_change_state (job, job_next_state (job));
} else {
if (ptrace (PTRACE_CONT, job->pid[process], NULL, 0) < 0)
nih_warn (_("Failed to continue traced %s %s process (%d): %s"),
job_name (job), process_name (process),
job->pid[process], strerror (errno));
}
}
/**
* job_process_find:
* @pid: process id to find,
* @process: pointer to place process which is running @pid.
*
* Finds the job with a process of the given @pid in the jobs hash table.
* If @process is not NULL, the @process variable is set to point at the
* process entry in the table which has @pid.
*
* Returns: job found or NULL if not known.
**/
Job *
job_process_find (pid_t pid,
ProcessType *process)
{
nih_assert (pid > 0);
job_class_init ();
NIH_HASH_FOREACH (job_classes, iter) {
JobClass *class = (JobClass *)iter;
NIH_HASH_FOREACH (class->instances, job_iter) {
Job *job = (Job *)job_iter;
int i;
for (i = 0; i < PROCESS_LAST; i++) {
if (job->pid[i] == pid) {
if (process)
*process = i;
return job;
}
}
}
}
return NULL;
}