Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for waiting using netlink #10

Merged
merged 6 commits into from
Sep 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,29 @@ Of course, pwait returns its own error codes to indicate whether something goes
wrong. To tell whether a nonzero exit code came from pwait itself or the
program being waited for, look at the log output.

pwait uses the ptrace system call, which means there are some prerequisites
for you to use it:
pwait uses one of two methods to collect the exit code of its target process:

- the ptrace system call, which attaches to the target process like a debugger
- the netlink connector, which registers with the kernel to be notified every
time a process exits (or forks, execs, etc.) and ignores all such notifications
except for the process you ask it to watch

Needless to say, this is fairly low-level system stuff, so it comes with a few
caveats:

- It probably only works on Linux, not other UNIX-like OS's. (Support for other
OS's could be implemented, in principle.)
- You have to have capability support enabled in the kernel and the capability
manipulation shell utility `setcap` installed on the system
manipulation shell utility `setcap` installed on the system (unless you run
pwait as root)
- It must be installed on a filesystem which supports extended attributes, so
that you can add cap_sys_ptrace to the permitted capabilities list with setcap
- It only works on processes that you can send signals to. In particular, you
can't pwait for a process running as root (or setuid/setgid) unless you run
pwait as root. You also can't pwait for a process that is already being traced
(such as one being run in a debugger).
that you can add cap_sys_ptrace and/or cap_net_admin to the permitted
capabilities list with setcap (again, unless you run it as root)
- The ptrace method only works on processes that you can send signals to. In
particular, you can't pwait for a process running as root (or setuid/setgid)
unless you run pwait as root. You also can't pwait for a process that is
already being traced (such as one being run in a debugger). The netlink method
doesn't suffer from these particular limitations, so it's the default.

If this utility turns out to be useful, a future addition might be a
polling mode which allows one to get around these difficulties.
Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ include(CheckSymbolExists)
check_symbol_exists(__GLIBC__ features.h _GNU_SOURCE)
configure_file(config.h.in config.h)

add_executable(pwait pwait.c ptrace.c capabilities.c)
add_executable(pwait pwait.c ptrace.c netlink.c capabilities.c)
target_link_libraries(pwait cap)

install(TARGETS pwait RUNTIME DESTINATION bin)
install(CODE "execute_process(COMMAND sudo setcap cap_sys_ptrace+ep pwait WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX}/bin)")
install(CODE "execute_process(COMMAND sudo setcap 'cap_sys_ptrace+p cap_net_admin+p' pwait WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX}/bin)")
205 changes: 205 additions & 0 deletions src/netlink.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
/*
* Inspired by http://bewareofgeek.livejournal.com/2945.html
*/

#include <errno.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <sysexits.h>
#include <unistd.h>
#include <asm/types.h>
#include <sys/socket.h>
#include <linux/netlink.h>
#include <linux/connector.h>
#include <linux/cn_proc.h>
#include <linux/rtnetlink.h>
#include "pwait.h"

/**
* Create a connector netlink socket.
*
* @return The file descriptor for the socket, or -1 if there is an error.
*/
static int create_netlink_socket() {
int netlink_socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_CONNECTOR);
if (netlink_socket == -1) {
syslog(LOG_CRIT, "Unable to create netlink socket");
}
return netlink_socket;
}

/**
* Bind a connector netlink socket to the process-related message group.
*
* @return @c TRUE if the operation succeeded or @c FALSE if not.
*/
static int bind_netlink_socket(int netlink_socket) {
struct sockaddr_nl address = {
.nl_family = AF_NETLINK,
.nl_groups = CN_IDX_PROC,
.nl_pid = getpid(),
};

int bind_result = bind(netlink_socket, (struct sockaddr*)&address, sizeof address);
if (bind_result == -1) {
syslog(LOG_CRIT, "Unable to bind netlink socket");
return FALSE;
}

return TRUE;
}

/**
* Send a message to the given netlink socket to subscribe it to process events.
*/
static int subscribe_to_process_events(int netlink_socket) {
struct __attribute__ ((aligned(NLMSG_ALIGNTO))) {
struct nlmsghdr header;
struct __attribute__ ((__packed__)) {
struct cn_msg message;
enum proc_cn_mcast_op desired_status;
};
} netlink_message;
memset(&netlink_message, 0, sizeof netlink_message);

netlink_message.header.nlmsg_len = sizeof netlink_message;
netlink_message.header.nlmsg_pid = getpid();
netlink_message.header.nlmsg_type = NLMSG_DONE; // indicates the last message in a series

netlink_message.message.id.idx = CN_IDX_PROC;
netlink_message.message.id.val = CN_VAL_PROC;
netlink_message.message.len = sizeof(enum proc_cn_mcast_op);

netlink_message.desired_status = PROC_CN_MCAST_LISTEN;

if (send(netlink_socket, &netlink_message, sizeof netlink_message, 0) == -1) {
syslog(LOG_CRIT, "Unable to send message to netlink socket");
return FALSE;
}

return TRUE;
}


static volatile int terminate = FALSE;

/**
* Listen on the given netlink socket for a message indicating that the given
* process ID has terminated.
*
* @param nl_sock The socket to listen on.
* @param target_pid The process ID whose termination should be listened for.
* @return The exit code of the process, or -1 if an error occurred.
*/
static int listen_for_process_termination(int nl_sock, pid_t target_pid) {
struct __attribute__ ((aligned(NLMSG_ALIGNTO))) {
struct nlmsghdr nl_hdr;
struct __attribute__ ((__packed__)) {
struct cn_msg cn_msg;
struct proc_event proc_ev;
};
} netlink_message;

while (!terminate) {
int receive_result = recv(nl_sock, &netlink_message, sizeof(netlink_message), 0);
if (receive_result == 0) {
// probably means the socket was shut down from the other end
syslog(LOG_CRIT, "Socket appears to have been shut down");
return -1;
}
else if (receive_result == -1) {
if (errno == EINTR) {
continue;
}
syslog(LOG_CRIT, "Error receiving from netlink socket");
return -1;
}
if (netlink_message.proc_ev.what == PROC_EVENT_EXIT && netlink_message.proc_ev.event_data.exit.process_pid == target_pid) {
return netlink_message.proc_ev.event_data.exit.exit_code;
}
}
return -1;
}

/**
* Stop the loop that listens for process exit messages.
*/
static void stop_listening(const int signal) {
terminate = TRUE;
}


/**
* Signal handlers for @c SIGTERM and @c SIGINT.
*/
struct signal_handlers {
struct sigaction term_action;
struct sigaction int_action;
};

/**
* Install the given new signal handlers for @c SIGTERM and @c SIGINT and store
* the old ones in the given structure.
*
* @param new_handlers The new handlers to install.
* @param[out] old_handlers The structure to store the old handlers in.
*/
static void swap_signal_handlers(const struct signal_handlers* new_handlers, struct signal_handlers* old_handlers) {
sigaction(SIGTERM, &(new_handlers->term_action), &(old_handlers->term_action));
sigaction(SIGINT, &(new_handlers->int_action), &(old_handlers->int_action));
}


/**
* Wait for a process to exit and capture its exit code using netlink.
*
* @param pid The process ID of the process to wait for.
* @return The exit code of the process, or @c EX_OSERR if an error occurred.
*/
int wait_using_netlink(pid_t pid) {
if (geteuid() != 0) {
#if defined(CAP_NET_ADMIN)
cap_value_t capability_to_acquire[1] = {CAP_NET_ADMIN};
if (!acquire_capabilities(1, capability_to_acquire)) {
return EX_SOFTWARE;
}
#else
syslog(LOG_CRIT, "CAP_NET_ADMIN not available");
return EX_SOFTWARE;
#endif
}

int nl_socket = create_netlink_socket();
if (!bind_netlink_socket(nl_socket)) {
close(nl_socket);
return EX_OSERR;
}
if (!subscribe_to_process_events(nl_socket)) {
close(nl_socket);
return EX_OSERR;
}

int process_result;
{
struct signal_handlers new_handlers = {
.term_action = {.sa_handler = stop_listening},
.int_action = {.sa_handler = stop_listening},
};
struct signal_handlers old_handlers;

swap_signal_handlers(&new_handlers, &old_handlers);
process_result = listen_for_process_termination(nl_socket, pid);
// Reset the signal handler (hopefully TERM or INT doesn't come right here)
swap_signal_handlers(&old_handlers, &new_handlers);
}

if (process_result == -1) {
close(nl_socket);
return EX_OSERR;
}
syslog(LOG_INFO, "Process %d exited with status %d", pid, WEXITSTATUS(process_result));

return WEXITSTATUS(process_result);
}
81 changes: 67 additions & 14 deletions src/pwait.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,56 @@
#include <syslog.h>
#include <getopt.h>
#include <unistd.h>
#include <string.h>
#include "config.h"
#include "pwait.h"

#define TRUE 1
#define FALSE 0

#ifdef _GNU_SOURCE
#define HAVE_GETOPT_LONG
#endif

#if _POSIX_C_SOURCE >= 2 || defined(_XOPEN_SOURCE)
#define HAVE_GETOPT
#endif

static void usage(const char* name) {
fprintf(stderr, "Usage: %s pid\n", name);
#if defined(HAVE_GETOPT_LONG) || defined(HAVE_GETOPT)
fprintf(stderr, "Usage: %s [OPTION]... PID\n", name);
#else
fprintf(stderr, "Usage: %s PID\n", name);
#endif
}

static const char* options = "v";
#ifdef _GNU_SOURCE
static void help(const char* name) {
#if defined(HAVE_GETOPT_LONG) || defined(HAVE_GETOPT)
printf("Usage: %s [OPTION]... PID\n", name);
#else
printf("Usage: %s PID\n", name);
#endif
printf("Wait for a process to finish and return its exit code\n");
#if !defined(HAVE_GETOPT_LONG) && !defined(HAVE_GETOPT)
return;
#endif
printf("\n");
#if defined(HAVE_GETOPT_LONG)
printf(" -h, --help print this help message and exit\n");
printf(" -m, --method=METHOD use METHOD to wait for the process, either ptrace or netlink\n");
printf(" -v, --verbose print diagnostic output to stderr\n");
#else
printf(" -h print this help message and exit\n");
printf(" -m use METHOD to wait for the process, either ptrace or netlink\n");
printf(" -v print diagnostic output to stderr\n");
#endif
}

static const char* options = "hm:v";
#ifdef HAVE_GETOPT_LONG
static struct option long_options[] = {
{"help", no_argument, NULL, 'h'},
{"method", required_argument, NULL, 'm'},
{"verbose", no_argument, NULL, 'v'},
{0, 0, 0, 0}
};
Expand All @@ -32,16 +70,32 @@ int main(const int argc, char* const* argv) {
int verbose = 0;
int c;

int (*wait_function)(pid_t) = wait_using_netlink;

// some trickery to be able to use either getopt_long, if it's available, or getopt, if not
#ifdef _GNU_SOURCE
int option_index;
#if defined(HAVE_GETOPT_LONG)
while ((c = getopt_long(argc, argv, options, long_options, &option_index)) != -1) {
#elif _POSIX_C_SOURCE >= 2 || defined(_XOPEN_SOURCE)
#elif defined(HAVE_GETOPT)
while ((c = getopt(argc, argv, options)) != -1) {
#else
while (FALSE) {
#endif
switch (c) {
case 'h':
help(argv[0]);
return EX_OK;
case 'm':
if (strncmp(optarg, "ptrace", 7) == 0) {
wait_function = wait_using_ptrace;
}
else if (strncmp(optarg, "netlink", 8) == 0) {
wait_function = wait_using_netlink;
}
else {
wait_function = NULL;
}
break;
case 'v':
verbose = 1;
break;
Expand All @@ -56,23 +110,22 @@ int main(const int argc, char* const* argv) {

openlog("pwait", verbose > 0 ? LOG_PERROR : LOG_CONS, LOG_USER);

if (wait_function == NULL) {
fprintf(stderr, "Invalid method (use \"ptrace\" or \"netlink\")");
return EX_USAGE;
}

pid = strtol(pidarg, &endptr, 0);
if (pidarg == endptr) {
syslog(LOG_CRIT, "First non-option argument \"%s\" must be a numeric PID", argv[optind]);
if (!verbose) {
fprintf(stderr, "First non-option argument \"%s\" must be a numeric PID", argv[optind]);
}
fprintf(stderr, "First non-option argument \"%s\" must be a numeric PID", argv[optind]);
return EX_USAGE;
}
if (pid < 1) {
syslog(LOG_CRIT, "Invalid process ID %d passed as first argument", pid);
if (!verbose) {
fprintf(stderr, "Invalid process ID %d passed as first argument", pid);
}
fprintf(stderr, "Invalid process ID %d passed as first argument", pid);
return EX_NOINPUT;
}

status = wait_using_ptrace(pid);
status = wait_function(pid);

closelog();
return status;
Expand Down
1 change: 1 addition & 0 deletions src/pwait.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@
int acquire_capabilities(size_t n, const cap_value_t* capabilities_to_acquire);

int wait_using_ptrace(pid_t pid);
int wait_using_netlink(pid_t pid);
Loading