mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
The "man 2 close":"Dealing with error returns from close()" says:
"Retrying the close() after a failure return is the wrong thing to do"
We should not leave the fd there, attempting to close it again on next
close()/close_safe() may lead to accidentally closing something else.
It confirms with the kernel code where sys_close() removes fd from
fdtable in this stack:
+-> sys_close
+-> file_close_fd
+-> file_close_fd_locked
+-> rcu_assign_pointer(fdt->fd[fd], NULL)
If there was an fd this stack is always reached and fd is always
removed.
Let's replace the fd with -1 after close no matter what.
Signed-off-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
2242 lines
45 KiB
C
2242 lines
45 KiB
C
#define _XOPEN_SOURCE 500
|
|
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <string.h>
|
|
#include <errno.h>
|
|
#include <stdbool.h>
|
|
#include <signal.h>
|
|
#include <unistd.h>
|
|
#include <dirent.h>
|
|
#include <sys/sendfile.h>
|
|
#include <fcntl.h>
|
|
#include <poll.h>
|
|
#include <arpa/inet.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/param.h>
|
|
#include <sys/types.h>
|
|
#include <sys/ptrace.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/wait.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/time.h>
|
|
#include <sys/resource.h>
|
|
#include <netdb.h>
|
|
#include <netinet/in.h>
|
|
#include <sched.h>
|
|
#include <ftw.h>
|
|
#include <time.h>
|
|
#include <libgen.h>
|
|
#include <uuid/uuid.h>
|
|
|
|
#include "linux/mount.h"
|
|
|
|
#include "kerndat.h"
|
|
#include "page.h"
|
|
#include "util.h"
|
|
#include "image.h"
|
|
#include "vma.h"
|
|
#include "mem.h"
|
|
#include "namespaces.h"
|
|
#include "criu-log.h"
|
|
#include "util-caps.h"
|
|
|
|
#include "clone-noasan.h"
|
|
#include "cr_options.h"
|
|
#include "cr-service.h"
|
|
#include "files.h"
|
|
#include "pstree.h"
|
|
#include "sched.h"
|
|
#include "mount-v2.h"
|
|
|
|
#include "cr-errno.h"
|
|
#include "action-scripts.h"
|
|
|
|
#include "compel/infect-util.h"
|
|
#include <compel/plugins/std/syscall-codes.h>
|
|
|
|
#define VMA_OPT_LEN 128
|
|
|
|
static int xatol_base(const char *string, long *number, int base)
|
|
{
|
|
char *endptr;
|
|
long nr;
|
|
|
|
errno = 0;
|
|
nr = strtol(string, &endptr, base);
|
|
if ((errno == ERANGE && (nr == LONG_MAX || nr == LONG_MIN)) || (errno != 0 && nr == 0)) {
|
|
pr_perror("failed to convert string '%s'", string);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((endptr == string) || (*endptr != '\0')) {
|
|
pr_err("String is not a number: '%s'\n", string);
|
|
return -EINVAL;
|
|
}
|
|
*number = nr;
|
|
return 0;
|
|
}
|
|
|
|
int xatol(const char *string, long *number)
|
|
{
|
|
return xatol_base(string, number, 10);
|
|
}
|
|
|
|
int xatoi(const char *string, int *number)
|
|
{
|
|
long tmp;
|
|
int err;
|
|
|
|
err = xatol(string, &tmp);
|
|
if (err)
|
|
return err;
|
|
|
|
if (tmp > INT_MAX || tmp < INT_MIN) {
|
|
pr_err("value %#lx (%ld) is out of int range\n", tmp, tmp);
|
|
return -ERANGE;
|
|
}
|
|
|
|
*number = (int)tmp;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This function reallocates passed str pointer.
|
|
* It means:
|
|
* 1) passed pointer can be either NULL, or previously allocated by malloc.
|
|
* 2) Passed pointer can' be reused. It's either freed in case of error or can
|
|
* be changed.
|
|
*/
|
|
static char *xvstrcat(char *str, const char *fmt, va_list args)
|
|
{
|
|
size_t offset = 0, delta;
|
|
int ret;
|
|
char *new;
|
|
va_list tmp;
|
|
|
|
if (str)
|
|
offset = strlen(str);
|
|
delta = strlen(fmt) * 2;
|
|
|
|
do {
|
|
new = xrealloc(str, offset + delta);
|
|
if (!new) {
|
|
/* realloc failed. We must release former string */
|
|
xfree(str);
|
|
pr_err("Failed to allocate string\n");
|
|
return new;
|
|
}
|
|
|
|
va_copy(tmp, args);
|
|
ret = vsnprintf(new + offset, delta, fmt, tmp);
|
|
va_end(tmp);
|
|
if (ret < delta) /* an error, or all was written */
|
|
break;
|
|
|
|
/* NOTE: vsnprintf returns the amount of bytes
|
|
* to allocate. */
|
|
delta = ret + 1;
|
|
str = new;
|
|
} while (1);
|
|
|
|
if (ret < 0) {
|
|
/* vsnprintf failed */
|
|
pr_err("Failed to print string\n");
|
|
xfree(new);
|
|
new = NULL;
|
|
}
|
|
return new;
|
|
}
|
|
|
|
char *xstrcat(char *str, const char *fmt, ...)
|
|
{
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
str = xvstrcat(str, fmt, args);
|
|
va_end(args);
|
|
|
|
return str;
|
|
}
|
|
|
|
char *xsprintf(const char *fmt, ...)
|
|
{
|
|
va_list args;
|
|
char *str;
|
|
|
|
va_start(args, fmt);
|
|
str = xvstrcat(NULL, fmt, args);
|
|
va_end(args);
|
|
|
|
return str;
|
|
}
|
|
|
|
static void vma_opt_str(const struct vma_area *v, char *opt)
|
|
{
|
|
int p = 0;
|
|
|
|
#define opt2s(_o, _s) \
|
|
do { \
|
|
if (v->e->status & _o) \
|
|
p += sprintf(opt + p, _s " "); \
|
|
} while (0)
|
|
|
|
opt[p] = '\0';
|
|
opt2s(VMA_AREA_REGULAR, "reg");
|
|
opt2s(VMA_AREA_STACK, "stk");
|
|
opt2s(VMA_AREA_VSYSCALL, "vsys");
|
|
opt2s(VMA_AREA_VDSO, "vdso");
|
|
opt2s(VMA_AREA_VVAR, "vvar");
|
|
opt2s(VMA_AREA_HEAP, "heap");
|
|
opt2s(VMA_FILE_PRIVATE, "fp");
|
|
opt2s(VMA_FILE_SHARED, "fs");
|
|
opt2s(VMA_ANON_SHARED, "as");
|
|
opt2s(VMA_ANON_PRIVATE, "ap");
|
|
opt2s(VMA_AREA_SYSVIPC, "sysv");
|
|
opt2s(VMA_AREA_SOCKET, "sk");
|
|
opt2s(VMA_AREA_UPROBES, "uprobes");
|
|
|
|
#undef opt2s
|
|
}
|
|
|
|
void pr_vma(const struct vma_area *vma_area)
|
|
{
|
|
char opt[VMA_OPT_LEN];
|
|
memset(opt, 0, VMA_OPT_LEN);
|
|
|
|
if (!vma_area)
|
|
return;
|
|
|
|
vma_opt_str(vma_area, opt);
|
|
pr_info("%#" PRIx64 "-%#" PRIx64 " (%" PRIi64 "K) prot %#x flags %#x fdflags %#o st %#x off %#" PRIx64 " "
|
|
"%s shmid: %#" PRIx64 "\n",
|
|
vma_area->e->start, vma_area->e->end, KBYTES(vma_area_len(vma_area)), vma_area->e->prot,
|
|
vma_area->e->flags, vma_area->e->fdflags, vma_area->e->status, vma_area->e->pgoff, opt,
|
|
vma_area->e->shmid);
|
|
}
|
|
|
|
int close_safe(int *fd)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (*fd > -1) {
|
|
ret = close(*fd);
|
|
if (ret)
|
|
pr_perror("Failed closing fd %d", *fd);
|
|
*fd = -1;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int reopen_fd_as_safe(char *file, int line, int new_fd, int old_fd, bool allow_reuse_fd)
|
|
{
|
|
int tmp;
|
|
|
|
if (old_fd != new_fd) {
|
|
if (!allow_reuse_fd)
|
|
tmp = fcntl(old_fd, F_DUPFD, new_fd);
|
|
else
|
|
tmp = dup2(old_fd, new_fd);
|
|
if (tmp < 0) {
|
|
pr_perror("Dup %d -> %d failed (called at %s:%d)", old_fd, new_fd, file, line);
|
|
return tmp;
|
|
} else if (tmp != new_fd) {
|
|
close(tmp);
|
|
pr_err("fd %d already in use (called at %s:%d)\n", new_fd, file, line);
|
|
return -1;
|
|
}
|
|
|
|
/* Just to have error message if failed */
|
|
close_safe(&old_fd);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int move_fd_from(int *img_fd, int want_fd)
|
|
{
|
|
if (*img_fd == want_fd) {
|
|
int tmp;
|
|
|
|
tmp = dup(*img_fd);
|
|
if (tmp < 0) {
|
|
pr_perror("Can't dup file");
|
|
return -1;
|
|
}
|
|
|
|
close(*img_fd);
|
|
|
|
*img_fd = tmp;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Cached opened /proc/$pid and /proc/self files.
|
|
* Used for faster access to /proc/.../foo files
|
|
* by using openat()-s
|
|
*/
|
|
|
|
static pid_t open_proc_pid = PROC_NONE;
|
|
static pid_t open_proc_self_pid;
|
|
|
|
int set_proc_self_fd(int fd)
|
|
{
|
|
int ret;
|
|
|
|
if (fd < 0)
|
|
return close_service_fd(PROC_SELF_FD_OFF);
|
|
|
|
open_proc_self_pid = getpid();
|
|
ret = install_service_fd(PROC_SELF_FD_OFF, fd);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline int set_proc_pid_fd(int pid, int fd)
|
|
{
|
|
int ret;
|
|
|
|
if (fd < 0)
|
|
return close_service_fd(PROC_PID_FD_OFF);
|
|
|
|
open_proc_pid = pid;
|
|
ret = install_service_fd(PROC_PID_FD_OFF, fd);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline int get_proc_fd(int pid)
|
|
{
|
|
if (pid == PROC_SELF) {
|
|
int open_proc_self_fd;
|
|
|
|
open_proc_self_fd = get_service_fd(PROC_SELF_FD_OFF);
|
|
/**
|
|
* FIXME in case two processes from different pidnses have the
|
|
* same pid from getpid() and one inherited service fds from
|
|
* another or they share them by shared fdt - this check will
|
|
* not detect that one of them reuses /proc/self of another.
|
|
* Everything proc related may break in this case.
|
|
*/
|
|
if (open_proc_self_fd >= 0 && open_proc_self_pid != getpid())
|
|
open_proc_self_fd = -1;
|
|
return open_proc_self_fd;
|
|
} else if (pid == open_proc_pid)
|
|
return get_service_fd(PROC_PID_FD_OFF);
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
int close_pid_proc(void)
|
|
{
|
|
set_proc_self_fd(-1);
|
|
set_proc_pid_fd(PROC_NONE, -1);
|
|
return 0;
|
|
}
|
|
|
|
void close_proc(void)
|
|
{
|
|
close_pid_proc();
|
|
close_service_fd(PROC_FD_OFF);
|
|
}
|
|
|
|
int set_proc_fd(int fd)
|
|
{
|
|
int _fd;
|
|
|
|
_fd = dup(fd);
|
|
if (_fd < 0) {
|
|
pr_perror("dup() failed");
|
|
return -1;
|
|
}
|
|
if (install_service_fd(PROC_FD_OFF, _fd) < 0)
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
static int open_proc_sfd(char *path)
|
|
{
|
|
int fd, ret;
|
|
|
|
close_proc();
|
|
fd = open(path, O_DIRECTORY | O_PATH);
|
|
if (fd == -1) {
|
|
pr_perror("Can't open %s", path);
|
|
return -1;
|
|
}
|
|
|
|
ret = install_service_fd(PROC_FD_OFF, fd);
|
|
if (ret < 0)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
inline int open_pid_proc(pid_t pid)
|
|
{
|
|
char path[18];
|
|
int fd;
|
|
int dfd;
|
|
|
|
fd = get_proc_fd(pid);
|
|
if (fd >= 0)
|
|
return fd;
|
|
|
|
dfd = get_service_fd(PROC_FD_OFF);
|
|
if (dfd < 0) {
|
|
if (open_proc_sfd("/proc") < 0)
|
|
return -1;
|
|
|
|
dfd = get_service_fd(PROC_FD_OFF);
|
|
}
|
|
|
|
if (pid == PROC_GEN)
|
|
/*
|
|
* Don't cache it, close_pid_proc() would
|
|
* close service descriptor otherwise.
|
|
*/
|
|
return dfd;
|
|
|
|
if (pid == PROC_SELF)
|
|
snprintf(path, sizeof(path), "self");
|
|
else
|
|
snprintf(path, sizeof(path), "%d", pid);
|
|
|
|
fd = openat(dfd, path, O_PATH);
|
|
if (fd < 0) {
|
|
pr_perror("Can't open %s", path);
|
|
set_cr_errno(ESRCH);
|
|
return -1;
|
|
}
|
|
|
|
if (pid == PROC_SELF)
|
|
fd = set_proc_self_fd(fd);
|
|
else
|
|
fd = set_proc_pid_fd(pid, fd);
|
|
|
|
return fd;
|
|
}
|
|
|
|
int do_open_proc(pid_t pid, int flags, const char *fmt, ...)
|
|
{
|
|
char path[128];
|
|
va_list args;
|
|
int dirfd;
|
|
|
|
dirfd = open_pid_proc(pid);
|
|
if (dirfd < 0)
|
|
return -1;
|
|
|
|
va_start(args, fmt);
|
|
vsnprintf(path, sizeof(path), fmt, args);
|
|
va_end(args);
|
|
|
|
return openat(dirfd, path, flags);
|
|
}
|
|
|
|
int copy_file(int fd_in, int fd_out, size_t bytes)
|
|
{
|
|
ssize_t written = 0;
|
|
size_t chunk = bytes ? bytes : 4096;
|
|
ssize_t ret;
|
|
|
|
while (1) {
|
|
/*
|
|
* When fd_out is a pipe, sendfile() returns -EINVAL, so we
|
|
* fallback to splice(). Not sure why.
|
|
*/
|
|
if (opts.stream)
|
|
ret = splice(fd_in, NULL, fd_out, NULL, chunk, SPLICE_F_MOVE);
|
|
else
|
|
ret = sendfile(fd_out, fd_in, NULL, chunk);
|
|
if (ret < 0) {
|
|
pr_perror("Can't transfer data to ghost file from image");
|
|
return -1;
|
|
}
|
|
|
|
if (ret == 0) {
|
|
if (bytes && (written != bytes)) {
|
|
pr_err("Ghost file size mismatch %zu/%zu\n", written, bytes);
|
|
return -1;
|
|
}
|
|
break;
|
|
}
|
|
|
|
written += ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int read_fd_link(int lfd, char *buf, size_t size)
|
|
{
|
|
char t[32];
|
|
ssize_t ret;
|
|
|
|
snprintf(t, sizeof(t), "/proc/self/fd/%d", lfd);
|
|
ret = readlink(t, buf, size);
|
|
if (ret < 0) {
|
|
pr_perror("Can't read link of fd %d", lfd);
|
|
return -1;
|
|
} else if ((size_t)ret >= size) {
|
|
pr_err("Buffer for read link of fd %d is too small\n", lfd);
|
|
return -1;
|
|
}
|
|
buf[ret] = 0;
|
|
|
|
return ret;
|
|
}
|
|
|
|
int is_anon_link_type(char *link, char *type)
|
|
{
|
|
char aux[32];
|
|
|
|
snprintf(aux, sizeof(aux), "anon_inode:%s", type);
|
|
return !strcmp(link, aux);
|
|
}
|
|
|
|
#define DUP_SAFE(fd, out) \
|
|
({ \
|
|
int ret__; \
|
|
ret__ = dup(fd); \
|
|
if (ret__ == -1) { \
|
|
pr_perror("dup(%d) failed", fd); \
|
|
goto out; \
|
|
} \
|
|
ret__; \
|
|
})
|
|
|
|
/*
|
|
* If "in" is negative, stdin will be closed.
|
|
* If "out" or "err" are negative, a log file descriptor will be used.
|
|
*/
|
|
int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags)
|
|
{
|
|
return cr_system_userns(in, out, err, cmd, argv, flags, -1);
|
|
}
|
|
|
|
int cr_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags)
|
|
{
|
|
return syscall(__NR_close_range, fd, max_fd, flags);
|
|
}
|
|
|
|
int close_fds(int minfd)
|
|
{
|
|
DIR *dir;
|
|
struct dirent *de;
|
|
int fd, ret, dfd;
|
|
|
|
if (kdat.has_close_range) {
|
|
if (cr_close_range(minfd, ~0, 0)) {
|
|
pr_perror("close_range failed");
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
dir = opendir("/proc/self/fd");
|
|
if (dir == NULL) {
|
|
pr_perror("Can't open /proc/self/fd");
|
|
return -1;
|
|
}
|
|
dfd = dirfd(dir);
|
|
|
|
while ((de = readdir(dir))) {
|
|
if (dir_dots(de))
|
|
continue;
|
|
|
|
ret = sscanf(de->d_name, "%d", &fd);
|
|
if (ret != 1) {
|
|
pr_err("Can't parse %s\n", de->d_name);
|
|
return -1;
|
|
}
|
|
if (dfd == fd)
|
|
continue;
|
|
if (fd < minfd)
|
|
continue;
|
|
/* coverity[double_close] */
|
|
close(fd);
|
|
}
|
|
closedir(dir);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid)
|
|
{
|
|
sigset_t blockmask, oldmask;
|
|
int ret = -1, status;
|
|
pid_t pid;
|
|
|
|
sigemptyset(&blockmask);
|
|
sigaddset(&blockmask, SIGCHLD);
|
|
if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) {
|
|
pr_perror("Cannot set mask of blocked signals");
|
|
return -1;
|
|
}
|
|
|
|
pid = fork();
|
|
if (pid == -1) {
|
|
pr_perror("fork() failed");
|
|
goto out;
|
|
} else if (pid == 0) {
|
|
sigemptyset(&blockmask);
|
|
if (sigprocmask(SIG_SETMASK, &blockmask, NULL) == -1) {
|
|
pr_perror("Cannot clear blocked signals");
|
|
goto out_chld;
|
|
}
|
|
|
|
if (userns_pid > 0) {
|
|
if (switch_ns(userns_pid, &user_ns_desc, NULL))
|
|
goto out_chld;
|
|
if (setuid(0) || setgid(0)) {
|
|
pr_perror("Unable to set uid or gid");
|
|
goto out_chld;
|
|
}
|
|
}
|
|
|
|
if (out < 0)
|
|
out = DUP_SAFE(log_get_fd(), out_chld);
|
|
if (err < 0)
|
|
err = DUP_SAFE(log_get_fd(), out_chld);
|
|
|
|
/*
|
|
* out, err, in should be a separate fds,
|
|
* because reopen_fd_as() closes an old fd
|
|
*/
|
|
if (err == out || err == in)
|
|
err = DUP_SAFE(err, out_chld);
|
|
|
|
if (out == in)
|
|
out = DUP_SAFE(out, out_chld);
|
|
|
|
if (move_fd_from(&out, STDIN_FILENO) || move_fd_from(&err, STDIN_FILENO))
|
|
goto out_chld;
|
|
|
|
if (in < 0) {
|
|
close(STDIN_FILENO);
|
|
} else {
|
|
if (reopen_fd_as_nocheck(STDIN_FILENO, in))
|
|
goto out_chld;
|
|
}
|
|
|
|
if (move_fd_from(&err, STDOUT_FILENO))
|
|
goto out_chld;
|
|
|
|
if (reopen_fd_as_nocheck(STDOUT_FILENO, out))
|
|
goto out_chld;
|
|
|
|
if (reopen_fd_as_nocheck(STDERR_FILENO, err))
|
|
goto out_chld;
|
|
|
|
close_fds(STDERR_FILENO + 1);
|
|
|
|
execvp(cmd, argv);
|
|
|
|
/* We can't use pr_error() as log file fd is closed. */
|
|
fprintf(stderr, "Error (%s:%d): " LOG_PREFIX "execvp(\"%s\", ...) failed: %s\n", __FILE__, __LINE__,
|
|
cmd, strerror(errno));
|
|
out_chld:
|
|
_exit(1);
|
|
}
|
|
|
|
while (1) {
|
|
ret = waitpid(pid, &status, 0);
|
|
if (ret == -1) {
|
|
pr_perror("waitpid() failed");
|
|
goto out;
|
|
}
|
|
|
|
if (WIFEXITED(status)) {
|
|
if (!(flags & CRS_CAN_FAIL) && WEXITSTATUS(status))
|
|
pr_err("exited, status=%d\n", WEXITSTATUS(status));
|
|
break;
|
|
} else if (WIFSIGNALED(status)) {
|
|
pr_err("killed by signal %d: %s\n", WTERMSIG(status), strsignal(WTERMSIG(status)));
|
|
break;
|
|
} else if (WIFSTOPPED(status)) {
|
|
pr_err("stopped by signal %d\n", WSTOPSIG(status));
|
|
} else if (WIFCONTINUED(status)) {
|
|
pr_err("continued\n");
|
|
}
|
|
}
|
|
|
|
ret = status ? -1 : 0;
|
|
out:
|
|
if (sigprocmask(SIG_SETMASK, &oldmask, NULL) == -1) {
|
|
pr_perror("Can not unset mask of blocked signals");
|
|
BUG();
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
struct child_args {
|
|
int *sk_pair;
|
|
int (*child_setup)(void);
|
|
};
|
|
|
|
static int child_func(void *_args)
|
|
{
|
|
struct child_args *args = _args;
|
|
int sk, *sk_pair = args->sk_pair;
|
|
char c = 0;
|
|
|
|
sk = sk_pair[1];
|
|
close(sk_pair[0]);
|
|
|
|
if (args->child_setup && args->child_setup() != 0)
|
|
exit(1);
|
|
|
|
if (write(sk, &c, 1) != 1) {
|
|
pr_perror("write");
|
|
exit(1);
|
|
}
|
|
|
|
while (1)
|
|
sleep(1000);
|
|
exit(1);
|
|
}
|
|
|
|
pid_t fork_and_ptrace_attach(int (*child_setup)(void))
|
|
{
|
|
pid_t pid;
|
|
int sk_pair[2], sk;
|
|
char c = 0;
|
|
struct child_args cargs = {
|
|
.sk_pair = sk_pair,
|
|
.child_setup = child_setup,
|
|
};
|
|
|
|
if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) {
|
|
pr_perror("socketpair");
|
|
return -1;
|
|
}
|
|
|
|
pid = clone_noasan(child_func, CLONE_UNTRACED | SIGCHLD, &cargs);
|
|
if (pid < 0) {
|
|
pr_perror("fork");
|
|
return -1;
|
|
}
|
|
|
|
sk = sk_pair[0];
|
|
close(sk_pair[1]);
|
|
|
|
if (read(sk, &c, 1) != 1) {
|
|
close(sk);
|
|
kill(pid, SIGKILL);
|
|
waitpid(pid, NULL, 0);
|
|
pr_perror("read");
|
|
return -1;
|
|
}
|
|
|
|
close(sk);
|
|
|
|
if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) {
|
|
pr_perror("Unable to ptrace the child");
|
|
kill(pid, SIGKILL);
|
|
waitpid(pid, NULL, 0);
|
|
return -1;
|
|
}
|
|
|
|
waitpid(pid, NULL, 0);
|
|
|
|
return pid;
|
|
}
|
|
|
|
int status_ready(void)
|
|
{
|
|
char c = 0;
|
|
|
|
if (run_scripts(ACT_STATUS_READY))
|
|
return -1;
|
|
|
|
if (opts.status_fd < 0)
|
|
return 0;
|
|
|
|
if (write(opts.status_fd, &c, 1) != 1) {
|
|
pr_perror("Unable to write into the status fd");
|
|
return -1;
|
|
}
|
|
|
|
return close_safe(&opts.status_fd);
|
|
}
|
|
|
|
int cr_daemon(int nochdir, int noclose, int close_fd)
|
|
{
|
|
int pid;
|
|
|
|
pid = fork();
|
|
if (pid < 0) {
|
|
pr_perror("Can't fork");
|
|
return -1;
|
|
}
|
|
|
|
if (pid > 0)
|
|
return pid;
|
|
|
|
setsid();
|
|
if (!nochdir)
|
|
if (chdir("/") == -1)
|
|
pr_perror("Can't change directory");
|
|
if (!noclose) {
|
|
int fd;
|
|
|
|
if (close_fd != -1)
|
|
close(close_fd);
|
|
|
|
fd = open("/dev/null", O_RDWR);
|
|
if (fd < 0) {
|
|
pr_perror("Can't open /dev/null");
|
|
return -1;
|
|
}
|
|
dup2(fd, 0);
|
|
dup2(fd, 1);
|
|
dup2(fd, 2);
|
|
close(fd);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int is_root_user(void)
|
|
{
|
|
if (geteuid() != 0) {
|
|
pr_err("You need to be root to run this command\n");
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* is_empty_dir will always close the FD dirfd: either implicitly
|
|
* via closedir or explicitly in case fdopendir had failed
|
|
*
|
|
* return values:
|
|
* < 0 : open directory stream failed
|
|
* 0 : directory is not empty
|
|
* 1 : directory is empty
|
|
*/
|
|
|
|
int is_empty_dir(int dirfd)
|
|
{
|
|
int ret = 0;
|
|
DIR *fdir = NULL;
|
|
struct dirent *de;
|
|
|
|
fdir = fdopendir(dirfd);
|
|
if (!fdir) {
|
|
pr_perror("open directory stream by fd %d failed", dirfd);
|
|
close_safe(&dirfd);
|
|
return -1;
|
|
}
|
|
|
|
while ((de = readdir(fdir))) {
|
|
if (dir_dots(de))
|
|
continue;
|
|
|
|
goto out;
|
|
}
|
|
|
|
ret = 1;
|
|
out:
|
|
closedir(fdir);
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Get PFN from pagemap file for virtual address vaddr.
|
|
* Optionally if fd >= 0, it's used as pagemap file descriptor
|
|
* (may be other task's pagemap)
|
|
*/
|
|
int vaddr_to_pfn(int fd, unsigned long vaddr, u64 *pfn)
|
|
{
|
|
int ret = -1;
|
|
off_t off;
|
|
bool close_fd = false;
|
|
|
|
if (fd < 0) {
|
|
fd = open_proc(PROC_SELF, "pagemap");
|
|
if (fd < 0)
|
|
return -1;
|
|
close_fd = true;
|
|
}
|
|
|
|
off = (vaddr / page_size()) * sizeof(u64);
|
|
ret = pread(fd, pfn, sizeof(*pfn), off);
|
|
if (ret != sizeof(*pfn)) {
|
|
pr_perror("Can't read pme for pid %d", getpid());
|
|
ret = -1;
|
|
} else {
|
|
*pfn &= PME_PFRAME_MASK;
|
|
ret = 0;
|
|
}
|
|
|
|
if (close_fd)
|
|
close(fd);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Note since VMA_AREA_NONE = 0 we can skip assignment
|
|
* here and simply rely on xzalloc
|
|
*/
|
|
struct vma_area *alloc_vma_area(void)
|
|
{
|
|
struct vma_area *p;
|
|
|
|
p = xzalloc(sizeof(*p) + sizeof(VmaEntry));
|
|
if (p) {
|
|
p->e = (VmaEntry *)(p + 1);
|
|
vma_entry__init(p->e);
|
|
p->e->fd = -1;
|
|
}
|
|
|
|
return p;
|
|
}
|
|
|
|
int mkdirpat(int fd, const char *path, int mode)
|
|
{
|
|
size_t i;
|
|
char made_path[PATH_MAX], *pos;
|
|
|
|
if (strlen(path) >= PATH_MAX) {
|
|
pr_err("path %s is longer than PATH_MAX\n", path);
|
|
return -ENOSPC;
|
|
}
|
|
|
|
strcpy(made_path, path);
|
|
|
|
i = 0;
|
|
if (made_path[0] == '/')
|
|
i++;
|
|
|
|
for (; i < strlen(made_path); i++) {
|
|
pos = strchr(made_path + i, '/');
|
|
if (pos)
|
|
*pos = '\0';
|
|
if (mkdirat(fd, made_path, mode) < 0 && errno != EEXIST) {
|
|
int ret = -errno;
|
|
pr_perror("couldn't mkdirpat directory %s", made_path);
|
|
return ret;
|
|
}
|
|
if (pos) {
|
|
*pos = '/';
|
|
i = pos - made_path;
|
|
} else
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool is_path_prefix(const char *path, const char *prefix)
|
|
{
|
|
if (strstartswith(path, prefix)) {
|
|
size_t len = strlen(prefix);
|
|
switch (path[len]) {
|
|
case '\0':
|
|
case '/':
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
FILE *fopenat(int dirfd, char *path, char *cflags)
|
|
{
|
|
int tmp, flags = 0;
|
|
char *iter;
|
|
|
|
for (iter = cflags; *iter; iter++) {
|
|
switch (*iter) {
|
|
case 'r':
|
|
flags |= O_RDONLY;
|
|
break;
|
|
case 'a':
|
|
flags |= O_APPEND;
|
|
break;
|
|
case 'w':
|
|
flags |= O_WRONLY | O_CREAT;
|
|
break;
|
|
case '+':
|
|
flags = O_RDWR | O_CREAT;
|
|
break;
|
|
}
|
|
}
|
|
|
|
tmp = openat(dirfd, path, flags, S_IRUSR | S_IWUSR);
|
|
if (tmp < 0)
|
|
return NULL;
|
|
|
|
return fdopen(tmp, cflags);
|
|
}
|
|
|
|
int cr_fchown(int fd, uid_t new_uid, gid_t new_gid)
|
|
{
|
|
struct stat st;
|
|
|
|
if (!fchown(fd, new_uid, new_gid))
|
|
return 0;
|
|
if (errno != EPERM)
|
|
return -1;
|
|
|
|
if (fstat(fd, &st) < 0) {
|
|
pr_perror("fstat() after fchown() for fd %d", fd);
|
|
goto out_eperm;
|
|
}
|
|
pr_debug("fstat(%d): uid %u gid %u\n", fd, st.st_uid, st.st_gid);
|
|
|
|
if (new_uid != st.st_uid || new_gid != st.st_gid)
|
|
goto out_eperm;
|
|
|
|
return 0;
|
|
out_eperm:
|
|
errno = EPERM;
|
|
return -1;
|
|
}
|
|
|
|
int cr_fchpermat(int dirfd, const char *path, uid_t new_uid, gid_t new_gid, mode_t new_mode, int flags)
|
|
{
|
|
struct stat st;
|
|
int ret;
|
|
|
|
if (fchownat(dirfd, path, new_uid, new_gid, flags) < 0 && errno != EPERM) {
|
|
int errno_cpy = errno;
|
|
pr_perror("Unable to change [%d]/%s ownership to (%d, %d)",
|
|
dirfd, path, new_uid, new_gid);
|
|
errno = errno_cpy;
|
|
return -1;
|
|
}
|
|
|
|
if (fstatat(dirfd, path, &st, flags) < 0) {
|
|
int errno_cpy = errno;
|
|
pr_perror("Unable to stat [%d]/%s", dirfd, path);
|
|
errno = errno_cpy;
|
|
return -1;
|
|
}
|
|
|
|
if (new_uid != st.st_uid || new_gid != st.st_gid) {
|
|
errno = EPERM;
|
|
pr_perror("Unable to change [%d]/%s ownership (%d, %d) to (%d, %d)",
|
|
dirfd, path, st.st_uid, st.st_gid, new_uid, new_gid);
|
|
errno = EPERM;
|
|
return -1;
|
|
}
|
|
|
|
if (new_mode == st.st_mode)
|
|
return 0;
|
|
|
|
if (S_ISLNK(st.st_mode)) {
|
|
/*
|
|
* We have no lchmod() function, and fchmod() will fail on
|
|
* O_PATH | O_NOFOLLOW fd. Yes, we have fchmodat()
|
|
* function and flag AT_SYMLINK_NOFOLLOW described in
|
|
* man 2 fchmodat, but it is not currently implemented. %)
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
if (!*path && flags & AT_EMPTY_PATH)
|
|
ret = fchmod(dirfd, new_mode);
|
|
else
|
|
ret = fchmodat(dirfd, path, new_mode, flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH));
|
|
if (ret < 0) {
|
|
int errno_cpy = errno;
|
|
pr_perror("Unable to set perms %o on [%d]/%s", new_mode, dirfd, path);
|
|
errno = errno_cpy;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int cr_fchperm(int fd, uid_t new_uid, gid_t new_gid, mode_t new_mode)
|
|
{
|
|
return cr_fchpermat(fd, "", new_uid, new_gid, new_mode, AT_EMPTY_PATH);
|
|
}
|
|
|
|
void split(char *str, char token, char ***out, int *n)
|
|
{
|
|
int i;
|
|
char *cur;
|
|
|
|
*n = 0;
|
|
for (cur = str; cur != NULL; cur = strchr(cur, token)) {
|
|
(*n)++;
|
|
cur++;
|
|
}
|
|
|
|
if (*n == 0) {
|
|
/* This can only happen if str == NULL */
|
|
*out = NULL;
|
|
*n = -1;
|
|
return;
|
|
}
|
|
|
|
*out = xmalloc((*n) * sizeof(char *));
|
|
if (!*out) {
|
|
*n = -1;
|
|
return;
|
|
}
|
|
|
|
cur = str;
|
|
i = 0;
|
|
do {
|
|
char *prev = cur;
|
|
cur = strchr(cur, token);
|
|
|
|
if (cur)
|
|
*cur = '\0';
|
|
(*out)[i] = xstrdup(prev);
|
|
if (cur) {
|
|
*cur = token;
|
|
cur++;
|
|
}
|
|
|
|
if (!(*out)[i]) {
|
|
int j;
|
|
for (j = 0; j < i; j++)
|
|
xfree((*out)[j]);
|
|
xfree(*out);
|
|
*out = NULL;
|
|
*n = -1;
|
|
return;
|
|
}
|
|
|
|
i++;
|
|
} while (cur);
|
|
}
|
|
|
|
int fd_has_data(int lfd)
|
|
{
|
|
struct pollfd pfd = { lfd, POLLIN, 0 };
|
|
int ret;
|
|
|
|
ret = poll(&pfd, 1, 0);
|
|
if (ret < 0) {
|
|
pr_perror("poll() failed");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void fd_set_nonblocking(int fd, bool on)
|
|
{
|
|
int flags = fcntl(fd, F_GETFL, NULL);
|
|
|
|
if (flags < 0) {
|
|
pr_perror("Failed to obtain flags from fd %d", fd);
|
|
return;
|
|
}
|
|
|
|
if (on)
|
|
flags |= O_NONBLOCK;
|
|
else
|
|
flags &= (~O_NONBLOCK);
|
|
|
|
if (fcntl(fd, F_SETFL, flags) < 0)
|
|
pr_perror("Failed to set flags for fd %d", fd);
|
|
}
|
|
|
|
int make_yard(char *path)
|
|
{
|
|
if (mount("none", path, "tmpfs", 0, NULL)) {
|
|
pr_perror("Unable to mount tmpfs in %s", path);
|
|
return -1;
|
|
}
|
|
|
|
if (mount("none", path, NULL, MS_PRIVATE, NULL)) {
|
|
pr_perror("Unable to mark yard as private");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
const char *ns_to_string(unsigned int ns)
|
|
{
|
|
switch (ns) {
|
|
case CLONE_NEWIPC:
|
|
return "ipc";
|
|
case CLONE_NEWNS:
|
|
return "mnt";
|
|
case CLONE_NEWNET:
|
|
return "net";
|
|
case CLONE_NEWPID:
|
|
return "pid";
|
|
case CLONE_NEWUSER:
|
|
return "user";
|
|
case CLONE_NEWUTS:
|
|
return "uts";
|
|
case CLONE_NEWTIME:
|
|
return "time";
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static int get_sockaddr_in(struct sockaddr_storage *addr, char *host, unsigned short port)
|
|
{
|
|
memset(addr, 0, sizeof(*addr));
|
|
|
|
if (!host) {
|
|
((struct sockaddr_in *)addr)->sin_addr.s_addr = INADDR_ANY;
|
|
addr->ss_family = AF_INET;
|
|
} else if (inet_pton(AF_INET, host, &((struct sockaddr_in *)addr)->sin_addr)) {
|
|
addr->ss_family = AF_INET;
|
|
} else if (inet_pton(AF_INET6, host, &((struct sockaddr_in6 *)addr)->sin6_addr)) {
|
|
addr->ss_family = AF_INET6;
|
|
} else {
|
|
pr_err("Invalid server address \"%s\". "
|
|
"The address must be in IPv4 or IPv6 format.\n",
|
|
host);
|
|
return -1;
|
|
}
|
|
|
|
if (addr->ss_family == AF_INET6) {
|
|
((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
|
|
} else if (addr->ss_family == AF_INET) {
|
|
((struct sockaddr_in *)addr)->sin_port = htons(port);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int setup_tcp_server(char *type, char *addr, unsigned short *port)
|
|
{
|
|
int sk = -1;
|
|
int sockopt = 1;
|
|
struct sockaddr_storage saddr;
|
|
socklen_t slen = sizeof(saddr);
|
|
|
|
if (get_sockaddr_in(&saddr, addr, (*port))) {
|
|
return -1;
|
|
}
|
|
|
|
pr_info("Starting %s server on port %u\n", type, *port);
|
|
|
|
sk = socket(saddr.ss_family, SOCK_STREAM, IPPROTO_TCP);
|
|
|
|
if (sk < 0) {
|
|
pr_perror("Can't init %s server", type);
|
|
return -1;
|
|
}
|
|
|
|
if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &sockopt, sizeof(sockopt)) == -1) {
|
|
pr_perror("Unable to set SO_REUSEADDR");
|
|
goto out;
|
|
}
|
|
|
|
if (bind(sk, (struct sockaddr *)&saddr, slen)) {
|
|
pr_perror("Can't bind %s server", type);
|
|
goto out;
|
|
}
|
|
|
|
if (listen(sk, 1)) {
|
|
pr_perror("Can't listen on %s server socket", type);
|
|
goto out;
|
|
}
|
|
|
|
/* Get socket port in case of autobind */
|
|
if ((*port) == 0) {
|
|
if (getsockname(sk, (struct sockaddr *)&saddr, &slen)) {
|
|
pr_perror("Can't get %s server name", type);
|
|
goto out;
|
|
}
|
|
|
|
if (saddr.ss_family == AF_INET6) {
|
|
(*port) = ntohs(((struct sockaddr_in6 *)&saddr)->sin6_port);
|
|
} else if (saddr.ss_family == AF_INET) {
|
|
(*port) = ntohs(((struct sockaddr_in *)&saddr)->sin_port);
|
|
}
|
|
|
|
pr_info("Using %u port\n", (*port));
|
|
}
|
|
|
|
return sk;
|
|
out:
|
|
close(sk);
|
|
return -1;
|
|
}
|
|
|
|
int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk)
|
|
{
|
|
int ret;
|
|
struct sockaddr_storage caddr;
|
|
socklen_t clen = sizeof(caddr);
|
|
|
|
if (daemon_mode) {
|
|
ret = cr_daemon(1, 0, cfd);
|
|
if (ret == -1) {
|
|
pr_err("Can't run in the background\n");
|
|
goto err;
|
|
}
|
|
if (ret > 0) { /* parent task, daemon started */
|
|
close_safe(&sk);
|
|
if (opts.pidfile) {
|
|
if (write_pidfile(ret) == -1) {
|
|
pr_perror("Can't write pidfile");
|
|
kill(ret, SIGKILL);
|
|
waitpid(ret, NULL, 0);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
if (status_ready())
|
|
return -1;
|
|
|
|
if (sk >= 0) {
|
|
char port[6];
|
|
char address[INET6_ADDRSTRLEN];
|
|
*ask = accept(sk, (struct sockaddr *)&caddr, &clen);
|
|
if (*ask < 0) {
|
|
pr_perror("Can't accept connection to server");
|
|
goto err;
|
|
}
|
|
ret = getnameinfo((struct sockaddr *)&caddr, clen, address, sizeof(address), port, sizeof(port),
|
|
NI_NUMERICHOST | NI_NUMERICSERV);
|
|
if (ret) {
|
|
pr_err("Failed converting address: %s\n", gai_strerror(ret));
|
|
goto err;
|
|
}
|
|
pr_info("Accepted connection from %s:%s\n", address, port);
|
|
close(sk);
|
|
}
|
|
|
|
return 0;
|
|
err:
|
|
close_safe(&sk);
|
|
return -1;
|
|
}
|
|
|
|
int setup_tcp_client(char *hostname)
|
|
{
|
|
struct sockaddr_storage saddr;
|
|
struct addrinfo addr_criteria, *addr_list, *p;
|
|
char ipstr[INET6_ADDRSTRLEN];
|
|
int sk = -1;
|
|
void *ip;
|
|
|
|
memset(&addr_criteria, 0, sizeof(addr_criteria));
|
|
addr_criteria.ai_family = AF_UNSPEC;
|
|
addr_criteria.ai_socktype = SOCK_STREAM;
|
|
addr_criteria.ai_protocol = IPPROTO_TCP;
|
|
|
|
/*
|
|
* addr_list contains a list of addrinfo structures that corresponding
|
|
* to the criteria specified in hostname and addr_criteria.
|
|
*/
|
|
if (getaddrinfo(hostname, NULL, &addr_criteria, &addr_list)) {
|
|
pr_perror("Failed to resolve hostname: %s", hostname);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Iterate through addr_list and try to connect. The loop stops if the
|
|
* connection is successful or we reach the end of the list.
|
|
*/
|
|
for (p = addr_list; p != NULL; p = p->ai_next) {
|
|
if (p->ai_family == AF_INET) {
|
|
struct sockaddr_in *ipv4 = (struct sockaddr_in *)p->ai_addr;
|
|
ip = &(ipv4->sin_addr);
|
|
} else {
|
|
struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)p->ai_addr;
|
|
ip = &(ipv6->sin6_addr);
|
|
}
|
|
|
|
inet_ntop(p->ai_family, ip, ipstr, sizeof(ipstr));
|
|
pr_info("Connecting to server %s:%u\n", ipstr, opts.port);
|
|
|
|
if (get_sockaddr_in(&saddr, ipstr, opts.port))
|
|
goto out;
|
|
|
|
sk = socket(saddr.ss_family, SOCK_STREAM, IPPROTO_TCP);
|
|
if (sk < 0) {
|
|
pr_perror("Can't create socket");
|
|
goto out;
|
|
}
|
|
|
|
if (connect(sk, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) {
|
|
pr_info("Can't connect to server %s:%u\n", ipstr, opts.port);
|
|
close(sk);
|
|
sk = -1;
|
|
} else {
|
|
/* Connected successfully */
|
|
break;
|
|
}
|
|
}
|
|
|
|
out:
|
|
freeaddrinfo(addr_list);
|
|
return sk;
|
|
}
|
|
|
|
int epoll_add_rfd(int epfd, struct epoll_rfd *rfd)
|
|
{
|
|
struct epoll_event ev;
|
|
|
|
ev.events = EPOLLIN | EPOLLRDHUP;
|
|
ev.data.ptr = rfd;
|
|
if (epoll_ctl(epfd, EPOLL_CTL_ADD, rfd->fd, &ev) == -1) {
|
|
pr_perror("epoll_ctl failed");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int epoll_del_rfd(int epfd, struct epoll_rfd *rfd)
|
|
{
|
|
if (epoll_ctl(epfd, EPOLL_CTL_DEL, rfd->fd, NULL) == -1) {
|
|
pr_perror("epoll_ctl failed");
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int epoll_hangup_event(int epollfd, struct epoll_rfd *rfd)
|
|
{
|
|
int ret = 0;
|
|
|
|
if (rfd->hangup_event) {
|
|
ret = rfd->hangup_event(rfd);
|
|
if (ret < 0)
|
|
return ret;
|
|
}
|
|
|
|
if (epoll_del_rfd(epollfd, rfd))
|
|
return -1;
|
|
|
|
close_safe(&rfd->fd);
|
|
|
|
return ret;
|
|
}
|
|
|
|
int epoll_run_rfds(int epollfd, struct epoll_event *evs, int nr_fds, int timeout)
|
|
{
|
|
int ret, i, nr_events;
|
|
bool have_a_break = false;
|
|
|
|
while (1) {
|
|
ret = epoll_wait(epollfd, evs, nr_fds, timeout);
|
|
if (ret <= 0) {
|
|
if (ret < 0)
|
|
pr_perror("polling failed");
|
|
break;
|
|
}
|
|
|
|
nr_events = ret;
|
|
for (i = 0; i < nr_events; i++) {
|
|
struct epoll_rfd *rfd;
|
|
uint32_t events;
|
|
|
|
rfd = (struct epoll_rfd *)evs[i].data.ptr;
|
|
events = evs[i].events;
|
|
|
|
if (events & EPOLLIN) {
|
|
ret = rfd->read_event(rfd);
|
|
if (ret < 0)
|
|
goto out;
|
|
if (ret > 0)
|
|
have_a_break = true;
|
|
}
|
|
|
|
if (events & (EPOLLHUP | EPOLLRDHUP)) {
|
|
ret = epoll_hangup_event(epollfd, rfd);
|
|
if (ret < 0)
|
|
goto out;
|
|
if (ret > 0)
|
|
have_a_break = true;
|
|
}
|
|
}
|
|
|
|
if (have_a_break)
|
|
return 1;
|
|
}
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
int epoll_prepare(int nr_fds, struct epoll_event **events)
|
|
{
|
|
int epollfd;
|
|
|
|
*events = xmalloc(sizeof(struct epoll_event) * nr_fds);
|
|
if (!*events)
|
|
return -1;
|
|
|
|
epollfd = epoll_create(nr_fds);
|
|
if (epollfd < 0) {
|
|
pr_perror("epoll_create failed");
|
|
goto free_events;
|
|
}
|
|
|
|
return epollfd;
|
|
|
|
free_events:
|
|
xfree(*events);
|
|
*events = NULL;
|
|
return -1;
|
|
}
|
|
|
|
int call_in_child_process(int (*fn)(void *), void *arg)
|
|
{
|
|
int status, ret = -1;
|
|
pid_t pid;
|
|
/*
|
|
* Parent freezes till child exit, so child may use the same stack.
|
|
* No SIGCHLD flag, so it's not need to block signal.
|
|
*/
|
|
pid = clone_noasan(fn, CLONE_VFORK | CLONE_VM | CLONE_FILES | CLONE_IO | CLONE_SIGHAND | CLONE_SYSVSEM, arg);
|
|
if (pid == -1) {
|
|
pr_perror("Can't clone");
|
|
return -1;
|
|
}
|
|
errno = 0;
|
|
if (waitpid(pid, &status, __WALL) != pid || !WIFEXITED(status) || WEXITSTATUS(status)) {
|
|
pr_err("Can't wait or bad status: errno=%d, status=%d\n", errno, status);
|
|
goto out;
|
|
}
|
|
ret = 0;
|
|
/*
|
|
* Child opened PROC_SELF for pid. If we create one more child
|
|
* with the same pid later, it will try to reuse this /proc/self.
|
|
*/
|
|
out:
|
|
close_pid_proc();
|
|
return ret;
|
|
}
|
|
|
|
void rlimit_unlimit_nofile(void)
|
|
{
|
|
struct rlimit new;
|
|
|
|
if (opts.unprivileged && !has_cap_sys_resource(opts.cap_eff))
|
|
return;
|
|
|
|
new.rlim_cur = kdat.sysctl_nr_open;
|
|
new.rlim_max = kdat.sysctl_nr_open;
|
|
|
|
if (prlimit(getpid(), RLIMIT_NOFILE, &new, NULL)) {
|
|
pr_perror("rlimit: Can't setup RLIMIT_NOFILE for self");
|
|
return;
|
|
} else
|
|
pr_debug("rlimit: RLIMIT_NOFILE unlimited for self\n");
|
|
|
|
service_fd_rlim_cur = kdat.sysctl_nr_open;
|
|
}
|
|
|
|
#ifdef __GLIBC__
|
|
#include <execinfo.h>
|
|
void print_stack_trace(pid_t pid)
|
|
{
|
|
void *array[10];
|
|
char **strings;
|
|
size_t size, i;
|
|
|
|
size = backtrace(array, 10);
|
|
strings = backtrace_symbols(array, size);
|
|
|
|
for (i = 0; i < size; i++)
|
|
pr_err("stack %d#%zu: %s\n", pid, i, strings[i]);
|
|
|
|
free(strings);
|
|
}
|
|
#endif
|
|
|
|
int cr_fsopen(const char *fsname, unsigned int flags)
|
|
{
|
|
return syscall(__NR_fsopen, fsname, flags);
|
|
}
|
|
|
|
int cr_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
|
{
|
|
int ret = syscall(__NR_fsconfig, fd, cmd, key, value, aux);
|
|
if (ret)
|
|
fsfd_dump_messages(fd);
|
|
return ret;
|
|
}
|
|
|
|
int cr_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
|
|
{
|
|
int ret = syscall(__NR_fsmount, fd, flags, attr_flags);
|
|
if (ret)
|
|
fsfd_dump_messages(fd);
|
|
return ret;
|
|
}
|
|
|
|
void fsfd_dump_messages(int fd)
|
|
{
|
|
char buf[4096];
|
|
int err, n;
|
|
|
|
err = errno;
|
|
|
|
for (;;) {
|
|
n = read(fd, buf, sizeof(buf) - 1);
|
|
if (n < 0) {
|
|
if (errno != ENODATA)
|
|
pr_perror("Unable to read from fs descriptor");
|
|
break;
|
|
}
|
|
buf[n] = 0;
|
|
|
|
switch (buf[0]) {
|
|
case 'w':
|
|
pr_warn("%s\n", buf);
|
|
break;
|
|
case 'i':
|
|
pr_info("%s\n", buf);
|
|
break;
|
|
case 'e':
|
|
/* fallthrough */
|
|
default:
|
|
pr_err("%s\n", buf);
|
|
break;
|
|
}
|
|
}
|
|
|
|
errno = err;
|
|
}
|
|
|
|
int mount_detached_fs(const char *fsname)
|
|
{
|
|
int fsfd, fd;
|
|
|
|
fsfd = cr_fsopen(fsname, 0);
|
|
if (fsfd < 0) {
|
|
pr_perror("Unable to open the %s file system", fsname);
|
|
return -1;
|
|
}
|
|
|
|
if (cr_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) {
|
|
pr_perror("Unable to create the %s file system", fsname);
|
|
close(fsfd);
|
|
return -1;
|
|
}
|
|
|
|
fd = cr_fsmount(fsfd, 0, 0);
|
|
if (fd < 0)
|
|
pr_perror("Unable to mount the %s file system", fsname);
|
|
close(fsfd);
|
|
return fd;
|
|
}
|
|
|
|
int strip_deleted(char *name, int len)
|
|
{
|
|
struct dcache_prepends {
|
|
const char *str;
|
|
size_t len;
|
|
} static const prepends[] = { {
|
|
.str = " (deleted)",
|
|
.len = 10,
|
|
},
|
|
{
|
|
.str = "//deleted",
|
|
.len = 9,
|
|
} };
|
|
size_t i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(prepends); i++) {
|
|
size_t at;
|
|
|
|
if (len <= prepends[i].len)
|
|
continue;
|
|
|
|
at = len - prepends[i].len;
|
|
if (!strcmp(&name[at], prepends[i].str)) {
|
|
pr_debug("Strip '%s' tag from '%s'\n", prepends[i].str, name);
|
|
name[at] = '\0';
|
|
len -= prepends[i].len;
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This function check if path ends with ending and cuts it from path.
|
|
* Return 0 if path is cut. -1 otherwise, leaving path unchanged.
|
|
* Example:
|
|
* path = "/foo/bar", ending = "bar"
|
|
* cut(path, ending) -> path becomes "/foo"
|
|
*
|
|
* 1. Skip leading "./" in subpath.
|
|
* 2. Respect root: ("/a/b", "b") -> "/a" but ("/a", "a") -> "/"
|
|
* 3. Refuse to cut identical strings, e.g. ("abc", "abc") will result in -1
|
|
* 4. Do not handle "..", "//", "./" (with exception "./" as leading symbols)
|
|
*/
|
|
|
|
int cut_path_ending(char *path, char *ending)
|
|
{
|
|
int ending_pos;
|
|
|
|
if (ending[0] == '.' && ending[1] == '/')
|
|
ending = ending + 2;
|
|
|
|
ending_pos = strlen(path) - strlen(ending);
|
|
|
|
if (ending_pos < 1)
|
|
return -1;
|
|
|
|
if (strcmp(path + ending_pos, ending))
|
|
return -1;
|
|
|
|
if (path[ending_pos - 1] != '/')
|
|
return -1;
|
|
|
|
if (ending_pos == 1) {
|
|
path[ending_pos] = 0;
|
|
return 0;
|
|
}
|
|
|
|
path[ending_pos - 1] = 0;
|
|
return 0;
|
|
}
|
|
|
|
static int is_iptables_nft(char *bin)
|
|
{
|
|
int pfd[2] = { -1, -1 }, ret = -1;
|
|
char *cmd[] = { bin, "-V", NULL };
|
|
char buf[100];
|
|
|
|
if (pipe(pfd) < 0) {
|
|
pr_perror("Unable to create pipe");
|
|
goto err;
|
|
}
|
|
|
|
ret = cr_system(-1, pfd[1], -1, cmd[0], cmd, CRS_CAN_FAIL);
|
|
if (ret) {
|
|
pr_err("%s -V failed\n", cmd[0]);
|
|
goto err;
|
|
}
|
|
|
|
close_safe(&pfd[1]);
|
|
|
|
ret = read(pfd[0], buf, sizeof(buf) - 1);
|
|
if (ret < 0) {
|
|
pr_perror("Unable to read %s -V output", cmd[0]);
|
|
goto err;
|
|
}
|
|
|
|
buf[ret] = '\0';
|
|
ret = 0;
|
|
|
|
if (strstr(buf, "nf_tables")) {
|
|
pr_info("iptables has nft backend: %s\n", buf);
|
|
ret = 1;
|
|
}
|
|
|
|
err:
|
|
close_safe(&pfd[1]);
|
|
close_safe(&pfd[0]);
|
|
return ret;
|
|
}
|
|
|
|
char *get_legacy_iptables_bin(bool ipv6, bool restore)
|
|
{
|
|
static char iptables_bin[2][2][32];
|
|
/* 0 - means we don't know yet,
|
|
* -1 - not present,
|
|
* 1 - present.
|
|
*/
|
|
static int iptables_present[2][2] = { { 0, 0 }, { 0, 0 } };
|
|
char bins[2][2][2][32] = { { { "iptables-save", "iptables-legacy-save" },
|
|
{ "iptables-restore", "iptables-legacy-restore" } },
|
|
{ { "ip6tables-save", "ip6tables-legacy-save" },
|
|
{ "ip6tables-restore", "ip6tables-legacy-restore" } } };
|
|
int ret;
|
|
|
|
if (iptables_present[ipv6][restore] == -1)
|
|
return NULL;
|
|
|
|
if (iptables_present[ipv6][restore] == 1)
|
|
return iptables_bin[ipv6][restore];
|
|
|
|
memcpy(iptables_bin[ipv6][restore], bins[ipv6][restore][0], strlen(bins[ipv6][restore][0]) + 1);
|
|
ret = is_iptables_nft(iptables_bin[ipv6][restore]);
|
|
|
|
/*
|
|
* iptables on host uses nft backend (or not installed),
|
|
* let's try iptables-legacy
|
|
*/
|
|
if (ret < 0 || ret == 1) {
|
|
memcpy(iptables_bin[ipv6][restore], bins[ipv6][restore][1], strlen(bins[ipv6][restore][1]) + 1);
|
|
ret = is_iptables_nft(iptables_bin[ipv6][restore]);
|
|
if (ret < 0 || ret == 1) {
|
|
iptables_present[ipv6][restore] = -1;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
/* we can come here with iptables-save or iptables-legacy-save */
|
|
iptables_present[ipv6][restore] = 1;
|
|
|
|
return iptables_bin[ipv6][restore];
|
|
}
|
|
|
|
/*
|
|
* read_all() behaves like read() without the possibility of partial reads.
|
|
* Use only with blocking I/O.
|
|
*/
|
|
ssize_t read_all(int fd, void *buf, size_t size)
|
|
{
|
|
ssize_t n = 0;
|
|
while (size > 0) {
|
|
ssize_t ret = read(fd, buf, size);
|
|
if (ret == -1) {
|
|
if (errno == EINTR)
|
|
continue;
|
|
/*
|
|
* The caller should use standard read() for
|
|
* non-blocking I/O.
|
|
*/
|
|
if (errno == EAGAIN || errno == EWOULDBLOCK)
|
|
errno = EINVAL;
|
|
return ret;
|
|
}
|
|
if (ret == 0)
|
|
break;
|
|
n += ret;
|
|
buf = (char *)buf + ret;
|
|
size -= ret;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
/*
|
|
* write_all() behaves like write() without the possibility of partial writes.
|
|
* Use only with blocking I/O.
|
|
*/
|
|
ssize_t write_all(int fd, const void *buf, size_t size)
|
|
{
|
|
ssize_t n = 0;
|
|
while (size > 0) {
|
|
ssize_t ret = write(fd, buf, size);
|
|
if (ret == -1) {
|
|
if (errno == EINTR)
|
|
continue;
|
|
/*
|
|
* The caller should use standard write() for
|
|
* non-blocking I/O.
|
|
*/
|
|
if (errno == EAGAIN || errno == EWOULDBLOCK)
|
|
errno = EINVAL;
|
|
return ret;
|
|
}
|
|
n += ret;
|
|
buf = (char *)buf + ret;
|
|
size -= ret;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
static int remove_one(const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf)
|
|
{
|
|
int ret;
|
|
|
|
ret = remove(fpath);
|
|
if (ret) {
|
|
pr_perror("rmrf: unable to remove %s", fpath);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define NFTW_FD_MAX 64
|
|
|
|
int rmrf(char *path)
|
|
{
|
|
pr_debug("rmrf: removing %s\n", path);
|
|
return nftw(path, remove_one, NFTW_FD_MAX, FTW_DEPTH | FTW_PHYS);
|
|
}
|
|
|
|
__attribute__((returns_twice)) static pid_t raw_legacy_clone(unsigned long flags, int *pidfd)
|
|
{
|
|
#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
|
|
/* On s390/s390x and cris the order of the first and second arguments
|
|
* of the system call is reversed.
|
|
*/
|
|
return syscall(__NR_clone, NULL, flags | SIGCHLD, pidfd);
|
|
#elif defined(__sparc__) && defined(__arch64__)
|
|
{
|
|
/*
|
|
* sparc64 always returns the other process id in %o0, and a
|
|
* boolean flag whether this is the child or the parent in %o1.
|
|
* Inline assembly is needed to get the flag returned in %o1.
|
|
*/
|
|
register long g1 asm("g1") = __NR_clone;
|
|
register long o0 asm("o0") = flags | SIGCHLD;
|
|
register long o1 asm("o1") = 0; /* is parent/child indicator */
|
|
register long o2 asm("o2") = (unsigned long)pidfd;
|
|
long is_error, retval, in_child;
|
|
pid_t child_pid;
|
|
|
|
asm volatile(
|
|
#if defined(__arch64__)
|
|
"t 0x6d\n\t" /* 64-bit trap */
|
|
#else
|
|
"t 0x10\n\t" /* 32-bit trap */
|
|
#endif
|
|
/*
|
|
* catch errors: On sparc, the carry bit (csr) in the
|
|
* processor status register (psr) is used instead of a
|
|
* full register.
|
|
*/
|
|
"addx %%g0, 0, %%g1"
|
|
: "=r"(g1), "=r"(o0), "=r"(o1), "=r"(o2) /* outputs */
|
|
: "r"(g1), "r"(o0), "r"(o1), "r"(o2) /* inputs */
|
|
: "%cc"); /* clobbers */
|
|
|
|
is_error = g1;
|
|
retval = o0;
|
|
in_child = o1;
|
|
|
|
if (is_error) {
|
|
errno = retval;
|
|
return -1;
|
|
}
|
|
|
|
if (in_child)
|
|
return 0;
|
|
|
|
child_pid = retval;
|
|
return child_pid;
|
|
}
|
|
#elif defined(__ia64__)
|
|
/* On ia64 the stack and stack size are passed as separate arguments. */
|
|
return syscall(__NR_clone, flags | SIGCHLD, NULL, 0, pidfd);
|
|
#else
|
|
return syscall(__NR_clone, flags | SIGCHLD, NULL, pidfd);
|
|
#endif
|
|
}
|
|
|
|
__attribute__((returns_twice)) static pid_t raw_clone(unsigned long flags, int *pidfd)
|
|
{
|
|
pid_t pid;
|
|
struct _clone_args args = {
|
|
.flags = flags,
|
|
.pidfd = ptr_to_u64(pidfd),
|
|
};
|
|
|
|
if (flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID | CLONE_SETTLS))
|
|
return -EINVAL;
|
|
|
|
/* On CLONE_PARENT we inherit the parent's exit signal. */
|
|
if (!(flags & CLONE_PARENT))
|
|
args.exit_signal = SIGCHLD;
|
|
|
|
pid = syscall(__NR_clone3, &args, sizeof(args));
|
|
if (pid < 0 && errno == ENOSYS)
|
|
return raw_legacy_clone(flags, pidfd);
|
|
|
|
return pid;
|
|
}
|
|
|
|
static int wait_for_pid(pid_t pid)
|
|
{
|
|
int status, ret;
|
|
|
|
again:
|
|
ret = waitpid(pid, &status, 0);
|
|
if (ret == -1) {
|
|
if (errno == EINTR)
|
|
goto again;
|
|
|
|
return -1;
|
|
}
|
|
|
|
if (ret != pid)
|
|
goto again;
|
|
|
|
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args)
|
|
{
|
|
pid_t child;
|
|
int ret, fret, pipefd[2];
|
|
ssize_t bytes;
|
|
|
|
/* Make sure our callers do not receive uninitialized memory. */
|
|
if (buf_size > 0 && buf)
|
|
buf[0] = '\0';
|
|
|
|
if (pipe(pipefd) < 0)
|
|
return -1;
|
|
|
|
child = raw_clone(0, NULL);
|
|
if (child < 0) {
|
|
close(pipefd[0]);
|
|
close(pipefd[1]);
|
|
return -1;
|
|
}
|
|
|
|
if (child == 0) {
|
|
/* Close the read-end of the pipe. */
|
|
close(pipefd[0]);
|
|
|
|
/* Redirect std{err,out} to write-end of the
|
|
* pipe.
|
|
*/
|
|
ret = dup2(pipefd[1], STDOUT_FILENO);
|
|
if (ret >= 0)
|
|
ret = dup2(pipefd[1], STDERR_FILENO);
|
|
|
|
/* Close the write-end of the pipe. */
|
|
close(pipefd[1]);
|
|
|
|
if (ret < 0)
|
|
_exit(EXIT_FAILURE);
|
|
|
|
/* Does not return. */
|
|
child_fn(args);
|
|
_exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* close the write-end of the pipe */
|
|
close(pipefd[1]);
|
|
|
|
if (buf && buf_size > 0) {
|
|
bytes = read_all(pipefd[0], buf, buf_size - 1);
|
|
if (bytes > 0)
|
|
buf[bytes - 1] = '\0';
|
|
}
|
|
|
|
fret = wait_for_pid(child);
|
|
|
|
/* close the read-end of the pipe */
|
|
close(pipefd[0]);
|
|
|
|
return fret;
|
|
}
|
|
|
|
char criu_run_id[RUN_ID_HASH_LENGTH];
|
|
|
|
void util_init(void)
|
|
{
|
|
uuid_t uuid;
|
|
|
|
uuid_generate(uuid);
|
|
uuid_unparse(uuid, criu_run_id);
|
|
pr_info("CRIU run id = %s\n", criu_run_id);
|
|
memcpy(compel_run_id, criu_run_id, sizeof(criu_run_id));
|
|
}
|
|
|
|
/*
|
|
* This function cuts sub_path from the path.
|
|
* 1) It assumes all relative paths given are relative to "/":
|
|
* /a/b/c is the same as a/b/c
|
|
* 2) It can handle paths with multiple consequent slashes:
|
|
* ///a///b///c is the same as /a/b/c
|
|
* 3) It always returns relative path, with no leading slash:
|
|
* get_relative_path("/a/b/c", "/") would be "a/b/c"
|
|
* get_relative_path("/a/b/c", "/a/b") would be "c"
|
|
* get_relative_path("/", "/") would be ""
|
|
* 4) It can handle paths with single dots:
|
|
* get_relative_path("./a/b", "a/") would be "b"
|
|
* 5) Note ".." in paths are not supported and handled as normal directory name
|
|
*/
|
|
char *get_relative_path(char *path, char *sub_path)
|
|
{
|
|
bool skip_slashes = true;
|
|
|
|
while (1) {
|
|
if ((*path == '/' || *path == '\0') && (*sub_path == '/' || *sub_path == '\0'))
|
|
skip_slashes = true;
|
|
|
|
if (skip_slashes) {
|
|
while (*path == '/' || (path[0] == '.' && (path[1] == '/' || path[1] == '\0')))
|
|
path++;
|
|
while (*sub_path == '/' || (sub_path[0] == '.' && (sub_path[1] == '/' || sub_path[1] == '\0')))
|
|
sub_path++;
|
|
}
|
|
|
|
if (*sub_path == '\0') {
|
|
if (skip_slashes)
|
|
return path;
|
|
return NULL;
|
|
}
|
|
skip_slashes = false;
|
|
|
|
if (*path == '\0')
|
|
return NULL;
|
|
|
|
if (*path != *sub_path)
|
|
return NULL;
|
|
|
|
path++;
|
|
sub_path++;
|
|
}
|
|
|
|
/* will never get here */
|
|
return NULL;
|
|
}
|
|
|
|
bool is_sub_path(char *path, char *sub_path)
|
|
{
|
|
char *rel_path;
|
|
|
|
rel_path = get_relative_path(path, sub_path);
|
|
if (!rel_path)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool is_same_path(char *path1, char *path2)
|
|
{
|
|
char *rel_path;
|
|
|
|
rel_path = get_relative_path(path1, path2);
|
|
if (!rel_path || *rel_path != '\0')
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Checks if path is a mountpoint
|
|
* (path should be visible - no overmounts)
|
|
*/
|
|
static int path_is_mountpoint(char *path, bool *is_mountpoint)
|
|
{
|
|
char *dname, *bname, *free_name;
|
|
struct open_how how = {
|
|
.flags = O_PATH,
|
|
.resolve = RESOLVE_NO_XDEV,
|
|
};
|
|
int exit_code = -1;
|
|
int dfd, fd;
|
|
|
|
dname = free_name = xstrdup(path);
|
|
if (!dname)
|
|
return -1;
|
|
dname = dirname(dname);
|
|
|
|
bname = get_relative_path(path, dname);
|
|
if (!bname || *bname == '\0') {
|
|
pr_err("Failed to get bname for %s\n", path);
|
|
goto err_free;
|
|
}
|
|
|
|
dfd = open(dname, O_PATH);
|
|
if (dfd < 0) {
|
|
pr_perror("Failed to open dir %s", dname);
|
|
goto err_free;
|
|
}
|
|
|
|
fd = sys_openat2(dfd, bname, &how, sizeof(how));
|
|
if (fd < 0) {
|
|
if (errno != EXDEV) {
|
|
pr_perror("Failed to open %s at %s", bname, dname);
|
|
goto err_close;
|
|
}
|
|
|
|
/*
|
|
* EXDEV means that dfd and bname are from different
|
|
* mounts, meaning that bname is a mountpoint
|
|
*/
|
|
*is_mountpoint = true;
|
|
} else {
|
|
/*
|
|
* No error means that dfd and bname are from same mount,
|
|
* meaning that bname is not a mountpoint
|
|
*/
|
|
*is_mountpoint = false;
|
|
close(fd);
|
|
}
|
|
|
|
exit_code = 0;
|
|
err_close:
|
|
close(dfd);
|
|
err_free:
|
|
xfree(free_name);
|
|
return exit_code;
|
|
}
|
|
|
|
/*
|
|
* Resolves real mountpoint path by any path on it
|
|
* (path should be visible - no overmountes)
|
|
*/
|
|
char *resolve_mountpoint(char *path)
|
|
{
|
|
char *mp_path, *free_path;
|
|
bool is_mountpoint;
|
|
|
|
/*
|
|
* The dirname() function may modify the contents of given path,
|
|
* so we need a strdup here to preserve path.
|
|
*/
|
|
mp_path = free_path = xstrdup(path);
|
|
if (!mp_path)
|
|
return NULL;
|
|
|
|
while (1) {
|
|
/*
|
|
* If we see "/" or "." we can't check if they are mountpoints
|
|
* by openat2 RESOLVE_NO_XDEV, let's just assume they are.
|
|
*/
|
|
if (is_same_path(mp_path, "/"))
|
|
goto out;
|
|
|
|
if (path_is_mountpoint(mp_path, &is_mountpoint) == -1) {
|
|
xfree(free_path);
|
|
return NULL;
|
|
}
|
|
|
|
if (is_mountpoint)
|
|
goto out;
|
|
|
|
/* Try parent directory */
|
|
mp_path = dirname(mp_path);
|
|
}
|
|
|
|
/* never get here */
|
|
xfree(free_path);
|
|
return NULL;
|
|
out:
|
|
/*
|
|
* The dirname() function may or may not return statically allocated
|
|
* strings, so here mp_path can be either dynamically allocated or
|
|
* statically allocated. Let's strdup to make the return pointer
|
|
* always freeable.
|
|
*/
|
|
mp_path = xstrdup(mp_path);
|
|
xfree(free_path);
|
|
return mp_path;
|
|
}
|
|
|
|
int set_opts_cap_eff(void)
|
|
{
|
|
struct __user_cap_header_struct cap_header;
|
|
struct __user_cap_data_struct cap_data[_LINUX_CAPABILITY_U32S_3];
|
|
int i;
|
|
|
|
cap_header.version = _LINUX_CAPABILITY_VERSION_3;
|
|
cap_header.pid = getpid();
|
|
|
|
if (capget(&cap_header, &cap_data[0]))
|
|
return -1;
|
|
|
|
for (i = 0; i < _LINUX_CAPABILITY_U32S_3; i++)
|
|
memcpy(&opts.cap_eff[i], &cap_data[i].effective, sizeof(u32));
|
|
|
|
return 0;
|
|
}
|