diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 653a7539b..721ff16dc 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -111,3 +111,4 @@ preadv_raw 69 361 (int fd, struct iovec *iov, unsigned long nr, unsigned long userfaultfd 282 388 (int flags) fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) cacheflush ! 983042 (void *start, void *end, int flags) +ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 62e0bc1a0..3b3079040 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -107,3 +107,4 @@ __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, un __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_userfaultfd 364 sys_userfaultfd (int flags) +__NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index 3521e9150..cc13a63dd 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -107,3 +107,4 @@ __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, un __NR_userfaultfd 355 sys_userfaultfd (int flags) __NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) +__NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index a6c55b83c..7903ab150 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -95,3 +95,4 @@ __NR_kcmp 349 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs) __NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 374 sys_userfaultfd (int flags) +__NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 642715147..4ac9164ea 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -106,3 +106,4 @@ __NR_setns 308 sys_setns (int fd, int nstype) __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) __NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 323 sys_userfaultfd (int flags) +__NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/plugins/include/uapi/std/syscall-types.h b/compel/plugins/include/uapi/std/syscall-types.h index ddb740c82..57865e741 100644 --- a/compel/plugins/include/uapi/std/syscall-types.h +++ b/compel/plugins/include/uapi/std/syscall-types.h @@ -38,6 +38,7 @@ struct siginfo; struct msghdr; struct rusage; struct iocb; +struct pollfd; typedef unsigned long aio_context_t; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 2ffd9a86c..b4530f8e5 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -726,6 +726,40 @@ static int collect_zombie_pids(struct task_restore_args *ta) return collect_child_pids(TASK_DEAD, &ta->zombies_n); } +static int collect_inotify_fds(struct task_restore_args *ta) +{ + struct list_head *list = &rsti(current)->fds; + struct fdt *fdt = rsti(current)->fdt; + struct fdinfo_list_entry *fle; + + /* Check we are an fdt-restorer */ + if (fdt && fdt->pid != vpid(current)) + return 0; + + ta->inotify_fds = (int *)rst_mem_align_cpos(RM_PRIVATE); + + list_for_each_entry(fle, list, ps_list) { + struct file_desc *d = fle->desc; + int *inotify_fd; + + if (d->ops->type != FD_TYPES__INOTIFY) + continue; + + if (fle != file_master(d)) + continue; + + inotify_fd = rst_mem_alloc(sizeof(*inotify_fd), RM_PRIVATE); + if (!inotify_fd) + return -1; + + ta->inotify_fds_n++; + *inotify_fd = fle->fe->fd; + + pr_debug("Collect inotify fd %d to cleanup later\n", *inotify_fd); + } + return 0; +} + static int open_core(int pid, CoreEntry **pcore) { int ret; @@ -880,6 +914,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core) if (collect_zombie_pids(ta) < 0) return -1; + if (collect_inotify_fds(ta) < 0) + return -1; + if (prepare_proc_misc(pid, core->tc, ta)) return -1; @@ -3411,6 +3448,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns RST_MEM_FIXUP_PPTR(task_args->helpers); RST_MEM_FIXUP_PPTR(task_args->zombies); RST_MEM_FIXUP_PPTR(task_args->vma_ios); + RST_MEM_FIXUP_PPTR(task_args->inotify_fds); task_args->compatible_mode = core_is_compat(core); /* diff --git a/criu/include/restorer.h b/criu/include/restorer.h index f980bfad3..b93807f5f 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -177,6 +177,9 @@ struct task_restore_args { pid_t *zombies; unsigned int zombies_n; + int *inotify_fds; /* fds to cleanup inotify events at CR_STATE_RESTORE_SIGCHLD stage */ + unsigned int inotify_fds_n; + /* * * * * * * * * * * * * * * * * * * * */ unsigned long task_size; diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 4fff2c85d..6f8f1ae54 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "linux/userfaultfd.h" @@ -1307,6 +1308,72 @@ static int map_vdso(struct task_restore_args *args, bool compatible) return 0; } +static int fd_poll(int inotify_fd) +{ + struct pollfd pfd = {inotify_fd, POLLIN, 0}; + struct timespec tmo = {0, 0}; + + return sys_ppoll(&pfd, 1, &tmo, NULL, sizeof(sigset_t)); +} + +/* + * note: Actually kernel may want even more space for one event (see + * round_event_name_len), so using buffer of EVENT_BUFF_SIZE size may fail. + * To be on the safe side - take a bigger buffer, and these also allows to + * read more events in one syscall. + */ +#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX)) + +/* + * Read all available events from inotify queue + */ +static int cleanup_inotify_events(int inotify_fd) +{ + char buf[EVENT_BUFF_SIZE * 8]; + int ret; + + while (1) { + ret = fd_poll(inotify_fd); + if (ret < 0) { + pr_err("Failed to poll from inotify fd: %d\n", ret); + return -1; + } else if (ret == 0) { + break; + } + + ret = sys_read(inotify_fd, buf, sizeof(buf)); + if (ret < 0) { + pr_err("Failed to read inotify events\n"); + return -1; + } + } + + return 0; +} + +/* + * When we restore inotifies we can open and close files we create a watch + * for. So wee need to cleanup these auxiliary events which we've generated. + * + * note: For now we don't have a way to c/r events in queue but we need to + * at least leave the queue clean from events generated by our own. + */ +int cleanup_current_inotify_events(struct task_restore_args *task_args) +{ + int i; + + for (i = 0; i < task_args->inotify_fds_n; i++) { + int inotify_fd = task_args->inotify_fds[i]; + + pr_debug("Cleaning inotify events from %d\n", inotify_fd); + + if (cleanup_inotify_events(inotify_fd)) + return -1; + } + + return 0; +} + /* * The main routine to restore task via sigreturn. * This one is very special, we never return there @@ -1767,6 +1834,9 @@ long __export_restore_task(struct task_restore_args *args) restore_finish_stage(task_entries_local, CR_STATE_RESTORE); + if (cleanup_current_inotify_events(args)) + goto core_restore_end; + if (wait_helpers(args) < 0) goto core_restore_end; if (wait_zombies(args) < 0)