diff --git a/Makefile.config b/Makefile.config index 7851f826f..0e557805c 100644 --- a/Makefile.config +++ b/Makefile.config @@ -36,7 +36,7 @@ export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW UFFD + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW # $1 - config name define gen-feature-test diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 91213e0e7..ba541ec14 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -108,3 +108,4 @@ io_getevents 4 245 (aio_context_t ctx, long min_nr, long nr, struct io_event * seccomp 277 383 (unsigned int op, unsigned int flags, const char *uargs) gettimeofday 169 78 (struct timeval *tv, struct timezone *tz) preadv 69 361 (int fd, struct iovec *iov, unsigned long nr, loff_t off) +userfaultfd 282 388 (int flags) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 9cb2fb6ea..68411745a 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -105,3 +105,4 @@ __NR_io_submit 230 sys_io_submit (aio_context_t ctx_id, long nr, struct iocb __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, unsigned long third, const void *ptr, long fifth) __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_preadv 320 sys_preadv (int fd, struct iovec *iov, unsigned long nr, loff_t off) +__NR_userfaultfd 364 sys_userfaultfd (int flags) diff --git a/criu/include/linux/userfaultfd.h b/criu/include/linux/userfaultfd.h new file mode 100644 index 000000000..9057d7af3 --- /dev/null +++ b/criu/include/linux/userfaultfd.h @@ -0,0 +1,167 @@ +/* + * include/linux/userfaultfd.h + * + * Copyright (C) 2007 Davide Libenzi + * Copyright (C) 2015 Red Hat, Inc. + * + */ + +#ifndef _LINUX_USERFAULTFD_H +#define _LINUX_USERFAULTFD_H + +#include + +#define UFFD_API ((__u64)0xAA) +/* + * After implementing the respective features it will become: + * #define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + * UFFD_FEATURE_EVENT_FORK) + */ +#define UFFD_API_FEATURES (0) +#define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ + (__u64)1 << _UFFDIO_API) +#define UFFD_API_RANGE_IOCTLS \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_ZEROPAGE) + +/* + * Valid ioctl command number range with this API is from 0x00 to + * 0x3F. UFFDIO_API is the fixed number, everything else can be + * changed by implementing a different UFFD_API. If sticking to the + * same UFFD_API more ioctl can be added and userland will be aware of + * which ioctl the running kernel implements through the ioctl command + * bitmask written by the UFFDIO_API. + */ +#define _UFFDIO_REGISTER (0x00) +#define _UFFDIO_UNREGISTER (0x01) +#define _UFFDIO_WAKE (0x02) +#define _UFFDIO_COPY (0x03) +#define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_API (0x3F) + +/* userfaultfd ioctl ids */ +#define UFFDIO 0xAA +#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ + struct uffdio_api) +#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ + struct uffdio_register) +#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ + struct uffdio_range) +#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ + struct uffdio_range) +#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ + struct uffdio_copy) +#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ + struct uffdio_zeropage) + +/* read() structure */ +struct uffd_msg { + __u8 event; + + __u8 reserved1; + __u16 reserved2; + __u32 reserved3; + + union { + struct { + __u64 flags; + __u64 address; + } pagefault; + + struct { + /* unused reserved fields */ + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + } reserved; + } arg; +} __packed; + +/* + * Start at 0x12 and not at 0 to be more strict against bugs. + */ +#define UFFD_EVENT_PAGEFAULT 0x12 +#if 0 /* not available yet */ +#define UFFD_EVENT_FORK 0x13 +#endif + +/* flags for UFFD_EVENT_PAGEFAULT */ +#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ +#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ + +struct uffdio_api { + /* userland asks for an API number and the features to enable */ + __u64 api; + /* + * Kernel answers below with the all available features for + * the API, this notifies userland of which events and/or + * which flags for each event are enabled in the current + * kernel. + * + * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE + * are to be considered implicitly always enabled in all kernels as + * long as the uffdio_api.api requested matches UFFD_API. + */ +#if 0 /* not available yet */ +#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) +#define UFFD_FEATURE_EVENT_FORK (1<<1) +#endif + __u64 features; + + __u64 ioctls; +}; + +struct uffdio_range { + __u64 start; + __u64 len; +}; + +struct uffdio_register { + struct uffdio_range range; +#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) +#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) + __u64 mode; + + /* + * kernel answers which ioctl commands are available for the + * range, keep at the end as the last 8 bytes aren't read. + */ + __u64 ioctls; +}; + +struct uffdio_copy { + __u64 dst; + __u64 src; + __u64 len; + /* + * There will be a wrprotection flag later that allows to map + * pages wrprotected on the fly. And such a flag will be + * available if the wrprotection ioctl are implemented for the + * range according to the uffdio_register.ioctls. + */ +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * "copy" is written by the ioctl and must be at the end: the + * copy_from_user will not read the last 8 bytes. + */ + __s64 copy; +}; + +struct uffdio_zeropage { + struct uffdio_range range; +#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * "zeropage" is written by the ioctl and must be at the end: + * the copy_from_user will not read the last 8 bytes. + */ + __s64 zeropage; +}; + +#endif /* _LINUX_USERFAULTFD_H */ diff --git a/criu/include/uffd.h b/criu/include/uffd.h index 6c931e2b9..4e1ba4ab8 100644 --- a/criu/include/uffd.h +++ b/criu/include/uffd.h @@ -1,22 +1,7 @@ #ifndef __CR_UFFD_H_ #define __CR_UFFD_H_ -#include "config.h" -#include "restorer.h" - -#ifdef CONFIG_HAS_UFFD - -#include -#include - -#ifndef __NR_userfaultfd -#error "missing __NR_userfaultfd definition" -#endif - +struct task_restore_args; extern int setup_uffd(struct task_restore_args *task_args, int pid); -#else -static inline int setup_uffd(struct task_restore_args *task_args, int pid) { return 0; } - -#endif /* CONFIG_HAS_UFFD */ #endif /* __CR_UFFD_H_ */ diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index b4ec6cdfe..d478aa5bd 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -17,6 +17,8 @@ #include #include +#include "linux/userfaultfd.h" + #include "int.h" #include "types.h" #include "common/compiler.h" @@ -789,16 +791,16 @@ static void rst_tcp_socks_all(struct task_restore_args *ta) static int enable_uffd(int uffd, unsigned long addr, unsigned long len) { + int rc; + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + /* * If uffd == -1, this means that userfaultfd is not enabled * or it is not available. */ if (uffd == -1) return 0; -#ifdef CONFIG_HAS_UFFD - int rc; - struct uffdio_register uffdio_register; - unsigned long expected_ioctls; uffdio_register.range.start = addr; uffdio_register.range.len = len; @@ -818,7 +820,6 @@ static int enable_uffd(int uffd, unsigned long addr, unsigned long len) pr_err("lazy-pages: unexpected missing uffd ioctl for anon memory\n"); } -#endif return 0; } diff --git a/criu/uffd.c b/criu/uffd.c index 4d40f12fc..9a19a8aba 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -16,23 +16,25 @@ #include #include +#include "linux/userfaultfd.h" + #include "int.h" #include "page.h" -#include "include/log.h" -#include "include/criu-plugin.h" -#include "include/pagemap.h" -#include "include/files-reg.h" -#include "include/kerndat.h" -#include "include/mem.h" -#include "include/uffd.h" -#include "include/util-pie.h" -#include "include/protobuf.h" -#include "include/pstree.h" -#include "include/crtools.h" -#include "include/cr_options.h" +#include "log.h" +#include "criu-plugin.h" +#include "pagemap.h" +#include "files-reg.h" +#include "kerndat.h" +#include "mem.h" +#include "uffd.h" +#include "util-pie.h" +#include "protobuf.h" +#include "pstree.h" +#include "crtools.h" +#include "cr_options.h" #include "xmalloc.h" - -#ifdef CONFIG_HAS_UFFD +#include +#include "restorer.h" #undef LOG_PREFIX #define LOG_PREFIX "lazy-pages: " @@ -166,7 +168,7 @@ int setup_uffd(struct task_restore_args *task_args, int pid) * Open userfaulfd FD which is passed to the restorer blob and * to a second process handling the userfaultfd page faults. */ - task_args->uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + task_args->uffd = syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK); /* * Check if the UFFD_API is the one which is expected @@ -833,13 +835,3 @@ int cr_lazy_pages() return ret; } - -#else /* CONFIG_HAS_UFFD */ - -int cr_lazy_pages() -{ - pr_msg("userfaultfd system call is not supported, cannot start lazy-pages daemon\n"); - return -1; -} - -#endif /* CONFIG_HAS_UFFD */ diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 77f671a90..12bf54181 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -123,18 +123,3 @@ ENTRY(main) nop END(main) endef - -define FEATURE_TEST_UFFD - -#include -#include - -int main(void) -{ -#ifndef __NR_userfaultfd -#error "missing __NR_userfaultfd definition" -#endif - return 0; -} - -endef