mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
kerndat: detect if system support clone3() with set_tid
Linux kernel 5.4 extends clone3() with set_tid to allow processes to specify the PID of a newly created process. This introduces detection of the clone3() syscall and if set_tid is supported. This first implementation is X86_64 only. Signed-off-by: Adrian Reber <areber@redhat.com>
This commit is contained in:
parent
8fea2647b6
commit
ca02c47075
10 changed files with 93 additions and 0 deletions
|
|
@ -115,3 +115,4 @@ ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *t
|
|||
fsopen 430 430 (char *fsname, unsigned int flags)
|
||||
fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags)
|
||||
clone3 435 435 (struct clone_args *uargs, size_t size)
|
||||
|
|
|
|||
|
|
@ -111,3 +111,4 @@ __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct
|
|||
__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
|
||||
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
|
|
|
|||
|
|
@ -111,3 +111,4 @@ __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct
|
|||
__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
|
||||
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
|
|
|
|||
|
|
@ -99,3 +99,4 @@ __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct
|
|||
__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
|
||||
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
|
|
|
|||
|
|
@ -110,3 +110,4 @@ __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struc
|
|||
__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags)
|
||||
__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux)
|
||||
__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags)
|
||||
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ struct msghdr;
|
|||
struct rusage;
|
||||
struct iocb;
|
||||
struct pollfd;
|
||||
struct clone_args;
|
||||
|
||||
typedef unsigned long aio_context_t;
|
||||
|
||||
|
|
|
|||
|
|
@ -1224,6 +1224,16 @@ static int check_uffd_noncoop(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int check_clone3_set_tid(void)
|
||||
{
|
||||
if (!kdat.has_clone3_set_tid) {
|
||||
pr_warn("clone3() with set_tid not supported\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_can_map_vdso(void)
|
||||
{
|
||||
if (kdat_can_map_vdso() == 1)
|
||||
|
|
@ -1373,6 +1383,7 @@ int cr_check(void)
|
|||
ret |= check_sk_netns();
|
||||
ret |= check_kcmp_epoll();
|
||||
ret |= check_net_diag_raw();
|
||||
ret |= check_clone3_set_tid();
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -1476,6 +1487,7 @@ static struct feature_list feature_list[] = {
|
|||
{ "link_nsid", check_link_nsid},
|
||||
{ "kcmp_epoll", check_kcmp_epoll},
|
||||
{ "external_net_ns", check_external_net_ns},
|
||||
{ "clone3_set_tid", check_clone3_set_tid},
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ struct kerndat_s {
|
|||
bool has_inotify_setnextwd;
|
||||
bool has_kcmp_epoll_tfd;
|
||||
bool has_fsopen;
|
||||
bool has_clone3_set_tid;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
|
|
|
|||
33
criu/include/sched.h
Normal file
33
criu/include/sched.h
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
#ifndef __CR_SCHED_H__
|
||||
#define __CR_SCHED_H__
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifndef ptr_to_u64
|
||||
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
|
||||
#endif
|
||||
#ifndef u64_to_ptr
|
||||
#define u64_to_ptr(x) ((void *)(uintptr_t)x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This structure is needed by clone3(). The kernel
|
||||
* calls it 'struct clone_args'. As CRIU will always
|
||||
* need at least this part of the structure (VER1)
|
||||
* to be able to test if clone3() with set_tid works,
|
||||
* the structure is defined here as 'struct _clone_args'.
|
||||
*/
|
||||
|
||||
struct _clone_args {
|
||||
__aligned_u64 flags;
|
||||
__aligned_u64 pidfd;
|
||||
__aligned_u64 child_tid;
|
||||
__aligned_u64 parent_tid;
|
||||
__aligned_u64 exit_signal;
|
||||
__aligned_u64 stack;
|
||||
__aligned_u64 stack_size;
|
||||
__aligned_u64 tls;
|
||||
__aligned_u64 set_tid;
|
||||
__aligned_u64 set_tid_size;
|
||||
};
|
||||
#endif /* __CR_SCHED_H__ */
|
||||
|
|
@ -41,6 +41,7 @@
|
|||
#include "uffd.h"
|
||||
#include "vdso.h"
|
||||
#include "kcmp.h"
|
||||
#include "sched.h"
|
||||
|
||||
struct kerndat_s kdat = {
|
||||
};
|
||||
|
|
@ -986,6 +987,44 @@ static int kerndat_tun_netns(void)
|
|||
return check_tun_netns_cr(&kdat.tun_ns);
|
||||
}
|
||||
|
||||
static bool kerndat_has_clone3_set_tid(void)
|
||||
{
|
||||
pid_t pid;
|
||||
struct _clone_args args = {};
|
||||
|
||||
#ifndef CONFIG_X86_64
|
||||
/*
|
||||
* Currently the CRIU PIE assembler clone3() wrapper is
|
||||
* only implemented for X86_64.
|
||||
*/
|
||||
kdat.has_clone3_set_tid = false;
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
args.set_tid = -1;
|
||||
/*
|
||||
* On a system without clone3() this will return ENOSYS.
|
||||
* On a system with clone3() but without set_tid this
|
||||
* will return E2BIG.
|
||||
* On a system with clone3() and set_tid it will return
|
||||
* EINVAL.
|
||||
*/
|
||||
pid = syscall(__NR_clone3, &args, sizeof(args));
|
||||
|
||||
if (pid == -1 && (errno == ENOSYS || errno == E2BIG)) {
|
||||
kdat.has_clone3_set_tid = false;
|
||||
return 0;
|
||||
}
|
||||
if (pid == -1 && errno == EINVAL) {
|
||||
kdat.has_clone3_set_tid = true;
|
||||
} else {
|
||||
pr_perror("Unexpected error from clone3\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kerndat_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
|
@ -1059,6 +1098,8 @@ int kerndat_init(void)
|
|||
ret = has_kcmp_epoll_tfd();
|
||||
if (!ret)
|
||||
ret = kerndat_has_fsopen();
|
||||
if (!ret)
|
||||
ret = kerndat_has_clone3_set_tid();
|
||||
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue