diff --git a/criu/cr-check.c b/criu/cr-check.c index 7c3dc76dd..aadd28483 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1400,6 +1400,17 @@ static int check_timer_cr_ids(void) return 0; } +static int check_statmount_fd(void) +{ + if (!kdat.has_statmount_fd) { + pr_warn("statmount syscall with STATMOUNT_BY_FD is unavailable," + " files on unmounted mounts will not be supported\n"); + return -1; + } + + return 0; +} + /* musl doesn't have a statx wrapper... */ struct staty { __u32 stx_dev_major; @@ -1731,6 +1742,7 @@ int cr_check(void) ret |= check_overlayfs_maps(); ret |= check_timer_cr_ids(); ret |= check_pagemap_scan_guard_pages(); + ret |= check_statmount_fd(); if (kdat.lsm == LSMTYPE__APPARMOR) ret |= check_apparmor_stacking(); diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index e4922f401..a39275c8f 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -93,6 +93,7 @@ struct kerndat_s { bool has_breakpoints; bool has_madv_guard; bool has_pagemap_scan_guard_pages; + bool has_statmount_fd; }; extern struct kerndat_s kdat; diff --git a/criu/include/mount-v2.h b/criu/include/mount-v2.h index 096f08f3b..74d87d4af 100644 --- a/criu/include/mount-v2.h +++ b/criu/include/mount-v2.h @@ -52,6 +52,118 @@ static inline int sys_open_tree(int dfd, const char *filename, unsigned int flag #define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings (includes bind-mounts). */ #endif +#ifndef STATMOUNT_SB_BASIC +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#endif + +#ifndef STATMOUNT_MNT_BASIC +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#endif + +#ifndef STATMOUNT_PROPAGATE_FROM +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#endif + +#ifndef STATMOUNT_MNT_ROOT +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#endif + +#ifndef STATMOUNT_MNT_POINT +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#endif + +#ifndef STATMOUNT_FS_TYPE +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#endif + +#ifndef STATMOUNT_MNT_NS_ID +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#endif + +#ifndef STATMOUNT_MNT_OPTS +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#endif + +#ifndef STATMOUNT_FS_SUBTYPE +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#endif + +#ifndef STATMOUNT_SB_SOURCE +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#endif + +#ifndef STATMOUNT_OPT_ARRAY +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#endif + +#ifndef STATMOUNT_OPT_SEC_ARRAY +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#endif + +#ifndef STATMOUNT_SUPPORTED_MASK +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#endif + +#ifndef STATMOUNT_MNT_UIDMAP +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#endif + +#ifndef STATMOUNT_MNT_GIDMAP +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ +#endif + +#ifndef STATMOUNT_BY_FD +#define STATMOUNT_BY_FD 0x0000001U /* want mountinfo for given fd */ +#endif + +#ifndef MNT_ID_REQ_SIZE_VER1 +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ +#endif + +struct mnt_id_req { + __u32 size; + __u32 fd; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + static inline long sys_openat2(int dirfd, const char *pathname, struct open_how *how, size_t size) { return syscall(__NR_openat2, dirfd, pathname, how, size); diff --git a/criu/include/util.h b/criu/include/util.h index 55ad5b63c..5e9ff7aac 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -67,6 +67,7 @@ extern int set_proc_fd(int fd); extern pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid, unsigned long newtls); +struct statmount *do_statmount_fd(int fd, u64 mask); /* * Values for pid argument of the proc opening routines below. diff --git a/criu/kerndat.c b/criu/kerndat.c index 997181ce7..2c0f7e351 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -1741,6 +1742,18 @@ static int kerndat_has_timer_cr_ids(void) return 0; } +static int kerndat_has_statmount_fd(void) +{ + struct statmount *statmnt = do_statmount_fd(STDIN_FILENO, STATMOUNT_MNT_BASIC); + if (!statmnt && (errno == ENOSYS || errno == EINVAL)) { + pr_info("statmount with STATMOUNT_BY_FD flag isn't supported\n"); + kdat.has_statmount_fd = false; + } else { + kdat.has_statmount_fd = true; + } + return 0; +} + static void breakpoint_func(void) { if (raise(SIGSTOP)) @@ -2125,6 +2138,10 @@ int kerndat_init(void) pr_err("kerndat_has_madv_guard has failed when initializing kerndat.\n"); ret = -1; } + if (!ret && kerndat_has_statmount_fd()) { + pr_err("kerndat_has_statmount_fd failed when initializing kerndat.\n"); + ret = -1; + } kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/criu/util.c b/criu/util.c index e2f80e4c6..7d3e59287 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1425,6 +1425,48 @@ static int epoll_hangup_event(int epollfd, struct epoll_rfd *rfd) return ret; } +static int __statmount(struct mnt_id_req *req, struct statmount *stmnt, + size_t bufsize, int flags) +{ + return syscall(__NR_statmount, req, stmnt, bufsize, flags); +} + +struct statmount *do_statmount_fd(int fd, u64 mask) +{ + size_t bufsize = 1 << 15; + struct statmount *stmnt = NULL, *tmp = NULL; + int ret; + + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER1, + .fd = fd, + .param = mask + }; + + for (;;) { + tmp = xrealloc(stmnt, bufsize); + if (!tmp) + goto out; + + stmnt = tmp; + ret = __statmount(&req, stmnt, bufsize, STATMOUNT_BY_FD); + if (!ret) { + return stmnt; + } + + if (errno != EOVERFLOW) + goto out; + + bufsize <<= 1; + if (bufsize >= UINT_MAX / 2) + goto out; + } + +out: + free(stmnt); + return NULL; +} + int epoll_run_rfds(int epollfd, struct epoll_event *evs, int nr_fds, int timeout) { int ret, i, nr_events;