This commit is contained in:
Bhavik Sachdev 2025-11-28 18:13:18 +01:00 committed by GitHub
commit ebe4c11da5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 1006 additions and 10 deletions

View file

@ -124,3 +124,4 @@ openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size
pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
membarrier 283 389 (int cmd, unsigned int flags, int cpu_id)
statmount 457 457 (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)

View file

@ -113,6 +113,7 @@ __NR_pidfd_open 434 sys_pidfd_open (pid_t pid, unsigned int flags)
__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size)
__NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how, size_t size)
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
__NR_statmount 457 sys_statmount (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)
#__NR_dup2 ! sys_dup2 (int oldfd, int newfd)
#__NR_rmdir ! sys_rmdir (const char *name)
#__NR_unlink ! sys_unlink (char *pathname)

View file

@ -121,3 +121,4 @@ __NR_openat2 5437 sys_openat2 (int dirfd, char *pathname, struct open_how *h
__NR_pidfd_getfd 5438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
__NR_rseq 5327 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
__NR_membarrier 5318 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
__NR_statmount 5457 sys_statmount (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)

View file

@ -120,3 +120,4 @@ __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
__NR_rseq 387 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
__NR_membarrier 365 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
__NR_statmount 457 sys_statmount (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)

View file

@ -123,3 +123,4 @@ move_mount 429 429 (int from_dfd, const char *from_pathname, int to_d
open_tree 428 428 (int dirfd, const char *pathname, unsigned int flags)
openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size)
membarrier 283 283 (int cmd, unsigned int flags, int cpu_id)
statmount 457 457 (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)

View file

@ -120,3 +120,4 @@ __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
__NR_rseq 383 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
__NR_membarrier 356 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
__NR_statmount 457 sys_statmount (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)

View file

@ -108,3 +108,4 @@ __NR_openat2 437 sys_openat2 (int dirfd, char *pathname, struct open_how *how
__NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int flags)
__NR_rseq 386 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
__NR_membarrier 375 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
__NR_statmount 457 sys_statmount (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)

View file

@ -120,3 +120,4 @@ __NR_pidfd_getfd 438 sys_pidfd_getfd (int pidfd, int targetfd, unsigned int f
__NR_rseq 334 sys_rseq (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
__NR_membarrier 324 sys_membarrier (int cmd, unsigned int flags, int cpu_id)
__NR_map_shadow_stack 453 sys_map_shadow_stack (unsigned long addr, unsigned long size, unsigned int flags)
__NR_statmount 457 sys_statmount (struct mnt_id_req *req, struct statmount *smbuf, size_t bufsize, unsigned long flags)

View file

@ -41,6 +41,8 @@ struct iocb;
struct pollfd;
struct clone_args;
struct open_how;
struct mnt_id_req;
struct statmount;
typedef unsigned long aio_context_t;

View file

@ -1400,6 +1400,17 @@ static int check_timer_cr_ids(void)
return 0;
}
static int check_statmount_fd(void)
{
if (!kdat.has_statmount_fd) {
pr_warn("statmount syscall with STATMOUNT_BY_FD is unavailable,"
" files on unmounted mounts will not be supported\n");
return -1;
}
return 0;
}
/* musl doesn't have a statx wrapper... */
struct staty {
__u32 stx_dev_major;
@ -1731,6 +1742,7 @@ int cr_check(void)
ret |= check_overlayfs_maps();
ret |= check_timer_cr_ids();
ret |= check_pagemap_scan_guard_pages();
ret |= check_statmount_fd();
if (kdat.lsm == LSMTYPE__APPARMOR)
ret |= check_apparmor_stacking();

View file

@ -2158,6 +2158,9 @@ skip_ns_bouncing:
if (ret < 0)
goto out_kill;
if (umount_detached_mountpoints())
goto out_kill;
ret = stop_usernsd();
if (ret < 0)
goto out_kill;

View file

@ -1,3 +1,5 @@
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
@ -15,9 +17,11 @@
#include <elf.h>
#include <linux/fiemap.h>
#include <linux/fs.h>
#include <inttypes.h>
#include "tty.h"
#include "stats.h"
#include "filesystems.h"
#include "common/bug.h"
#ifndef SEEK_DATA
#define SEEK_DATA 3
@ -1787,8 +1791,10 @@ int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
if (opts.shell_job && is_tty(p->stat.st_rdev, p->stat.st_dev)) {
skip_for_shell_job = true;
} else {
pr_err("Can't lookup mount=%d for fd=%d path=%s\n", p->mnt_id, p->fd, link->name + 1);
return -1;
if (!(kdat.has_statmount_fd && (mi = mount_info_from_statmount(lfd)))) {
pr_err("Can't lookup mount=%d for fd=%d path=%s\n", p->mnt_id, p->fd, link->name + 1);
return -1;
}
}
}
@ -1813,7 +1819,8 @@ int dump_one_reg_file(int lfd, u32 id, const struct fd_parms *p)
return -1;
}
if (!skip_for_shell_job && check_path_remap(link, p, lfd, id, mi->nsid))
/* skipping for detached */
if (!skip_for_shell_job && !mi->detached_mnt && check_path_remap(link, p, lfd, id, mi->nsid))
return -1;
rfe.name = &link->name[1];
ext:
@ -2192,10 +2199,12 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil
{
int tmp = -1, mntns_root, level = 0;
struct reg_file_info *rfi;
struct mount_info *mi;
char *orig_path = NULL;
char path[PATH_MAX];
int inh_fd = -1;
int ret;
char dot[] = ".";
if (inherited_fd(d, &tmp))
return tmp;
@ -2261,6 +2270,21 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil
}
mntns_root = mntns_get_root_by_mnt_id(rfi->rfe->mnt_id);
mi = mnt_is_detached(rfi->rfe->mnt_id);
if (!mi)
goto ext;
if (strncmp(rfi->path, dot, strlen(dot)) == 0) {
rfi->path = mi->ns_mountpoint;
} else {
char* path = xmalloc(PATH_MAX);
if (!path)
goto err;
snprintf(path, PATH_MAX, "%s/%s", mi->ns_mountpoint, rfi->path);
rfi->path = path;
}
ext:
tmp = open_cb(mntns_root, rfi, arg);
if (tmp < 0) {

View file

@ -1,3 +1,4 @@
#include "log.h"
#include <stdio.h>
#include <stdbool.h>
#include <string.h>

View file

@ -93,6 +93,7 @@ struct kerndat_s {
bool has_breakpoints;
bool has_madv_guard;
bool has_pagemap_scan_guard_pages;
bool has_statmount_fd;
};
extern struct kerndat_s kdat;

View file

@ -52,6 +52,118 @@ static inline int sys_open_tree(int dfd, const char *filename, unsigned int flag
#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings (includes bind-mounts). */
#endif
#ifndef STATMOUNT_SB_BASIC
#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
#endif
#ifndef STATMOUNT_MNT_BASIC
#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
#endif
#ifndef STATMOUNT_PROPAGATE_FROM
#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
#endif
#ifndef STATMOUNT_MNT_ROOT
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#endif
#ifndef STATMOUNT_MNT_POINT
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#endif
#ifndef STATMOUNT_FS_TYPE
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
#endif
#ifndef STATMOUNT_MNT_NS_ID
#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
#endif
#ifndef STATMOUNT_MNT_OPTS
#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
#endif
#ifndef STATMOUNT_FS_SUBTYPE
#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
#endif
#ifndef STATMOUNT_SB_SOURCE
#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
#endif
#ifndef STATMOUNT_OPT_ARRAY
#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
#endif
#ifndef STATMOUNT_OPT_SEC_ARRAY
#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
#endif
#ifndef STATMOUNT_SUPPORTED_MASK
#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
#endif
#ifndef STATMOUNT_MNT_UIDMAP
#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
#endif
#ifndef STATMOUNT_MNT_GIDMAP
#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
#endif
#ifndef STATMOUNT_BY_FD
#define STATMOUNT_BY_FD 0x0000001U /* want mountinfo for given fd */
#endif
#ifndef MNT_ID_REQ_SIZE_VER1
#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
#endif
struct mnt_id_req {
__u32 size;
__u32 fd;
__u64 mnt_id;
__u64 param;
__u64 mnt_ns_id;
};
struct statmount {
__u32 size; /* Total size, including strings */
__u32 mnt_opts; /* [str] Options (comma separated, escaped) */
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
__u64 sb_magic; /* ..._SUPER_MAGIC */
__u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
__u32 fs_type; /* [str] Filesystem type */
__u64 mnt_id; /* Unique ID of mount */
__u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
__u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
__u32 mnt_parent_id_old;
__u64 mnt_attr; /* MOUNT_ATTR_... */
__u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
__u64 mnt_peer_group; /* ID of shared peer group */
__u64 mnt_master; /* Mount receives propagation from this ID */
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
__u64 mnt_ns_id; /* ID of the mount namespace */
__u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
__u32 sb_source; /* [str] Source string of the mount */
__u32 opt_num; /* Number of fs options */
__u32 opt_array; /* [str] Array of nul terminated fs options */
__u32 opt_sec_num; /* Number of security options */
__u32 opt_sec_array; /* [str] Array of nul terminated security options */
__u64 supported_mask; /* Mask flags that this kernel supports */
__u32 mnt_uidmap_num; /* Number of uid mappings */
__u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
__u32 mnt_gidmap_num; /* Number of gid mappings */
__u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
__u64 __spare2[43];
char str[]; /* Variable size part containing strings */
};
static inline long sys_openat2(int dirfd, const char *pathname, struct open_how *how, size_t size)
{
return syscall(__NR_openat2, dirfd, pathname, how, size);

View file

@ -111,10 +111,12 @@ struct mount_info {
struct list_head mnt_propagate; /* circular list of mounts which propagate from each other */
struct list_head mnt_notprop; /* temporary list used in can_mount_now */
struct list_head mnt_unbindable; /* list of mounts with delayed unbindable */
struct list_head mnt_detached_list;
struct list_head postpone;
int is_overmounted;
bool detached_mnt;
struct rst_mount_info *rmi;
@ -124,6 +126,8 @@ struct mount_info {
extern struct mount_info *mntinfo;
extern void mntinfo_add_list_before(struct mount_info **head, struct mount_info *new);
extern void mntinfo_add_list(struct mount_info *new);
extern struct mount_info *mount_info_from_statmount(int lfd);
/*
* Put a : in here since those are invalid on
@ -232,8 +236,10 @@ extern int mount_root(void *args, int fd, pid_t pid);
extern int restore_ext_mount(struct mount_info *mi);
extern int cr_pivot_root(char *root);
extern int print_ns_root(struct ns_id *ns, int remap_id, char *buf, int bs);
extern struct mount_info* mnt_is_detached(int mnt_id);
extern struct mount_info *root_yard_mp;
extern char *mnt_roots;
extern struct list_head detached_mounts;
#endif /* __CR_MOUNT_H__ */

View file

@ -165,7 +165,7 @@ extern int dump_namespaces(struct pstree_item *item, unsigned int ns_flags);
extern int prepare_namespace_before_tasks(void);
extern int prepare_namespace(struct pstree_item *item, unsigned long clone_flags);
extern int prepare_userns_creds(void);
extern int umount_detached_mountpoints(void);
extern int switch_ns(int pid, struct ns_desc *nd, int *rst);
extern int switch_mnt_ns(int pid, int *rst, int *cwd_fd);
extern int switch_ns_by_fd(int nsfd, struct ns_desc *nd, int *rst);

View file

@ -93,6 +93,7 @@ extern int prepare_loginuid(unsigned int value);
extern int parse_pid_status(pid_t pid, struct seize_task_status *, void *data);
extern int parse_file_locks(void);
extern int get_fd_mntid(int fd, int *mnt_id);
extern int parse_sb_opt(char *opt, unsigned *flags, char *uopt);
struct pid;
extern int parse_threads(int pid, struct pid **_t, int *_n);

View file

@ -67,6 +67,7 @@ extern int set_proc_fd(int fd);
extern pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid,
unsigned long newtls);
struct statmount *do_statmount_fd(int fd, u64 mask);
/*
* Values for pid argument of the proc opening routines below.

View file

@ -1,3 +1,4 @@
#include <linux/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
@ -1741,6 +1742,18 @@ static int kerndat_has_timer_cr_ids(void)
return 0;
}
static int kerndat_has_statmount_fd(void)
{
struct statmount *statmnt = do_statmount_fd(STDIN_FILENO, STATMOUNT_MNT_BASIC);
if (!statmnt && (errno == ENOSYS || errno == EINVAL)) {
pr_info("statmount with STATMOUNT_BY_FD flag isn't supported\n");
kdat.has_statmount_fd = false;
} else {
kdat.has_statmount_fd = true;
}
return 0;
}
static void breakpoint_func(void)
{
if (raise(SIGSTOP))
@ -2125,6 +2138,10 @@ int kerndat_init(void)
pr_err("kerndat_has_madv_guard has failed when initializing kerndat.\n");
ret = -1;
}
if (!ret && kerndat_has_statmount_fd()) {
pr_err("kerndat_has_statmount_fd failed when initializing kerndat.\n");
ret = -1;
}
kerndat_lsm();
kerndat_mmap_min_addr();

View file

@ -1,4 +1,7 @@
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
@ -1283,6 +1286,117 @@ err:
return exit_code;
}
static int detect_is_detached_dir(struct mount_info *detached)
{
detached->is_dir = true;
return 0;
}
static int create_temporary_mountpoint(struct mount_info *detached)
{
if (detached->is_dir) {
/*
* we dont have mountpoint for our detached mounts,
* creating a temporary one
*/
if (mkdir(detached->ns_mountpoint, 0700)) {
pr_perror("failed to create temporary mountpoint for mnt_id=%d", detached->mnt_id);
return -1;
}
pr_debug("Created temporary mountpoint for mnt_id=%d\n", detached->mnt_id);
return 0;
}
return -1;
}
static int do_one_detached_bind_mount(struct mount_info *detached)
{
/* going into the right namespace for now */
int nsfd, original_nsfd;
unsigned long mflags;
int ret = 0;
BUG_ON(!detached->nsid || !detached->bind);
original_nsfd = open_proc(PROC_SELF, "ns/mnt");
if (original_nsfd < 0)
return -1;
nsfd = fdstore_get(detached->nsid->mnt.nsfd_id);
if (nsfd < 0) {
pr_err("failed to get nsfd for detached mount mnt_id=%d\n", detached->mnt_id);
close(original_nsfd);
return -1;
}
if (switch_ns_by_fd(nsfd, &mnt_ns_desc, &original_nsfd)) {
pr_err("failed to mount namespace for detached mount mnt_id=%d\n", detached->mnt_id);
close(original_nsfd);
return -1;
}
close(nsfd);
detached->private = detached->bind->private;
if (create_temporary_mountpoint(detached)) {
ret = -1;
goto out;
}
if (__do_bind_mount_v2(detached->bind->ns_mountpoint, detached->ns_mountpoint)) {
pr_info("failed to do bind: %s, detached: %s\n", detached->bind->ns_mountpoint, detached->ns_mountpoint);
ret = -1;
goto out;
}
mflags = detached->flags & (~MS_PROPAGATE);
if (mflags != (detached->bind->flags & (~MS_PROPAGATE)))
if (mount(NULL, detached->ns_mountpoint, NULL, MS_BIND | MS_REMOUNT | mflags, NULL)) {
pr_perror("Can't bind remount 0x%lx at %s", mflags, detached->ns_mountpoint);
ret = -1;
goto out;
}
detached->mounted = true;
out:
if (restore_ns(original_nsfd, &mnt_ns_desc)) {
pr_perror("failed to restore original mount namespace");
return -1;
}
close(original_nsfd);
return ret;
}
static int do_one_detached_mount(struct mount_info *detached)
{
int ret;
BUG_ON(detached->mounted);
/* detached->bind should be filled up by search_bindmounts() */
if (detached->bind)
ret = do_one_detached_bind_mount(detached);
else
ret = 0;
return ret;
}
static int mount_detached_mounts(void)
{
struct mount_info *detached;
list_for_each_entry(detached, &detached_mounts, mnt_detached_list) {
BUG_ON(!detached->detached_mnt);
/* currently only doing directory mounts */
if (detect_is_detached_dir(detached))
return -1;
if (do_one_detached_mount(detached))
return -1;
}
return 0;
}
/* The main entry point of mount-v2 for creating mounts */
int prepare_mnt_ns_v2(void)
{
@ -1314,6 +1428,9 @@ int prepare_mnt_ns_v2(void)
if (assemble_mount_namespaces())
return -1;
if (mount_detached_mounts())
return -1;
if (restore_mount_sharing_options())
return -1;

View file

@ -1,3 +1,7 @@
#include "common/list.h"
#include <fcntl.h>
#include <linux/stat.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
@ -6,6 +10,7 @@
#include <sys/stat.h>
#include <string.h>
#include <stdlib.h>
#include <linux/mount.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <sched.h>
@ -29,6 +34,7 @@
#include "clone-noasan.h"
#include "fdstore.h"
#include "rst-malloc.h"
#include "proc_parse.h"
#include "images/mnt.pb-c.h"
@ -111,7 +117,7 @@ static char *ext_mount_lookup(char *key)
*/
struct mount_info *mntinfo;
static void mntinfo_add_list(struct mount_info *new)
void mntinfo_add_list(struct mount_info *new)
{
if (!mntinfo)
mntinfo = new;
@ -351,6 +357,19 @@ static bool mounts_equal(struct mount_info *a, struct mount_info *b)
* non-root namespaces.
*/
char *mnt_roots;
LIST_HEAD(detached_mounts);
struct mount_info* mnt_is_detached(int mnt_id)
{
struct mount_info *detached;
list_for_each_entry(detached, &detached_mounts, mnt_detached_list) {
if (detached->mnt_id == mnt_id)
return detached;
}
return NULL;
}
static struct mount_info *mnt_build_ids_tree(struct mount_info *list)
{
@ -366,10 +385,16 @@ static struct mount_info *mnt_build_ids_tree(struct mount_info *list)
pr_debug("\t\tWorking on %d->%d\n", m->mnt_id, m->parent_mnt_id);
if (m->mnt_id != m->parent_mnt_id)
if (m->mnt_id != m->parent_mnt_id) {
parent = __lookup_mnt_id(list, m->parent_mnt_id);
else /* a circular mount reference. It's rootfs or smth like it. */
} else /* a circular mount reference. It's rootfs or detached mount or smth like it. */ {
if (m->detached_mnt) {
list_add(&m->mnt_detached_list, &detached_mounts);
continue;
}
parent = NULL;
}
if (!parent) {
/* Only a root mount can be without parent */
@ -1867,6 +1892,11 @@ static int dump_one_mountpoint(struct mount_info *pm, struct cr_img *img)
* for reverse mapping details.
*/
me.ext_key = pm->external;
if (pm->detached_mnt) {
me.has_detached_mnt = true;
me.detached_mnt = pm->detached_mnt;
}
me.root = pm->root;
if (pb_write_one(img, &me, PB_MNT))
@ -3035,6 +3065,7 @@ struct mount_info *mnt_entry_alloc(bool rst)
INIT_LIST_HEAD(&new->mnt_unbindable);
INIT_LIST_HEAD(&new->postpone);
INIT_LIST_HEAD(&new->deleted_list);
INIT_LIST_HEAD(&new->mnt_detached_list);
}
return new;
}
@ -3153,6 +3184,27 @@ static int get_mp_mountpoint(char *mountpoint, struct mount_info *mi, char *root
{
int len;
if (mi->detached_mnt) {
/*
* ns_mountpoint, mountpoint don't really make sense for detached mounts
* since, detached mounts are not really part of the filesystem.
* We just need to create some temporary mountpoints to open fds and
* then MNT_DETACH.
* In the future, we can set them to NULL and just use plain_mountpoint.
*/
mi->ns_mountpoint = xmalloc(PATH_MAX);
if (!mi->ns_mountpoint) {
pr_debug("Could not allocate memory for mountpoint: mnt_id:%d\n", mi->mnt_id);
return -1;
}
snprintf(mi->ns_mountpoint, PATH_MAX, "/.criu.detached.%010d", mi->mnt_id);
mi->mountpoint = mi->ns_mountpoint;
mi->plain_mountpoint = mi->ns_mountpoint;
return 0;
}
len = strlen(mountpoint) + root_len + 1;
mi->mountpoint = xmalloc(len);
if (!mi->mountpoint)
@ -3294,6 +3346,9 @@ static int collect_mnt_from_image(struct mount_info **head, struct mount_info **
if (me->has_internal_sharing)
pm->internal_sharing = me->internal_sharing;
if (me->has_detached_mnt)
pm->detached_mnt = me->detached_mnt;
pm->source = xstrdup(me->source);
if (!pm->source)
goto err;
@ -4053,7 +4108,6 @@ int dump_mnt_namespaces(void)
if (dump_mnt_ns(nsid, nsid->mnt.mntinfo_list))
return -1;
}
return 0;
}
@ -4238,6 +4292,128 @@ int remount_readonly_mounts(void)
return call_helper_process(ns_remount_readonly_mounts, NULL);
}
static unsigned int parse_mnt_flags(unsigned int flags)
{
unsigned int mount_flags = 0;
if (flags & MOUNT_ATTR_RDONLY)
flags |= MS_RDONLY;
if (flags & MOUNT_ATTR_NOSUID)
flags |= MS_NOSUID;
if (flags & MOUNT_ATTR_NODEV)
flags |= MS_NODEV;
if (flags & MOUNT_ATTR_NOEXEC)
flags |= MS_NOATIME;
if (flags & MOUNT_ATTR_NODIRATIME)
flags |= MS_NODIRATIME;
if (flags & MOUNT_ATTR_RELATIME)
flags |= MS_RELATIME;
return mount_flags;
}
struct mount_info* mount_info_from_statmount(int lfd)
{
int ret;
char *options;
struct mount_info *cur;
struct mount_info *mnt = NULL;
cleanup_free struct statmount *statmnt = NULL;
u64 statmount_mask = STATMOUNT_MNT_BASIC | STATMOUNT_FS_TYPE |
STATMOUNT_SB_BASIC | STATMOUNT_PROPAGATE_FROM |
STATMOUNT_MNT_POINT | STATMOUNT_MNT_ROOT | STATMOUNT_SB_SOURCE | STATMOUNT_MNT_OPTS | STATMOUNT_OPT_ARRAY;
statmnt = do_statmount_fd(lfd, statmount_mask);
if (!statmnt)
return NULL;
mnt = mnt_entry_alloc(false);
if (!mnt)
return NULL;
mnt->detached_mnt = true;
mnt->s_dev = MKKDEV(statmnt->sb_dev_major, statmnt->sb_dev_minor);
mnt->mnt_id = statmnt->mnt_id_old;
mnt->parent_mnt_id = statmnt->mnt_parent_id_old;
/* parse flags */
mnt->flags = parse_mnt_flags(statmnt->mnt_attr) | statmnt->mnt_propagation;
mnt->sb_flags = statmnt->sb_flags;
if (mnt->flags & MS_SLAVE)
mnt->shared_id = statmnt->mnt_peer_group;
else if (mnt->flags & MS_SHARED)
mnt->master_id = statmnt->mnt_master;
/* detached mount does not have a mountpoint */
mnt->mountpoint = NULL;
/* needed for mnt_is_overmounted */
mnt->parent = NULL;
mnt->fsname = xstrdup(statmnt->str + statmnt->fs_type);
if (!mnt->fsname)
goto err;
mnt->source = xstrdup(statmnt->str + statmnt->sb_source);
if (!mnt->source)
goto err;
options = xstrdup(statmnt->str + statmnt->mnt_opts);
if (!options)
goto err;
mnt->options = xmalloc(strlen(options));
if (!mnt->options)
goto err;
if (parse_sb_opt(options, &mnt->sb_flags, mnt->options))
goto err;
mnt->fstype = find_fstype_by_name(mnt->fsname);
if (mnt->fstype->parse) {
ret = mnt->fstype->parse(mnt);
if (ret < 0) {
pr_err("Failed to parse FS specific data on %s\n", service_mountpoint(mnt));
goto err;
}
if (ret > 0) {
pr_info("\tskipping fs mounted at %s\n", service_mountpoint(mnt) + 1);
goto err;
}
}
mnt->root = xstrdup(statmnt->str + statmnt->mnt_root);
if (!mnt->root)
goto err;
mnt->ns_mountpoint = xstrdup(statmnt->str + statmnt->mnt_point);
if (!mnt->ns_mountpoint)
goto err;
/* check whether this is bind mount of a normal mount */
for (cur = mntinfo; cur; cur = cur->next) {
if (mounts_sb_equal(mnt, cur)) {
mnt->nsid = cur->nsid;
list_add(&cur->mnt_bind, &mnt->mnt_bind);
cur->mnt_bind_is_populated = true;
}
}
if (!mnt->nsid) {
pr_err("detached mount is not a bind mount\n");
goto err;
}
mnt->mnt_bind_is_populated = true;
mntinfo_add_list(mnt);
return mnt;
err:
mnt_entry_free(mnt);
return NULL;
}
static struct mount_info *mnt_subtree_next(struct mount_info *mi, struct mount_info *root)
{
if (!list_empty(&mi->children))

View file

@ -1,3 +1,7 @@
#include "common/list.h"
#include "common/lock.h"
#include "log.h"
#include <sys/mount.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/wait.h>
@ -1518,6 +1522,57 @@ static int exit_usernsd(void *arg, int fd, pid_t pid)
exit(code);
}
int umount_detached_mountpoints(void)
{
int ret = 0;
int orig_nsfd, nsfd;
struct mount_info *detached;
orig_nsfd = open_proc(PROC_SELF, "ns/mnt");
if (orig_nsfd < 0) {
pr_err("failed to get original mount namespace fd\n");
return -1;
}
list_for_each_entry(detached, &detached_mounts, mnt_detached_list) {
BUG_ON(!detached->detached_mnt);
nsfd = fdstore_get(detached->nsid->mnt.nsfd_id);
if (nsfd < 0) {
pr_err("failed to get mount namespace fd\n");
ret = -1;
goto out;
}
if (switch_ns_by_fd(nsfd, &mnt_ns_desc, &orig_nsfd)) {
pr_err("failed to switch to mount namespace\n");
ret = -1;
goto out;
}
if (umount2(detached->ns_mountpoint, MNT_DETACH)) {
pr_perror("failed to umount detached mountpoint: %s", detached->ns_mountpoint);
ret = -1;
goto out;
}
if (rmdir(detached->ns_mountpoint)) {
pr_perror("failed to remove temporary directory for detached mount: %s", detached->ns_mountpoint);
ret = -1;
goto out;
}
pr_debug("successfully detached mount mnt_id=%d\n", detached->mnt_id);
}
out:
if (restore_ns(orig_nsfd, &mnt_ns_desc)) {
pr_perror("failed to restore original mount namespace");
ret = -1;
}
close(orig_nsfd);
return ret;
}
int stop_usernsd(void)
{
int ret = 0;

View file

@ -1367,7 +1367,7 @@ static int parse_mnt_flags(char *opt, unsigned *flags)
return 0;
}
static int parse_sb_opt(char *opt, unsigned *flags, char *uopt)
int parse_sb_opt(char *opt, unsigned *flags, char *uopt)
{
static const struct opt2flag sb_opt2flag[] = {
{
@ -1472,6 +1472,7 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname)
char *sub, *opt = NULL;
char link_path[PATH_MAX];
new->detached_mnt = false;
new->mountpoint = xmalloc(PATH_MAX);
if (new->mountpoint == NULL)
goto err;

View file

@ -1425,6 +1425,48 @@ static int epoll_hangup_event(int epollfd, struct epoll_rfd *rfd)
return ret;
}
static int __statmount(struct mnt_id_req *req, struct statmount *stmnt,
size_t bufsize, int flags)
{
return syscall(__NR_statmount, req, stmnt, bufsize, flags);
}
struct statmount *do_statmount_fd(int fd, u64 mask)
{
size_t bufsize = 1 << 15;
struct statmount *stmnt = NULL, *tmp = NULL;
int ret;
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER1,
.fd = fd,
.param = mask
};
for (;;) {
tmp = xrealloc(stmnt, bufsize);
if (!tmp)
goto out;
stmnt = tmp;
ret = __statmount(&req, stmnt, bufsize, STATMOUNT_BY_FD);
if (!ret) {
return stmnt;
}
if (errno != EOVERFLOW)
goto out;
bufsize <<= 1;
if (bufsize >= UINT_MAX / 2)
goto out;
}
out:
free(stmnt);
return NULL;
}
int epoll_run_rfds(int epollfd, struct epoll_event *evs, int nr_fds, int timeout)
{
int ret, i, nr_events;

View file

@ -59,4 +59,5 @@ message mnt_entry {
optional uint32 sb_flags = 17 [(criu).hex = true];
/* user defined mapping for external mount */
optional string ext_key = 18;
optional bool detached_mnt = 19;
}

View file

@ -393,6 +393,10 @@ TST_DIR = \
cwd00 \
cwd01 \
cwd02 \
detached_mount \
anonymous_mount \
detached_bind_mount \
detached_file_bind_mount \
overmount_dev \
overmount_file \
overmount_fifo \

View file

@ -0,0 +1,117 @@
#include <bits/types.h>
#include <fcntl.h>
#include <linux/limits.h>
#include <linux/openat2.h>
#include <linux/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <inttypes.h>
#include <sys/mount.h>
#include "zdtmtst.h"
const char *test_doc = "Check whether CRIU can c/r a fd pointing to a anonymous mount created using open_tree";
const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
char *dirname;
TEST_OPTION(dirname, string, "directory name", 1);
#define TEST_FILE "anonymous-mount-file"
long sys_openat2(int dirfd, const char *pathname, struct open_how *how, size_t size)
{
return syscall(__NR_openat2, dirfd, pathname, how, size);
}
int main(int argc, char *argv[])
{
int mntfd, fd, ret = 1;
struct open_how how;
char *data = "anonymous_mount.data";
size_t len = strlen(data);
char buf[len + 1];
test_init(argc, argv);
if (mkdir(dirname, 0700)) {
pr_perror("mkdir %s", dirname);
return 1;
}
/* create a mount point at dirname */
if (mount("none", dirname, "tmpfs", 0, NULL)) {
pr_perror("mount %s", dirname);
return 1;
}
/* create a abstract (detached) clone mount of this mount */
mntfd = open_tree(AT_FDCWD, dirname, OPEN_TREE_CLONE);
if (mntfd < 0) {
pr_perror("open_tree");
return 1;
}
how.flags = O_CREAT | O_RDWR;
how.mode = 0600;
how.resolve = 0;
fd = sys_openat2(mntfd, TEST_FILE, &how, sizeof(how));
if (fd < 0) {
pr_perror("openat2");
return 1;
}
if (write(fd, data, len) != len) {
pr_perror("write");
return 1;
}
close(fd);
test_daemon();
test_waitsig();
/* verify the contents of the file inside anonymous mount */
how.flags = O_RDONLY;
how.mode = 0;
how.resolve = 0;
fd = sys_openat2(mntfd, TEST_FILE, &how, sizeof(how));
if (fd < 0) {
pr_perror("open_at");
close(mntfd);
return 1;
}
if (read(fd, buf, len) != len) {
pr_perror("read");
goto out;
}
buf[len] = 0;
/* Should contain the same data */
if (strncmp(data, buf, len) != 0) {
fail();
ret = 0;
goto out;
}
/* we should still be able to create mount using mntfd */
if (move_mount(mntfd, "", AT_FDCWD, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH)) {
pr_perror("move_mount");
goto out;
}
/* we should be able to umount, if mounted correctly */
if (umount(dirname)) {
pr_perror("umount");
goto out;
}
pass();
ret = 0;
out:
close(fd);
close(mntfd);
return ret;
}

View file

@ -0,0 +1 @@
{'flags': 'suid crfail', 'flavor': 'ns uns'}

View file

@ -0,0 +1,106 @@
#include <fcntl.h>
#include <linux/limits.h>
#include <stdio.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include "zdtmtst.h"
#include <unistd.h>
const char *test_doc = "Check C/R of a open file on a detached bind mount, when the original mount is still present";
const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
char *dirname;
TEST_OPTION(dirname, string, "directory name", 1);
#define TEST_FILE "detached-bind-mount-file"
int main(int argc, char *argv[])
{
char path[PATH_MAX], mnt[PATH_MAX], bind_mnt[PATH_MAX];
/* opened to a file on the detached mount point */
int fd;
/* some data for testing */
char *data = "detached_bind_mount.data";
size_t len = strlen(data);
char buf[len + 1];
test_init(argc, argv);
if (mkdir(dirname, 0700)) {
pr_perror("mkdir %s", dirname);
return 1;
}
ssprintf(mnt, "%s/mnt", dirname);
if (mkdir(mnt, 0700)) {
pr_perror("mkdir %s", mnt);
return 1;
}
/* create a mount point at mnt */
if (mount("none", mnt, "tmpfs", 0, NULL)) {
pr_perror("mount %s", mnt);
return 1;
}
ssprintf(bind_mnt, "%s/bind_mnt", dirname);
if (mkdir(bind_mnt, 0700)) {
pr_perror("mkdir %s", bind_mnt);
return 1;
}
if (mount(mnt, bind_mnt, NULL, MS_BIND, NULL) == -1) {
pr_perror("mount %s", bind_mnt);
return 1;
}
ssprintf(path, "%s/bind_mnt/%s", dirname, TEST_FILE);
fd = open(path, O_CREAT | O_RDWR);
if (fd < 0) {
pr_perror("open %s", path);
return 1;
}
if (write(fd, data, len) != len) {
pr_perror("write %s", path);
goto err;
}
/* detach the bind mount lazily */
if (umount2(bind_mnt, MNT_DETACH)) {
pr_perror("umount2 %s", dirname);
goto err;
}
test_daemon();
test_waitsig();
/* Should still be able to read from the fd */
if (lseek(fd, 0, SEEK_SET)) {
pr_perror("lseek %s", path);
goto err;
}
if (read(fd, buf, len) != len) {
pr_perror("read %s", path);
goto err;
}
buf[len] = 0;
/* Should contain the same data */
if (strncmp(data, buf, len) != 0)
fail();
else
pass();
close(fd);
return 0;
err:
close(fd);
return 1;
}

View file

@ -0,0 +1 @@
{'flags': 'suid', 'flavor': 'ns uns'}

View file

@ -0,0 +1,102 @@
#include <fcntl.h>
#include <linux/limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include "zdtmtst.h"
#include <unistd.h>
const char *test_doc = "Check C/R of a detached bind file mount, while the original mount is still mounted";
const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
char *dirname;
TEST_OPTION(dirname, string, "directory name", 1);
#define TEST_FILE "detached-bind-file-mount-file"
int main(int argc, char *argv[])
{
char mount_path[PATH_MAX], file_mount_path[PATH_MAX], bind_path[PATH_MAX];
int fd, bind_fd;
char *data = "detached_file_bind_mount.data";
size_t len = strlen(data);
char buf[len + 1];
test_init(argc, argv);
if (mkdir(dirname, 0700)) {
pr_perror("mkdir %s", dirname);
return 1;
}
ssprintf(mount_path, "%s/mnt", dirname);
if (mkdir(mount_path, 0700)) {
pr_perror("mkdir %s", mount_path);
return 1;
}
if (mount("none", mount_path, "tmpfs", 0, NULL)) {
pr_perror("mount %s", mount_path);
return 1;
}
ssprintf(file_mount_path, "%s/mnt/file", dirname);
fd = open(file_mount_path, O_CREAT | O_RDWR);
if (fd < 0) {
pr_perror("open %s", file_mount_path);
return 1;
}
/* create bind file path */
ssprintf(bind_path, "%s/bind_file", dirname);
bind_fd = creat(bind_path, O_CREAT);
if (bind_fd < 0) {
pr_perror("creat %s", bind_path);
return 1;
}
close(bind_fd);
if (mount(file_mount_path, bind_path, NULL, MS_BIND, NULL)) {
pr_perror("bind mount %s", bind_path);
return 1;
}
if (write(fd, data, len) != len) {
pr_perror("write %s", file_mount_path);
return 1;
}
bind_fd = open(bind_path, O_RDWR);
if (bind_fd < 0) {
pr_perror("open %s", bind_path);
return 1;
}
if (umount2(bind_path, MNT_DETACH)) {
pr_perror("umount2 %s", bind_path);
return 1;
}
test_daemon();
test_waitsig();
if (read(bind_fd, buf, len) != len) {
pr_perror("read %s", bind_path);
return 1;
}
buf[len] = 0;
/* Should contain the same data */
if (strncmp(data, buf, len) != 0)
fail();
else
pass();
close(fd);
close(bind_fd);
return 0;
}

View file

@ -0,0 +1 @@
{'flags': 'suid crfail', 'flavor': 'ns uns'}

View file

@ -0,0 +1,83 @@
#include <fcntl.h>
#include <linux/limits.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include "zdtmtst.h"
const char *test_doc = "Check that open file on unmounted mount is restored correctly";
const char *test_author = "Bhavik Sachdev <b.sachdev1904@gmail.com>";
char *dirname;
TEST_OPTION(dirname, string, "directory name", 1);
#define TEST_FILE "detached-mount-file"
int main(int argc, char *argv[])
{
char path[PATH_MAX];
/* opened to a file on the detached mount point */
int fd;
char *data = "detached_mount.data";
size_t len = strlen(data);
char buf[len + 1];
test_init(argc, argv);
if (mkdir(dirname, 0700)) {
pr_perror("mkdir %s", dirname);
return 1;
}
/* create a mount point at dirname */
if (mount("none", dirname, "tmpfs", 0, NULL)) {
pr_perror("mount %s", dirname);
return 1;
}
ssprintf(path, "%s/%s", dirname, TEST_FILE);
fd = open(path, O_CREAT | O_RDWR);
if (fd < 0) {
pr_perror("open %s", path);
return 1;
}
if (write(fd, data, len) != len) {
pr_perror("write %s", path);
goto err;
}
/* detach the mount lazily */
if (umount2(dirname, MNT_DETACH)) {
pr_perror("umount2 %s", dirname);
goto err;
}
test_daemon();
test_waitsig();
/* Should still be able to read from the fd */
if (lseek(fd, 0, SEEK_SET)) {
pr_perror("lseek %s", path);
goto err;
}
if (read(fd, buf, len) != len) {
pr_perror("read %s", path);
goto err;
}
buf[len] = 0;
/* Should contain the same data */
if (strncmp(data, buf, len) != 0)
fail();
else
pass();
close(fd);
return 0;
err:
close(fd);
return 1;
}

View file

@ -0,0 +1 @@
{'flags': 'suid crfail', 'flavor': 'ns uns'}