kerndat: check for statmount with STATMOUNT_BY_FD

For supporting files on "unmounted" mounts (mounts umounted with
MNT_DETACH), we need support for statmount on fds. Add kerndat check to
check support for statmount on fd.

Signed-off-by: Bhavik Sachdev <b.sachdev1904@gmail.com>
This commit is contained in:
Bhavik Sachdev 2025-06-11 01:25:47 +05:30
parent f71837d5dc
commit 3bf4c40672
6 changed files with 185 additions and 0 deletions

View file

@ -1400,6 +1400,17 @@ static int check_timer_cr_ids(void)
return 0;
}
static int check_statmount_fd(void)
{
if (!kdat.has_statmount_fd) {
pr_warn("statmount syscall with STATMOUNT_BY_FD is unavailable,"
" files on unmounted mounts will not be supported\n");
return -1;
}
return 0;
}
/* musl doesn't have a statx wrapper... */
struct staty {
__u32 stx_dev_major;
@ -1731,6 +1742,7 @@ int cr_check(void)
ret |= check_overlayfs_maps();
ret |= check_timer_cr_ids();
ret |= check_pagemap_scan_guard_pages();
ret |= check_statmount_fd();
if (kdat.lsm == LSMTYPE__APPARMOR)
ret |= check_apparmor_stacking();

View file

@ -93,6 +93,7 @@ struct kerndat_s {
bool has_breakpoints;
bool has_madv_guard;
bool has_pagemap_scan_guard_pages;
bool has_statmount_fd;
};
extern struct kerndat_s kdat;

View file

@ -52,6 +52,118 @@ static inline int sys_open_tree(int dfd, const char *filename, unsigned int flag
#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings (includes bind-mounts). */
#endif
#ifndef STATMOUNT_SB_BASIC
#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
#endif
#ifndef STATMOUNT_MNT_BASIC
#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
#endif
#ifndef STATMOUNT_PROPAGATE_FROM
#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
#endif
#ifndef STATMOUNT_MNT_ROOT
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#endif
#ifndef STATMOUNT_MNT_POINT
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#endif
#ifndef STATMOUNT_FS_TYPE
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
#endif
#ifndef STATMOUNT_MNT_NS_ID
#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
#endif
#ifndef STATMOUNT_MNT_OPTS
#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
#endif
#ifndef STATMOUNT_FS_SUBTYPE
#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */
#endif
#ifndef STATMOUNT_SB_SOURCE
#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
#endif
#ifndef STATMOUNT_OPT_ARRAY
#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
#endif
#ifndef STATMOUNT_OPT_SEC_ARRAY
#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
#endif
#ifndef STATMOUNT_SUPPORTED_MASK
#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
#endif
#ifndef STATMOUNT_MNT_UIDMAP
#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
#endif
#ifndef STATMOUNT_MNT_GIDMAP
#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
#endif
#ifndef STATMOUNT_BY_FD
#define STATMOUNT_BY_FD 0x0000001U /* want mountinfo for given fd */
#endif
#ifndef MNT_ID_REQ_SIZE_VER1
#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
#endif
struct mnt_id_req {
__u32 size;
__u32 fd;
__u64 mnt_id;
__u64 param;
__u64 mnt_ns_id;
};
struct statmount {
__u32 size; /* Total size, including strings */
__u32 mnt_opts; /* [str] Options (comma separated, escaped) */
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
__u64 sb_magic; /* ..._SUPER_MAGIC */
__u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
__u32 fs_type; /* [str] Filesystem type */
__u64 mnt_id; /* Unique ID of mount */
__u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
__u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
__u32 mnt_parent_id_old;
__u64 mnt_attr; /* MOUNT_ATTR_... */
__u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
__u64 mnt_peer_group; /* ID of shared peer group */
__u64 mnt_master; /* Mount receives propagation from this ID */
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
__u64 mnt_ns_id; /* ID of the mount namespace */
__u32 fs_subtype; /* [str] Subtype of fs_type (if any) */
__u32 sb_source; /* [str] Source string of the mount */
__u32 opt_num; /* Number of fs options */
__u32 opt_array; /* [str] Array of nul terminated fs options */
__u32 opt_sec_num; /* Number of security options */
__u32 opt_sec_array; /* [str] Array of nul terminated security options */
__u64 supported_mask; /* Mask flags that this kernel supports */
__u32 mnt_uidmap_num; /* Number of uid mappings */
__u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
__u32 mnt_gidmap_num; /* Number of gid mappings */
__u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
__u64 __spare2[43];
char str[]; /* Variable size part containing strings */
};
static inline long sys_openat2(int dirfd, const char *pathname, struct open_how *how, size_t size)
{
return syscall(__NR_openat2, dirfd, pathname, how, size);

View file

@ -67,6 +67,7 @@ extern int set_proc_fd(int fd);
extern pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid,
unsigned long newtls);
struct statmount *do_statmount_fd(int fd, u64 mask);
/*
* Values for pid argument of the proc opening routines below.

View file

@ -1,3 +1,4 @@
#include <linux/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
@ -1741,6 +1742,18 @@ static int kerndat_has_timer_cr_ids(void)
return 0;
}
static int kerndat_has_statmount_fd(void)
{
struct statmount *statmnt = do_statmount_fd(STDIN_FILENO, STATMOUNT_MNT_BASIC);
if (!statmnt && (errno == ENOSYS || errno == EINVAL)) {
pr_info("statmount with STATMOUNT_BY_FD flag isn't supported\n");
kdat.has_statmount_fd = false;
} else {
kdat.has_statmount_fd = true;
}
return 0;
}
static void breakpoint_func(void)
{
if (raise(SIGSTOP))
@ -2125,6 +2138,10 @@ int kerndat_init(void)
pr_err("kerndat_has_madv_guard has failed when initializing kerndat.\n");
ret = -1;
}
if (!ret && kerndat_has_statmount_fd()) {
pr_err("kerndat_has_statmount_fd failed when initializing kerndat.\n");
ret = -1;
}
kerndat_lsm();
kerndat_mmap_min_addr();

View file

@ -1425,6 +1425,48 @@ static int epoll_hangup_event(int epollfd, struct epoll_rfd *rfd)
return ret;
}
static int __statmount(struct mnt_id_req *req, struct statmount *stmnt,
size_t bufsize, int flags)
{
return syscall(__NR_statmount, req, stmnt, bufsize, flags);
}
struct statmount *do_statmount_fd(int fd, u64 mask)
{
size_t bufsize = 1 << 15;
struct statmount *stmnt = NULL, *tmp = NULL;
int ret;
struct mnt_id_req req = {
.size = MNT_ID_REQ_SIZE_VER1,
.fd = fd,
.param = mask
};
for (;;) {
tmp = xrealloc(stmnt, bufsize);
if (!tmp)
goto out;
stmnt = tmp;
ret = __statmount(&req, stmnt, bufsize, STATMOUNT_BY_FD);
if (!ret) {
return stmnt;
}
if (errno != EOVERFLOW)
goto out;
bufsize <<= 1;
if (bufsize >= UINT_MAX / 2)
goto out;
}
out:
free(stmnt);
return NULL;
}
int epoll_run_rfds(int epollfd, struct epoll_event *evs, int nr_fds, int timeout)
{
int ret, i, nr_events;