From 29a1a88bcebaf9d83591077d2bec424da82c0e71 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH] memfd: add memory mapping support * During checkpoint, we add a vma flags: VMA_AREA_MEMFD to denote memfd regions. * Even though memfd is backed by the shmem device, we use the file semantics of memfd (via /proc/map_files/) which we already have support for. Signed-off-by: Nicolas Viennot --- criu/cr-dump.c | 6 +++++- criu/files-reg.c | 11 +++++++++-- criu/include/image.h | 1 + criu/include/memfd.h | 6 ++++++ criu/memfd.c | 19 ++++++++++++++++++- criu/proc_parse.c | 35 +++++++++++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 4 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 88323af92..6aa114c2d 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -80,6 +80,7 @@ #include "fault-injection.h" #include "dump.h" #include "eventpoll.h" +#include "memfd.h" /* * Architectures can overwrite this function to restore register sets that @@ -414,7 +415,10 @@ static int dump_filemap(struct vma_area *vma_area, int fd) /* Flags will be set during restore in open_filmap() */ - ret = dump_one_reg_file_cond(fd, &id, &p); + if (vma->status & VMA_AREA_MEMFD) + ret = dump_one_memfd_cond(fd, &id, &p); + else + ret = dump_one_reg_file_cond(fd, &id, &p); vma->shmid = id; return ret; diff --git a/criu/files-reg.c b/criu/files-reg.c index 90fb7dd7f..b0dad78e6 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -35,6 +35,7 @@ #include "pstree.h" #include "fault-injection.h" #include "external.h" +#include "memfd.h" #include "protobuf.h" #include "util.h" @@ -1879,7 +1880,10 @@ static int open_filemap(int pid, struct vma_area *vma) flags = vma->e->fdflags; if (ctx.flags != flags || ctx.desc != vma->vmfd) { - ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); + if (vma->e->status & VMA_AREA_MEMFD) + ret = memfd_open(vma->vmfd, &flags); + else + ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); if (ret < 0) return ret; @@ -1909,7 +1913,10 @@ int collect_filemap(struct vma_area *vma) vma->e->fdflags = O_RDONLY; } - fd = collect_special_file(vma->e->shmid); + if (vma->e->status & VMA_AREA_MEMFD) + fd = collect_memfd(vma->e->shmid); + else + fd = collect_special_file(vma->e->shmid); if (!fd) return -1; diff --git a/criu/include/image.h b/criu/include/image.h index 2baa39496..1c7cc5471 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -83,6 +83,7 @@ #define VMA_AREA_SOCKET (1 << 11) #define VMA_AREA_VVAR (1 << 12) #define VMA_AREA_AIORING (1 << 13) +#define VMA_AREA_MEMFD (1 << 14) #define VMA_CLOSE (1 << 28) #define VMA_NO_PROT_WRITE (1 << 29) diff --git a/criu/include/memfd.h b/criu/include/memfd.h index c1d7949cb..0a9aeff2f 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -5,10 +5,16 @@ #include "int.h" #include "common/config.h" +struct fd_parms; +struct file_desc; + extern int is_memfd(dev_t dev, const char *path); +extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms); extern const struct fdtype_ops memfd_dump_ops; +extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; +extern struct file_desc *collect_memfd(u32 id); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 36b3be8df..1cca96a32 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -164,6 +164,13 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); } +int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms) +{ + if (fd_id_generate_special(parms, id)) + return dump_one_memfd(lfd, *id, parms); + return 0; +} + const struct fdtype_ops memfd_dump_ops = { .type = FD_TYPES__MEMFD, .dump = dump_one_memfd, @@ -265,7 +272,7 @@ static int memfd_open_inode(struct memfd_inode *inode) return fd; } -static int memfd_open(struct file_desc *d, u32 *fdflags) +int memfd_open(struct file_desc *d, u32 *fdflags) { char lpath[PSFDS]; struct memfd_info *mfi; @@ -384,3 +391,13 @@ struct collect_image_info memfd_cinfo = { .priv_size = sizeof(struct memfd_info), .collect = collect_one_memfd, }; + +struct file_desc *collect_memfd(u32 id) { + struct file_desc *fdesc; + + fdesc = find_file_desc_raw(FD_TYPES__MEMFD, id); + if (fdesc == NULL) + pr_err("No entry for memfd %#x\n", id); + + return fdesc; +} diff --git a/criu/proc_parse.c b/criu/proc_parse.c index fa7644992..468afcdf3 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -41,6 +41,7 @@ #include "timerfd.h" #include "path.h" #include "fault-injection.h" +#include "memfd.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" @@ -303,6 +304,26 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, } vfi_dev = makedev(vfi->dev_maj, vfi->dev_min); + + if (is_memfd(vfi_dev, fname)) { + struct fd_link link; + link.len = strlen(fname); + strlcpy(link.name, fname, sizeof(link.name)); + strip_deleted(&link); + + /* + * The error EPERM will be shown in the following pr_perror(). + * It comes from the previous open() call. + */ + pr_perror("Can't open mapped [%s]", link.name); + + /* + * TODO Perhaps we could do better than failing and dump the + * memory like what is being done in shmem.c + */ + return -1; + } + if (is_anon_shmem_map(vfi_dev)) { if (!(vma->e->flags & MAP_SHARED)) return -1; @@ -578,7 +599,20 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, /* * /dev/zero stands for anon-shared mapping * otherwise it's some file mapping. + * + * We treat memfd mappings as regular file mappings because + * their backing can be seen as files, which is easy to + * support. So even though memfd is an anonymous shmem, we + * treat it differently. + * Note: maybe we should revisit this as /proc/map_files/ + * may not always be accessible. */ + + if (is_memfd(st_buf->st_dev, file_path)) { + vma_area->e->status |= VMA_AREA_MEMFD; + goto normal_file; + } + if (is_anon_shmem_map(st_buf->st_dev)) { if (!(vma_area->e->flags & MAP_SHARED)) goto err_bogus_mapping; @@ -594,6 +628,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE; } } else { +normal_file: if (vma_area->e->flags & MAP_PRIVATE) vma_area->e->status |= VMA_FILE_PRIVATE; else