memfd: add memory mapping support

* During checkpoint, we add a vma flags: VMA_AREA_MEMFD to denote memfd
  regions.
* Even though memfd is backed by the shmem device, we use the file
  semantics of memfd (via /proc/map_files/<vma>) which we already have
  support for.

Signed-off-by: Nicolas Viennot <Nicolas.Viennot@twosigma.com>
This commit is contained in:
Nicolas Viennot 2019-12-18 23:32:32 +00:00 committed by Andrei Vagin
parent b25684e24a
commit 29a1a88bce
6 changed files with 74 additions and 4 deletions

View file

@ -80,6 +80,7 @@
#include "fault-injection.h"
#include "dump.h"
#include "eventpoll.h"
#include "memfd.h"
/*
* Architectures can overwrite this function to restore register sets that
@ -414,7 +415,10 @@ static int dump_filemap(struct vma_area *vma_area, int fd)
/* Flags will be set during restore in open_filmap() */
ret = dump_one_reg_file_cond(fd, &id, &p);
if (vma->status & VMA_AREA_MEMFD)
ret = dump_one_memfd_cond(fd, &id, &p);
else
ret = dump_one_reg_file_cond(fd, &id, &p);
vma->shmid = id;
return ret;

View file

@ -35,6 +35,7 @@
#include "pstree.h"
#include "fault-injection.h"
#include "external.h"
#include "memfd.h"
#include "protobuf.h"
#include "util.h"
@ -1879,7 +1880,10 @@ static int open_filemap(int pid, struct vma_area *vma)
flags = vma->e->fdflags;
if (ctx.flags != flags || ctx.desc != vma->vmfd) {
ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
if (vma->e->status & VMA_AREA_MEMFD)
ret = memfd_open(vma->vmfd, &flags);
else
ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags);
if (ret < 0)
return ret;
@ -1909,7 +1913,10 @@ int collect_filemap(struct vma_area *vma)
vma->e->fdflags = O_RDONLY;
}
fd = collect_special_file(vma->e->shmid);
if (vma->e->status & VMA_AREA_MEMFD)
fd = collect_memfd(vma->e->shmid);
else
fd = collect_special_file(vma->e->shmid);
if (!fd)
return -1;

View file

@ -83,6 +83,7 @@
#define VMA_AREA_SOCKET (1 << 11)
#define VMA_AREA_VVAR (1 << 12)
#define VMA_AREA_AIORING (1 << 13)
#define VMA_AREA_MEMFD (1 << 14)
#define VMA_CLOSE (1 << 28)
#define VMA_NO_PROT_WRITE (1 << 29)

View file

@ -5,10 +5,16 @@
#include "int.h"
#include "common/config.h"
struct fd_parms;
struct file_desc;
extern int is_memfd(dev_t dev, const char *path);
extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms);
extern const struct fdtype_ops memfd_dump_ops;
extern int memfd_open(struct file_desc *d, u32 *fdflags);
extern struct collect_image_info memfd_cinfo;
extern struct file_desc *collect_memfd(u32 id);
#ifdef CONFIG_HAS_MEMFD_CREATE
# include <sys/mman.h>

View file

@ -164,6 +164,13 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p)
return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE);
}
int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms)
{
if (fd_id_generate_special(parms, id))
return dump_one_memfd(lfd, *id, parms);
return 0;
}
const struct fdtype_ops memfd_dump_ops = {
.type = FD_TYPES__MEMFD,
.dump = dump_one_memfd,
@ -265,7 +272,7 @@ static int memfd_open_inode(struct memfd_inode *inode)
return fd;
}
static int memfd_open(struct file_desc *d, u32 *fdflags)
int memfd_open(struct file_desc *d, u32 *fdflags)
{
char lpath[PSFDS];
struct memfd_info *mfi;
@ -384,3 +391,13 @@ struct collect_image_info memfd_cinfo = {
.priv_size = sizeof(struct memfd_info),
.collect = collect_one_memfd,
};
struct file_desc *collect_memfd(u32 id) {
struct file_desc *fdesc;
fdesc = find_file_desc_raw(FD_TYPES__MEMFD, id);
if (fdesc == NULL)
pr_err("No entry for memfd %#x\n", id);
return fdesc;
}

View file

@ -41,6 +41,7 @@
#include "timerfd.h"
#include "path.h"
#include "fault-injection.h"
#include "memfd.h"
#include "protobuf.h"
#include "images/fdinfo.pb-c.h"
@ -303,6 +304,26 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma,
}
vfi_dev = makedev(vfi->dev_maj, vfi->dev_min);
if (is_memfd(vfi_dev, fname)) {
struct fd_link link;
link.len = strlen(fname);
strlcpy(link.name, fname, sizeof(link.name));
strip_deleted(&link);
/*
* The error EPERM will be shown in the following pr_perror().
* It comes from the previous open() call.
*/
pr_perror("Can't open mapped [%s]", link.name);
/*
* TODO Perhaps we could do better than failing and dump the
* memory like what is being done in shmem.c
*/
return -1;
}
if (is_anon_shmem_map(vfi_dev)) {
if (!(vma->e->flags & MAP_SHARED))
return -1;
@ -578,7 +599,20 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area,
/*
* /dev/zero stands for anon-shared mapping
* otherwise it's some file mapping.
*
* We treat memfd mappings as regular file mappings because
* their backing can be seen as files, which is easy to
* support. So even though memfd is an anonymous shmem, we
* treat it differently.
* Note: maybe we should revisit this as /proc/map_files/<vma>
* may not always be accessible.
*/
if (is_memfd(st_buf->st_dev, file_path)) {
vma_area->e->status |= VMA_AREA_MEMFD;
goto normal_file;
}
if (is_anon_shmem_map(st_buf->st_dev)) {
if (!(vma_area->e->flags & MAP_SHARED))
goto err_bogus_mapping;
@ -594,6 +628,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area,
vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE;
}
} else {
normal_file:
if (vma_area->e->flags & MAP_PRIVATE)
vma_area->e->status |= VMA_FILE_PRIVATE;
else