mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
criu: Introduce new device file plugin hooks
Currently CRIU cannot handle Checkpoint Restore operations when a device file is involved in a process, however, CRIU allows flexible extensions via special plugins but still, for certain complex devices such as a GPU, the existing hooks are not sufficient. This introduces few new hooks that will be used to support Checkpoint Restore operation with AMD GPU devices and potentially to other similar devices too. - HANDLE_DEVICE_VMA - UPDATE_VMA_MAP - RESUME_DEVICES_LATE *HANDLE_DEVICE_VMA: Hook to detect a suitable plugin to handle device file VMA with PF | IO mappings. *UPDATE_VMA_MAP: Hook to handle VMAs during a device file restore. When restoring VMAs for the device files, criu runs sys_mmap in the pie restore context but the offsets and file path within a device file may change during restore operation so it needs to be adjusted properly. *RESUME_DEVICES_LATE: Hook to do some special handling in late restore phase. During criu restore phase when a device is getting restored with the help of a plugin, some device specific operations might need to be delayed until criu finalizes the VMA placements in address space of the target process. But by the time criu finalizes this, its too late since pie phase is over and control is back to criu master process. This hook allows an external trigger to each resuming task to check whether it has a device specific operation pending such as issuing an ioctl call? Since this is called from criu master process context, supply the pid of the target process and give a chance to each plugin registered to run device specific operation if the target pid is valid. A future patch will add consumers for these plugin hooks to support AMD GPUs. Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
This commit is contained in:
parent
dd46e79196
commit
17e2a8c709
5 changed files with 100 additions and 6 deletions
|
|
@ -2388,6 +2388,29 @@ skip_ns_bouncing:
|
|||
pr_err("Unable to flush breakpoints\n");
|
||||
|
||||
finalize_restore();
|
||||
/*
|
||||
* Some external devices such as GPUs might need a very late
|
||||
* trigger to kick-off some events, memory notifiers and for
|
||||
* restarting the previously restored queues during criu restore
|
||||
* stage. This is needed since criu pie code may shuffle VMAs
|
||||
* around so things such as registering MMU notifiers (for GPU
|
||||
* mapped memory) could be done sanely once the pie code hands
|
||||
* over the control to master process.
|
||||
*/
|
||||
for_each_pstree_item(item) {
|
||||
pr_info("Run late stage hook from criu master for external devices\n");
|
||||
ret = run_plugins(RESUME_DEVICES_LATE, item->pid->real);
|
||||
/*
|
||||
* This may not really be an error. Only certain plugin hooks
|
||||
* (if available) will return success such as amdgpu_plugin that
|
||||
* validates the pid of the resuming tasks in the kernel mode.
|
||||
* Most of the times, it'll be -ENOTSUP and in few cases, it
|
||||
* might actually be a true error code but that would be also
|
||||
* captured in the plugin so no need to print the error here.
|
||||
*/
|
||||
if (ret < 0)
|
||||
pr_debug("restore late stage hook for external plugin failed\n");
|
||||
}
|
||||
|
||||
ret = run_scripts(ACT_PRE_RESUME);
|
||||
if (ret)
|
||||
|
|
|
|||
|
|
@ -2267,6 +2267,23 @@ static int open_filemap(int pid, struct vma_area *vma)
|
|||
BUG_ON((vma->vmfd == NULL) || !vma->e->has_fdflags);
|
||||
flags = vma->e->fdflags;
|
||||
|
||||
/* update the new device file page offsets and file paths set during restore */
|
||||
if (vma->e->status & VMA_UNSUPP) {
|
||||
uint64_t new_pgoff;
|
||||
char new_path[PATH_MAX];
|
||||
int ret;
|
||||
|
||||
struct reg_file_info *rfi = container_of(vma->vmfd, struct reg_file_info, d);
|
||||
ret = run_plugins(UPDATE_VMA_MAP, rfi->rfe->name, new_path, vma->e->start, vma->e->pgoff, &new_pgoff);
|
||||
if (ret == 1) {
|
||||
pr_info("New mmap %#016" PRIx64 "->%#016" PRIx64 " path %s\n", vma->e->pgoff, new_pgoff,
|
||||
new_path);
|
||||
vma->e->pgoff = new_pgoff;
|
||||
rfi->path = xstrdup(new_path);
|
||||
pr_debug("Updated rfi->path %s\n", rfi->path);
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx.flags != flags || ctx.desc != vma->vmfd) {
|
||||
if (vma->e->status & VMA_AREA_MEMFD)
|
||||
ret = memfd_open(vma->vmfd, &flags);
|
||||
|
|
|
|||
|
|
@ -22,6 +22,8 @@
|
|||
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#define CRIU_PLUGIN_GEN_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c))
|
||||
#define CRIU_PLUGIN_VERSION_MAJOR 0
|
||||
|
|
@ -48,6 +50,12 @@ enum {
|
|||
|
||||
CR_PLUGIN_HOOK__DUMP_EXT_LINK = 6,
|
||||
|
||||
CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA = 7,
|
||||
|
||||
CR_PLUGIN_HOOK__UPDATE_VMA_MAP = 8,
|
||||
|
||||
CR_PLUGIN_HOOK__RESUME_DEVICES_LATE = 9,
|
||||
|
||||
CR_PLUGIN_HOOK__MAX
|
||||
};
|
||||
|
||||
|
|
@ -60,6 +68,10 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id);
|
|||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, int fd, const struct stat *stat);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *old_path, char *new_path, const uint64_t addr,
|
||||
const uint64_t old_pgoff, uint64_t *new_pgoff);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
|
||||
|
||||
enum {
|
||||
CR_PLUGIN_STAGE__DUMP,
|
||||
|
|
@ -130,5 +142,9 @@ typedef int(cr_plugin_restore_file_t)(int id);
|
|||
typedef int(cr_plugin_dump_ext_mount_t)(char *mountpoint, int id);
|
||||
typedef int(cr_plugin_restore_ext_mount_t)(int id, char *mountpoint, char *old_root, int *is_file);
|
||||
typedef int(cr_plugin_dump_ext_link_t)(int index, int type, char *kind);
|
||||
typedef int(cr_plugin_handle_device_vma_t)(int fd, const struct stat *stat);
|
||||
typedef int(cr_plugin_update_vma_map_t)(const char *old_path, char *new_path, const uint64_t addr,
|
||||
const uint64_t old_pgoff, uint64_t *new_pgoff);
|
||||
typedef int(cr_plugin_resume_devices_late_t)(int pid);
|
||||
|
||||
#endif /* __CRIU_PLUGIN_H__ */
|
||||
|
|
|
|||
|
|
@ -54,6 +54,9 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
|
|||
__assign_hook(DUMP_EXT_MOUNT, "cr_plugin_dump_ext_mount");
|
||||
__assign_hook(RESTORE_EXT_MOUNT, "cr_plugin_restore_ext_mount");
|
||||
__assign_hook(DUMP_EXT_LINK, "cr_plugin_dump_ext_link");
|
||||
__assign_hook(HANDLE_DEVICE_VMA, "cr_plugin_handle_device_vma");
|
||||
__assign_hook(UPDATE_VMA_MAP, "cr_plugin_update_vma_map");
|
||||
__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
|
||||
|
||||
#undef __assign_hook
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
#include "protobuf.h"
|
||||
#include "images/fdinfo.pb-c.h"
|
||||
#include "images/mnt.pb-c.h"
|
||||
#include "plugin.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
|
|
@ -103,6 +104,19 @@ bool is_vma_range_fmt(char *line)
|
|||
return __is_vma_range_fmt(line);
|
||||
}
|
||||
|
||||
bool handle_vma_plugin(int *fd, struct stat *stat)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = run_plugins(HANDLE_DEVICE_VMA, *fd, stat);
|
||||
if (ret < 0) {
|
||||
pr_perror("handle_device_vma plugin failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf)
|
||||
{
|
||||
char *tok;
|
||||
|
|
@ -188,6 +202,7 @@ struct vma_file_info {
|
|||
int dev_min;
|
||||
unsigned long ino;
|
||||
struct vma_area *vma;
|
||||
bool has_device_plugin;
|
||||
};
|
||||
|
||||
static inline int vfi_equal(struct vma_file_info *a, struct vma_file_info *b)
|
||||
|
|
@ -577,11 +592,17 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
|
|||
} else if (*vm_file_fd >= 0) {
|
||||
struct stat *st_buf = vma_area->vmst;
|
||||
|
||||
if (S_ISREG(st_buf->st_mode))
|
||||
if (S_ISREG(st_buf->st_mode)) {
|
||||
/* regular file mapping -- supported */;
|
||||
else if (S_ISCHR(st_buf->st_mode) && (st_buf->st_rdev == DEVZERO))
|
||||
pr_debug("Found regular file mapping, OK\n");
|
||||
} else if (S_ISCHR(st_buf->st_mode) && (st_buf->st_rdev == DEVZERO)) {
|
||||
/* devzero mapping -- also makes sense */;
|
||||
else {
|
||||
pr_debug("Found devzero mapping, OK\n");
|
||||
} else if (handle_vma_plugin(vm_file_fd, st_buf)) {
|
||||
pr_info("Found device file mapping, plugin is available\n");
|
||||
vfi->has_device_plugin = true;
|
||||
} else {
|
||||
/* non-regular mapping with no supporting plugin */
|
||||
pr_err("Can't handle non-regular mapping on %d's map %" PRIx64 "\n", pid, vma_area->e->start);
|
||||
goto err;
|
||||
}
|
||||
|
|
@ -646,9 +667,23 @@ static int vma_list_add(struct vma_area *vma_area, struct vm_area_list *vma_area
|
|||
struct vma_file_info *vfi, struct vma_file_info *prev_vfi)
|
||||
{
|
||||
if (vma_area->e->status & VMA_UNSUPP) {
|
||||
pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start,
|
||||
vma_area->e->end);
|
||||
return -1;
|
||||
if (vfi->has_device_plugin) {
|
||||
/* Unsupported VMAs that provide special plugins for
|
||||
* backup can be treated as regular VMAs and criu
|
||||
* should only save their metadata in the dump files.
|
||||
* There can be several special backup plugins hooks
|
||||
* that might run at different stages during checkpoint
|
||||
* and restore.
|
||||
*/
|
||||
pr_debug("Device file mapping %016" PRIx64 "-%016" PRIx64 " "
|
||||
"must be supported via device plugins\n",
|
||||
vma_area->e->start, vma_area->e->end);
|
||||
|
||||
} else {
|
||||
pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start,
|
||||
vma_area->e->end);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add a guard page only if here is enough space for it */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue