criu: Introduce a new device plugin hook for restore

Currently, in the target process, device-related restore operations and
other restore operations almost run sequentially. When the target
process executes the corresponding CRIU hook functions, it can't perform
other restore operations.  However, for GPU applications, some device
restore operations have no logical dependencies on other common restore
operations and can be parallelized with other operations to speed up the
process.

Instead of launching a thread in child processes for parallelization,
this patch chooses to add a new hook, `POST_FORKING`, in the main CRIU
process to handle these restore operations. This is because the
restoration of memory state in the restore blob is one of the most
time-consuming parts of all restore logic. The main CRIU process can
easily parallelize these operations, whereas parallelizing in threads
within child processes is challenging.

- POST_FORKING

*POST_FORKING: Hook to enable the main CRIU process to perform some
restore operations of plugins.

Signed-off-by: Yanning Yang <yangyanning@sjtu.edu.cn>
This commit is contained in:
Yanning Yang 2024-11-29 02:07:38 +00:00 committed by Andrei Vagin
parent d57d40a5ad
commit 427c0dc27b
3 changed files with 8 additions and 0 deletions

View file

@ -2132,6 +2132,9 @@ static int restore_root_task(struct pstree_item *init)
__restore_switch_stage(CR_STATE_FORKING);
skip_ns_bouncing:
ret = run_plugins(POST_FORKING);
if (ret < 0 && ret != -ENOTSUP)
goto out_kill;
ret = restore_wait_inprogress_tasks();
if (ret < 0)

View file

@ -60,6 +60,8 @@ enum {
CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11,
CR_PLUGIN_HOOK__POST_FORKING = 12,
CR_PLUGIN_HOOK__MAX
};
@ -78,6 +80,7 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__POST_FORKING, void);
enum {
CR_PLUGIN_STAGE__DUMP,
@ -152,5 +155,6 @@ typedef int(cr_plugin_handle_device_vma_t)(int fd, const struct stat *stat);
typedef int(cr_plugin_update_vma_map_t)(const char *path, const uint64_t addr, const uint64_t old_pgoff,
uint64_t *new_pgoff, int *plugin_fd);
typedef int(cr_plugin_resume_devices_late_t)(int pid);
typedef int(cr_plugin_post_forking_t)(void);
#endif /* __CRIU_PLUGIN_H__ */

View file

@ -59,6 +59,7 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path)
__assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late");
__assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices");
__assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices");
__assign_hook(POST_FORKING, "cr_plugin_post_forking");
#undef __assign_hook