mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
seize: fix pause devices for frozen containers
The container checkpointing procedure in Kubernetes freezes running containers to create a consistent snapshot of both the runtime state and the rootfs of the container. However, when checkpointing a GPU container, the container must be unfrozen before invoking the cuda-checkpoint tool. This is achieved in prepare_freezer_for_interrupt_only_mode(), which needs to be called before the PAUSE_DEVICES hook. The patch introducing this functionality fixes this problem for containers with multiple processes. However, if the container has a single process, prepare_freezer_for_interrupt_only_mode() must be invoked immediately before the PAUSE_DEVICES hook. Fixes: #2514 Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
This commit is contained in:
parent
366d73a4c2
commit
fddca67cc6
1 changed files with 16 additions and 6 deletions
22
criu/seize.c
22
criu/seize.c
|
|
@ -1060,22 +1060,32 @@ int collect_pstree(void)
|
|||
*/
|
||||
alarm(opts.timeout);
|
||||
|
||||
ret = run_plugins(PAUSE_DEVICES, pid);
|
||||
if (ret < 0 && ret != -ENOTSUP) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (opts.freeze_cgroup && cgroup_version())
|
||||
goto err;
|
||||
|
||||
pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1);
|
||||
|
||||
if (opts.freeze_cgroup && !compel_interrupt_only_mode) {
|
||||
ret = run_plugins(PAUSE_DEVICES, pid);
|
||||
if (ret < 0 && ret != -ENOTSUP) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (freeze_processes())
|
||||
goto err;
|
||||
} else {
|
||||
if (opts.freeze_cgroup && prepare_freezer_for_interrupt_only_mode())
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* Call PAUSE_DEVICES after prepare_freezer_for_interrupt_only_mode()
|
||||
* to be able to checkpoint containers in a frozen state.
|
||||
*/
|
||||
ret = run_plugins(PAUSE_DEVICES, pid);
|
||||
if (ret < 0 && ret != -ENOTSUP) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (compel_interrupt_task(pid)) {
|
||||
set_cr_errno(ESRCH);
|
||||
goto err;
|
||||
|
|
@ -1136,4 +1146,4 @@ int checkpoint_devices(void)
|
|||
exit_code = 0;
|
||||
err:
|
||||
return exit_code;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue