seize: fix pause devices for frozen containers

The container checkpointing procedure in Kubernetes freezes running containers to create a consistent snapshot of both the runtime state and the rootfs of the container. However, when checkpointing a GPU container, the container must be unfrozen before invoking the cuda-checkpoint tool. This is achieved in prepare_freezer_for_interrupt_only_mode(), which needs to be called before the PAUSE_DEVICES hook. The patch introducing this functionality fixes this problem for containers with multiple processes. However, if the container has a single process, prepare_freezer_for_interrupt_only_mode() must be invoked immediately before the PAUSE_DEVICES hook. Fixes: #2514 Signed-off-by: Radostin Stoyanov <rstoyanov@fedoraproject.org>
2026-01-23 02:14:37 +00:00 · 2025-05-11 11:33:29 +01:00 · 2025-05-11 11:33:29 +01:00 · fddca67cc6
commit fddca67cc6
parent 366d73a4c2
1 changed files with 16 additions and 6 deletions
--- a/criu/seize.c
+++ b/criu/seize.c
@ -1060,22 +1060,32 @@ int collect_pstree(void)
 	 */
 	alarm(opts.timeout);

-	ret = run_plugins(PAUSE_DEVICES, pid);
-	if (ret < 0 && ret != -ENOTSUP) {
-		goto err;
-	}
-
 	if (opts.freeze_cgroup && cgroup_version())
 		goto err;

 	pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1);

 	if (opts.freeze_cgroup && !compel_interrupt_only_mode) {
+		ret = run_plugins(PAUSE_DEVICES, pid);
+		if (ret < 0 && ret != -ENOTSUP) {
+			goto err;
+		}
+
 		if (freeze_processes())
 			goto err;
 	} else {
 		if (opts.freeze_cgroup && prepare_freezer_for_interrupt_only_mode())
 			goto err;
+
+		/*
+		 * Call PAUSE_DEVICES after prepare_freezer_for_interrupt_only_mode()
+		 * to be able to checkpoint containers in a frozen state.
+		 */
+		ret = run_plugins(PAUSE_DEVICES, pid);
+		if (ret < 0 && ret != -ENOTSUP) {
+			goto err;
+		}
+
 		if (compel_interrupt_task(pid)) {
 			set_cr_errno(ESRCH);
 			goto err;
@ -1136,4 +1146,4 @@ int checkpoint_devices(void)
 	exit_code = 0;
 err:
 	return exit_code;
-}
+}