mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
restore: Introduce the --restore-sibling option
We have a slight mess with how criu restores root task.
Right now we have the following options.
1) CLI
a) Usually
task calling criu
`- criu
`- root restored task
b) when --restore-detached AND root has pdeath_sig
task calling criu
`- criu
`- root restored task
2) Library/SWRK
task using lib/swrk
`- criu
`- root restored task
3) Standalone service
a) Usually
service
`- service sub task
`- root restored task
b) when root has pdeath_sig
criu service
`- criu sub task
`- root restored task
It would be better is CRIU always restored the root task as sibling,
but we have 3 constraints:
First, the case 1.a is kept for zdtm to run tests in pid namespaces
on 3.11, which in turn doesn't allow CLONE_PARENT | CLONE_NEWPID.
Second, CLI w/o --restore-detach waits for the restored task to die and
this behavior can be "expected" already.
Third, in case of standalone service tasks shouldn't become service's
children.
And I have one "plan". The p.haul project while live migrating tasks
on destination node starts a service, which uses library/swrk mode. In
this case the restored processes become p.haul service's kids which is
also not great.
That said, here's the option called --restore-child that pairs the
--restore-detach like this:
* detached AND child:
task
`- criu restore (exits at the end)
`- root task
The root task will become task's child.
This will be default to library/swrk.
This is what LXC needs.
* detach AND !child
task
`- criu restore (exits at the end)
`- root task
The root task will get re-parented to init.
This will be compatible with 1.3.
This will be default to standalone service and
to my wish with the p.haul case.
* !detach AND child
task
`- criu restore (waits for root task to die)
`- root task
This should be deprecated, so that criu restore doesn't mess
with task <-> root task signalling.
* !detach AND !child
task
`- criu restore (waits for root task to die)
`- root task
This is how plain criu restore works now.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Tycho Andersen <tycho.andersen@canonical.com>
Acked-by: Andrew Vagin <avagin@openvz.org>
This commit is contained in:
parent
6b4ffdc91e
commit
0b2c951358
6 changed files with 31 additions and 3 deletions
|
|
@ -962,8 +962,7 @@ static void maybe_clone_parent(struct pstree_item *item,
|
|||
* off of 3.11, this condition can be simplified to just test the
|
||||
* options and not have the pdeath_sig test.
|
||||
*/
|
||||
if (opts.swrk_restore ||
|
||||
(opts.restore_detach && ca->core->thread_core->pdeath_sig)) {
|
||||
if (opts.restore_sibling) {
|
||||
/*
|
||||
* This means we're called from lib's criu_restore_child().
|
||||
* In that case create the root task as the child one to+
|
||||
|
|
@ -984,6 +983,10 @@ static void maybe_clone_parent(struct pstree_item *item,
|
|||
if (item->rst->clone_flags & CLONE_NEWPID)
|
||||
pr_warn("Set CLONE_PARENT | CLONE_NEWPID but it might cause restore problem,"
|
||||
"because not all kernels support such clone flags combinations!\n");
|
||||
} else if (opts.restore_detach) {
|
||||
if (ca->core->thread_core->pdeath_sig)
|
||||
pr_warn("Root task has pdeath_sig configured, so it will receive one _right_"
|
||||
"after restore on CRIU exit\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -244,6 +244,15 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
|
|||
if (req->root)
|
||||
opts.root = req->root;
|
||||
|
||||
if (req->has_rst_sibling) {
|
||||
if (!opts.swrk_restore) {
|
||||
pr_err("rst_sibling is not allowed in standalone service\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
opts.restore_sibling = req->rst_sibling;
|
||||
}
|
||||
|
||||
if (req->has_tcp_established)
|
||||
opts.tcp_established_ok = req->tcp_established;
|
||||
|
||||
|
|
|
|||
12
crtools.c
12
crtools.c
|
|
@ -129,13 +129,14 @@ int main(int argc, char *argv[])
|
|||
int log_level = LOG_UNSET;
|
||||
char *imgs_dir = ".";
|
||||
char *work_dir = NULL;
|
||||
static const char short_opts[] = "dsRf:F:t:p:hcD:o:n:v::xVr:jlW:L:M:";
|
||||
static const char short_opts[] = "dSsRf:F:t:p:hcD:o:n:v::xVr:jlW:L:M:";
|
||||
static struct option long_opts[] = {
|
||||
{ "tree", required_argument, 0, 't' },
|
||||
{ "pid", required_argument, 0, 'p' },
|
||||
{ "leave-stopped", no_argument, 0, 's' },
|
||||
{ "leave-running", no_argument, 0, 'R' },
|
||||
{ "restore-detached", no_argument, 0, 'd' },
|
||||
{ "restore-sibling", no_argument, 0, 'S' },
|
||||
{ "daemon", no_argument, 0, 'd' },
|
||||
{ "contents", no_argument, 0, 'c' },
|
||||
{ "file", required_argument, 0, 'f' },
|
||||
|
|
@ -241,6 +242,9 @@ int main(int argc, char *argv[])
|
|||
case 'd':
|
||||
opts.restore_detach = true;
|
||||
break;
|
||||
case 'S':
|
||||
opts.restore_sibling = true;
|
||||
break;
|
||||
case 'D':
|
||||
imgs_dir = optarg;
|
||||
break;
|
||||
|
|
@ -405,6 +409,11 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
}
|
||||
|
||||
if (!opts.restore_detach && opts.restore_sibling) {
|
||||
pr_msg("--restore-sibling only makes sense with --restore-detach\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (work_dir == NULL)
|
||||
work_dir = imgs_dir;
|
||||
|
||||
|
|
@ -541,6 +550,7 @@ usage:
|
|||
"* Generic:\n"
|
||||
" -t|--tree PID checkpoint a process tree identified by PID\n"
|
||||
" -d|--restore-detached detach after restore\n"
|
||||
" -S|--restore-sibling restore root task as sibling\n"
|
||||
" -s|--leave-stopped leave tasks in stopped state after checkpoint\n"
|
||||
" -R|--leave-running leave tasks in running state after checkpoint\n"
|
||||
" -D|--images-dir DIR directory for image files\n"
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ struct cr_options {
|
|||
bool check_ms_kernel;
|
||||
bool show_pages_content;
|
||||
bool restore_detach;
|
||||
bool restore_sibling;
|
||||
bool ext_unix_sk;
|
||||
bool shell_job;
|
||||
bool handle_file_locks;
|
||||
|
|
|
|||
|
|
@ -666,6 +666,9 @@ int criu_restore_child(void)
|
|||
req.type = CRIU_REQ_TYPE__RESTORE;
|
||||
req.opts = opts;
|
||||
|
||||
req.opts->has_rst_sibling = true;
|
||||
req.opts->rst_sibling = true;
|
||||
|
||||
ret = send_req_and_recv_resp_sk(sks[0], &req, &resp);
|
||||
|
||||
close(sks[0]);
|
||||
|
|
|
|||
|
|
@ -52,6 +52,8 @@ message criu_opts {
|
|||
repeated ext_mount_map ext_mnt = 23;
|
||||
optional bool manage_cgroups = 24;
|
||||
repeated cgroup_root cg_root = 25;
|
||||
|
||||
optional bool rst_sibling = 26; /* swrk only */
|
||||
}
|
||||
|
||||
message criu_dump_resp {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue