From 08f3b57ab324aea55e7a92ecbe961be60df4983d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 1 Jul 2019 17:40:44 +0300 Subject: [PATCH 0001/1854] py: Manual fixlets of code formatting Signed-off-by: Pavel Emelyanov --- coredump/criu_coredump/coredump.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index bc53a7705..68dc16bf2 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -645,8 +645,7 @@ class coredump_generator: ppid = self.pstree[pid]["ppid"] return self._get_page(ppid, page_no) else: - with open(self._imgs_dir + "/" + "pages-" + str(pages_id) + - ".img") as f: + with open(self._imgs_dir + "/pages-%s.img" % pages_id) as f: f.seek(off * PAGESIZE) return f.read(PAGESIZE) From 3eed47223b255c092f3aba31b68a9acefa9be523 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 1 Sep 2019 12:23:39 +0100 Subject: [PATCH 0002/1854] files-reg: Drop clear_ghost_files() prototype The function clear_ghost_files() has been removed in commit b11eeea "restore: auto-unlink for ghost files (v2)". Signed-off-by: Radostin Stoyanov --- criu/include/files-reg.h | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/include/files-reg.h b/criu/include/files-reg.h index 7a22d4d82..016d76a9f 100644 --- a/criu/include/files-reg.h +++ b/criu/include/files-reg.h @@ -30,7 +30,6 @@ extern int open_reg_by_id(u32 id); extern int open_reg_fd(struct file_desc *); extern int open_path(struct file_desc *, int (*open_cb)(int ns_root_fd, struct reg_file_info *, void *), void *arg); -extern void clear_ghost_files(void); extern const struct fdtype_ops regfile_dump_ops; extern int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *arg); From 8ea953f18b8534be883de3638369e4804771d086 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 13 Aug 2019 22:11:04 +0100 Subject: [PATCH 0003/1854] cr-dump: Remove redundant if-statement Signed-off-by: Radostin Stoyanov --- criu/cr-dump.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 9273fc0a5..fcbe816e8 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -782,8 +782,6 @@ static int dump_task_core_all(struct parasite_ctl *ctl, img = img_from_set(cr_imgset, CR_FD_CORE); ret = pb_write_one(img, core, PB_CORE); - if (ret < 0) - goto err; err: pr_info("----------------------------------------\n"); From 0d8e2477e928a1301c0611de233aed6879fdc13b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 10 Sep 2019 06:50:58 -0700 Subject: [PATCH 0004/1854] arch/x86: push correct eip on the stack before lretq Right now we use pushq, but it pushes sign-extended value, so if the parasite code is placed higher that 2Gb, we will see something like this: 0xf7efd5b0: pushq $0x23 0xf7efd5b2: pushq $0xfffffffff7efd5b9 => 0xf7efd5b7: lretq Actually we want to push 0xf7efd5b9 instead of 0xfffffffff7efd5b9. Fixes: #398 Cc: Dmitry Safonov Cc: Cyrill Gorcunov Signed-off-by: Andrei Vagin Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/include/uapi/asm/sigframe.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h index 51ca023f7..486c0c8e0 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h @@ -194,7 +194,9 @@ void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe) #define ARCH_RT_SIGRETURN_COMPAT(new_sp) \ asm volatile( \ "pushq $"__stringify(USER32_CS)" \n" \ - "pushq $1f \n" \ + "xor %%rax, %%rax \n" \ + "movl $1f, %%eax \n" \ + "pushq %%rax \n" \ "lretq \n" \ "1: \n" \ ".code32 \n" \ From 3e9dc1c7f5537a860a7332b93e056e7058162578 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 11 Sep 2019 11:13:51 +0100 Subject: [PATCH 0005/1854] compel/x86: Don't use pushq for a label `pushq` sign-extends the value. Which is a bummer as the label's address may be higher that 2Gb, which means that the sign-bit will be set. As it long-jumps with ia32 selector, %r11 can be scratched. Use %r11 register as a temporary to push the 32-bit address. Complements: a9a760278c1a ("arch/x86: push correct eip on the stack before lretq") Cc: Cyrill Gorcunov Reported-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/x86/plugins/std/parasite-head.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compel/arch/x86/plugins/std/parasite-head.S b/compel/arch/x86/plugins/std/parasite-head.S index a988de9d4..465cd887b 100644 --- a/compel/arch/x86/plugins/std/parasite-head.S +++ b/compel/arch/x86/plugins/std/parasite-head.S @@ -25,7 +25,9 @@ ENTRY(__export_parasite_head_start_compat) .code64 PARASITE_ENTRY 0 pushq $__USER32_CS - pushq $2f + xor %r11, %r11 + movl $2f, %r11d + pushq %r11 lretq 2: .code32 From ad7e82a30f813b8b902026467434cc2e7421452e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 9 Sep 2019 21:57:33 +0100 Subject: [PATCH 0006/1854] scripts: Drop Fedora 28/rawhide fix This change was introduced with c75cb2b and it is no longer necessary. Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 280ce1cdd..b1127c9b2 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -30,12 +30,6 @@ RUN dnf install -y \ rubygem-asciidoctor \ kmod -# Replace coreutils-single with "traditional" coreutils -# to fix the following error on Fedora 28/rawhide while -# running under QEMU: -# > sh: /usr/bin/sort: /usr/bin/coreutils: bad interpreter: No such file or directory -RUN dnf install -y --allowerasing coreutils - RUN ln -sf python3 /usr/bin/python ENV PYTHON=python3 From 2f337652ad5c40f7a420fdd9a7c57767af4ba8a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Thu, 8 Aug 2019 18:49:13 +0200 Subject: [PATCH 0007/1854] Add new command line option: --cgroup-yard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of creating cgroup yard in CRIU, now we can create it externally and pass it to CRIU. Useful if somebody doesn't want to grant CAP_SYS_ADMIN to CRIU. Signed-off-by: Michał Cłapiński --- Documentation/criu.txt | 36 ++++++++++++-- criu/cgroup.c | 101 +++++++++++++++++++++++++------------- criu/config.c | 4 ++ criu/cr-service.c | 3 ++ criu/crtools.c | 4 ++ criu/image.c | 2 +- criu/include/cr_options.h | 1 + images/rpc.proto | 1 + lib/c/criu.c | 13 +++++ lib/c/criu.h | 1 + 10 files changed, 126 insertions(+), 40 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 94fc5428a..28913a7fb 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -266,10 +266,33 @@ For example, the command line for the above example should look like this: discovered automatically (usually via */proc*). This option is useful when one needs *criu* to skip some controllers. -*--cgroup-props-ignore-default*:: - When combined with *--cgroup-props*, makes *criu* substitute - a predefined controller property with the new one shipped. If the option - is not used, the predefined properties are merged with the provided ones. +*--cgroup-yard* 'path':: + Instead of trying to mount cgroups in CRIU, provide a path to a directory + with already created cgroup yard. Useful if you don't want to grant + CAP_SYS_ADMIN to CRIU. For every cgroup mount there should be exactly one + directory. If there is only one controller in this mount, the dir's name + should be just the name of the controller. If there are multiple controllers + comounted, the directory name should have them be separated by a comma. ++ +For example, if */proc/cgroups* looks like this: ++ +---------- +#subsys_name hierarchy num_cgroups enabled +cpu 1 1 1 +devices 2 2 1 +freezer 2 2 1 +---------- ++ +then you can create the cgroup yard by the following commands: ++ +---------- +mkdir private_yard +cd private_yard +mkdir cpu +mount -t cgroup -o cpu none cpu +mkdir devices,freezer +mount -t cgroup -o devices,freezer none devices,freezer +---------- *--tcp-established*:: Checkpoint established TCP connections. @@ -442,6 +465,11 @@ The 'mode' may be one of the following: *ignore*::: Don't deal with cgroups and pretend that they don't exist. +*--cgroup-yard* 'path':: + Instead of trying to mount cgroups in CRIU, provide a path to a directory + with already created cgroup yard. For more information look in the *dump* + section. + *--cgroup-root* ['controller'*:*]/'newroot':: Change the root cgroup the controller will be installed into. No controller means that root is the default for all controllers not specified. diff --git a/criu/cgroup.c b/criu/cgroup.c index 332c79fb9..9f3aef10d 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -549,8 +549,9 @@ static int collect_cgroups(struct list_head *ctls) int fd = -1; list_for_each_entry(cc, ctls, l) { - char path[PATH_MAX], mopts[1024], *root; + char path[PATH_MAX], *root; char prefix[] = ".criu.cgmounts.XXXXXX"; + const char namestr[] = "name="; struct cg_controller *cg; struct cg_root_opt *o; @@ -568,7 +569,7 @@ static int collect_cgroups(struct list_head *ctls) if (!current_controller) { /* only allow "fake" controllers to be created this way */ - if (!strstartswith(cc->name, "name=")) { + if (!strstartswith(cc->name, namestr)) { pr_err("controller %s not found\n", cc->name); return -1; } else { @@ -586,26 +587,45 @@ static int collect_cgroups(struct list_head *ctls) if (!opts.manage_cgroups) continue; - if (strstartswith(cc->name, "name=")) - snprintf(mopts, sizeof(mopts), "none,%s", cc->name); - else - snprintf(mopts, sizeof(mopts), "%s", cc->name); + if (opts.cgroup_yard) { + char dir_path[PATH_MAX]; + int off; + + off = snprintf(dir_path, PATH_MAX, "%s/", opts.cgroup_yard); + if (strstartswith(cc->name, namestr)) + snprintf(dir_path + off, PATH_MAX, "%s", cc->name + strlen(namestr)); + else + snprintf(dir_path + off, PATH_MAX, "%s", cc->name); - if (mkdtemp(prefix) == NULL) { - pr_perror("can't make dir for cg mounts"); - return -1; + fd = open(dir_path, O_RDONLY | O_DIRECTORY, 0); + if (fd < 0) { + pr_perror("couldn't open %s", dir_path); + return -1; + } + } else { + char mopts[1024]; + + if (strstartswith(cc->name, namestr)) + snprintf(mopts, sizeof(mopts), "none,%s", cc->name); + else + snprintf(mopts, sizeof(mopts), "%s", cc->name); + + if (mkdtemp(prefix) == NULL) { + pr_perror("can't make dir for cg mounts"); + return -1; + } + + if (mount("none", prefix, "cgroup", 0, mopts) < 0) { + pr_perror("couldn't mount %s", mopts); + rmdir(prefix); + return -1; + } + + fd = open_detach_mount(prefix); + if (fd < 0) + return -1; } - if (mount("none", prefix, "cgroup", 0, mopts) < 0) { - pr_perror("couldn't mount %s", mopts); - rmdir(prefix); - return -1; - } - - fd = open_detach_mount(prefix); - if (fd < 0) - return -1; - path_pref_len = snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd); root = cc->path; @@ -620,6 +640,7 @@ static int collect_cgroups(struct list_head *ctls) snprintf(path + path_pref_len, PATH_MAX - path_pref_len, "%s", root); ret = ftw(path, add_cgroup, 4); + if (ret < 0) pr_perror("failed walking %s for empty cgroups", path); @@ -1167,10 +1188,12 @@ void fini_cgroup(void) return; close_service_fd(CGROUP_YARD); - if (umount2(cg_yard, MNT_DETACH)) - pr_perror("Unable to umount %s", cg_yard); - if (rmdir(cg_yard)) - pr_perror("Unable to remove %s", cg_yard); + if (!opts.cgroup_yard) { + if (umount2(cg_yard, MNT_DETACH)) + pr_perror("Unable to umount %s", cg_yard); + if (rmdir(cg_yard)) + pr_perror("Unable to remove %s", cg_yard); + } xfree(cg_yard); cg_yard = NULL; } @@ -1652,20 +1675,28 @@ static int prepare_cgroup_sfd(CgroupEntry *ce) pr_info("Preparing cgroups yard (cgroups restore mode %#x)\n", opts.manage_cgroups); - off = sprintf(paux, ".criu.cgyard.XXXXXX"); - if (mkdtemp(paux) == NULL) { - pr_perror("Can't make temp cgyard dir"); - return -1; - } + if (opts.cgroup_yard) { + off = sprintf(paux, "%s", opts.cgroup_yard); - cg_yard = xstrdup(paux); - if (!cg_yard) { - rmdir(paux); - return -1; - } + cg_yard = xstrdup(paux); + if (!cg_yard) + return -1; + } else { + off = sprintf(paux, ".criu.cgyard.XXXXXX"); + if (mkdtemp(paux) == NULL) { + pr_perror("Can't make temp cgyard dir"); + return -1; + } - if (make_yard(cg_yard)) - goto err; + cg_yard = xstrdup(paux); + if (!cg_yard) { + rmdir(paux); + return -1; + } + + if (make_yard(cg_yard)) + goto err; + } pr_debug("Opening %s as cg yard\n", cg_yard); i = open(cg_yard, O_DIRECTORY); diff --git a/criu/config.c b/criu/config.c index 39aa071c9..cdea91f02 100644 --- a/criu/config.c +++ b/criu/config.c @@ -516,6 +516,7 @@ int parse_options(int argc, char **argv, bool *usage_error, { "tls-key", required_argument, 0, 1095}, BOOL_OPT("tls", &opts.tls), {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, + { "cgroup-yard", required_argument, 0, 1096 }, { }, }; @@ -814,6 +815,9 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1095: SET_CHAR_OPTS(tls_key, optarg); break; + case 1096: + SET_CHAR_OPTS(cgroup_yard, optarg); + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) diff --git a/criu/cr-service.c b/criu/cr-service.c index 0938db02b..95ba2e5ce 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -608,6 +608,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req) goto err; } + if (req->cgroup_yard) + SET_CHAR_OPTS(cgroup_yard, req->cgroup_yard); + if (req->tls_cacert) SET_CHAR_OPTS(tls_cacert, req->tls_cacert); if (req->tls_cacrl) diff --git a/criu/crtools.c b/criu/crtools.c index a94875684..0799a564c 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -366,6 +366,10 @@ usage: " --cgroup-dump-controller NAME\n" " define cgroup controller to be dumped\n" " and skip anything else present in system\n" +" --cgroup-yard PATH\n" +" instead of trying to mount cgroups in CRIU, provide\n" +" a path to a directory with already created cgroup yard.\n" +" Useful if you don't want to grant CAP_SYS_ADMIN to CRIU\n" " --lsm-profile TYPE:NAME\n" " Specify an LSM profile to be used during restore.\n" " The type can be either 'apparmor' or 'selinux'.\n" diff --git a/criu/image.c b/criu/image.c index 2eb926929..0225788b0 100644 --- a/criu/image.c +++ b/criu/image.c @@ -190,7 +190,7 @@ int prepare_inventory(InventoryEntry *he) struct dmp_info d; } crt = { .i.pid = &pid }; - pr_info("Perparing image inventory (version %u)\n", CRTOOLS_IMAGES_V1); + pr_info("Preparing image inventory (version %u)\n", CRTOOLS_IMAGES_V1); he->img_version = CRTOOLS_IMAGES_V1_1; he->fdinfo_per_id = true; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 82f76ad94..da7c10d69 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -106,6 +106,7 @@ struct cr_options { char *cgroup_props; char *cgroup_props_file; struct list_head new_cgroup_roots; + char *cgroup_yard; bool autodetect_ext_mounts; int enable_external_sharing; int enable_external_masters; diff --git a/images/rpc.proto b/images/rpc.proto index 15e677a77..c402259ac 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -120,6 +120,7 @@ message criu_opts { optional string tls_key = 57; optional bool tls = 58; optional bool tls_no_cn_verify = 59; + optional string cgroup_yard = 60; /* optional bool check_mounts = 128; */ } diff --git a/lib/c/criu.c b/lib/c/criu.c index 17d5c3983..14ddff26d 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -987,6 +987,19 @@ int criu_local_add_cg_dump_controller(criu_opts *opts, const char *name) return 0; } +int criu_local_add_cg_yard(criu_opts *opts, const char *path) +{ + char *new; + + new = strdup(path); + if (!new) + return -ENOMEM; + + free(opts->rpc->cgroup_yard); + opts->rpc->cgroup_yard = new; + return 0; +} + int criu_add_skip_mnt(const char *mnt) { return criu_local_add_skip_mnt(global_opts, mnt); diff --git a/lib/c/criu.h b/lib/c/criu.h index 76f3547fc..cb37c5291 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -207,6 +207,7 @@ int criu_local_add_irmap_path(criu_opts *opts, const char *path); int criu_local_add_cg_props(criu_opts *opts, const char *stream); int criu_local_add_cg_props_file(criu_opts *opts, const char *path); int criu_local_add_cg_dump_controller(criu_opts *opts, const char *name); +int criu_local_add_cg_yard(criu_opts *opts, const char *path); int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key); int criu_local_add_external(criu_opts *opts, const char *key); int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port); From cf0080505ac3b3194f664d77edccccfa47bf450a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Wed, 14 Aug 2019 21:13:34 +0200 Subject: [PATCH 0008/1854] test: implement test for new --cgroup-yard option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Cłapiński --- test/zdtm.py | 4 +- test/zdtm/static/Makefile | 3 +- test/zdtm/static/cgroup_yard.c | 1 + test/zdtm/static/cgroup_yard.desc | 7 ++++ test/zdtm/static/cgroup_yard.hook | 70 +++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 120000 test/zdtm/static/cgroup_yard.c create mode 100644 test/zdtm/static/cgroup_yard.desc create mode 100755 test/zdtm/static/cgroup_yard.hook diff --git a/test/zdtm.py b/test/zdtm.py index 0153c6058..f0a102413 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -2018,7 +2018,7 @@ def print_sep(title, sep="=", width=80): def print_error(line): line = line.rstrip() - print(line) + print(line.encode('utf-8')) if line.endswith('>'): # combine pie output return True return False @@ -2028,7 +2028,7 @@ def grep_errors(fname): first = True print_next = False before = [] - with open(fname) as fd: + with open(fname, errors='replace') as fd: for l in fd: before.append(l) if len(before) > 5: diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index d8279d6f8..a38482f44 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -319,7 +319,8 @@ TST_DIR = \ cgroup03 \ cgroup04 \ cgroup_ifpriomap \ - cgroup_stray \ + cgroup_stray \ + cgroup_yard \ unlink_fstat04 \ unlink_fstat041 \ mntns_remap \ diff --git a/test/zdtm/static/cgroup_yard.c b/test/zdtm/static/cgroup_yard.c new file mode 120000 index 000000000..f3683c2b4 --- /dev/null +++ b/test/zdtm/static/cgroup_yard.c @@ -0,0 +1 @@ +cgroup00.c \ No newline at end of file diff --git a/test/zdtm/static/cgroup_yard.desc b/test/zdtm/static/cgroup_yard.desc new file mode 100644 index 000000000..8736d6780 --- /dev/null +++ b/test/zdtm/static/cgroup_yard.desc @@ -0,0 +1,7 @@ +{ +'flavor': 'h', +'flags': 'suid', +# We create the external cgroup yard in working directory during --pre-dump +# hook. We have to go up a few directories to find the yard. +'opts': '--manage-cgroups --cgroup-yard ../../../../../../external_yard' +} diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook new file mode 100755 index 000000000..7ae53342c --- /dev/null +++ b/test/zdtm/static/cgroup_yard.hook @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +import sys +import os +import subprocess +import tempfile + +yard = "external_yard" + +if sys.argv[1] == "--pre-dump": + ''' + Create external cgroup yard to be passed to CRIU via --cgroup-yard + ''' + os.mkdir(yard) + with open("/proc/self/cgroup") as f: + for line in f: + cgr = line.split(":")[1] + + if cgr == "": + continue + + if cgr.startswith("name="): + ctrl = cgr[len("name="):] + opts = "none," + cgr + else: + ctrl = cgr + opts = cgr + + os.mkdir(yard + "/" + ctrl) + subprocess.check_call(["mount", "-t", "cgroup", "none", yard + "/" + ctrl, "-o", opts]) + +if sys.argv[1] == "--post-restore": + ''' + Clean up the cgroup yard created during `--pre-dump` + ''' + with open("/proc/self/cgroup") as f: + for line in f: + cgr = line.split(":")[1] + + if cgr == "": + continue + + if cgr.startswith("name="): + ctrl = cgr[len("name="):] + else: + ctrl = cgr + + subprocess.check_call(["umount", yard + "/" + ctrl]) + os.rmdir(yard + "/" + ctrl) + os.rmdir(yard) + +if sys.argv[1] in ["--pre-restore", "--clean"]: + ''' + Clean up the leftover cgroups created by the test + ''' + tname = tempfile.mkdtemp() + subprocess.call(["mount", "-t", "cgroup", "none", tname, "-o", "none,name=zdtmtst"]) + + try: + os.rmdir(os.path.join(tname, "subcg00", "subsubcg")) + except: + pass + + try: + os.rmdir(os.path.join(tname, "subcg00")) + except: + pass + + subprocess.call(["umount", tname]) + os.rmdir(tname) From 4f24786b36058ab82e669fd5686cc9f5cfc573db Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 14 Sep 2019 13:47:06 +0100 Subject: [PATCH 0009/1854] travis: Install missing diffutils dependency The following tests fail in Fedora rawhide because /usr/bin/diff is missing. * zdtm/static/bridge(ns) * zdtm/static/cr_veth(uns) * zdtm/static/macvlan(ns) * zdtm/static/netns(uns) * zdtm/static/netns-nf(ns) * zdtm/static/sit(ns) Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index b1127c9b2..0500a8fc5 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -3,6 +3,7 @@ ARG ENV1=FOOBAR RUN dnf install -y \ ccache \ + diffutils \ findutils \ gcc \ git \ From 8bdc60d50e5b990aa8debd06785175da3e0ba34a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 14 Sep 2019 10:26:22 +0300 Subject: [PATCH 0010/1854] arch/x86: fpu_state->fpu_state_ia32.xsave hast to be 64-byte aligned Before the 5.2 kernel, only fpu_state->fpu_state_64.xsave has to be 64-byte aligned. But staring with the 5.2 kernel, the same is required for pu_state->fpu_state_ia32.xsave. The behavior was changed in: c2ff9e9a3d9d ("x86/fpu: Merge the two code paths in __fpu__restore_sig()") Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/include/uapi/asm/fpu.h | 8 ++++++-- criu/arch/x86/sigframe.c | 6 ++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h index 509f4488b..4ff531fb9 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h @@ -263,7 +263,7 @@ struct xsave_struct_ia32 { struct ymmh_struct ymmh; uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE]; }; -} __aligned(FXSAVE_ALIGN_BYTES); +}; typedef struct { /* @@ -309,7 +309,11 @@ typedef struct { typedef struct { union { fpu_state_64_t fpu_state_64; - fpu_state_ia32_t fpu_state_ia32; + struct { + /* fpu_state_ia32->xsave has to be 64-byte aligned. */ + uint32_t __pad[2]; + fpu_state_ia32_t fpu_state_ia32; + }; }; uint8_t has_fpu; diff --git a/criu/arch/x86/sigframe.c b/criu/arch/x86/sigframe.c index 11b0d640d..33ba14387 100644 --- a/criu/arch/x86/sigframe.c +++ b/criu/arch/x86/sigframe.c @@ -28,8 +28,14 @@ int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, sigframe->native.uc.uc_mcontext.fpstate = (uint64_t)addr; } else if (!sigframe->is_native) { + unsigned long addr = (unsigned long)(void *)&fpu_state->fpu_state_ia32.xsave; sigframe->compat.uc.uc_mcontext.fpstate = (uint32_t)(unsigned long)(void *)&fpu_state->fpu_state_ia32; + if ((addr % 64ul)) { + pr_err("Unaligned address passed: %lx (native %d)\n", + addr, sigframe->is_native); + return -1; + } } return 0; From a9f974b4951a261063187f0536c01c7f84e5fe56 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Sep 2019 06:58:15 +0100 Subject: [PATCH 0011/1854] Introduce flush_early_log_to_stderr destructor Prior log initialisation CRIU preserves all (early) log messages in a buffer. In case of error the content of the content of this buffer needs to be printed out (flushed). Suggested-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Radostin Stoyanov --- criu/crtools.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 0799a564c..4625446ad 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -47,6 +47,13 @@ #include "setproctitle.h" #include "sysctl.h" +void flush_early_log_to_stderr() __attribute__((destructor)); + +void flush_early_log_to_stderr(void) +{ + flush_early_log_buffer(STDERR_FILENO); +} + int main(int argc, char *argv[], char *envp[]) { int ret = -1; @@ -95,10 +102,8 @@ int main(int argc, char *argv[], char *envp[]) return cr_service_work(atoi(argv[2])); } - if (check_options()) { - flush_early_log_buffer(STDERR_FILENO); + if (check_options()) return 1; - } if (opts.imgs_dir == NULL) SET_CHAR_OPTS(imgs_dir, "."); From 813bfbeb4f26611f8fd431c6fd65104cbad789d1 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Sep 2019 07:03:57 +0100 Subject: [PATCH 0012/1854] Convert pr_msg() error messages to pr_err() Print error messages to stderr (instead of stdout). Suggested-by: Andrei Vagin Signed-off-by: Radostin Stoyanov --- criu/config.c | 4 ++-- criu/cr-check.c | 2 +- criu/crtools.c | 22 +++++++++++----------- criu/proc_parse.c | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/criu/config.c b/criu/config.c index cdea91f02..2ad2fd43c 100644 --- a/criu/config.c +++ b/criu/config.c @@ -835,10 +835,10 @@ int parse_options(int argc, char **argv, bool *usage_error, bad_arg: if (idx < 0) /* short option */ - pr_msg("Error: invalid argument for -%c: %s\n", + pr_err("invalid argument for -%c: %s\n", opt, optarg); else /* long option */ - pr_msg("Error: invalid argument for --%s: %s\n", + pr_err("invalid argument for --%s: %s\n", long_opts[idx].name, optarg); return 1; } diff --git a/criu/cr-check.c b/criu/cr-check.c index 75a665cfb..729b2dc38 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -62,7 +62,7 @@ static int check_tty(void) int ret = -1; if (ARRAY_SIZE(t.c_cc) < TERMIOS_NCC) { - pr_msg("struct termios has %d @c_cc while " + pr_err("struct termios has %d @c_cc while " "at least %d expected.\n", (int)ARRAY_SIZE(t.c_cc), TERMIOS_NCC); diff --git a/criu/crtools.c b/criu/crtools.c index 4625446ad..5740b806d 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -112,7 +112,7 @@ int main(int argc, char *argv[], char *envp[]) SET_CHAR_OPTS(work_dir, opts.imgs_dir); if (optind >= argc) { - pr_msg("Error: command is required\n"); + pr_err("command is required\n"); goto usage; } @@ -120,17 +120,17 @@ int main(int argc, char *argv[], char *envp[]) if (has_exec_cmd) { if (!has_sub_command) { - pr_msg("Error: --exec-cmd requires a command\n"); + pr_err("--exec-cmd requires a command\n"); goto usage; } if (strcmp(argv[optind], "restore")) { - pr_msg("Error: --exec-cmd is available for the restore command only\n"); + pr_err("--exec-cmd is available for the restore command only\n"); goto usage; } if (opts.restore_detach) { - pr_msg("Error: --restore-detached and --exec-cmd cannot be used together\n"); + pr_err("--restore-detached and --exec-cmd cannot be used together\n"); goto usage; } @@ -142,7 +142,7 @@ int main(int argc, char *argv[], char *envp[]) } else { /* No subcommands except for cpuinfo and restore --exec-cmd */ if (strcmp(argv[optind], "cpuinfo") && has_sub_command) { - pr_msg("Error: excessive parameter%s for command %s\n", + pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]); goto usage; } @@ -241,7 +241,7 @@ int main(int argc, char *argv[], char *envp[]) if (!strcmp(argv[optind], "cpuinfo")) { if (!argv[optind + 1]) { - pr_msg("Error: cpuinfo requires an action: dump or check\n"); + pr_err("cpuinfo requires an action: dump or check\n"); goto usage; } if (!strcmp(argv[optind + 1], "dump")) @@ -251,17 +251,17 @@ int main(int argc, char *argv[], char *envp[]) } if (!strcmp(argv[optind], "exec")) { - pr_msg("The \"exec\" action is deprecated by the Compel library.\n"); + pr_err("The \"exec\" action is deprecated by the Compel library.\n"); return -1; } if (!strcmp(argv[optind], "show")) { - pr_msg("The \"show\" action is deprecated by the CRIT utility.\n"); - pr_msg("To view an image use the \"crit decode -i $name --pretty\" command.\n"); + pr_err("The \"show\" action is deprecated by the CRIT utility.\n"); + pr_err("To view an image use the \"crit decode -i $name --pretty\" command.\n"); return -1; } - pr_msg("Error: unknown command: %s\n", argv[optind]); + pr_err("unknown command: %s\n", argv[optind]); usage: pr_msg("\n" "Usage:\n" @@ -455,6 +455,6 @@ usage: return 0; opt_pid_missing: - pr_msg("Error: pid not specified\n"); + pr_err("pid not specified\n"); return 1; } diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 0e8b6f209..97f82ee01 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -932,7 +932,7 @@ int prepare_loginuid(unsigned int value, unsigned int loglevel) if (write(fd, buf, 11) < 0) { print_on_level(loglevel, - "Write %s to /proc/self/loginuid failed: %s", + "Write %s to /proc/self/loginuid failed: %s\n", buf, strerror(errno)); ret = -1; } From db40ef5be671dbd78f42bd868a5377e62707c3de Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Sep 2019 11:49:27 -0700 Subject: [PATCH 0013/1854] test/cgroup_yard: always clean up a test cgroup yard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now it is cleaned up from a post-restore hook, but zdtm.py can be executed with the norst option: $ zdtm.py run -t zdtm/static/cgroup_yard --norst ... OSError: [Errno 17] File exists: 'external_yard' Cc: Michał Cłapiński Signed-off-by: Andrei Vagin --- test/zdtm/static/cgroup_yard.hook | 39 ++++++++----------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook index 7ae53342c..cc3971707 100755 --- a/test/zdtm/static/cgroup_yard.hook +++ b/test/zdtm/static/cgroup_yard.hook @@ -12,6 +12,7 @@ if sys.argv[1] == "--pre-dump": Create external cgroup yard to be passed to CRIU via --cgroup-yard ''' os.mkdir(yard) + subprocess.check_call(["mount", "-t", "tmpfs", "zdtm_yard", yard]) with open("/proc/self/cgroup") as f: for line in f: cgr = line.split(":")[1] @@ -29,26 +30,6 @@ if sys.argv[1] == "--pre-dump": os.mkdir(yard + "/" + ctrl) subprocess.check_call(["mount", "-t", "cgroup", "none", yard + "/" + ctrl, "-o", opts]) -if sys.argv[1] == "--post-restore": - ''' - Clean up the cgroup yard created during `--pre-dump` - ''' - with open("/proc/self/cgroup") as f: - for line in f: - cgr = line.split(":")[1] - - if cgr == "": - continue - - if cgr.startswith("name="): - ctrl = cgr[len("name="):] - else: - ctrl = cgr - - subprocess.check_call(["umount", yard + "/" + ctrl]) - os.rmdir(yard + "/" + ctrl) - os.rmdir(yard) - if sys.argv[1] in ["--pre-restore", "--clean"]: ''' Clean up the leftover cgroups created by the test @@ -56,15 +37,15 @@ if sys.argv[1] in ["--pre-restore", "--clean"]: tname = tempfile.mkdtemp() subprocess.call(["mount", "-t", "cgroup", "none", tname, "-o", "none,name=zdtmtst"]) - try: - os.rmdir(os.path.join(tname, "subcg00", "subsubcg")) - except: - pass - - try: - os.rmdir(os.path.join(tname, "subcg00")) - except: - pass + for cg in [os.path.join(tname, "subcg00", "subsubcg"), + os.path.join(tname, "subcg00")]: + if os.access(cg, os.F_OK): + os.rmdir(cg) subprocess.call(["umount", tname]) os.rmdir(tname) + +if sys.argv[1] == "--clean": + if os.access(yard, os.F_OK): + subprocess.call(["umount", "-l", yard]) + os.rmdir(yard) From f44939317f60288874bdc7b8544442a59db0d024 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 19 Sep 2019 23:37:57 +0300 Subject: [PATCH 0014/1854] zdtm/cgroup_yard: create a test cgroup yard from the post-start hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, it is created from the pre-dump hook, but if the --snap option is set, the test fails: $ python test/zdtm.py run -t zdtm/static/cgroup_yard -f h --snap --iter 3 ... Running zdtm/static/cgroup_yard.hook(--pre-dump) Traceback (most recent call last): File zdtm/static/cgroup_yard.hook, line 14, in os.mkdir(yard) OSError: [Errno 17] File exists: 'external_yard' Cc: Michał Cłapiński Signed-off-by: Andrei Vagin --- test/zdtm/static/cgroup_yard.hook | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook index cc3971707..072b9d38d 100755 --- a/test/zdtm/static/cgroup_yard.hook +++ b/test/zdtm/static/cgroup_yard.hook @@ -7,7 +7,7 @@ import tempfile yard = "external_yard" -if sys.argv[1] == "--pre-dump": +if sys.argv[1] == "--post-start": ''' Create external cgroup yard to be passed to CRIU via --cgroup-yard ''' From b47ef26eac1777396ede638af0d6951a3fc52a1e Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Sat, 21 Sep 2019 13:35:18 +0300 Subject: [PATCH 0015/1854] cgroup: fixup nits 1) s/\s*$// 2) fix snprintf out of bound access Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index 9f3aef10d..1be8be234 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -590,12 +590,12 @@ static int collect_cgroups(struct list_head *ctls) if (opts.cgroup_yard) { char dir_path[PATH_MAX]; int off; - + off = snprintf(dir_path, PATH_MAX, "%s/", opts.cgroup_yard); if (strstartswith(cc->name, namestr)) - snprintf(dir_path + off, PATH_MAX, "%s", cc->name + strlen(namestr)); + snprintf(dir_path + off, PATH_MAX - off, "%s", cc->name + strlen(namestr)); else - snprintf(dir_path + off, PATH_MAX, "%s", cc->name); + snprintf(dir_path + off, PATH_MAX - off, "%s", cc->name); fd = open(dir_path, O_RDONLY | O_DIRECTORY, 0); if (fd < 0) { From 3f1c4a17ad18fca3f95c0f51c5c42fedbb403b89 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Sep 2019 23:36:29 +0300 Subject: [PATCH 0016/1854] pipe: print pipe_id as unsigned to generate an external pipe name Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/pipes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index fd1a7e6bb..cb5da71de 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -282,8 +282,8 @@ static char *pipe_d_name(struct file_desc *d, char *buf, size_t s) struct pipe_info *pi; pi = container_of(d, struct pipe_info, d); - if (snprintf(buf, s, "pipe:[%d]", pi->pe->pipe_id) >= s) { - pr_err("Not enough room for pipe %d identifier string\n", + if (snprintf(buf, s, "pipe:[%u]", pi->pe->pipe_id) >= s) { + pr_err("Not enough room for pipe %u identifier string\n", pi->pe->pipe_id); return NULL; } From b84f481b55442433f46b5ea4b91a32dd8cffc502 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Sep 2019 23:48:15 +0300 Subject: [PATCH 0017/1854] unix: print inode numbers as unsigned int Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/sk-unix.c | 60 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f0620e676..f43aa2124 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -130,7 +130,7 @@ static struct unix_sk_listen_icon *lookup_unix_listen_icons(unsigned int peer_in static void show_one_unix(char *act, const struct unix_sk_desc *sk) { - pr_debug("\t%s: ino %d peer_ino %d family %4d type %4d state %2d name %s\n", + pr_debug("\t%s: ino %u peer_ino %u family %4d type %4d state %2d name %s\n", act, sk->sd.ino, sk->peer_ino, sk->sd.family, sk->type, sk->state, sk->name); if (sk->nr_icons) { @@ -143,7 +143,7 @@ static void show_one_unix(char *act, const struct unix_sk_desc *sk) static void show_one_unix_img(const char *act, const UnixSkEntry *e) { - pr_info("\t%s: id %#x ino %d peer %d type %d state %d name %d bytes\n", + pr_info("\t%s: id %#x ino %u peer %u type %d state %d name %d bytes\n", act, e->id, e->ino, e->peer, e->type, e->state, (int)e->name.len); } @@ -426,7 +426,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) if (ue->peer) { peer = (struct unix_sk_desc *)lookup_socket(ue->peer, PF_UNIX, 0); if (IS_ERR_OR_NULL(peer)) { - pr_err("Unix socket %d without peer %d\n", + pr_err("Unix socket %u without peer %u\n", ue->ino, ue->peer); goto err; } @@ -437,7 +437,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) */ if (peer->peer_ino != ue->ino) { if (!peer->name) { - pr_err("Unix socket %d with unreachable peer %d (%d)\n", + pr_err("Unix socket %u with unreachable peer %u (%u)\n", ue->ino, ue->peer, peer->peer_ino); goto err; } @@ -513,7 +513,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) ue->peer = e->sk_desc->sd.ino; - pr_debug("\t\tFixed inflight socket %d peer %d)\n", + pr_debug("\t\tFixed inflight socket %u peer %u)\n", ue->ino, ue->peer); } dump: @@ -1383,7 +1383,7 @@ static int keep_deleted(struct unix_sk_info *ui) { int fd = open(ui->name, O_PATH); if (fd < 0) { - pr_perror("ghost: Can't open id %#x ino %d addr %s", + pr_perror("ghost: Can't open id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return -1; } @@ -1409,7 +1409,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) int ret; if (ui->ue->name.len >= UNIX_PATH_MAX) { - pr_err("ghost: Too long name for socket id %#x ino %d name %s\n", + pr_err("ghost: Too long name for socket id %#x ino %u name %s\n", ui->ue->id, ui->ue->ino, ui->name); return -ENOSPC; } @@ -1424,14 +1424,14 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = access(path, R_OK | W_OK | X_OK); if (ret == 0) { ui->ghost_dir_pos = pos - path; - pr_debug("ghost: socket id %#x ino %d name %s detected F_OK %s\n", + pr_debug("ghost: socket id %#x ino %u name %s detected F_OK %s\n", ui->ue->id, ui->ue->ino, ui->name, path); break; } if (errno != ENOENT) { ret = -errno; - pr_perror("ghost: Can't access %s for socket id %#x ino %d name %s", + pr_perror("ghost: Can't access %s for socket id %#x ino %u name %s", path, ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1441,7 +1441,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) path[ui->ue->name.len] = '\0'; pos = dirname(path); - pr_debug("ghost: socket id %#x ino %d name %s creating %s\n", + pr_debug("ghost: socket id %#x ino %u name %s creating %s\n", ui->ue->id, ui->ue->ino, ui->name, pos); ret = mkdirpat(AT_FDCWD, pos, 0755); if (ret) { @@ -1471,15 +1471,15 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) * clean it up. */ if (unlinkat(AT_FDCWD, path_parked, 0) == 0) - pr_debug("ghost: Unlinked stale socket id %#x ino %d name %s\n", + pr_debug("ghost: Unlinked stale socket id %#x ino %u name %s\n", ui->ue->id, ui->ue->ino, path_parked); if (rename(ui->name, path_parked)) { ret = -errno; - pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s", + pr_perror("ghost: Can't rename id %#x ino %u addr %s -> %s", ui->ue->id, ui->ue->ino, ui->name, path_parked); return ret; } - pr_debug("ghost: id %#x ino %d renamed %s -> %s\n", + pr_debug("ghost: id %#x ino %u renamed %s -> %s\n", ui->ue->id, ui->ue->ino, ui->name, path_parked); renamed = true; ret = bind(sk, (struct sockaddr *)&addr, @@ -1487,7 +1487,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) } if (ret < 0) { ret = -errno; - pr_perror("ghost: Can't bind on socket id %#x ino %d addr %s", + pr_perror("ghost: Can't bind on socket id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1499,7 +1499,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = keep_deleted(ui); if (ret < 0) { - pr_err("ghost: Can't save socket %#x ino %d addr %s into fdstore\n", + pr_err("ghost: Can't save socket %#x ino %u addr %s into fdstore\n", ui->ue->id, ui->ue->ino, ui->name); return -EIO; } @@ -1511,7 +1511,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = unlinkat(AT_FDCWD, ui->name, 0); if (ret < 0) { ret = -errno; - pr_perror("ghost: Can't unlink socket %#x ino %d addr %s", + pr_perror("ghost: Can't unlink socket %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1519,12 +1519,12 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) if (renamed) { if (rename(path_parked, ui->name)) { ret = -errno; - pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s", + pr_perror("ghost: Can't rename id %#x ino %u addr %s -> %s", ui->ue->id, ui->ue->ino, path_parked, ui->name); return ret; } - pr_debug("ghost: id %#x ino %d renamed %s -> %s\n", + pr_debug("ghost: id %#x ino %u renamed %s -> %s\n", ui->ue->id, ui->ue->ino, path_parked, ui->name); } @@ -1542,11 +1542,11 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) pos = strrchr(path, '/')) { *pos = '\0'; if (rmdir(path)) { - pr_perror("ghost: Can't remove directory %s on id %#x ino %d", + pr_perror("ghost: Can't remove directory %s on id %#x ino %u", path, ui->ue->id, ui->ue->ino); return -1; } - pr_debug("ghost: Removed %s on id %#x ino %d\n", + pr_debug("ghost: Removed %s on id %#x ino %u\n", path, ui->ue->id, ui->ue->ino); } } @@ -1594,13 +1594,13 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) mutex_lock(mutex_ghost); if (ui->flags & USK_GHOST_FDSTORE) { - pr_debug("ghost: bind id %#x ino %d addr %s\n", + pr_debug("ghost: bind id %#x ino %u addr %s\n", ui->ue->id, ui->ue->ino, ui->name); ret = bind_on_deleted(sk, ui); if (ret) errno = -ret; } else { - pr_debug("bind id %#x ino %d addr %s\n", + pr_debug("bind id %#x ino %u addr %s\n", ui->ue->id, ui->ue->ino, ui->name); ret = bind(sk, (struct sockaddr *)&addr, sizeof(addr.sun_family) + ui->ue->name.len); @@ -1608,7 +1608,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) goto done; } if (ret < 0) { - pr_perror("Can't bind id %#x ino %d addr %s", + pr_perror("Can't bind id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); goto done; } @@ -1654,7 +1654,7 @@ static int post_open_interconnected_master(struct unix_sk_info *ui) static void pr_info_opening(const char *prefix, struct unix_sk_info *ui, struct fdinfo_list_entry *fle) { - pr_info("Opening %s (stage %d id %#x ino %d peer %d)\n", + pr_info("Opening %s (stage %d id %#x ino %u peer %u)\n", prefix, fle->stage, ui->ue->id, ui->ue->ino, ui->ue->peer); } @@ -1950,7 +1950,7 @@ static char *socket_d_name(struct file_desc *d, char *buf, size_t s) ui = container_of(d, struct unix_sk_info, d); - if (snprintf(buf, s, "socket:[%d]", ui->ue->ino) >= s) { + if (snprintf(buf, s, "socket:[%u]", ui->ue->ino) >= s) { pr_err("Not enough room for unixsk %d identifier string\n", ui->ue->ino); return NULL; @@ -1981,14 +1981,14 @@ static int unlink_sk(struct unix_sk_info *ui) ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0; if (ret < 0 && errno != ENOENT) { - pr_warn("Can't unlink socket %d peer %d (name %s dir %s)\n", + pr_warn("Can't unlink socket %u peer %u (name %s dir %s)\n", ui->ue->ino, ui->ue->peer, ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-", ui->name_dir ? ui->name_dir : "-"); ret = -errno; goto out; } else if (ret == 0) { - pr_debug("Unlinked socket %d peer %d (name %s dir %s)\n", + pr_debug("Unlinked socket %u peer %u (name %s dir %s)\n", ui->ue->ino, ui->ue->peer, ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-", ui->name_dir ? ui->name_dir : "-"); @@ -2065,7 +2065,7 @@ int unix_prepare_root_shared(void) char tp_name[32]; char st_name[32]; - pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n", + pr_debug("ghost: id %#x type %s state %s ino %u peer %u address %s\n", ui->ue->id, __socket_type_name(ui->ue->type, tp_name), __tcp_state_name(ui->ue->state, st_name), ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0, @@ -2113,7 +2113,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) uname = "-"; } - pr_info(" `- Got id %#x ino %d type %s state %s peer %d (name %s%.*s dir %s)\n", + pr_info(" `- Got id %#x ino %u type %s state %s peer %u (name %s%.*s dir %s)\n", ui->ue->id, ui->ue->ino, ___socket_type_name(ui->ue->type), ___tcp_state_name(ui->ue->state), ui->ue->peer, prefix, ulen, uname, ui->name_dir ? ui->name_dir : "-"); @@ -2128,7 +2128,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) if (ui->ue->deleted) { if (!ui->name || !ui->ue->name.len || !ui->name[0]) { - pr_err("No name present, ino %d\n", ui->ue->ino); + pr_err("No name present, ino %u\n", ui->ue->ino); return -1; } From 578597299a82f0aea0ef7a3063e6dc6ea6fccb33 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Sep 2019 20:57:08 +0000 Subject: [PATCH 0018/1854] Cleanup do_full_int80() 1) Instead of tampering with the nr argument, do_full_int80() returns the value of the system call. It also avoids copying all registers back into the syscall_args32 argument after the syscall. 2) Additionally, the registers r12-r15 were added in the list of clobbers as kernels older than v4.4 do not preserve these. 3) Further, GCC uses a 128-byte red-zone as defined in the x86_64 ABI optimizing away the correct position of the %rsp register in leaf-functions. We now avoid tampering with the red-zone, fixing a SIGSEGV when running mmap_bug_test() in debug mode (DEBUG=1). Signed-off-by: Nicolas Viennot Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- criu/arch/x86/crtools.c | 6 ++-- criu/arch/x86/include/asm/compat.h | 51 ++++++++++++++++++++---------- criu/arch/x86/kerndat.c | 4 +-- criu/arch/x86/restorer.c | 3 +- criu/arch/x86/sigaction_compat.c | 6 +--- 5 files changed, 40 insertions(+), 30 deletions(-) diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index efc23e5fe..e4073c27b 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -590,8 +590,7 @@ static int get_robust_list32(pid_t pid, uintptr_t head, uintptr_t len) .arg2 = (uint32_t)len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } static int set_robust_list32(uint32_t head, uint32_t len) @@ -602,8 +601,7 @@ static int set_robust_list32(uint32_t head, uint32_t len) .arg1 = len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info) diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h index cd1ae472d..acd552fb3 100644 --- a/criu/arch/x86/include/asm/compat.h +++ b/criu/arch/x86/include/asm/compat.h @@ -38,26 +38,45 @@ struct syscall_args32 { uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5; }; -static inline void do_full_int80(struct syscall_args32 *args) +static inline uint32_t do_full_int80(struct syscall_args32 *args) { /* - * r8-r11 registers are cleared during returning to userspace - * from syscall - that's x86_64 ABI to avoid leaking kernel - * pointers. + * Kernel older than v4.4 do not preserve r8-r15 registers when + * invoking int80, so we need to preserve them. * - * Other than that - we can't use %rbp in clobbers as GCC's inline - * assembly doesn't allow to do so. So, here is explicitly saving - * %rbp before syscall and restoring it's value afterward. + * Additionally, %rbp is used as the 6th syscall argument, and we need + * to preserve its value when returning from the syscall to avoid + * upsetting GCC. However, we can't use %rbp in the GCC asm clobbers + * due to a GCC limitation. Instead, we explicitly save %rbp on the + * stack before invoking the syscall and restore its value afterward. + * + * Further, GCC may not adjust the %rsp pointer when allocating the + * args and ret variables because 1) do_full_int80() is a leaf + * function, and 2) the local variables (args and ret) are in the + * 128-byte red-zone as defined in the x86_64 ABI. To use the stack + * when preserving %rbp, we must either tell GCC to a) mark the + * function as non-leaf, or b) move away from the red-zone when using + * the stack. It seems that there is no easy way to do a), so we'll go + * with b). + * Note 1: Another workaround would have been to add %rsp in the list + * of clobbers, but this was deprecated in GCC 9. + * Note 2: This red-zone bug only manifests when compiling CRIU with + * DEBUG=1. */ - asm volatile ("pushq %%rbp\n\t" - "mov %6, %%ebp\n\t" - "int $0x80\n\t" - "mov %%ebp, %6\n\t" - "popq %%rbp\n\t" - : "+a" (args->nr), - "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+g" (args->arg5) - : : "r8", "r9", "r10", "r11"); + uint32_t ret; + + asm volatile ("sub $128, %%rsp\n\t" + "pushq %%rbp\n\t" + "mov %7, %%ebp\n\t" + "int $0x80\n\t" + "popq %%rbp\n\t" + "add $128, %%rsp\n\t" + : "=a" (ret) + : "a" (args->nr), + "b" (args->arg0), "c" (args->arg1), "d" (args->arg2), + "S" (args->arg3), "D" (args->arg4), "g" (args->arg5) + : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); + return ret; } #ifndef CR_NOGLIBC diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c index f7593251b..94c954e1e 100644 --- a/criu/arch/x86/kerndat.c +++ b/criu/arch/x86/kerndat.c @@ -75,9 +75,7 @@ void *mmap_ia32(void *addr, size_t len, int prot, s.arg4 = fildes; s.arg5 = (uint32_t)off; - do_full_int80(&s); - - return (void *)(uintptr_t)s.nr; + return (void *)(uintptr_t)do_full_int80(&s); } /* diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c index 2d335d5e1..b2c3b3668 100644 --- a/criu/arch/x86/restorer.c +++ b/criu/arch/x86/restorer.c @@ -54,8 +54,7 @@ int set_compat_robust_list(uint32_t head_ptr, uint32_t len) .arg1 = len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } static int prepare_stack32(void **stack32) diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c index b38ba8011..f467da490 100644 --- a/criu/arch/x86/sigaction_compat.c +++ b/criu/arch/x86/sigaction_compat.c @@ -28,7 +28,6 @@ extern char restore_rt_sigaction; */ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) { - int ret; struct syscall_args32 arg = {}; unsigned long act_stack = (unsigned long)stack32; @@ -49,8 +48,5 @@ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) arg.arg2 = 0; /* oldact */ arg.arg3 = (uint32_t)sizeof(act->rt_sa_mask); /* sigsetsize */ - do_full_int80(&arg); - asm volatile ("\t movl %%eax,%0\n" : "=r"(ret)); - return ret; + return do_full_int80(&arg); } - From 576a99f492b1f55050d4fde8560efe454ab887bc Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 4 Oct 2019 16:32:48 +0100 Subject: [PATCH 0019/1854] restorer/inotify: Don't overflow PIE stack PATH_MAX == 4096; PATH_MAX*8 == 32k; RESTORE_STACK_SIZE == 32k. Fixes: a3cdf948699c6 ("inotify: cleanup auxiliary events from queue") Cc: Pavel Tikhomirov Cc: Andrei Vagin Co-debugged-with: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/pie/restorer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 390c0e1a9..dab58add6 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1320,21 +1320,23 @@ static int fd_poll(int inotify_fd) } /* - * note: Actually kernel may want even more space for one event (see - * round_event_name_len), so using buffer of EVENT_BUFF_SIZE size may fail. - * To be on the safe side - take a bigger buffer, and these also allows to - * read more events in one syscall. + * In the worst case buf size should be: + * sizeof(struct inotify_event) * 2 + PATH_MAX + * See round_event_name_len() in kernel. */ -#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX)) +#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) * 2 + PATH_MAX)) /* * Read all available events from inotify queue */ static int cleanup_inotify_events(int inotify_fd) { - char buf[EVENT_BUFF_SIZE * 8]; + char buf[EVENT_BUFF_SIZE * 3]; int ret; + /* Limit buf to be lesser than half of restorer's stack */ + BUILD_BUG_ON(ARRAY_SIZE(buf) >= RESTORE_STACK_SIZE/2); + while (1) { ret = fd_poll(inotify_fd); if (ret < 0) { From 20d4920a8bf74d1eceebc076bcc00889ba40e9f7 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:18 +0530 Subject: [PATCH 0020/1854] Adding --pre-dump-mode option Two modes of pre-dump algorithm: 1) splicing memory by parasite --pre-dump-mode=splice (default) 2) using process_vm_readv syscall --pre-dump-mode=read Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- Documentation/criu.txt | 6 ++++++ criu/config.c | 10 ++++++++++ criu/cr-service.c | 13 +++++++++++++ criu/crtools.c | 2 ++ criu/include/cr_options.h | 7 +++++++ criu/mem.c | 13 ++++++++++++- images/rpc.proto | 6 ++++++ lib/c/criu.c | 15 +++++++++++++++ lib/c/criu.h | 7 +++++++ test/zdtm.py | 9 ++++++++- 10 files changed, 86 insertions(+), 2 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 28913a7fb..2729bc95a 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -156,6 +156,12 @@ In addition, *page-server* options may be specified. Turn on memory changes tracker in the kernel. If the option is not passed the memory tracker get turned on implicitly. +*--pre-dump-mode*='mode':: + There are two 'mode' to operate pre-dump algorithm. The 'splice' mode + is parasite based, whereas 'read' mode is based on process_vm_readv + syscall. The 'read' mode incurs reduced frozen time and reduced + memory pressure as compared to 'splice' mode. Default is 'splice' mode. + *dump* ~~~~~~ Performs a checkpoint procedure. diff --git a/criu/config.c b/criu/config.c index 2ad2fd43c..e5d42efe4 100644 --- a/criu/config.c +++ b/criu/config.c @@ -276,6 +276,7 @@ void init_opts(void) opts.empty_ns = 0; opts.status_fd = -1; opts.log_level = DEFAULT_LOGLEVEL; + opts.pre_dump_mode = PRE_DUMP_SPLICE; } bool deprecated_ok(char *what) @@ -517,6 +518,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT("tls", &opts.tls), {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, { "cgroup-yard", required_argument, 0, 1096 }, + { "pre-dump-mode", required_argument, 0, 1097}, { }, }; @@ -818,6 +820,14 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1096: SET_CHAR_OPTS(cgroup_yard, optarg); break; + case 1097: + if (!strcmp("read", optarg)) { + opts.pre_dump_mode = PRE_DUMP_READ; + } else if (strcmp("splice", optarg)) { + pr_err("Unable to parse value of --pre-dump-mode\n"); + return 1; + } + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) diff --git a/criu/cr-service.c b/criu/cr-service.c index 95ba2e5ce..392e9ac50 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -473,6 +473,19 @@ static int setup_opts_from_req(int sk, CriuOpts *req) opts.lazy_pages = req->lazy_pages; } + if (req->has_pre_dump_mode) { + switch (req->pre_dump_mode) { + case CRIU_PRE_DUMP_MODE__SPLICE: + opts.pre_dump_mode = PRE_DUMP_SPLICE; + break; + case CRIU_PRE_DUMP_MODE__READ: + opts.pre_dump_mode = PRE_DUMP_READ; + break; + default: + goto err; + } + } + if (req->ps) { opts.port = (short)req->ps->port; diff --git a/criu/crtools.c b/criu/crtools.c index 5740b806d..700fad994 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -428,6 +428,8 @@ usage: " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image\n" +" --pre-dump-mode splice - parasite based pre-dumping (default)\n" +" read - process_vm_readv syscall based pre-dumping\n" "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index da7c10d69..2c1451e86 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -38,6 +38,12 @@ struct cg_root_opt { char *newroot; }; +/* + * Pre-dump variants + */ +#define PRE_DUMP_SPLICE 1 /* Pre-dump using parasite */ +#define PRE_DUMP_READ 2 /* Pre-dump using process_vm_readv syscall */ + /* * Cgroup management options. */ @@ -81,6 +87,7 @@ struct cr_options { int evasive_devices; int link_remap_ok; int log_file_per_pid; + int pre_dump_mode; bool swrk_restore; char *output; char *root; diff --git a/criu/mem.c b/criu/mem.c index de66a6210..911b9d21c 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -482,7 +482,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, if (mdc->lazy) memcpy(pargs_iovs(args), pp->iovs, sizeof(struct iovec) * pp->nr_iovs); - ret = drain_pages(pp, ctl, args); + + /* + * Faking drain_pages for pre-dump here. Actual drain_pages for pre-dump + * will happen after task unfreezing in cr_pre_dump_finish(). This is + * actual optimization which reduces time for which process was frozen + * during pre-dump. + */ + if (mdc->pre_dump && opts.pre_dump_mode == PRE_DUMP_READ) + ret = 0; + else + ret = drain_pages(pp, ctl, args); + if (!ret && !mdc->pre_dump) ret = xfer_pages(pp, &xfer); if (ret) diff --git a/images/rpc.proto b/images/rpc.proto index c402259ac..fc2f1bce2 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -47,6 +47,11 @@ enum criu_cg_mode { DEFAULT = 6; }; +enum criu_pre_dump_mode { + SPLICE = 1; + READ = 2; +}; + message criu_opts { required int32 images_dir_fd = 1; optional int32 pid = 2; /* if not set on dump, will dump requesting process */ @@ -121,6 +126,7 @@ message criu_opts { optional bool tls = 58; optional bool tls_no_cn_verify = 59; optional string cgroup_yard = 60; + optional criu_pre_dump_mode pre_dump_mode = 61 [default = SPLICE]; /* optional bool check_mounts = 128; */ } diff --git a/lib/c/criu.c b/lib/c/criu.c index 14ddff26d..fffb9fd9c 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -336,6 +336,21 @@ int criu_set_parent_images(const char *path) return criu_local_set_parent_images(global_opts, path); } +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) +{ + opts->rpc->has_pre_dump_mode = true; + if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { + opts->rpc->pre_dump_mode = mode; + return 0; + } + return -1; +} + +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode) +{ + return criu_local_set_pre_dump_mode(global_opts, mode); +} + void criu_local_set_track_mem(criu_opts *opts, bool track_mem) { opts->rpc->has_track_mem = true; diff --git a/lib/c/criu.h b/lib/c/criu.h index cb37c5291..22db0fdcf 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -43,6 +43,11 @@ enum criu_cg_mode { CRIU_CG_MODE_DEFAULT, }; +enum criu_pre_dump_mode { + CRIU_PRE_DUMP_SPLICE = 1, + CRIU_PRE_DUMP_READ = 2 +}; + int criu_set_service_address(const char *path); void criu_set_service_fd(int fd); int criu_set_service_binary(const char *path); @@ -95,6 +100,7 @@ int criu_add_irmap_path(const char *path); int criu_add_inherit_fd(int fd, const char *key); int criu_add_external(const char *key); int criu_set_page_server_address_port(const char *address, int port); +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode); /* * The criu_notify_arg_t na argument is an opaque @@ -211,6 +217,7 @@ int criu_local_add_cg_yard(criu_opts *opts, const char *path); int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key); int criu_local_add_external(criu_opts *opts, const char *key); int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port); +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode); void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na)); diff --git a/test/zdtm.py b/test/zdtm.py index f0a102413..3c0cee667 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1019,6 +1019,7 @@ class criu: self.__tls = self.__tls_options() if opts['tls'] else [] self.__criu_bin = opts['criu_bin'] self.__crit_bin = opts['crit_bin'] + self.__pre_dump_mode = opts['pre_dump_mode'] def fini(self): if self.__lazy_migrate: @@ -1249,6 +1250,8 @@ class criu: a_opts += ['--leave-stopped'] if self.__empty_ns: a_opts += ['--empty-ns', 'net'] + if self.__pre_dump_mode: + a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode] nowait = False if self.__lazy_migrate and action == "dump": @@ -1835,7 +1838,7 @@ class Launcher: 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', 'remote_lazy_pages', 'show_stats', 'lazy_migrate', - 'tls', 'criu_bin', 'crit_bin') + 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) if self.__use_log: @@ -2482,6 +2485,10 @@ rp.add_argument("--criu-bin", rp.add_argument("--crit-bin", help="Path to crit binary", default='../crit/crit') +rp.add_argument("--pre-dump-mode", + help="Use splice or read mode of pre-dumping", + choices=['splice', 'read'], + default='splice') lp = sp.add_parser("list", help="List tests") lp.set_defaults(action=list_tests) From e0ea21ad5ecafadad653f46a0ed8cbef69c7b883 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:19 +0530 Subject: [PATCH 0021/1854] Handling iov generation for non-PROT_READ regions Skip iov-generation for regions not having PROT_READ, since process_vm_readv syscall can't process them during "read" pre-dump. Handle random order of "read" & "splice" pre-dumps. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 5 ++++ criu/mem.c | 56 ++++++++++++++++++++++++++++++++++++++++-- images/inventory.proto | 1 + 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index fcbe816e8..dd5b62dd0 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1485,6 +1485,9 @@ static int cr_pre_dump_finish(int status) if (ret) goto err; + he.has_pre_dump_mode = true; + he.pre_dump_mode = opts.pre_dump_mode; + pstree_switch_state(root_item, TASK_ALIVE); timing_stop(TIME_FROZEN); @@ -1914,6 +1917,8 @@ int cr_dump_tasks(pid_t pid) if (ret) goto err; + he.has_pre_dump_mode = false; + ret = write_img_inventory(&he); if (ret) goto err; diff --git a/criu/mem.c b/criu/mem.c index 911b9d21c..a5de23755 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -351,7 +351,8 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, struct page_pipe *pp, struct page_xfer *xfer, struct parasite_dump_pages_args *args, struct parasite_ctl *ctl, pmc_t *pmc, - bool has_parent, bool pre_dump) + bool has_parent, bool pre_dump, + int parent_predump_mode) { u64 off = 0; u64 *map; @@ -361,6 +362,52 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, !vma_area_is(vma, VMA_ANON_SHARED)) return 0; + /* + * To facilitate any combination of pre-dump modes to run after + * one another, we need to take extra care as discussed below. + * + * The SPLICE mode pre-dump, processes all type of memory regions, + * whereas READ mode pre-dump skips processing those memory regions + * which lacks PROT_READ flag. + * + * Now on mixing pre-dump modes: + * If SPLICE mode follows SPLICE mode : no issue + * -> everything dumped both the times + * + * If READ mode follows READ mode : no issue + * -> non-PROT_READ skipped both the time + * + * If READ mode follows SPLICE mode : no issue + * -> everything dumped at first, + * the non-PROT_READ skipped later + * + * If SPLICE mode follows READ mode : Need special care + * + * If READ pre-dump happens first, then it has skipped processing + * non-PROT_READ regions. Following SPLICE pre-dump expects pagemap + * entries for all mappings in parent pagemap, but last READ mode + * pre-dump cycle has skipped processing & pagemap generation for + * non-PROT_READ regions. So SPLICE mode throws error of missing + * pagemap entry for encountered non-PROT_READ mapping. + * + * To resolve this, the pre-dump-mode is stored in current pre-dump's + * inventoy file. This pre-dump mode is read back from this file + * (present in parent pre-dump dir) as parent-pre-dump-mode during + * next pre-dump. + * + * If parent-pre-dump-mode and next-pre-dump-mode are in READ-mode -> + * SPLICE-mode order, then SPLICE mode doesn't expect mappings for + * non-PROT_READ regions in parent-image and marks "has_parent=false". + */ + + if (!(vma->e->prot & PROT_READ)) { + if (opts.pre_dump_mode == PRE_DUMP_READ && pre_dump) + return 0; + if ((parent_predump_mode == PRE_DUMP_READ && + opts.pre_dump_mode == PRE_DUMP_SPLICE) || !pre_dump) + has_parent = false; + } + if (vma_entry_is(vma->e, VMA_AREA_AIORING)) { if (pre_dump) return 0; @@ -406,6 +453,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, unsigned long pmc_size; int possible_pid_reuse = 0; bool has_parent; + int parent_predump_mode = -1; pr_info("\n"); pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real); @@ -472,9 +520,13 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, */ args->off = 0; has_parent = !!xfer.parent && !possible_pid_reuse; + if(mdc->parent_ie) + parent_predump_mode = mdc->parent_ie->pre_dump_mode; + list_for_each_entry(vma_area, &vma_area_list->h, list) { ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, - &pmc, has_parent, mdc->pre_dump); + &pmc, has_parent, mdc->pre_dump, + parent_predump_mode); if (ret < 0) goto out_xfer; } diff --git a/images/inventory.proto b/images/inventory.proto index 7bc2b0c02..d1438e8c8 100644 --- a/images/inventory.proto +++ b/images/inventory.proto @@ -16,4 +16,5 @@ message inventory_entry { optional uint32 root_cg_set = 5; optional lsmtype lsmtype = 6; optional uint64 dump_uptime = 8; + optional uint32 pre_dump_mode = 9; } From 29b63e9a720ec3e996de3bdbeccde5d7c0cb46e3 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:20 +0530 Subject: [PATCH 0022/1854] Skip adding PROT_READ to non-PROT_READ mappings "read" mode pre-dump may fail even after adding PROT_READ flag. Adding PROT_READ works when dumping statically. See added comment for details. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/mem.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/criu/mem.c b/criu/mem.c index a5de23755..4e110c9e9 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -591,13 +591,47 @@ int parasite_dump_pages_seized(struct pstree_item *item, * able to read the memory contents. * * Afterwards -- reprotect memory back. + * + * This step is required for "splice" mode pre-dump and dump. + * Skip this step for "read" mode pre-dump. + * "read" mode pre-dump delegates processing of non-PROT_READ + * regions to dump stage. Adding PROT_READ works fine for + * static processing (target process frozen during pre-dump) + * and fails for dynamic as explained below. + * + * Consider following sequence of instances to reason, why + * not to add PROT_READ in "read" mode pre-dump ? + * + * CRIU- "read" pre-dump Target Process + * + * 1. Creates mapping M + * without PROT_READ + * 2. CRIU freezes target + * process + * 3. Collect the mappings + * 4. Add PROT_READ to M + * (non-PROT_READ region) + * 5. CRIU unfreezes target + * process + * 6. Add flag PROT_READ + * to mapping M + * 7. Revoke flag PROT_READ + * from mapping M + * 8. process_vm_readv tries + * to copy mapping M + * (believing M have + * PROT_READ flag) + * 9. syscall fails to copy + * data from M */ - pargs->add_prot = PROT_READ; - ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl); - if (ret) { - pr_err("Can't dump unprotect vmas with parasite\n"); - return ret; + if (!mdc->pre_dump || opts.pre_dump_mode == PRE_DUMP_SPLICE) { + pargs->add_prot = PROT_READ; + ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl); + if (ret) { + pr_err("Can't dump unprotect vmas with parasite\n"); + return ret; + } } if (fault_injected(FI_DUMP_PAGES)) { @@ -612,10 +646,12 @@ int parasite_dump_pages_seized(struct pstree_item *item, return ret; } - pargs->add_prot = 0; - if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) { - pr_err("Can't rollback unprotected vmas with parasite\n"); - ret = -1; + if (!mdc->pre_dump || opts.pre_dump_mode == PRE_DUMP_SPLICE) { + pargs->add_prot = 0; + if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) { + pr_err("Can't rollback unprotected vmas with parasite\n"); + ret = -1; + } } return ret; From 4c774afc18e8af458eeb03ea021ba52d0af4b32c Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:21 +0530 Subject: [PATCH 0023/1854] Adding cnt_sub for stats manipulation adding cnt_sub function (complement of cnt_add). cnt_sub is utilized to decrement stats counter according to skipped page count during "read" mode pre-dump. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/include/stats.h | 1 + criu/stats.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/criu/include/stats.h b/criu/include/stats.h index bab9a0507..5d408b7b1 100644 --- a/criu/include/stats.h +++ b/criu/include/stats.h @@ -45,6 +45,7 @@ enum { }; extern void cnt_add(int c, unsigned long val); +extern void cnt_sub(int c, unsigned long val); #define DUMP_STATS 1 #define RESTORE_STATS 2 diff --git a/criu/stats.c b/criu/stats.c index 7410b5ced..cb528011a 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -41,6 +41,18 @@ void cnt_add(int c, unsigned long val) BUG(); } +void cnt_sub(int c, unsigned long val) +{ + if (dstats != NULL) { + BUG_ON(c >= DUMP_CNT_NR_STATS); + dstats->counts[c] -= val; + } else if (rstats != NULL) { + BUG_ON(c >= RESTORE_CNT_NR_STATS); + atomic_sub(val, &rstats->counts[c]); + } else + BUG(); +} + static void timeval_accumulate(const struct timeval *from, const struct timeval *to, struct timeval *res) { From 98608b90de0f853b1c8a6e15b312320e1441c359 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:22 +0530 Subject: [PATCH 0024/1854] read mode pre-dump implementation Pre-dump using the process_vm_readv syscall. During frozen state, only iovecs will be generated and draining of memory happens after the task is unfrozen. Pre-dumping of shared memory remains unmodified. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +- criu/include/page-xfer.h | 4 + criu/page-xfer.c | 389 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 399 insertions(+), 1 deletion(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index dd5b62dd0..ff05e38d7 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1513,7 +1513,12 @@ static int cr_pre_dump_finish(int status) goto err; mem_pp = dmpi(item)->mem_pp; - ret = page_xfer_dump_pages(&xfer, mem_pp); + + if (opts.pre_dump_mode == PRE_DUMP_READ) + ret = page_xfer_predump_pages(item->pid->real, + &xfer, mem_pp); + else + ret = page_xfer_dump_pages(&xfer, mem_pp); xfer.close(&xfer); diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h index fa72273ea..98061e2d3 100644 --- a/criu/include/page-xfer.h +++ b/criu/include/page-xfer.h @@ -9,6 +9,9 @@ struct ps_info { extern int cr_page_server(bool daemon_mode, bool lazy_dump, int cfd); +/* User buffer for read-mode pre-dump*/ +#define BUFFER_SIZE (PIPE_MAX_SIZE << PAGE_SHIFT) + /* * page_xfer -- transfer pages into image file. * Two images backends are implemented -- local image file @@ -48,6 +51,7 @@ struct page_xfer { extern int open_page_xfer(struct page_xfer *xfer, int fd_type, unsigned long id); struct page_pipe; extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *); +extern int page_xfer_predump_pages(int pid, struct page_xfer *, struct page_pipe *); extern int connect_to_page_server_to_send(void); extern int connect_to_page_server_to_recv(int epfd); extern int disconnect_from_page_server(void); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 75e135c66..8709df745 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -6,6 +6,7 @@ #include #include #include +#include #undef LOG_PREFIX #define LOG_PREFIX "page-xfer: " @@ -480,6 +481,394 @@ static inline u32 ppb_xfer_flags(struct page_xfer *xfer, struct page_pipe_buf *p return PE_PRESENT; } +/* + * Optimized pre-dump algorithm + * ============================== + * + * Note: Please refer man(2) page of process_vm_readv syscall. + * + * The following discussion covers the possibly faulty-iov + * locations in an iovec, which hinders process_vm_readv from + * dumping the entire iovec in a single invocation. + * + * Memory layout of target process: + * + * Pages: A B C + * +--------+--------+--------+--------+--------+--------+ + * ||||||||||||||||||||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * + * Single "iov" representation: {starting_address, length_in_bytes} + * An iovec is array of iov-s. + * + * NOTE: For easy representation and discussion purpose, we carry + * out further discussion at "page granularity". + * length_in_bytes will represent page count in iov instead + * of byte count. Same assumption applies for the syscall's + * return value. Instead of returning the number of bytes + * read, it returns a page count. + * + * For above memory mapping, generated iovec: {A,1}{B,1}{C,4} + * + * This iovec remains unmodified once generated. At the same + * time some of memory regions listed in iovec may get modified + * (unmap/change protection) by the target process while syscall + * is trying to dump iovec regions. + * + * Case 1: + * A is unmapped, {A,1} become faulty iov + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * | |||||||||||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * start + * (2) + * + * process_vm_readv will return -1. Increment start pointer(2), + * syscall will process {B,1}{C,4} in one go and copy 5 pages + * to userbuf from iov-B and iov-C. + * + * Case 2: + * B is unmapped, {B,1} become faulty iov + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * ||||||||| ||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * start + * (2) + * + * process_vm_readv will return 1, i.e. page A copied to + * userbuf successfully and syscall stopped, since B got + * unmapped. + * + * Increment the start pointer to C(2) and invoke syscall. + * Userbuf contains 5 pages overall from iov-A and iov-C. + * + * Case 3: + * This case deals with partial unmapping of iov representing + * more than one pagesize region. + * + * Syscall can't process such faulty iov as whole. So we + * process such regions part-by-part and form new sub-iovs + * in aux_iov from successfully processed pages. + * + * + * Part 3.1: + * First page of C is unmapped + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * |||||||||||||||||| |||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * dummy + * (2) + * + * process_vm_readv will return 2, i.e. pages A and B copied. + * We identify length of iov-C is more than 1 page, that is + * where this case differs from Case 2. + * + * dummy-iov is introduced(2) as: {C+1,3}. dummy-iov can be + * directly placed at next page to failing page. This will copy + * remaining 3 pages from iov-C to userbuf. Finally create + * modified iov entry in aux_iov. Complete aux_iov look like: + * + * aux_iov: {A,1}{B,1}{C+1,3}* + * + * + * Part 3.2: + * In between page of C is unmapped, let's say third + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * |||||||||||||||||||||||||||||||||||| |||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | |-----------------| | + * start partial_read_bytes | + * (1) | + * dummy + * (2) + * + * process_vm_readv will return 4, i.e. pages A and B copied + * completely and first two pages of C are also copied. + * + * Since, iov-C is not processed completely, we need to find + * "partial_read_byte" count to place out dummy-iov for + * remainig processing of iov-C. This function is performed by + * analyze_iov function. + * + * dummy-iov will be(2): {C+3,1}. dummy-iov will be placed + * next to first failing address to process remaining iov-C. + * New entries in aux_iov will look like: + * + * aux_iov: {A,1}{B,1}{C,2}*{C+3,1}* + */ + +unsigned long handle_faulty_iov(int pid, struct iovec* riov, + unsigned long faulty_index, + struct iovec *bufvec, struct iovec* aux_iov, + unsigned long* aux_len, + unsigned long partial_read_bytes) +{ + /* Handling Case 2*/ + if (riov[faulty_index].iov_len == PAGE_SIZE) { + cnt_sub(CNT_PAGES_WRITTEN, 1); + return 0; + } + + struct iovec dummy; + ssize_t bytes_read; + unsigned long offset = 0; + unsigned long final_read_cnt = 0; + + /* Handling Case 3-Part 3.2*/ + offset = (partial_read_bytes)? partial_read_bytes : PAGE_SIZE; + + dummy.iov_base = riov[faulty_index].iov_base + offset; + dummy.iov_len = riov[faulty_index].iov_len - offset; + + if (!partial_read_bytes) + cnt_sub(CNT_PAGES_WRITTEN, 1); + + while (dummy.iov_len) { + + bytes_read = process_vm_readv(pid, bufvec, 1, &dummy, 1, 0); + + if(bytes_read == -1) { + /* Handling faulty page read in faulty iov */ + cnt_sub(CNT_PAGES_WRITTEN, 1); + dummy.iov_base += PAGE_SIZE; + dummy.iov_len -= PAGE_SIZE; + continue; + } + + /* If aux-iov can merge and expand or new entry required */ + if (aux_iov[(*aux_len)-1].iov_base + + aux_iov[(*aux_len)-1].iov_len == dummy.iov_base) + aux_iov[(*aux_len)-1].iov_len += bytes_read; + else { + aux_iov[*aux_len].iov_base = dummy.iov_base; + aux_iov[*aux_len].iov_len = bytes_read; + (*aux_len) += 1; + } + + dummy.iov_base += bytes_read; + dummy.iov_len -= bytes_read; + bufvec->iov_base += bytes_read; + bufvec->iov_len -= bytes_read; + final_read_cnt += bytes_read; + } + + return final_read_cnt; +} + +/* + * This function will position start pointer to the latest + * successfully read iov in iovec. In case of partial read it + * returns partial_read_bytes, otherwise 0. + */ +static unsigned long analyze_iov(ssize_t bytes_read, struct iovec* riov, + unsigned long *index, struct iovec *aux_iov, + unsigned long *aux_len) +{ + ssize_t processed_bytes = 0; + unsigned long partial_read_bytes = 0; + + /* correlating iovs with read bytes */ + while (processed_bytes < bytes_read) { + + processed_bytes += riov[*index].iov_len; + aux_iov[*aux_len].iov_base = riov[*index].iov_base; + aux_iov[*aux_len].iov_len = riov[*index].iov_len; + + (*aux_len) += 1; + (*index) += 1; + } + + /* handling partially processed faulty iov*/ + if (processed_bytes - bytes_read) { + + (*index) -= 1; + + partial_read_bytes = riov[*index].iov_len + - (processed_bytes - bytes_read); + aux_iov[*aux_len-1].iov_len = partial_read_bytes; + } + + return partial_read_bytes; +} + +/* + * This function iterates over complete ppb->iov entries and pass + * them to process_vm_readv syscall. + * + * Since process_vm_readv returns count of successfully read bytes. + * It does not point to iovec entry associated to last successful + * byte read. The correlation between bytes read and corresponding + * iovec is setup through analyze_iov function. + * + * If all iovecs are not processed in one go, it means there exists + * some faulty iov entry(memory mapping modified after it was grabbed) + * in iovec. process_vm_readv syscall stops at such faulty iov and + * skip processing further any entry in iovec. This is handled by + * handle_faulty_iov function. + */ +static long fill_userbuf(int pid, struct page_pipe_buf *ppb, + struct iovec *bufvec, + struct iovec* aux_iov, + unsigned long *aux_len) +{ + struct iovec *riov = ppb->iov; + ssize_t bytes_read; + unsigned long total_read = 0; + unsigned long start = 0; + unsigned long partial_read_bytes = 0; + + while (start < ppb->nr_segs) { + + bytes_read = process_vm_readv(pid, bufvec, 1, &riov[start], + ppb->nr_segs - start, 0); + + if (bytes_read == -1) { + /* Handling Case 1*/ + if (riov[start].iov_len == PAGE_SIZE) { + cnt_sub(CNT_PAGES_WRITTEN, 1); + start += 1; + continue; + } else if (errno == ESRCH) { + pr_debug("Target process PID:%d not found\n", pid); + return ESRCH; + } + } + + partial_read_bytes = 0; + + if (bytes_read > 0) { + partial_read_bytes = analyze_iov(bytes_read, riov, + &start, aux_iov, + aux_len); + bufvec->iov_base += bytes_read; + bufvec->iov_len -= bytes_read; + total_read += bytes_read; + } + + /* + * If all iovs not processed in one go, + * it means some iov in between has failed. + */ + if (start < ppb->nr_segs) + total_read += handle_faulty_iov(pid, riov, start, bufvec, + aux_iov, aux_len, + partial_read_bytes); + + start += 1; + } + + return total_read; +} + +/* + * This function is similar to page_xfer_dump_pages, instead it uses + * auxiliary_iov array for pagemap generation. + * + * The entries of ppb->iov may mismatch with actual process mappings + * present at time of pre-dump. Such entries need to be adjusted as per + * the pages read by process_vm_readv syscall. These adjusted entries + * along with unmodified entries are present in aux_iov array. + */ + +int page_xfer_predump_pages(int pid, struct page_xfer *xfer, + struct page_pipe *pp) +{ + struct page_pipe_buf *ppb; + unsigned int cur_hole = 0, i; + unsigned long ret, bytes_read; + struct iovec bufvec; + + struct iovec aux_iov[PIPE_MAX_SIZE]; + unsigned long aux_len; + + char *userbuf = mmap(NULL, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + if (userbuf == MAP_FAILED) { + pr_perror("Unable to mmap a buffer"); + return -1; + } + + list_for_each_entry(ppb, &pp->bufs, l) { + + aux_len = 0; + bufvec.iov_len = BUFFER_SIZE; + bufvec.iov_base = userbuf; + + bytes_read = fill_userbuf(pid, ppb, &bufvec, aux_iov, &aux_len); + + if (bytes_read == ESRCH) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + bufvec.iov_base = userbuf; + bufvec.iov_len = bytes_read; + ret = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK); + + if (ret == -1 || ret != bytes_read) { + pr_err("vmsplice: Failed to splice user buffer to pipe %ld\n", ret); + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + /* generating pagemap */ + for (i = 0; i < aux_len; i++) { + + struct iovec iov = aux_iov[i]; + u32 flags; + + ret = dump_holes(xfer, pp, &cur_hole, iov.iov_base); + if (ret) { + munmap(userbuf, BUFFER_SIZE); + return ret; + } + + BUG_ON(iov.iov_base < (void *)xfer->offset); + iov.iov_base -= xfer->offset; + pr_debug("\t p %p [%u]\n", iov.iov_base, + (unsigned int)(iov.iov_len / PAGE_SIZE)); + + flags = ppb_xfer_flags(xfer, ppb); + + if (xfer->write_pagemap(xfer, &iov, flags)) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + if (xfer->write_pages(xfer, ppb->p[0], iov.iov_len)) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + } + + } + + munmap(userbuf, BUFFER_SIZE); + return dump_holes(xfer, pp, &cur_hole, NULL); +} + int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp) { struct page_pipe_buf *ppb; From befbbd9bba013533a05547c9b3702a256904190c Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:23 +0530 Subject: [PATCH 0025/1854] Refactor time accounting macros refactoring time macros as per read mode pre-dump design. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +++++-- criu/page-xfer.c | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index ff05e38d7..f72373d22 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1514,11 +1514,14 @@ static int cr_pre_dump_finish(int status) mem_pp = dmpi(item)->mem_pp; - if (opts.pre_dump_mode == PRE_DUMP_READ) + if (opts.pre_dump_mode == PRE_DUMP_READ) { + timing_stop(TIME_MEMWRITE); ret = page_xfer_predump_pages(item->pid->real, &xfer, mem_pp); - else + } + else { ret = page_xfer_dump_pages(&xfer, mem_pp); + } xfer.close(&xfer); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 8709df745..4d2d046ef 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -812,6 +812,8 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, list_for_each_entry(ppb, &pp->bufs, l) { + timing_start(TIME_MEMDUMP); + aux_len = 0; bufvec.iov_len = BUFFER_SIZE; bufvec.iov_base = userbuf; @@ -833,6 +835,9 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, return -1; } + timing_stop(TIME_MEMDUMP); + timing_start(TIME_MEMWRITE); + /* generating pagemap */ for (i = 0; i < aux_len; i++) { @@ -863,9 +868,12 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, } } + timing_stop(TIME_MEMWRITE); } munmap(userbuf, BUFFER_SIZE); + timing_start(TIME_MEMWRITE); + return dump_holes(xfer, pp, &cur_hole, NULL); } From d30557699616d27bed6e2e1fd6bbdf45a8c99d7f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 4 Oct 2019 19:36:37 +0300 Subject: [PATCH 0026/1854] zdtm: handle --pre-dump-mode in the rpc mode Signed-off-by: Andrei Vagin --- test/zdtm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 3c0cee667..571962241 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -912,6 +912,13 @@ class criu_rpc: if arg == '--prev-images-dir': criu.opts.parent_img = args.pop(0) continue + if arg == '--pre-dump-mode': + key = args.pop(0) + mode = crpc.rpc.READ + if key == "splice": + mode = crpc.rpc.SPLICE + criu.opts.pre_dump_mode = mode + continue if arg == '--track-mem': criu.opts.track_mem = True continue @@ -929,7 +936,7 @@ class criu_rpc: inhfd.key = key continue - raise test_fail_exc('RPC for %s required' % arg) + raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) @staticmethod def run(action, From 7c97cc7eb26c7232ca286e5ad4322c3be2bdcfca Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 4 Oct 2019 20:02:07 +0300 Subject: [PATCH 0027/1854] lib/c: fix a compile time error lib/c/criu.c:343:30: error: implicit conversion from enumeration type 'enum criu_pre_dump_mode' to different enumeration type 'CriuPreDumpMode' (aka 'enum _CriuPreDumpMode') [-Werror,-Wenum-conversion opts->rpc->pre_dump_mode = mode; ~ ^~~~ Signed-off-by: Andrei Vagin --- lib/c/criu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index fffb9fd9c..1d0a235f4 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -340,7 +340,7 @@ int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) { opts->rpc->has_pre_dump_mode = true; if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { - opts->rpc->pre_dump_mode = mode; + opts->rpc->pre_dump_mode = (CriuPreDumpMode)mode; return 0; } return -1; From 6b264f591f42e0ee3e1ceee103d5c557184829d5 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 5 Oct 2019 22:46:02 +0300 Subject: [PATCH 0028/1854] criu: use atomic_add instead of atomic_sub atomic_sub isn't defined for all platforms. Reported-by: Mr Jenkins Cc: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/stats.c b/criu/stats.c index cb528011a..891c37800 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -48,7 +48,7 @@ void cnt_sub(int c, unsigned long val) dstats->counts[c] -= val; } else if (rstats != NULL) { BUG_ON(c >= RESTORE_CNT_NR_STATS); - atomic_sub(val, &rstats->counts[c]); + atomic_add(-val, &rstats->counts[c]); } else BUG(); } From 3efe44382fef816ea274ed1833adc1abfa4b4f06 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 6 Oct 2019 01:01:50 +0300 Subject: [PATCH 0029/1854] image: avoid name conflicts in image files Conflict register for file "sk-opts.proto": READ is already defined in file "rpc.proto". Please fix the conflict by adding package name on the proto file, or use different name for the duplication. Note: enum values appear as siblings of the enum type instead of children of it. https://github.com/checkpoint-restore/criu/issues/815 Signed-off-by: Andrei Vagin --- criu/cr-service.c | 2 +- images/rpc.proto | 2 +- test/zdtm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 392e9ac50..a70f99d71 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -478,7 +478,7 @@ static int setup_opts_from_req(int sk, CriuOpts *req) case CRIU_PRE_DUMP_MODE__SPLICE: opts.pre_dump_mode = PRE_DUMP_SPLICE; break; - case CRIU_PRE_DUMP_MODE__READ: + case CRIU_PRE_DUMP_MODE__VM_READ: opts.pre_dump_mode = PRE_DUMP_READ; break; default: diff --git a/images/rpc.proto b/images/rpc.proto index fc2f1bce2..df1b5aed2 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -49,7 +49,7 @@ enum criu_cg_mode { enum criu_pre_dump_mode { SPLICE = 1; - READ = 2; + VM_READ = 2; }; message criu_opts { diff --git a/test/zdtm.py b/test/zdtm.py index 571962241..6d3fddfad 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -914,7 +914,7 @@ class criu_rpc: continue if arg == '--pre-dump-mode': key = args.pop(0) - mode = crpc.rpc.READ + mode = crpc.rpc.VM_READ if key == "splice": mode = crpc.rpc.SPLICE criu.opts.pre_dump_mode = mode From 71c2a9dc73f679df93f2c749fed39088616a7b16 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 1 Oct 2019 20:56:26 +0000 Subject: [PATCH 0030/1854] Guard against empty file lock status The lock status string may be empty. This can happen when the owner of the lock is invisible from our PID namespace. This unfortunate behavior is fixed in kernels v4.19 and up (see commit 1cf8e5de40) Signed-off-by: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/proc_parse.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 97f82ee01..d67392a12 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1669,17 +1669,27 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) if (fdinfo_field(str, "lock")) { struct file_lock *fl; struct fdinfo_common *fdinfo = arg; + char *flock_status = str+sizeof("lock:\t")-1; if (type != FD_TYPES__UND) continue; + /* + * The lock status can be empty when the owner of the + * lock is invisible from our PID namespace. + * This unfortunate behavior is fixed in kernels v4.19 + * and up (see commit 1cf8e5de40). + */ + if (flock_status[0] == '\0') + continue; + fl = alloc_file_lock(); if (!fl) { pr_perror("Alloc file lock failed!"); goto out; } - if (parse_file_lock_buf(str + 6, fl, 0)) { + if (parse_file_lock_buf(flock_status, fl, 0)) { xfree(fl); goto parse_err; } From 5a92f100b88e25981d7d51b3f4db374297fcff3c Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 28 Sep 2019 06:59:45 +0100 Subject: [PATCH 0031/1854] page-pipe: Resize up to PIPE_MAX_SIZE When performing pre-dump we continuously increase the page-pipe size to fit the max amount memory pages in the pipe's buffer. However, we never actually set the pipe's buffer size to max. By doing so, we can reduce the number of pipe-s necessary for pre-dump and improve the performance as shown in the example below. For example, let's consider the following process: #include #include #include void main(void) { int i = 0; void *cache = calloc(1, 1024 * 1024 * 1024); while(1) { printf("%d\n", i++); sleep(1); } } stats-dump before this change: frozen_time: 123538 memdump_time: 95344 memwrite_time: 11980078 pages_scanned: 262721 pages_written: 262169 page_pipes: 513 page_pipe_bufs: 519 stats-dump after this change: frozen_time: 83287 memdump_time: 54587 memwrite_time: 12547466 pages_scanned: 262721 pages_written: 262169 page_pipes: 257 page_pipe_bufs: 263 Signed-off-by: Radostin Stoyanov --- criu/page-pipe.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/criu/page-pipe.c b/criu/page-pipe.c index a8216962d..439c180e4 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -54,8 +54,12 @@ static inline int ppb_resize_pipe(struct page_pipe_buf *ppb) if (ppb->pages_in + ppb->pipe_off < ppb->pipe_size) return 0; - if (new_size > PIPE_MAX_SIZE) - return 1; + if (new_size > PIPE_MAX_SIZE) { + if (ppb->pipe_size < PIPE_MAX_SIZE) + ppb->pipe_size = PIPE_MAX_SIZE; + else + return 1; + } ret = __ppb_resize_pipe(ppb, new_size); if (ret < 0) From f65b17e976633ad4d4a10dae96e3279157d8e77f Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 30 Apr 2019 11:35:26 +0300 Subject: [PATCH 0032/1854] cgroup: fix cg_yard leak on error path in prepare_cgroup_sfd Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index 1be8be234..a66fc960e 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -1730,11 +1730,11 @@ static int prepare_cgroup_sfd(CgroupEntry *ce) pr_debug("\tMaking controller dir %s (%s)\n", paux, opt); if (mkdir(paux, 0700)) { pr_perror("\tCan't make controller dir %s", paux); - return -1; + goto err; } if (mount("none", paux, "cgroup", 0, opt) < 0) { pr_perror("\tCan't mount controller dir %s", paux); - return -1; + goto err; } } From e56401ed3c187150c8b95cb5fd69e0d637c5515c Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 14 Sep 2019 12:47:14 +0100 Subject: [PATCH 0033/1854] image-desc: Remove CR_FD_FILE_LOCKS_PID The support for per-pid images with locks has been dropped with commit d040219 ("locks: Drop support for per-pid images with locks") and CR_FD_FILE_LOCKS_PID is not used. Signed-off-by: Radostin Stoyanov --- criu/image-desc.c | 5 ----- criu/include/image-desc.h | 1 - 2 files changed, 6 deletions(-) diff --git a/criu/image-desc.c b/criu/image-desc.c index 053e7af21..81cd07484 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -112,9 +112,4 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { .magic = IRMAP_CACHE_MAGIC, .oflags = O_SERVICE | O_FORCE_LOCAL, }, - - [CR_FD_FILE_LOCKS_PID] = { - .fmt = "filelocks-%u.img", - .magic = FILE_LOCKS_MAGIC, - }, }; diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 3135f56b4..fea80a719 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -79,7 +79,6 @@ enum { CR_FD_RLIMIT, CR_FD_ITIMERS, CR_FD_POSIX_TIMERS, - CR_FD_FILE_LOCKS_PID, CR_FD_IRMAP_CACHE, CR_FD_CPUINFO, From 477c3a4b0b7a246808afb2f12d0553db14dd74a6 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 8 Oct 2019 21:37:22 +0100 Subject: [PATCH 0034/1854] service: Use space on stack for msg buffer RPC messages are have fairly small size and using space on the stack might be a better option. This change follows the pattern used with do_pb_read_one() and pb_write_one(). Signed-off-by: Radostin Stoyanov --- criu/cr-service.c | 56 +++++++++++++++++++++++------------------ criu/include/protobuf.h | 7 ++++++ criu/protobuf.c | 7 ------ 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index a70f99d71..549b3368b 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -27,6 +27,7 @@ #include "cr-service.h" #include "cr-service-const.h" #include "page-xfer.h" +#include "protobuf.h" #include "net.h" #include "mount.h" #include "filesystems.h" @@ -49,18 +50,21 @@ unsigned int service_sk_ino = -1; static int recv_criu_msg(int socket_fd, CriuReq **req) { - unsigned char *buf; - int len; + u8 local[PB_PKOBJ_LOCAL_SIZE]; + void *buf = (void *)&local; + int len, exit_code = -1; len = recv(socket_fd, NULL, 0, MSG_TRUNC | MSG_PEEK); if (len == -1) { pr_perror("Can't read request"); - return -1; + goto err; } - buf = xmalloc(len); - if (!buf) - return -ENOMEM; + if (len > sizeof(local)) { + buf = xmalloc(len); + if (!buf) + return -ENOMEM; + } len = recv(socket_fd, buf, len, MSG_TRUNC); if (len == -1) { @@ -80,43 +84,47 @@ static int recv_criu_msg(int socket_fd, CriuReq **req) goto err; } - xfree(buf); - return 0; + exit_code = 0; err: - xfree(buf); - return -1; + if (buf != (void *)&local) + xfree(buf); + return exit_code; } static int send_criu_msg_with_fd(int socket_fd, CriuResp *msg, int fd) { - unsigned char *buf; - int len, ret; + u8 local[PB_PKOBJ_LOCAL_SIZE]; + void *buf = (void *)&local; + int len, exit_code = -1; len = criu_resp__get_packed_size(msg); - buf = xmalloc(len); - if (!buf) - return -ENOMEM; + if (len > sizeof(local)) { + buf = xmalloc(len); + if (!buf) + return -ENOMEM; + } if (criu_resp__pack(msg, buf) != len) { pr_perror("Failed packing response"); goto err; } - if (fd >= 0) { - ret = send_fds(socket_fd, NULL, 0, &fd, 1, buf, len); - } else - ret = write(socket_fd, buf, len); - if (ret < 0) { + if (fd >= 0) + exit_code = send_fds(socket_fd, NULL, 0, &fd, 1, buf, len); + else + exit_code = write(socket_fd, buf, len); + + if (exit_code < 0) { pr_perror("Can't send response"); goto err; } - xfree(buf); - return 0; + exit_code = 0; err: - xfree(buf); - return -1; + if (buf != (void *)&local) + xfree(buf); + return exit_code; } static int send_criu_msg(int socket_fd, CriuResp *msg) diff --git a/criu/include/protobuf.h b/criu/include/protobuf.h index fb7489e9d..0b6d8c150 100644 --- a/criu/include/protobuf.h +++ b/criu/include/protobuf.h @@ -52,4 +52,11 @@ static inline int collect_images(struct collect_image_info **array, unsigned siz return 0; } +/* + * To speed up reading of packed objects + * by providing space on stack, this should + * be more than enough for most objects. + */ +#define PB_PKOBJ_LOCAL_SIZE 1024 + #endif /* __CR_PROTOBUF_H__ */ diff --git a/criu/protobuf.c b/criu/protobuf.c index 8eb73e019..e68d42b5c 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -20,13 +20,6 @@ #include "protobuf.h" #include "util.h" -/* - * To speed up reading of packed objects - * by providing space on stack, this should - * be more than enough for most objects. - */ -#define PB_PKOBJ_LOCAL_SIZE 1024 - static char *image_name(struct cr_img *img) { int fd = img->_x.fd; From f8125b8bef7bf5a7bbaea5e6e1d29578c45bf53d Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Fri, 13 Sep 2019 18:47:33 +0000 Subject: [PATCH 0035/1854] Couple of fixes to build and run libcriu tests libcriu tests are currently broken. This patch fixes couple of issues to allow the building and running libcriu tests. 1. lib/c/criu.h got updated to include version.h which is present at "criu/include", but the command to compile libcriu tests is not specifying "criu/include" in the path to be searched for header files. This resulted in compilation error. This can be fixed by adding "-I ../../../../../criu/criu/include" however it causes more problems as "criu/include/fcntl.h" would now hide system defined fcntl.h Solution is to use "-iquote ../../../../../criu/criu/include" which applies only to the quote form of include directive. 2. Secondly, libcriu.so major version got updated to 2 but libcriu/run.sh still assumes verion 1. Instead of just updating the version in libcriu/run.sh to 2, this patch updates the libcriu/Makefile to use "CRIU_SO_VERSION_MAJOR" so that future changes to major version of libcriu won't cause same problem again. Signed-off-by: Ashutosh Mehra --- test/others/libcriu/Makefile | 14 ++++++++++++-- test/others/libcriu/run.sh | 5 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/test/others/libcriu/Makefile b/test/others/libcriu/Makefile index 5289ed15a..226396e6a 100644 --- a/test/others/libcriu/Makefile +++ b/test/others/libcriu/Makefile @@ -1,3 +1,5 @@ +include ../../../../criu/Makefile.versions + TESTS += test_sub TESTS += test_self TESTS += test_notify @@ -19,8 +21,16 @@ endef $(foreach t, $(TESTS), $(eval $(call genb, $(t)))) %.o: %.c - gcc -c $^ -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror + gcc -c $^ -iquote ../../../../criu/criu/include -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror -clean: +clean: libcriu_clean rm -rf $(TESTS) $(TESTS:%=%.o) lib.o .PHONY: clean + +libcriu_clean: + rm -f libcriu.so.${CRIU_SO_VERSION_MAJOR} +.PHONY: libcriu_clean + +libcriu: + ln -s ../../../../criu/lib/c/libcriu.so libcriu.so.${CRIU_SO_VERSION_MAJOR} +.PHONY: libcriu diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index a99b91e52..5f692db31 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -5,14 +5,13 @@ source ../env.sh || exit 1 echo "== Clean" make clean +make libcriu rm -rf wdir -rm -f ./libcriu.so.1 echo "== Prepare" mkdir -p wdir/i/ echo "== Run tests" -ln -s ../../../../criu/lib/c/libcriu.so libcriu.so.1 export LD_LIBRARY_PATH=. export PATH="`dirname ${BASH_SOURCE[0]}`/../../:$PATH" @@ -40,6 +39,6 @@ run_test test_iters run_test test_errno echo "== Tests done" -unlink libcriu.so.1 +make libcriu_clean [ $RESULT -eq 0 ] && echo "Success" || echo "FAIL" exit $RESULT From 321f82662129f516573cedba10b4fbbfc12f9f5a Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Mon, 16 Sep 2019 06:49:07 +0000 Subject: [PATCH 0036/1854] Enable libcriu testing in travis jobs Updated scripts/travis/travis-tests to run libcriu test. Signed-off-by: Ashutosh Mehra --- scripts/travis/travis-tests | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 980d74734..b2ebe969b 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -161,6 +161,9 @@ ip net add test ./test/zdtm.py run -t zdtm/static/env00 -k always ./test/crit-recode.py +# libcriu testing +make -C test/others/libcriu run + make -C test/others/shell-job if ! [ -x "$(command -v flake8)" ]; then From 00ce121fd55e5947d477be4601169e8676a2bbbf Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Mon, 23 Sep 2019 08:36:12 +0000 Subject: [PATCH 0037/1854] Add `criu` to PATH env variable in libcriu tests PATH is pointing to incorrect location for `criu` executable causing libcriu tests to fail when running in travis. Also added statements to display log file contents on failure to help in debugging. Signed-off-by: Ashutosh Mehra --- test/others/libcriu/run.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index 5f692db31..bd92f8544 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -13,7 +13,7 @@ mkdir -p wdir/i/ echo "== Run tests" export LD_LIBRARY_PATH=. -export PATH="`dirname ${BASH_SOURCE[0]}`/../../:$PATH" +export PATH="`dirname ${BASH_SOURCE[0]}`/../../../criu:$PATH" RESULT=0 @@ -21,6 +21,19 @@ function run_test { echo "== Build $1" if ! make $1; then echo "FAIL build $1" + echo "** Output of $1/test.log" + cat wdir/i/$1/test.log + echo "---------------" + if [ -f wdir/i/$1/dump.log ]; then + echo "** Contents of dump.log" + cat wdir/i/$1/dump.log + echo "---------------" + fi + if [ -f wdir/i/$1/restore.log ]; then + echo "** Contents of restore.log" + cat wdir/i/$1/restore.log + echo "---------------" + fi RESULT=1; else echo "== Test $1" From 19a24df53c2dba3b2e2457c99965edf43819818d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 1 Oct 2019 00:29:14 +0100 Subject: [PATCH 0038/1854] early-log: Print warnings only if the buffer is full I don't see many issues with early-log, so we probably don't need the warning when it was used. Note that after commit 74731d9 ("zdtm: make grep_errors also grep warnings") also warnings are grepped by zdtm.py (and I believe that was an improvement) which prints some bothering lines: > =[log]=> dump/zdtm/static/inotify00/38/1/dump.log > ------------------------ grep Error ------------------------ > (00.000000) Will allow link remaps on FS > (00.000034) Warn (criu/log.c:203): The early log isn't empty > ------------------------ ERROR OVER ------------------------ Instead of decreasing loglevel of the message, improve it by reporting a real issue. Cc: Adrian Reber Cc: Pavel Tikhomirov Cc: Radostin Stoyanov Signed-off-by: Dmitry Safonov --- criu/log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/log.c b/criu/log.c index 8bdf83534..0ee113b91 100644 --- a/criu/log.c +++ b/criu/log.c @@ -199,8 +199,8 @@ void flush_early_log_buffer(int fd) } pos += hdr->len; } - if (early_log_buf_off) - pr_warn("The early log isn't empty\n"); + if (early_log_buf_off == EARLY_LOG_BUF_LEN) + pr_warn("The early log buffer is full, some messages may have been lost\n"); early_log_buf_off = 0; } From 1a28dee52b63eca5adc48c1d6f1dda8d532a0e8e Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 24 Oct 2019 19:39:39 +0000 Subject: [PATCH 0039/1854] Action scripts should be invoked with normal signal behavior Signal masks propagate through execve, so we need to clear them before invoking the action scripts as it may want to handle SIGCHLD, or SIGSEGV. Signed-off-by: Nicolas Viennot --- criu/util.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/criu/util.c b/criu/util.c index 028f604bb..e47e109ae 100644 --- a/criu/util.c +++ b/criu/util.c @@ -536,7 +536,7 @@ int cr_system_userns(int in, int out, int err, char *cmd, sigemptyset(&blockmask); sigaddset(&blockmask, SIGCHLD); if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) { - pr_perror("Can not set mask of blocked signals"); + pr_perror("Cannot set mask of blocked signals"); return -1; } @@ -545,6 +545,12 @@ int cr_system_userns(int in, int out, int err, char *cmd, pr_perror("fork() failed"); goto out; } else if (pid == 0) { + sigemptyset(&blockmask); + if (sigprocmask(SIG_SETMASK, &blockmask, NULL) == -1) { + pr_perror("Cannot clear blocked signals"); + goto out_chld; + } + if (userns_pid > 0) { if (switch_ns(userns_pid, &user_ns_desc, NULL)) goto out_chld; From 3861b334b252ff65acf2c827b5d85cab21de086a Mon Sep 17 00:00:00 2001 From: Sergey Bronnikov Date: Sun, 3 Nov 2019 13:08:09 +0300 Subject: [PATCH 0040/1854] Fix broken web-links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 16e8452b5..558e87160 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,8 @@ Linux kernel supporting checkpoint and restore for all the features it provides. looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. Here are some useful hints to get involved. -* We have both -- [very simple](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; -* CRIU does need [extensive testing](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); +* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; +* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); * For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches); From a7c625938eb1d472341770699469ca6ddb4d91b1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 29 Oct 2019 15:17:40 +0100 Subject: [PATCH 0041/1854] travis: start to use aarch64 hardware With the newly introduced aarch64 at Travis it is possible for the CRIU test-cases to switch to aarch64. Travis uses unprivileged LXD containers on aarch64 which blocks many of the kernel interfaces CRIU needs. So for now this only tests building CRIU natively on aarch64 instead of using the Docker+QEMU combination. All tests based on Docker are not working on aarch64 is there currently seems to be a problem with Docker on aarch64. Maybe because of the nesting of Docker in LXD. Signed-off-by: Adrian Reber --- .travis.yml | 11 ++++-- scripts/build/Dockerfile.alpine | 2 +- scripts/build/Dockerfile.centos | 1 + scripts/build/Makefile | 4 +-- scripts/travis/travis-tests | 63 +++++++++++++++++++++------------ 5 files changed, 54 insertions(+), 27 deletions(-) diff --git a/.travis.yml b/.travis.yml index 82ba9fbc8..4cde9c4fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,11 +14,9 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=armv7hf - - TR_ARCH=aarch64 - TR_ARCH=ppc64le - TR_ARCH=s390x - TR_ARCH=armv7hf CLANG=1 - - TR_ARCH=aarch64 CLANG=1 - TR_ARCH=ppc64le CLANG=1 - TR_ARCH=alpine CLANG=1 - TR_ARCH=docker-test @@ -27,6 +25,15 @@ env: - TR_ARCH=centos - TR_ARCH=podman-test matrix: + include: + - os: linux + arch: arm64 + env: TR_ARCH=local + dist: bionic + - os: linux + arch: arm64 + env: TR_ARCH=local CLANG=1 + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index c71a3901f..70fdf480a 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -42,5 +42,5 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install protobuf ipaddress junit_xml +RUN pip install protobuf ipaddress junit_xml flake8 RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.centos b/scripts/build/Dockerfile.centos index 2ce40b179..213be694f 100644 --- a/scripts/build/Dockerfile.centos +++ b/scripts/build/Dockerfile.centos @@ -23,6 +23,7 @@ RUN yum install -y \ protobuf-devel \ protobuf-python \ python \ + python-flake8 \ python-ipaddress \ python2-future \ python2-junit_xml \ diff --git a/scripts/build/Makefile b/scripts/build/Makefile index bb2e9ca9d..3d4d91cd5 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ -QEMU_ARCHES := armv7hf aarch64 ppc64le s390x fedora-rawhide-aarch64 # require qemu -ARCHES := $(QEMU_ARCHES) x86_64 fedora-asan fedora-rawhide centos +QEMU_ARCHES := armv7hf ppc64le s390x fedora-rawhide-aarch64 # require qemu +ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index b2ebe969b..1f6b19130 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -1,17 +1,31 @@ #!/bin/sh set -x -e -TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev +TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev python-future libgnutls28-dev libgnutls30 libprotobuf-dev protobuf-compiler - libcap-dev libnl-3-dev gcc-multilib gdb bash python-protobuf - libnet-dev util-linux asciidoctor libnl-route-3-dev" + libcap-dev libnl-3-dev gdb bash python-protobuf python-yaml + libnet-dev util-linux asciidoctor libnl-route-3-dev + python-junit.xml python-ipaddress time ccache flake8 + libbsd-dev" + +X86_64_PKGS="gcc-multilib" + +UNAME_M=`uname -m` + +if [ "$UNAME_M" != "x86_64" ]; then + # For Travis only x86_64 seems to be baremetal. Other + # architectures are running in unprivileged LXD containers. + # That seems to block most of CRIU's interfaces. + SKIP_TRAVIS_TEST=1 +fi travis_prep () { [ -n "$SKIP_TRAVIS_PREP" ] && return cd ../../ - service apport stop + # This can fail on aarch64 travis + service apport stop || : CC=gcc # clang support @@ -43,24 +57,41 @@ travis_prep () { sed -i '/security/ d' /etc/apt/sources.list fi + + # Do not install x86_64 specific packages on other architectures + if [ "$UNAME_M" = "x86_64" ]; then + TRAVIS_PKGS="$TRAVIS_PKGS $X86_64_PKGS" + fi + apt-get update -qq apt-get install -qq --no-install-recommends $TRAVIS_PKGS - # travis is based on 14.04 and that does not have python - # packages for future and ipaddress (16.04 has those packages) - pip install junit-xml future ipaddress chmod a+x $HOME } travis_prep -ulimit -c unlimited -echo "|`pwd`/test/abrt.sh %P %p %s %e" > /proc/sys/kernel/core_pattern - export GCOV +$CC --version time make CC="$CC" -j4 +./criu/criu -v4 cpuinfo dump || : +./criu/criu -v4 cpuinfo check || : + +make lint + +# Check that help output fits into 80 columns +WIDTH=$(./criu/criu --help | wc --max-line-length) +if [ "$WIDTH" -gt 80 ]; then + echo "criu --help output does not obey 80 characters line width!" + exit 1 +fi + [ -n "$SKIP_TRAVIS_TEST" ] && return +ulimit -c unlimited + +echo "|`pwd`/test/abrt.sh %P %p %s %e" > /proc/sys/kernel/core_pattern + if [ "${COMPAT_TEST}x" = "yx" ] ; then # Dirty hack to keep both ia32 & x86_64 shared libs on a machine: # headers are probably not compatible, so apt-get doesn't allow @@ -165,15 +196,3 @@ ip net add test make -C test/others/libcriu run make -C test/others/shell-job - -if ! [ -x "$(command -v flake8)" ]; then - pip install flake8 -fi -make lint - -# Check that help output fits into 80 columns -WIDTH=$(./criu/criu --help | wc --max-line-length) -if [ "$WIDTH" -gt 80 ]; then - echo "criu --help output does not obey 80 characters line width!" - exit 1 -fi From c4006c0034ef2162693d9dd619d025c240affc78 Mon Sep 17 00:00:00 2001 From: Vitaly Ostrosablin Date: Fri, 1 Nov 2019 09:00:23 +0000 Subject: [PATCH 0042/1854] test/static:conntracks: Support nftables Update test to support both iptables and nft to create conntrack rules. Signed-off-by: Vitaly Ostrosablin Signed-off-by: Andrei Vagin --- test/zdtm/static/conntracks | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/conntracks b/test/zdtm/static/conntracks index a30e0e268..26220f97c 100755 --- a/test/zdtm/static/conntracks +++ b/test/zdtm/static/conntracks @@ -23,7 +23,7 @@ do_or_fail() fail "$failmsg: $output" } -do_start() +do_start_ipt() { [ -f "$statefile" ] && die "state file $statefile aleady exists" @@ -35,7 +35,7 @@ do_start() iptables -L \> "$statefile" } -do_stop() +do_stop_ipt() { do_or_fail "can't compare the iptables" \ iptables -L \| diff -u "$statefile" - @@ -45,6 +45,38 @@ do_stop() echo "PASS" > $outfile } +do_start_nft() +{ + [ -f "$statefile" ] && die "state file $statefile aleady exists" + + do_or_fail "can't install a state match" \ + nft add rule filter INPUT \ + ct state related,established accept + + do_or_fail "can't list the loaded nftables" \ + nft list ruleset \> "$statefile" +} + +do_stop_nft() +{ + do_or_fail "can't compare the nftables" \ + nft list ruleset \| diff -u "$statefile" - + + rm -f "$statefile" + + echo "PASS" > $outfile +} + +do_start() +{ + [ -x "$(command -v nft)" ] && do_start_nft || do_start_ipt +} + +do_stop() +{ + [ -x "$(command -v nft)" ] && do_stop_nft || do_stop_ipt +} + tmpargs="$(../lib/parseargs.sh --name=$0 \ --flags-req=statefile,outfile \ --flags-opt="start,stop" -- "$@")" || From 389bcfef3e8f4be35464da9f94681e6573d6d1d9 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Fri, 18 Oct 2019 20:09:15 +0530 Subject: [PATCH 0043/1854] test/java: Add FileRead Tests Signed-off-by: Nidhi Gupta --- test/javaTests/README.md | 33 ++ test/javaTests/pom.xml | 47 ++ .../criu/java/tests/CheckpointRestore.java | 450 ++++++++++++++++++ .../src/org/criu/java/tests/FileRead.java | 175 +++++++ .../src/org/criu/java/tests/Helper.java | 99 ++++ .../src/org/criu/java/tests/ImgFilter.java | 11 + test/javaTests/test.xml | 13 + 7 files changed, 828 insertions(+) create mode 100644 test/javaTests/README.md create mode 100644 test/javaTests/pom.xml create mode 100644 test/javaTests/src/org/criu/java/tests/CheckpointRestore.java create mode 100644 test/javaTests/src/org/criu/java/tests/FileRead.java create mode 100644 test/javaTests/src/org/criu/java/tests/Helper.java create mode 100644 test/javaTests/src/org/criu/java/tests/ImgFilter.java create mode 100644 test/javaTests/test.xml diff --git a/test/javaTests/README.md b/test/javaTests/README.md new file mode 100644 index 000000000..cb779285e --- /dev/null +++ b/test/javaTests/README.md @@ -0,0 +1,33 @@ +# JavaTests + +Java Functional tests checks the Java File based APIs and Memory mapping APIs by placing the process in various states before checkpointing and validates if these resources are still accessible after restore. It also validates if the file contents are in expected states. + +Tests are to be run by a user having following capabilities: +CAP_DAC_OVERRIDE +CAP_CHOWN +CAP_SETPCAP +CAP_SETGID +CAP_AUDIT_CONTROL +CAP_DAC_READ_SEARCH +CAP_NET_ADMIN +CAP_SYS_ADMIN +CAP_SYS_CHROOT +CAP_SYS_PTRACE +CAP_FOWNER +CAP_KILL +CAP_FSETID +CAP_SYS_RESOURCE +CAP_SETUID + +## File-based Java APIs + +Here we test the File-Based Java APIs by checkpointing the application in the following scenarios and verifying the contents of the file after restore: +- Reading and writing in the same file. (FileRead.java) + +### Prerequisites for running the tests: +- Maven + +### To run the tests: +- In the javaTests folder run the command ```sudo mvn test``` +- To keep the img files and logs from previous failures, between different runs of the test, use the ```-DneverCleanFailures=true ``` option in the maven command +as ```sudo mvn -DneverCleanFailures=true test``` diff --git a/test/javaTests/pom.xml b/test/javaTests/pom.xml new file mode 100644 index 000000000..faae44d1b --- /dev/null +++ b/test/javaTests/pom.xml @@ -0,0 +1,47 @@ + + 4.0.0 + criu + criu-javaTests + 1 + criu-javaTests + + + src + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.14.1 + + + + test.xml + + + + + + maven-compiler-plugin + 3.1 + + 1.7 + 1.7 + + + + + + + + org.testng + testng + 6.3.1 + + + + UTF-8 + + diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java new file mode 100644 index 000000000..968488191 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -0,0 +1,450 @@ +package org.criu.java.tests; + +import org.testng.Assert; +import org.testng.annotations.AfterTest; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Parameters; +import org.testng.annotations.Test; + +import java.io.*; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.text.SimpleDateFormat; +import java.util.Date; + +public class CheckpointRestore { + private MappedByteBuffer mappedByteBuffer = null; + private String testName = ""; + private String logFolder = Helper.LOG_FOLDER + "/"; + private String outputFolder = Helper.OUTPUT_FOLDER_NAME + "/"; + + /** + * Create CRlog and output directory if they don't exist. + * Delete directories containing .img files from failed Checkpoint-Restore if 'neverCleanFailures' property is not set to true. + * + * @throws IOException + */ + @BeforeSuite + void suiteSetup() throws IOException { + System.out.println("Tests are to be run as a privileged user having capabilities mentioned in ReadMe"); + boolean neverCleanFailures = Boolean.getBoolean("neverCleanFailures"); + Path logDir = Paths.get(logFolder); + Path outputDir = Paths.get(outputFolder); + if (!Files.exists(logDir)) { + System.out.println("Logs directory does not exist, creating it"); + Files.createDirectory(logDir); + } + if (!Files.exists(outputDir)) { + System.out.println("Output directory does not exist, creating it"); + Files.createDirectory(outputDir); + } + /* + * Delete the directories containing the img files from failed Checkpoint-Restore. + */ + if (!neverCleanFailures) { + File output = new File(outputFolder); + String[] name = output.list(); + for (int i = 0; null != name && i < name.length; i++) { + File testFolder = new File(outputFolder + name[i]); + if (testFolder.isDirectory()) { + String[] list = testFolder.list(); + File file; + if (null != list) { + for (int j = 0; j < list.length; j++) { + file = new File(outputFolder + name[i] + "/" + list[j]); + if (!file.isDirectory()) { + Files.delete(file.toPath()); + } + } + } + } + Files.delete(testFolder.toPath()); + } + } + } + + /** + * Create the output folder for the test in case it does not exist + * + * @param testName Name of the java test + * @throws IOException + */ + private void testSetup(String testName) throws IOException { + Path testFolderPath = Paths.get(outputFolder + testName + "/"); + if (!Files.exists(testFolderPath)) { + System.out.println("Test Folder does not exist creating it"); + Files.createDirectory(testFolderPath); + } + } + + /** + * Read the pid of process from the pid file of test + * + * @param name Name of the java test + * @return pid Process id of the java test process + * @throws IOException + */ + private String getPid(String name) throws IOException { + name = outputFolder + testName + "/" + name + Helper.PID_APPEND; + File pidfile = new File(name); + BufferedReader pidReader = new BufferedReader(new FileReader(pidfile)); + String pid = pidReader.readLine(); + pidReader.close(); + return pid; + } + + /** + * @param testName Name of the java test + * @param checkpointOpt Additional options for checkpoint + * @param restoreOpt Additional options for restore + * @throws Exception + */ + @Test + @Parameters({"testname", "checkpointOpt", "restoreOpt"}) + public void runtest(String testName, String checkpointOpt, String restoreOpt) throws Exception { + this.testName = testName; + String name = Helper.PACKAGE_NAME + "." + testName; + String pid; + int exitCode; + + System.out.println("======= Testing " + testName + " ========"); + + testSetup(testName); + + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + if (f.exists()) { + f.delete(); + } + + /* + * Create a new file that will be mapped to memory and used to communicate between + * this process and the java test process. + */ + boolean newFile = f.createNewFile(); + Assert.assertTrue(newFile, "Unable to create a new file to be mapped"); + + /* + * MappedByteBuffer communicates between this process and java process called. + */ + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + mappedByteBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + mappedByteBuffer.clear(); + channel.close(); + + /* + * Put MappedByteBuffer in Init state + */ + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + /* + * Run the test as a separate process + */ + System.out.println("Starting the java Test"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", name); + Process process = builder.start(); + + char currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + /* + * Loop until the test process changes the state of MappedByteBuffer from init state + */ + while (Helper.STATE_INIT == currentState) { + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * If Mapped Buffer is in Helper.STATE_FAIL state before checkpointing then an exception must + * have occurred in the test. + */ + while (Helper.STATE_FAIL == currentState) { + try { + /* + * We exit the test process with exit code 5 in case of an exception + */ + exitCode = process.exitValue(); + /* + * Reaching here implies that .exitValue() has not thrown an exception, so the process has + * exited, We now check the exitCode. + */ + if (5 == exitCode) { + Assert.fail(testName + ": Exception occurred while running the test: check the log file for details."); + } else { + Assert.fail(testName + ": ERROR: Unexpected value of exit code: " + exitCode + ", expected: 5"); + } + } catch (IllegalThreadStateException e) { + /* + * Do nothing, as an Exception is expected if the process has not exited + * and we try to get its exitValue. + */ + } + + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * Mapped Buffer state should be Helper.STATE_CHECKPOINT for checkpointing or Helper.STATE_END if some error occurs in test + */ + if (Helper.STATE_END != currentState) { + Assert.assertEquals(currentState, Helper.STATE_CHECKPOINT, testName + ": ERROR: Error occurred while running the test: test is not in the excepted 'waiting to be checkpointed state': " + currentState); + } else { + Assert.fail(testName + ": ERROR: Error took place in the test check the log file for more details"); + } + /* + * Reaching here implies that MappedByteBuffer is in To Be Checkpointed state. + * Get the pid of the test process + */ + + pid = getPid(testName); + try { + /* + * Checkpoint the process + */ + checkpoint(pid, checkpointOpt); + + } catch (Exception e) { + /* + * If exception occurs put the MappedByteBuffer to Helper.STATE_TERMINATE-Terminate state. + * On reading the terminate state, the test process terminates, else it + * may go on looping. + */ + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + Assert.fail(testName + ": Exception occurred while during checkpointing" + e, e); + } + + /* + * The process has been checkpointed successfully, now restoring the process. + */ + try { + /* + * Restore the process + */ + restore(restoreOpt); + } catch (Exception e) { + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + Assert.fail(testName + ": Exception occurred while restoring the test" + e, e); + } + + /* + * Wait for test process to finish + */ + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + while (Helper.STATE_RESTORE == currentState) { + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * If a test passes it puts the MappedByteBuffer to Helper.STATE_PASS-Pass state, + * On failing to Helper.STATE_FAIL-Fail state, and if our Buffer is in Helper.STATE_TERMINATE state + * its because the checkpoint-restore of test process failed. + */ + + Assert.assertNotEquals(currentState, Helper.STATE_TERMINATE, testName + ": ERROR: Checkpoint-Restore failed"); + Assert.assertNotEquals(currentState, Helper.STATE_FAIL, testName + ": ERROR: Test Failed, Check Log for details"); + Assert.assertEquals(currentState, Helper.STATE_PASS, testName + " ERROR: Unexpected State of Mapped Buffer"); + System.out.println("-----" + "PASS" + "-----"); + + } + + /** + * Remove .img files, dump.log, restore.log, stats-dump and stats-restore files from Log Directory + * + * @throws IOException + */ + @AfterTest + void cleanup() throws IOException { + int i; + String currentPath = System.getProperty("user.dir"); + currentPath = currentPath + "/" + logFolder; + File deleteFile; + File dir = new File(currentPath); + String[] imgFiles = dir.list(new ImgFilter()); + if (null != imgFiles) { + for (i = 0; i < imgFiles.length; i++) { + deleteFile = new File(currentPath + imgFiles[i]); + Files.delete(deleteFile.toPath()); + } + } + + boolean exists = Files.exists(Paths.get(currentPath + "dump.log")); + if (exists) { + Files.delete(Paths.get(currentPath + "dump.log")); + } + + exists = Files.exists(Paths.get(currentPath + "restore.log")); + if (exists) { + Files.delete(Paths.get(currentPath + "restore.log")); + } + + exists = Files.exists(Paths.get(currentPath + "stats-dump")); + if (exists) { + Files.delete(Paths.get(currentPath + "stats-dump")); + } + + exists = Files.exists(Paths.get(currentPath + "stats-restore")); + if (exists) { + Files.delete(Paths.get(currentPath + "stats-restore")); + } + } + + /** + * Copy .img files, dump.log, restore.log, stats-dump and stats-restore files from Log Directory if they exist + * to another folder. + * + * @throws IOException + */ + String copyFiles() throws IOException { + String currentPath = System.getProperty("user.dir"); + String folderSuffix = new SimpleDateFormat("yyMMddHHmmss").format(new Date()); + String fromPath = currentPath + "/" + logFolder; + File fromDir = new File(fromPath); + Path fromFile, toFile; + boolean exists; + String toPath = currentPath + "/" + outputFolder + testName + folderSuffix + "/"; + Path dirPath = Paths.get(toPath); + Files.createDirectory(dirPath); + + String[] imgFiles = fromDir.list(new ImgFilter()); + if (null != imgFiles) { + for (int i = 0; i < imgFiles.length; i++) { + fromFile = Paths.get(fromPath + imgFiles[i]); + toFile = Paths.get(toPath + imgFiles[i]); + Files.copy(fromFile, toFile); + } + } + + fromFile = Paths.get(fromPath + "dump.log"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "dump.log"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "restore.log"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "restore.log"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "stats-dump"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "stats-dump"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "stats-restore"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "stats-restore"); + Files.copy(fromFile, toFile); + } + + return folderSuffix; + } + + /** + * Checkpoint the process, if process has not been checkpointed correctly + * copy the .img, log and stats files, puts MappedBuffer to 'terminate' state and mark + * test as failed + * + * @param pid Pid of process to be checkpointed + * @param checkpointOpt Additional options for checkpoint + * @throws IOException + * @throws InterruptedException + */ + private void checkpoint(String pid, String checkpointOpt) throws IOException, InterruptedException { + ProcessBuilder builder; + System.out.println("Checkpointing process " + pid); + String command = "../../criu/criu dump --shell-job -t " + pid + " -vvv -D " + logFolder + " -o dump.log"; + if (0 == checkpointOpt.length()) { + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } else { + command = command + " " + checkpointOpt; + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } + Process process = builder.start(); + BufferedReader stdError = new BufferedReader(new InputStreamReader(process.getErrorStream())); + int exitCode = process.waitFor(); + + if (0 != exitCode) { + /* + * Print the error stream + */ + String line = stdError.readLine(); + while (null != line) { + System.out.println(line); + line = stdError.readLine(); + } + + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + /* + * If checkpoint fails copy the img files, dump.log, stats-dump, stats-restore + */ + String folderSuffix = copyFiles(); + + Assert.fail(testName + ": ERROR: Error during checkpoint: exitCode of checkpoint process was not zero.\nFor more details check dump.log in " + outputFolder + testName + folderSuffix); + return; + } + + System.out.println("Checkpoint success"); + process.destroy(); + + } + + /** + * Restore the process, if process has been restored correctly put Mapped Buffer to + * 'restored' state, else copy the .img, log and stats files and put MappedBuffer to 'terminate' + * state and mark test as failed + * + * @param restoreOpt Additional options for restore + * @throws IOException + * @throws InterruptedException + */ + private void restore(String restoreOpt) throws IOException, InterruptedException { + ProcessBuilder builder; + System.out.println("Restoring process"); + String command = "../../criu/criu restore -d -vvv --shell-job -D " + logFolder + " -o restore.log"; + if (0 == restoreOpt.length()) { + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } else { + command = command + " " + restoreOpt; + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } + + Process process = builder.start(); + BufferedReader stdError = new BufferedReader(new InputStreamReader(process.getErrorStream())); + int exitCode = process.waitFor(); + + if (0 != exitCode) { + /* + * Print the error stream + */ + String line = stdError.readLine(); + while (null != line) { + System.out.println(line); + line = stdError.readLine(); + } + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + /* + * If restore fails copy img files, dump.log, restore.log, stats-dump, stats-restore + */ + String folderSuffix = copyFiles(); + Assert.fail(testName + ": ERROR: Error during restore: exitCode of restore process was not zero.\nFor more details check restore.log in " + outputFolder + testName + folderSuffix); + + return; + } else { + System.out.println("Restore success"); + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + } + process.destroy(); + } +} diff --git a/test/javaTests/src/org/criu/java/tests/FileRead.java b/test/javaTests/src/org/criu/java/tests/FileRead.java new file mode 100644 index 000000000..d94a14112 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/FileRead.java @@ -0,0 +1,175 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class FileRead { + private static String TESTNAME = "FileRead"; + + /** + * @param i int value denoting the line number. + * @return The line as a string. + */ + private static String getLine(int i) { + return "Line No: " + i + "\n"; + } + + /** + * Write in a file, line by line, and read it, checkpoint and restore + * and then continue to read and write the file. + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + Logger logger = null; + int wi, ri = 0; + try { + File file = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/FileRead_write.txt"); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if ('I' != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Checking existence of file to be read and written to."); + if (file.exists()) { + file.delete(); + } + boolean newFile = file.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Cannot create a new file to read and write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedWriter brw = new BufferedWriter(new FileWriter(file)); + BufferedReader brr = new BufferedReader(new FileReader(file)); + + logger.log(Level.INFO, "Start writing the lines in file"); + + for (wi = 1; wi <= 5; wi++) { + brw.write(getLine(wi)); + } + + brw.flush(); + String s = "Line No: 0"; + int i; + + for (i = 0; i < 50; i++) { + brw.write(getLine(wi)); + brw.flush(); + wi++; + s = brr.readLine(); + ri = Integer.parseInt(s.replaceAll("[\\D]", "")); + } + + wi--; + logger.log(Level.INFO, "Going to checkpoint"); + + /* + * Checkpoint and wait for restore + */ + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + + brw.flush(); + + try { + s = brr.readLine(); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Error: Buffered Reader is not reading file"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (null == s || s.isEmpty()) { + logger.log(Level.SEVERE, "Error: Error while reading lines after restore: Line read is null"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + int readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + if (ri + 1 != readLineNo) { + logger.log(Level.SEVERE, "Error: Not reading at correct line"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + String ch = brr.readLine(); + while (null != ch && !ch.isEmpty()) { + s = ch; + ch = brr.readLine(); + } + + readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + + if (readLineNo != wi) { + logger.log(Level.SEVERE, "Error: Data written has been lost"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + try { + brw.write(getLine(wi + 1)); + brw.flush(); + } catch (IOException e) { + logger.log(Level.SEVERE, "Error: cannot write file after restore"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + s = brr.readLine(); + readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + + if (readLineNo != wi + 1) { + logger.log(Level.SEVERE, "Error: Data not written correctly"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "File is being read and written to correctly after restore!"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + brw.close(); + brr.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java new file mode 100644 index 000000000..d608fba47 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -0,0 +1,99 @@ +package org.criu.java.tests; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.logging.SimpleFormatter; + +class Helper { + static String MEMORY_MAPPED_FILE_NAME = "output/file"; + static String PASS_MESSAGE = "Test was a Success!!!"; + static String OUTPUT_FOLDER_NAME = "output"; + static String PACKAGE_NAME = "org.criu.java.tests"; + static String PID_APPEND = ".pid"; + static String SOURCE_FOLDER = "src/org/criu/java/tests"; + static String LOG_FOLDER = "CRlogs"; + static int MAPPED_REGION_SIZE = 100; + static int MAPPED_INDEX = 1; + static char STATE_RESTORE = 'R'; + static char STATE_CHECKPOINT = 'C'; + static char STATE_INIT = 'I'; + static char STATE_TERMINATE = 'T'; + static char STATE_END = 'E'; + static char STATE_FAIL = 'F'; + static char STATE_PASS = 'P'; + + /** + * Create a new log file and pidfile and write + * the pid to the pidFile. + * + * @param testName Name of the java test + * @param pid Pid of the java test process + * @param logger + * @return 0 or 1 denoting whether the function was successful or not. + * @throws IOException + */ + static int init(String testName, String pid, Logger logger) throws IOException { + File pidfile = new File(OUTPUT_FOLDER_NAME + "/" + testName + "/" + testName + PID_APPEND); + + FileHandler handler = new FileHandler(Helper.OUTPUT_FOLDER_NAME + "/" + testName + "/" + testName + ".log", false); + handler.setFormatter(new SimpleFormatter()); + handler.setLevel(Level.FINE); + logger.addHandler(handler); + logger.setLevel(Level.FINE); + + /* + * Create a pid file and write the process's pid into it. + */ + if (pidfile.exists()) { + pidfile.delete(); + } + boolean newFile = pidfile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Cannot create new pid file."); + return 1; + } + BufferedWriter pidWriter = new BufferedWriter(new FileWriter(pidfile)); + pidWriter.write(pid + "\n"); + pidWriter.close(); + return 0; + } + + /** + * Put the Mapped Buffer to 'Ready to be checkpointed' state and wait for restore. + * + * @param b The MappedByteBuffer from the calling process. + * @param logger The Logger from the calling process. + */ + static void checkpointAndWait(MappedByteBuffer b, Logger logger) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + char c = b.getChar(Helper.MAPPED_INDEX); + /* + * Loop while MappedByteBuffer is in 'To be checkpointed' state + */ + while (Helper.STATE_CHECKPOINT == c) { + c = b.getChar(Helper.MAPPED_INDEX); + } + /* + * Test is in 'T' state if some error or exception occurs during checkpoint or restore. + */ + if (Helper.STATE_TERMINATE == c) { + logger.log(Level.SEVERE, "Error during checkpoint-restore, Test terminated"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * The expected state of MappedByteBuffer is Helper.STATE_RESTORE-restored state. + */ + if (Helper.STATE_RESTORE != c) { + logger.log(Level.INFO, "Error: Test state is not the expected Restored state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/ImgFilter.java b/test/javaTests/src/org/criu/java/tests/ImgFilter.java new file mode 100644 index 000000000..97087c2cc --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/ImgFilter.java @@ -0,0 +1,11 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.FilenameFilter; + +class ImgFilter implements FilenameFilter { + @Override + public boolean accept(File dir, String fileName) { + return (fileName.endsWith(".img")); + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml new file mode 100644 index 000000000..8ff67c5e0 --- /dev/null +++ b/test/javaTests/test.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + From 9325339e64e0485e981e880ae460729cd9b9b648 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 18:31:58 +0000 Subject: [PATCH 0044/1854] travis: Disallow failures on ia32 It seems pretty stable and hasn't add many false-positives during last months. While can reveal some issues for compatible C/R code. Signed-off-by: Dmitry Safonov --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4cde9c4fb..7a0c29a55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,8 +40,6 @@ matrix: - env: TR_ARCH=fedora-rawhide-aarch64 - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 - - env: TR_ARCH=local COMPAT_TEST=y - - env: TR_ARCH=local CLANG=1 COMPAT_TEST=y script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: From d804f70a680b0ba7410e3845aa8179d8ab5c4219 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:48 +0300 Subject: [PATCH 0045/1854] mount: remove useless check in populate_mnt_ns The path: restore_root_task prepare_namespace_before_tasks mntns_maybe_create_roots is always called before the path below: retore_root_task fork_with_pid restore_task_with_children prepare_namespace prepare_mnt_ns populate_mnt_ns So (!!mnt_roots) == (root_ns_mask & CLONE_NEWNS) in populate_mnt_ns, but in prepare_mnt_ns we've already checked that it is true, so there is no need in these check - remove it. Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 486d01719..802295778 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3139,15 +3139,12 @@ static int populate_mnt_ns(void) struct ns_id *nsid; int ret; - if (mnt_roots) { - /* mnt_roots is a tmpfs mount and it's private */ - root_yard_mp = mnt_entry_alloc(); - if (!root_yard_mp) - return -1; + root_yard_mp = mnt_entry_alloc(); + if (!root_yard_mp) + return -1; - root_yard_mp->mountpoint = mnt_roots; - root_yard_mp->mounted = true; - } + root_yard_mp->mountpoint = mnt_roots; + root_yard_mp->mounted = true; pms = mnt_build_tree(mntinfo, root_yard_mp); if (!pms) From 71dff54aa474efa105043cd86cc38103c3c21859 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:49 +0300 Subject: [PATCH 0046/1854] ns: make rst_new_ns_id static It's never used outside of namespaces.c Signed-off-by: Pavel Tikhomirov --- criu/include/namespaces.h | 1 - criu/namespaces.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index 287abb3c8..a9a970a9b 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -166,7 +166,6 @@ extern int restore_ns(int rst, struct ns_desc *nd); extern int dump_task_ns_ids(struct pstree_item *); extern int predump_task_ns_ids(struct pstree_item *); -extern struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd, enum ns_type t); extern int rst_add_ns_id(unsigned int id, struct pstree_item *, struct ns_desc *nd); extern struct ns_id *lookup_ns_by_id(unsigned int id, struct ns_desc *nd); diff --git a/criu/namespaces.c b/criu/namespaces.c index a228737ee..57f6bdfef 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -290,7 +290,7 @@ static void nsid_add(struct ns_id *ns, struct ns_desc *nd, unsigned int id, pid_ pr_info("Add %s ns %d pid %d\n", nd->str, ns->id, ns->ns_pid); } -struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, +static struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd, enum ns_type type) { struct ns_id *nsid; From 7be7260261a1f94b111b9390a2b39179e87d7d8b Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:50 +0300 Subject: [PATCH 0047/1854] ns/restore/image: do not read namespace images for non-namespaced case Images for mount and net namespaces are empty if ns does not belong to us, thus we don't need to collect on restore. By adding these checks we will eliminate suspicious messages in logs about lack of images: ./test/zdtm.py run -k always -f h -t zdtm/static/env00 env00/54/2/restore.log:(00.000332) No mountpoints-5.img image env00/54/2/restore.log:(00.000342) No netns-2.img image Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 5 +++++ criu/net.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/criu/mount.c b/criu/mount.c index 802295778..fdaaa7b31 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2989,6 +2989,11 @@ int read_mnt_ns_img(void) struct mount_info *pms = NULL; struct ns_id *nsid; + if (!(root_ns_mask & CLONE_NEWNS)) { + mntinfo = NULL; + return 0; + } + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) { if (nsid->nd != &mnt_ns_desc) continue; diff --git a/criu/net.c b/criu/net.c index 44b0ce224..9825db10f 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2057,6 +2057,9 @@ int read_net_ns_img(void) { struct ns_id *ns; + if (!(root_ns_mask & CLONE_NEWNET)) + return 0; + for (ns = ns_ids; ns != NULL; ns = ns->next) { struct cr_img *img; int ret; From 35adc08598f78e9845388f235ffe0917d7606779 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:51 +0300 Subject: [PATCH 0048/1854] mount: rework mount tree build step on restore Build each mntns mount tree alone just after reading mounts for it from image. These additional step before merging everything to a single mount tree allows us to have pointers to each mntns root mount at hand, also it allows us to remove extra complication from mnt_build_tree. Teach collect_mnt_from_image return a tail pointer, so we can merge lists together later after building each tree. Add separate merge_mount_trees helper to create joint mount tree for all mntns'es and simplify mnt_build_ids_tree. I don't see any place where we use mntinfo_tree on restore, so save the real root of mntns mounts tree in it, instead of root_yard_mp, will need it in next patches for checking restore of these trees. v2: prepend children to the root_yard in merge_mount_trees so that the order in merged tree persists Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 131 +++++++++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 66 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index fdaaa7b31..49708ffd5 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -330,7 +330,7 @@ static bool mounts_equal(struct mount_info *a, struct mount_info *b) */ static char *mnt_roots; -static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mount_info *yard_mount) +static struct mount_info *mnt_build_ids_tree(struct mount_info *list) { struct mount_info *m, *root = NULL; @@ -351,41 +351,14 @@ static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou if (!parent) { /* Only a root mount can be without parent */ - if (root == NULL && m->is_ns_root) { + if (!root && m->is_ns_root) { root = m; - if (!yard_mount) - continue; + continue; } - if (!root) { - pr_err("No parent found for mountpoint %d (@%s)\n", - m->mnt_id, m->mountpoint); - return NULL; - } - - pr_debug("Mountpoint %d (@%s) w/o parent %d\n", - m->mnt_id, m->mountpoint, m->parent_mnt_id); - - if (!mounts_sb_equal(root, m) || - strcmp(root->root, m->root)) { - pr_err("Nested mount namespaces with different " - "roots %d (@%s %s) %d (@%s %s) are not supported yet\n", - root->mnt_id, root->mountpoint, root->root, - m->mnt_id, m->mountpoint, m->root); - return NULL; - } - - /* Mount all namespace roots into the roots yard. */ - parent = yard_mount; - if (unlikely(!yard_mount)) { - pr_err("Nested mount %d (@%s %s) w/o root insertion detected\n", - m->mnt_id, m->mountpoint, m->root); - return NULL; - } - - pr_debug("Mountpoint %d (@%s) get parent %d (@%s)\n", - m->mnt_id, m->mountpoint, - parent->mnt_id, parent->mountpoint); + pr_err("No parent found for mountpoint %d (@%s)\n", + m->mnt_id, m->mountpoint); + return NULL; } m->parent = parent; @@ -397,9 +370,6 @@ static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou return NULL; } - if (yard_mount) - return yard_mount; - return root; } @@ -997,8 +967,7 @@ static int resolve_shared_mounts(struct mount_info *info, int root_master_id) return 0; } -static struct mount_info *mnt_build_tree(struct mount_info *list, - struct mount_info *root_mp) +static struct mount_info *mnt_build_tree(struct mount_info *list) { struct mount_info *tree; @@ -1007,7 +976,7 @@ static struct mount_info *mnt_build_tree(struct mount_info *list, */ pr_info("Building mountpoints tree\n"); - tree = mnt_build_ids_tree(list, root_mp); + tree = mnt_build_ids_tree(list); if (!tree) return NULL; @@ -1690,7 +1659,7 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump) return NULL; } - ns->mnt.mntinfo_tree = mnt_build_tree(pm, NULL); + ns->mnt.mntinfo_tree = mnt_build_tree(pm); if (ns->mnt.mntinfo_tree == NULL) goto err; @@ -2881,7 +2850,7 @@ static int get_mp_mountpoint(char *mountpoint, struct mount_info *mi, char *root return 0; } -static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid) +static int collect_mnt_from_image(struct mount_info **head, struct mount_info **tail, struct ns_id *nsid) { MntEntry *me = NULL; int ret, root_len = 1; @@ -2909,8 +2878,10 @@ static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid) goto err; pm->nsid = nsid; - pm->next = *pms; - *pms = pm; + pm->next = *head; + *head = pm; + if (!*tail) + *tail = pm; pm->mnt_id = me->mnt_id; pm->parent_mnt_id = me->parent_mnt_id; @@ -2995,11 +2966,20 @@ int read_mnt_ns_img(void) } for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) { + struct mount_info *head = NULL, *tail = NULL; + if (nsid->nd != &mnt_ns_desc) continue; - if (collect_mnt_from_image(&pms, nsid)) + if (collect_mnt_from_image(&head, &tail, nsid)) return -1; + + nsid->mnt.mntinfo_tree = mnt_build_tree(head); + if (!nsid->mnt.mntinfo_tree) + return -1; + + tail->next = pms; + pms = head; } mntinfo = pms; @@ -3101,6 +3081,40 @@ void fini_restore_mntns(void) } } +static int merge_mount_trees(struct mount_info *root_yard) +{ + struct mount_info *first = NULL; + struct ns_id *nsid; + + /* Merge mount trees together under root_yard */ + for (nsid = ns_ids; nsid; nsid = nsid->next) { + struct mount_info *root; + + if (nsid->nd != &mnt_ns_desc) + continue; + + root = nsid->mnt.mntinfo_tree; + + if (!first) + first = root; + else if (!mounts_sb_equal(root, first) || + strcmp(root->root, first->root)) { + pr_err("Nested mount namespaces with different " + "roots %d (@%s %s) %d (@%s %s) are not supported yet\n", + root->mnt_id, root->mountpoint, root->root, + first->mnt_id, first->mountpoint, first->root); + return -1; + } + + pr_debug("Mountpoint %d (@%s) moved to the root yard\n", + root->mnt_id, root->mountpoint); + root->parent = root_yard; + list_add(&root->siblings, &root_yard->children); + } + + return 0; +} + /* * All nested mount namespaces are restore as sub-trees of the root namespace. */ @@ -3140,8 +3154,6 @@ static int populate_roots_yard(void) static int populate_mnt_ns(void) { - struct mount_info *pms; - struct ns_id *nsid; int ret; root_yard_mp = mnt_entry_alloc(); @@ -3151,40 +3163,27 @@ static int populate_mnt_ns(void) root_yard_mp->mountpoint = mnt_roots; root_yard_mp->mounted = true; - pms = mnt_build_tree(mntinfo, root_yard_mp); - if (!pms) + if (merge_mount_trees(root_yard_mp)) return -1; #ifdef CONFIG_BINFMT_MISC_VIRTUALIZED if (!opts.has_binfmt_misc && !list_empty(&binfmt_misc_list)) { /* Add to mount tree. Generic code will mount it later */ - ret = add_cr_time_mount(pms, "binfmt_misc", BINFMT_MISC_HOME, 0); + ret = add_cr_time_mount(root_yard_mp, "binfmt_misc", BINFMT_MISC_HOME, 0); if (ret) return -1; } #endif - if (resolve_shared_mounts(mntinfo, pms->master_id)) + if (resolve_shared_mounts(mntinfo, 0)) return -1; - for (nsid = ns_ids; nsid; nsid = nsid->next) { - if (nsid->nd != &mnt_ns_desc) - continue; - - /* - * Make trees of all namespaces look the - * same, so that manual paths resolution - * works on them. - */ - nsid->mnt.mntinfo_tree = pms; - } - if (validate_mounts(mntinfo, false)) return -1; - mnt_tree_for_each(pms, set_is_overmounted); + mnt_tree_for_each(root_yard_mp, set_is_overmounted); - if (find_remap_mounts(pms)) + if (find_remap_mounts(root_yard_mp)) return -1; if (populate_roots_yard()) @@ -3193,8 +3192,8 @@ static int populate_mnt_ns(void) if (mount_clean_path()) return -1; - ret = mnt_tree_for_each(pms, do_mount_one); - mnt_tree_for_each(pms, do_close_one); + ret = mnt_tree_for_each(root_yard_mp, do_mount_one); + mnt_tree_for_each(root_yard_mp, do_close_one); if (ret == 0 && fixup_remap_mounts()) return -1; From f3cca97d80c77a6f2b9702edc8225ea8bb6034e5 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 27 Jun 2019 12:43:40 +0300 Subject: [PATCH 0049/1854] mount: make mnt_resort_siblings nonrecursive and reuse friendly Add mnt_subtree_next DFS-next search to remove recursion. v5: add these patch, remove recursion from sorting helpers v6: rip out butifull yet unused step-part of nfs-next algorithm Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 57 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 49708ffd5..974af6eb2 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -385,13 +385,12 @@ static unsigned int mnt_depth(struct mount_info *m) return depth; } -static void mnt_resort_siblings(struct mount_info *tree) +static void __mnt_resort_children(struct mount_info *parent) { - struct mount_info *m, *p; LIST_HEAD(list); /* - * Put siblings of each node in an order they can be (u)mounted + * Put children mounts in an order they can be (u)mounted * I.e. if we have mounts on foo/bar/, foo/bar/foobar/ and foo/ * we should put them in the foo/bar/foobar/, foo/bar/, foo/ order. * Otherwise we will not be able to (u)mount them in a sequence. @@ -403,11 +402,12 @@ static void mnt_resort_siblings(struct mount_info *tree) * to contain hundreds (or more) elements. */ - pr_info("\tResorting siblings on %d\n", tree->mnt_id); - while (!list_empty(&tree->children)) { + pr_info("\tResorting children of %d in mount order\n", parent->mnt_id); + while (!list_empty(&parent->children)) { + struct mount_info *m, *p; unsigned int depth; - m = list_first_entry(&tree->children, struct mount_info, siblings); + m = list_first_entry(&parent->children, struct mount_info, siblings); list_del(&m->siblings); depth = mnt_depth(m); @@ -416,10 +416,31 @@ static void mnt_resort_siblings(struct mount_info *tree) break; list_add_tail(&m->siblings, &p->siblings); - mnt_resort_siblings(m); } - list_splice(&list, &tree->children); + list_splice(&list, &parent->children); +} + +static struct mount_info *mnt_subtree_next(struct mount_info *mi, + struct mount_info *root); + +static void resort_siblings(struct mount_info *root, + void (*resort_children)(struct mount_info *)) { + struct mount_info *mi = root; + while (1) { + /* + * Explanation: sorting the children of the tree like these is + * safe and does not break the tree search in mnt_subtree_next + * (DFS-next search), as we sort children before calling next + * on parent and thus before DFS-next ever touches them, so + * from the perspective of DFS-next all children look like they + * are already sorted. + */ + resort_children(mi); + mi = mnt_subtree_next(mi, root); + if (!mi) + break; + } } static void mnt_tree_show(struct mount_info *tree, int off) @@ -980,7 +1001,7 @@ static struct mount_info *mnt_build_tree(struct mount_info *list) if (!tree) return NULL; - mnt_resort_siblings(tree); + resort_siblings(tree, __mnt_resort_children); pr_info("Done:\n"); mnt_tree_show(tree, 0); return tree; @@ -3821,3 +3842,21 @@ int remount_readonly_mounts(void) */ return call_helper_process(ns_remount_readonly_mounts, NULL); } + +static struct mount_info *mnt_subtree_next(struct mount_info *mi, + struct mount_info *root) +{ + if (!list_empty(&mi->children)) + return list_entry(mi->children.next, + struct mount_info, siblings); + + while (mi->parent && mi != root) { + if (mi->siblings.next == &mi->parent->children) + mi = mi->parent; + else + return list_entry(mi->siblings.next, + struct mount_info, siblings); + } + + return NULL; +} From 2b4e653361ec0c3b827fb4af4e0b8848fb9a26f0 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Tue, 5 Nov 2019 15:19:25 +0530 Subject: [PATCH 0050/1854] Run java functional tests on travis Signed-off-by: Nidhi Gupta --- .travis.yml | 2 ++ scripts/build/Dockerfile.openj9-alpine | 33 ++++++++++++++++++++++++++ scripts/build/Dockerfile.openj9-ubuntu | 30 +++++++++++++++++++++++ scripts/travis/Makefile | 3 +++ scripts/travis/openj9-test.sh | 22 +++++++++++++++++ 5 files changed, 90 insertions(+) create mode 100644 scripts/build/Dockerfile.openj9-alpine create mode 100644 scripts/build/Dockerfile.openj9-ubuntu create mode 100755 scripts/travis/openj9-test.sh diff --git a/.travis.yml b/.travis.yml index 7a0c29a55..6e854540b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,8 @@ env: - TR_ARCH=fedora-rawhide-aarch64 - TR_ARCH=centos - TR_ARCH=podman-test + - TR_ARCH=openj9-test + matrix: include: - os: linux diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine new file mode 100644 index 000000000..654e7bf31 --- /dev/null +++ b/scripts/build/Dockerfile.openj9-alpine @@ -0,0 +1,33 @@ +FROM adoptopenjdk/openjdk8-openj9:alpine + +RUN apk update && apk add \ + bash \ + build-base \ + ccache \ + coreutils \ + git \ + gnutls-dev \ + libaio-dev \ + libcap-dev \ + libnet-dev \ + libnl3-dev \ + pkgconfig \ + protobuf-c-dev \ + protobuf-dev \ + python \ + sudo \ + maven \ + py-yaml \ + py-pip \ + py2-future \ + ip6tables \ + iptables \ + bash + +COPY . /criu +WORKDIR /criu + +RUN make + +ENTRYPOINT mvn -f test/javaTests/pom.xml test + diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu new file mode 100644 index 000000000..13d9080ff --- /dev/null +++ b/scripts/build/Dockerfile.openj9-ubuntu @@ -0,0 +1,30 @@ +FROM adoptopenjdk/openjdk8-openj9:latest + +RUN apt-get update && apt-get install -y --no-install-recommends protobuf-c-compiler \ + libprotobuf-c-dev \ + libaio-dev \ + python-future \ + libprotobuf-dev \ + protobuf-compiler \ + libcap-dev \ + libnl-3-dev \ + gdb \ + bash \ + python-protobuf \ + python-yaml \ + libnet-dev \ + libnl-route-3-dev \ + libbsd-dev \ + make \ + git \ + pkg-config \ + gcc \ + maven + +COPY . /criu +WORKDIR /criu + +RUN make + +ENTRYPOINT mvn -f test/javaTests/pom.xml test + diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index baddd6eb1..c6b67935b 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -41,5 +41,8 @@ docker-test: podman-test: ./podman-test.sh +openj9-test: + ./openj9-test.sh + %: $(MAKE) -C ../build $@$(target-suffix) diff --git a/scripts/travis/openj9-test.sh b/scripts/travis/openj9-test.sh new file mode 100755 index 000000000..968f064f8 --- /dev/null +++ b/scripts/travis/openj9-test.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +cd ../.. + +failures="" + +docker build -t criu-openj9-ubuntu-test:latest -f scripts/build/Dockerfile.openj9-ubuntu . +docker run --rm --privileged criu-openj9-ubuntu-test:latest +if [ $? -ne 0 ]; then + failures=`echo "$failures ubuntu"` +fi + +docker build -t criu-openj9-alpine-test:latest -f scripts/build/Dockerfile.openj9-alpine . +docker run --rm --privileged criu-openj9-alpine-test:latest +if [ $? -ne 0 ]; then + failures=`echo "$failures alpine"` +fi + +if [ -n "$failures" ]; then + echo "Tests failed on $failures" + exit 1 +fi From 62953d4334b0294ac90e0ec088267fa99daf9f92 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:52:55 +0100 Subject: [PATCH 0051/1854] travis: fix copy paste error from previous commit In my previous commit I copied a line with a return into the main script body. bash can only return from functions. This changes return to exit. Signed-off-by: Adrian Reber --- scripts/travis/travis-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 1f6b19130..07311511c 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -86,7 +86,7 @@ if [ "$WIDTH" -gt 80 ]; then exit 1 fi -[ -n "$SKIP_TRAVIS_TEST" ] && return +[ -n "$SKIP_TRAVIS_TEST" ] && exit 0 ulimit -c unlimited From 6be414bb2be1e8be13b996f60977ecc44b765a2e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:54:22 +0100 Subject: [PATCH 0052/1854] travis: Do not run privileged containers in LXD Travis uses unprivileged containers for aarch64 in LXD. Docker with '--privileged' fails in such situation. This changes the travis setup to only start docker with '--privileged' if running on x86_64. Signed-off-by: Adrian Reber --- scripts/travis/Makefile | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index c6b67935b..80c7b9230 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -13,6 +13,9 @@ endif TARGETS := alpine fedora-rawhide centos ZDTM_OPTIONS := +UNAME := $(shell uname -m) + +export UNAME alpine: ZDTM_OPTIONS=-x zdtm/static/binfmt_misc -x zdtm/static/netns-nf -x zdtm/static/sched_policy00 -x zdtm/static/seccomp_strict -x zdtm/static/sigaltstack -x zdtm/static/signalfd00 -x zdtm/static/config_inotify_irmap @@ -23,17 +26,29 @@ define DOCKER_JSON endef export DOCKER_JSON -$(TARGETS): - echo "$$DOCKER_JSON" > /etc/docker/daemon.json - systemctl restart docker - $(MAKE) -C ../build $@$(target-suffix) - docker run --env-file docker.env --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run criu-$@ scripts/travis/travis-tests -fedora-asan: - echo "$$DOCKER_JSON" > /etc/docker/daemon.json - systemctl restart docker +ifeq ($(UNAME),x86_64) + CONTAINER_OPTS := --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run +else + CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run +endif + +restart-docker: + if [ "$$UNAME" = "x86_64" ]; then \ + echo "$$DOCKER_JSON" > /etc/docker/daemon.json; \ + cat /etc/docker/daemon.json; \ + systemctl status docker; \ + systemctl restart docker; \ + systemctl status docker; \ + fi + +$(TARGETS): restart-docker $(MAKE) -C ../build $@$(target-suffix) - docker run --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run criu-$@ ./scripts/travis/asan.sh $(ZDTM_OPTIONS) + docker run --env-file docker.env $(CONTAINER_OPTS) criu-$@ scripts/travis/travis-tests + +fedora-asan: restart-docker + $(MAKE) -C ../build $@$(target-suffix) + docker run -it $(CONTAINER_OPTS) criu-$@ ./scripts/travis/asan.sh $(ZDTM_OPTIONS) docker-test: ./docker-test.sh From 075f1beaf7d36cb9ea5030e1faab9661c33290ab Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:56:15 +0100 Subject: [PATCH 0053/1854] Makefile hack for travis aarch64/armv8l For CRIU's compile only tests for armv7hf on Travis we are using 'setarch linux32' which returns armv8l on Travis aarch64. This adds a path in the Makefile to treat armv8l just as armv7hf during compile. This enables us to run armv7hf compile tests on Travis aarch64 hardware. Much faster. Maybe not entirely correct, but probably good enough for compile testing in an armv7hf container. Signed-off-by: Adrian Reber --- Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0140330e1..f827e7baa 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,6 @@ endif # Architecture specific options. ifeq ($(ARCH),arm) ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') - DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32 ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 @@ -45,6 +44,16 @@ ifeq ($(ARCH),arm) USERCFLAGS += -march=armv7-a endif + ifeq ($(ARMV),8) + # Running 'setarch linux32 uname -m' returns armv8l on travis aarch64. + # This tells CRIU to handle armv8l just as armv7hf. Right now this is + # only used for compile testing. No further verification of armv8l exists. + USERCFLAGS += -march=armv7-a + ARMV := 7 + endif + + DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32 + PROTOUFIX := y # For simplicity - compile code in Arm mode without interwork. # We could choose Thumb mode as default instead - but a dirty From eab8cf0775ed5569bb22795a86817dae06cf0005 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:58:54 +0100 Subject: [PATCH 0054/1854] travis: switch all arm related tests to real hardware This switches all arm related tests (32bit and 64bit) to the aarch64 systems Travis provides. For arm32 we are running in a armv7hf container on aarch64 with 'setarch linux32'. The main changes are that docker on Travis aarch64 cannot use '--privileged' as Travis is using unprivileged LXD containers to setup the testing environment. Signed-off-by: Adrian Reber --- .travis.yml | 50 +++++++++++++++---- scripts/build/Dockerfile.armv7hf.hdr | 4 +- scripts/build/Dockerfile.armv7hf.tmpl | 2 +- .../Dockerfile.fedora-rawhide-aarch64.hdr | 3 -- .../Dockerfile.fedora-rawhide-aarch64.tmpl | 1 - scripts/build/Dockerfile.linux32.tmpl | 47 +++++++++++++++++ scripts/build/Makefile | 4 +- scripts/travis/Makefile | 2 + scripts/travis/podman-test.sh | 2 +- 9 files changed, 93 insertions(+), 22 deletions(-) delete mode 100644 scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr delete mode 120000 scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl create mode 100644 scripts/build/Dockerfile.linux32.tmpl diff --git a/.travis.yml b/.travis.yml index 6e854540b..85b6b6e07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: c sudo: required -dist: xenial +dist: bionic cache: ccache services: - docker @@ -9,21 +9,12 @@ env: - TR_ARCH=local CLANG=1 - TR_ARCH=local COMPAT_TEST=y - TR_ARCH=local CLANG=1 COMPAT_TEST=y - - TR_ARCH=alpine - - TR_ARCH=fedora-asan - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=armv7hf - TR_ARCH=ppc64le - TR_ARCH=s390x - - TR_ARCH=armv7hf CLANG=1 - TR_ARCH=ppc64le CLANG=1 - - TR_ARCH=alpine CLANG=1 - TR_ARCH=docker-test - - TR_ARCH=fedora-rawhide - - TR_ARCH=fedora-rawhide-aarch64 - - TR_ARCH=centos - - TR_ARCH=podman-test - TR_ARCH=openj9-test matrix: @@ -36,10 +27,47 @@ matrix: arch: arm64 env: TR_ARCH=local CLANG=1 dist: bionic + - os: linux + arch: arm64 + # This runs on aarch64 with 'setarch linux32' + env: TR_ARCH=armv7hf + dist: bionic + - os: linux + arch: arm64 + # This runs on aarch64 with 'setarch linux32' + env: TR_ARCH=armv7hf CLANG=1 + dist: bionic + - os: linux + arch: arm64 + env: TR_ARCH=fedora-rawhide + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=fedora-rawhide + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=podman-test + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=alpine CLANG=1 + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=alpine + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=centos + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=fedora-asan + dist: xenial # test hangs on bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - - env: TR_ARCH=fedora-rawhide-aarch64 - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 script: diff --git a/scripts/build/Dockerfile.armv7hf.hdr b/scripts/build/Dockerfile.armv7hf.hdr index d453d6df7..7c66474e5 100644 --- a/scripts/build/Dockerfile.armv7hf.hdr +++ b/scripts/build/Dockerfile.armv7hf.hdr @@ -1,3 +1 @@ -FROM arm32v7/ubuntu:xenial - -COPY scripts/build/qemu-user-static/usr/bin/qemu-arm-static /usr/bin/qemu-arm-static +FROM arm32v7/ubuntu:bionic diff --git a/scripts/build/Dockerfile.armv7hf.tmpl b/scripts/build/Dockerfile.armv7hf.tmpl index cb804790e..7bc6d9cde 120000 --- a/scripts/build/Dockerfile.armv7hf.tmpl +++ b/scripts/build/Dockerfile.armv7hf.tmpl @@ -1 +1 @@ -Dockerfile.tmpl \ No newline at end of file +Dockerfile.linux32.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr b/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr deleted file mode 100644 index 82f29e336..000000000 --- a/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr +++ /dev/null @@ -1,3 +0,0 @@ -FROM arm64v8/fedora:rawhide - -COPY scripts/build/qemu-user-static/usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static diff --git a/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl b/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl deleted file mode 120000 index e4c40309c..000000000 --- a/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.fedora.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl new file mode 100644 index 000000000..5d3fe5139 --- /dev/null +++ b/scripts/build/Dockerfile.linux32.tmpl @@ -0,0 +1,47 @@ +ARG CC=gcc +ARG ENV1=FOOBAR + +RUN apt-get update && apt-get install -y \ + ccache \ + libnet-dev \ + libnl-route-3-dev \ + $CC \ + bsdmainutils \ + build-essential \ + git-core \ + iptables \ + libaio-dev \ + libcap-dev \ + libgnutls28-dev \ + libgnutls30 \ + libnl-3-dev \ + libprotobuf-c-dev \ + libprotobuf-dev \ + libselinux-dev \ + pkg-config \ + protobuf-c-compiler \ + protobuf-compiler \ + python-minimal \ + python-future + +COPY . /criu +WORKDIR /criu +ENV CC="ccache $CC" CCACHE_DIR=/tmp/.ccache CCACHE_NOCOMPRESS=1 $ENV1=yes + +RUN uname -m && setarch linux32 uname -m && setarch --list + +RUN mv .ccache /tmp && make mrproper && ccache -s && \ + date && \ +# Check single object build + setarch linux32 make -j $(nproc) CC="$CC" criu/parasite-syscall.o && \ +# Compile criu + setarch linux32 make -j $(nproc) CC="$CC" && \ + date && \ +# Check that "make mrproper" works + setarch linux32 make mrproper && ! git clean -ndx --exclude=scripts/build \ + --exclude=.config --exclude=test | grep . + +# Compile tests +RUN date && setarch linux32 make -j $(nproc) CC="$CC" -C test/zdtm && date + +#RUN make test/compel/handle_binary && ./test/compel/handle_binary diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 3d4d91cd5..d7ad82aec 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ -QEMU_ARCHES := armv7hf ppc64le s390x fedora-rawhide-aarch64 # require qemu -ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos +QEMU_ARCHES := ppc64le s390x # require qemu +ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 80c7b9230..373171149 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -28,6 +28,8 @@ endef export DOCKER_JSON ifeq ($(UNAME),x86_64) + # On anything besides x86_64 Travis is running unprivileged LXD + # containers which do not support running docker with '--privileged'. CONTAINER_OPTS := --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run else CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 9bd1f3d8b..eafdc73be 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -11,7 +11,7 @@ apt-get install -qq \ apt-get update -qq -apt-get install -qqy podman +apt-get install -qqy podman containernetworking-plugins export SKIP_TRAVIS_TEST=1 From fe668075ad2afe1021f8ff86d774eccb7bd1bef7 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 Nov 2019 08:38:16 +0100 Subject: [PATCH 0055/1854] travis: switch pcp64le and s390x to real hardware Now that Travis also supports ppc64le and s390x we can remove all qemu based docker emulation from our test setup. This now runs ppc64le and s390x tests on real hardware (LXD containers). Signed-off-by: Adrian Reber --- .travis.yml | 17 +++++++++---- scripts/build/Dockerfile.aarch64.hdr | 3 --- scripts/build/Dockerfile.aarch64.tmpl | 1 - scripts/build/Dockerfile.ppc64le.hdr | 5 ---- scripts/build/Dockerfile.ppc64le.tmpl | 1 - scripts/build/Dockerfile.s390x.hdr | 6 ----- scripts/build/Dockerfile.s390x.tmpl | 1 - scripts/build/Makefile | 21 +--------------- scripts/build/binfmt_misc | 13 ---------- scripts/build/extract-deb-pkg | 36 --------------------------- scripts/travis/travis-tests | 7 ------ 11 files changed, 13 insertions(+), 98 deletions(-) delete mode 100644 scripts/build/Dockerfile.aarch64.hdr delete mode 120000 scripts/build/Dockerfile.aarch64.tmpl delete mode 100644 scripts/build/Dockerfile.ppc64le.hdr delete mode 120000 scripts/build/Dockerfile.ppc64le.tmpl delete mode 100644 scripts/build/Dockerfile.s390x.hdr delete mode 120000 scripts/build/Dockerfile.s390x.tmpl delete mode 100755 scripts/build/binfmt_misc delete mode 100755 scripts/build/extract-deb-pkg diff --git a/.travis.yml b/.travis.yml index 85b6b6e07..3c760d08a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,14 +11,22 @@ env: - TR_ARCH=local CLANG=1 COMPAT_TEST=y - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=ppc64le - - TR_ARCH=s390x - - TR_ARCH=ppc64le CLANG=1 - TR_ARCH=docker-test - TR_ARCH=openj9-test - matrix: include: + - os: linux + arch: ppc64le + env: TR_ARCH=local + dist: bionic + - os: linux + arch: ppc64le + env: TR_ARCH=local CLANG=1 + dist: bionic + - os: linux + arch: s390x + env: TR_ARCH=local + dist: bionic - os: linux arch: arm64 env: TR_ARCH=local @@ -68,7 +76,6 @@ matrix: allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH diff --git a/scripts/build/Dockerfile.aarch64.hdr b/scripts/build/Dockerfile.aarch64.hdr deleted file mode 100644 index c90c98088..000000000 --- a/scripts/build/Dockerfile.aarch64.hdr +++ /dev/null @@ -1,3 +0,0 @@ -FROM arm64v8/ubuntu:xenial - -COPY scripts/build/qemu-user-static/usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static diff --git a/scripts/build/Dockerfile.aarch64.tmpl b/scripts/build/Dockerfile.aarch64.tmpl deleted file mode 120000 index cb804790e..000000000 --- a/scripts/build/Dockerfile.aarch64.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64le.hdr b/scripts/build/Dockerfile.ppc64le.hdr deleted file mode 100644 index ba65901c2..000000000 --- a/scripts/build/Dockerfile.ppc64le.hdr +++ /dev/null @@ -1,5 +0,0 @@ -FROM ppc64le/ubuntu:xenial - -ENV QEMU_CPU POWER8 -COPY scripts/build/qemu-user-static/usr/bin/qemu-ppc64le-static /usr/bin/qemu-ppc64le-static -RUN sed -i '/security/ d' /etc/apt/sources.list diff --git a/scripts/build/Dockerfile.ppc64le.tmpl b/scripts/build/Dockerfile.ppc64le.tmpl deleted file mode 120000 index cb804790e..000000000 --- a/scripts/build/Dockerfile.ppc64le.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.s390x.hdr b/scripts/build/Dockerfile.s390x.hdr deleted file mode 100644 index e02097f62..000000000 --- a/scripts/build/Dockerfile.s390x.hdr +++ /dev/null @@ -1,6 +0,0 @@ -FROM s390x/debian:latest - -ENV QEMU_CPU z900 -COPY scripts/build/qemu-user-static/usr/bin/qemu-s390x-static /usr/bin/qemu-s390x-static -# The security repository does not seem to exist anymore -RUN sed -i '/security/ d' /etc/apt/sources.list diff --git a/scripts/build/Dockerfile.s390x.tmpl b/scripts/build/Dockerfile.s390x.tmpl deleted file mode 120000 index cb804790e..000000000 --- a/scripts/build/Dockerfile.s390x.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Makefile b/scripts/build/Makefile index d7ad82aec..a7c78e8bd 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,4 @@ -QEMU_ARCHES := ppc64le s390x # require qemu -ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos armv7hf +ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker @@ -16,15 +15,6 @@ $(foreach arch,$(ARCHES),$(eval $(call ARCH_DEP,$(arch)))) Dockerfile.%: Dockerfile.%.hdr Dockerfile.%.tmpl cat $^ > $@ -qemu-user-static: - ./extract-deb-pkg qemu-user-static - -binfmt_misc: - ./binfmt_misc -.PHONY: binfmt_misc - -$(QEMU_ARCHES): qemu-user-static binfmt_misc - $(TARGETS): mkdir -p $(HOME)/.ccache mv $(HOME)/.ccache ../../ @@ -42,12 +32,3 @@ $(foreach t,$(TARGETS),$(eval $(call CLANG_DEP,$(t)))) %-clang: DB_ENV=--build-arg ENV1=CCACHE_CPP2 s390x-clang: DB_CC=--build-arg CC=clang-3.8 .PHONY: $(TARGETS_CLANG) - -clean: - rm -rf qemu-user-static - for ARCH in $(ARCHES); do \ - FILE=/proc/sys/fs/binfmt_misc/$$ARCH; \ - test -f $$FILE && echo -1 > $$FILE; \ - rm -f Dockerfile.$$ARCH; \ - done -.PHONY: clean diff --git a/scripts/build/binfmt_misc b/scripts/build/binfmt_misc deleted file mode 100755 index bf2a2ecad..000000000 --- a/scripts/build/binfmt_misc +++ /dev/null @@ -1,13 +0,0 @@ -set -e -x - -test -f /proc/sys/fs/binfmt_misc/armv7hf || - echo ':armv7hf:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-arm-static:' > /proc/sys/fs/binfmt_misc/register; - -test -f /proc/sys/fs/binfmt_misc/aarch64 || - echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-aarch64-static:' > /proc/sys/fs/binfmt_misc/register - -test -f /proc/sys/fs/binfmt_misc/ppc64le || - echo ':ppc64le:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:/usr/bin/qemu-ppc64le-static:' > /proc/sys/fs/binfmt_misc/register - -test -f /proc/sys/fs/binfmt_misc/s390x || - echo ':s390x:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-s390x-static:' > /proc/sys/fs/binfmt_misc/register diff --git a/scripts/build/extract-deb-pkg b/scripts/build/extract-deb-pkg deleted file mode 100755 index 44457bc5a..000000000 --- a/scripts/build/extract-deb-pkg +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -set -e -set -u -set -o pipefail -MIRROR="https://mirrors.kernel.org/ubuntu" -PKGS="$MIRROR/dists/bionic/universe/binary-amd64/Packages.gz" - -if [ $# -ne 1 ]; then - echo "Usage: $0 package-name" 1>&2 - exit 1 -fi - -if [ -d "$1" ]; then - echo "Directory $1 already exists -- exiting" - exit 0 -fi - -if ! pkg=$(curl -sSL "$PKGS" | zgrep "Filename.*$1" | awk '{ print $2 }'); then - echo "ERROR: no packages matching $1" 1>&2 - exit 1 -fi - -if [ "$(wc -w <<< "$pkg")" -gt 1 ]; then - echo "$pkg" 1>&2 - echo "ERROR: more than one match for $1" 1>&2 - exit 1 -fi - -mkdir "$1" -cd "$1" - -wget "$MIRROR/$pkg" -pkg=$(basename "$pkg") -ar vx "$pkg" -tar xJvf data.tar.xz diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 07311511c..bc97fd455 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -51,13 +51,6 @@ travis_prep () { CC="ccache $CC" fi - # The /etc/apt/sources.list in the current trusty image for ppc64le is - # broken and needs to be fixed - if [ "$TR_ARCH" = "ppc64le" ] ; then - sed -i '/security/ d' /etc/apt/sources.list - fi - - # Do not install x86_64 specific packages on other architectures if [ "$UNAME_M" = "x86_64" ]; then TRAVIS_PKGS="$TRAVIS_PKGS $X86_64_PKGS" From ea018e9a9c78353b8f5532a2e5a36a0d1c5e8769 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 Nov 2019 13:25:30 +0100 Subject: [PATCH 0056/1854] travis: remove group from .travis.yml Tests are successful even after removing 'group:' from .travis.yml. Apparently it is not necessary. Signed-off-by: Adrian Reber --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3c760d08a..f6f71be48 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,4 +82,3 @@ script: after_success: - ccache -s - make -C scripts/travis after_success -group: deprecated-2017Q2 From ef277068de3f6f89f394b9f63a3870eddde8c998 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:34 +0000 Subject: [PATCH 0057/1854] lib/ptrace: Allow PTRACE_PEEKDATA with errno != 0 >From man ptrace: > On error, all requests return -1, and errno is set appropriately. > Since the value returned by a successful PTRACE_PEEK* request may be > -1, the caller must clear errno before the call, and then check > it afterward to determine whether or not an error occurred. FWIW: if ptrace_peek_area() is called with (errno != 0) it may false-fail if the data is (-1). Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/ptrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compel/src/lib/ptrace.c b/compel/src/lib/ptrace.c index 9142bac42..715e564df 100644 --- a/compel/src/lib/ptrace.c +++ b/compel/src/lib/ptrace.c @@ -34,14 +34,20 @@ int ptrace_suspend_seccomp(pid_t pid) int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes) { unsigned long w; + int old_errno = errno; + if (bytes & (sizeof(long) - 1)) return -1; + + errno = 0; for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *d = dst, *a = addr; + d[w] = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL); if (d[w] == -1U && errno) goto err; } + errno = old_errno; return 0; err: return -2; From a93117ede1e58db68246f775c00bc21683954c39 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:35 +0000 Subject: [PATCH 0058/1854] lib/ptrace: Be more elaborate about failures Also, don't use the magic -2 => return errno on failure. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/ptrace.c | 46 ++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/compel/src/lib/ptrace.c b/compel/src/lib/ptrace.c index 715e564df..4c3530c85 100644 --- a/compel/src/lib/ptrace.c +++ b/compel/src/lib/ptrace.c @@ -36,50 +36,72 @@ int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes) unsigned long w; int old_errno = errno; - if (bytes & (sizeof(long) - 1)) + if (bytes & (sizeof(long) - 1)) { + pr_err("Peek request with non-word size %ld\n", bytes); return -1; + } errno = 0; for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *d = dst, *a = addr; d[w] = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL); - if (d[w] == -1U && errno) + if (d[w] == -1U && errno) { + pr_perror("PEEKDATA failed"); goto err; + } } errno = old_errno; return 0; err: - return -2; + return -errno; } int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes) { unsigned long w; - if (bytes & (sizeof(long) - 1)) + + if (bytes & (sizeof(long) - 1)) { + pr_err("Poke request with non-word size %ld\n", bytes); return -1; + } + for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *s = src, *a = addr; - if (ptrace(PTRACE_POKEDATA, pid, a + w, s[w])) + + if (ptrace(PTRACE_POKEDATA, pid, a + w, s[w])) { + pr_perror("POKEDATA failed"); goto err; + } } return 0; err: - return -2; + return -errno; } /* don't swap big space, it might overflow the stack */ int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes) { void *t = alloca(bytes); + int err; - if (ptrace_peek_area(pid, t, dst, bytes)) - return -1; + err = ptrace_peek_area(pid, t, dst, bytes); + if (err) + return err; - if (ptrace_poke_area(pid, src, dst, bytes)) { - if (ptrace_poke_area(pid, t, dst, bytes)) - return -2; - return -1; + err = ptrace_poke_area(pid, src, dst, bytes); + if (err) { + int err2; + + pr_err("Can't poke %d @ %p from %p sized %ld\n", + pid, dst, src, bytes); + + err2 = ptrace_poke_area(pid, t, dst, bytes); + if (err2) { + pr_err("Can't restore the original data with poke\n"); + return err2; + } + return err; } memcpy(src, t, bytes); From c8f16bfacb82b98841a9de49f5f9a15254d7b95f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:36 +0000 Subject: [PATCH 0059/1854] compel/infect: Warn if close() failed on memfd As a preparation for __must_check on compel_syscall(), check it on close() too - maybe not as useful as with other syscalls, but why not. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/infect.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index f0bcaf334..f726a9895 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -718,14 +718,25 @@ static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size) return 0; } +static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) +{ + bool __maybe_unused compat = !compel_mode_native(ctl); + long ret; + int err; + + err = compel_syscall(ctl, __NR(close, compat), &ret, fd, 0, 0, 0, 0, 0); + if (err || ret) + pr_err("Can't close memfd\n"); +} + static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) { void *where = (void *)ctl->ictx.syscall_ip + BUILTIN_SYSCALL_SIZE; + bool __maybe_unused compat_task = !compel_mode_native(ctl); uint8_t orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME; pid_t pid = ctl->rpid; long sret = -ENOSYS; int ret, fd, lfd; - bool __maybe_unused compat_task = !compel_mode_native(ctl); if (ctl->ictx.flags & INFECT_NO_MEMFD) return 1; @@ -741,10 +752,9 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) (unsigned long)where, 0, 0, 0, 0, 0); if (ptrace_poke_area(pid, orig_code, where, sizeof(orig_code))) { - fd = (int)(long)sret; + fd = (int)sret; if (fd >= 0) - compel_syscall(ctl, __NR(close, compat_task), &sret, - fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); pr_err("Can't restore memfd args (pid: %d)\n", pid); return -1; } @@ -752,7 +762,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) if (ret < 0) return ret; - fd = (int)(long)sret; + fd = (int)sret; if (fd == -ENOSYS) return 1; if (fd < 0) { @@ -787,7 +797,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) goto err_curef; } - compel_syscall(ctl, __NR(close, compat_task), &sret, fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); close(lfd); pr_info("Set up parasite blob using memfd\n"); @@ -796,7 +806,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) err_curef: close(lfd); err_cure: - compel_syscall(ctl, __NR(close, compat_task), &sret, fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); return -1; } From ee449e27c6979291660772db9724474a55d83b12 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:37 +0000 Subject: [PATCH 0060/1854] compel: Mark compat argument of __NR() as used And remove __maybe_unused work-around. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/aarch64/src/lib/include/syscall.h | 2 +- compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/arm/src/lib/include/syscall.h | 2 +- compel/arch/arm/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/ppc64/src/lib/include/syscall.h | 2 +- compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/s390/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/src/lib/infect.c | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/compel/arch/aarch64/src/lib/include/syscall.h b/compel/arch/aarch64/src/lib/include/syscall.h index e2ec1272e..30290667a 100644 --- a/compel/arch/aarch64/src/lib/include/syscall.h +++ b/compel/arch/aarch64/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h index 4662f7689..7a33baa8e 100644 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h @@ -27,6 +27,6 @@ typedef struct user_fpsimd_state user_fpregs_struct_t; #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/arm/src/lib/include/syscall.h b/compel/arch/arm/src/lib/include/syscall.h index e2ec1272e..30290667a 100644 --- a/compel/arch/arm/src/lib/include/syscall.h +++ b/compel/arch/arm/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h index b8286d404..69222b251 100644 --- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h @@ -61,6 +61,6 @@ struct user_vfp_exc { #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/ppc64/src/lib/include/syscall.h b/compel/arch/ppc64/src/lib/include/syscall.h index e2ec1272e..30290667a 100644 --- a/compel/arch/ppc64/src/lib/include/syscall.h +++ b/compel/arch/ppc64/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h index 89fc4aa3c..126fa2ea3 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h @@ -81,6 +81,6 @@ typedef struct { #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h index fddf65d3b..8171d3395 100644 --- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h @@ -73,7 +73,7 @@ typedef struct { #define user_regs_native(pregs) true -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) struct mmap_arg_struct { unsigned long addr; diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index f726a9895..656cc030d 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -720,7 +720,7 @@ static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size) static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) { - bool __maybe_unused compat = !compel_mode_native(ctl); + bool compat = !compel_mode_native(ctl); long ret; int err; @@ -732,7 +732,7 @@ static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) { void *where = (void *)ctl->ictx.syscall_ip + BUILTIN_SYSCALL_SIZE; - bool __maybe_unused compat_task = !compel_mode_native(ctl); + bool compat_task = !compel_mode_native(ctl); uint8_t orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME; pid_t pid = ctl->rpid; long sret = -ENOSYS; From 71738565780552b93fad5dcd42ecfcb4e972471f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:38 +0000 Subject: [PATCH 0061/1854] lib/infect: Check if compel succeed in executing munmap Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/s390/src/lib/infect.c | 4 +++- compel/src/lib/infect.c | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index 00e9c36d2..7e7d24ce2 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -453,8 +453,10 @@ void *remote_mmap(struct parasite_ctl *ctl, if (ptrace_poke_area(pid, &arg_struct, where, sizeof(arg_struct))) { pr_err("Can't restore mmap args (pid: %d)\n", pid); if (map != 0) { - compel_syscall(ctl, __NR_munmap, NULL, map, + err = compel_syscall(ctl, __NR_munmap, NULL, map, length, 0, 0, 0, 0); + if (err) + pr_err("Can't munmap %d\n", err); map = 0; } } diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 656cc030d..8b377e7d2 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -1303,6 +1303,7 @@ int compel_stop_daemon(struct parasite_ctl *ctl) int compel_cure_remote(struct parasite_ctl *ctl) { long ret; + int err; if (compel_stop_daemon(ctl)) return -1; @@ -1310,9 +1311,12 @@ int compel_cure_remote(struct parasite_ctl *ctl) if (!ctl->remote_map) return 0; - compel_syscall(ctl, __NR(munmap, !compel_mode_native(ctl)), &ret, - (unsigned long)ctl->remote_map, ctl->map_length, - 0, 0, 0, 0); + err = compel_syscall(ctl, __NR(munmap, !compel_mode_native(ctl)), &ret, + (unsigned long)ctl->remote_map, ctl->map_length, + 0, 0, 0, 0); + if (err) + return err; + if (ret) { pr_err("munmap for remote map %p, %lu returned %lu\n", ctl->remote_map, ctl->map_length, ret); From b5a83623b0327c19a2b9e6da28c434f28e33f7c3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:39 +0000 Subject: [PATCH 0062/1854] cr-dump: Try to cure remote on err-pathes On daemon stop or threads dump failures it's still desired to remove parasite from the remote (if possible). Try best and keep hopeing. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index f72373d22..56724f9a5 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1385,16 +1385,20 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) ret = compel_stop_daemon(parasite_ctl); if (ret) { - pr_err("Can't cure (pid: %d) from parasite\n", pid); - goto err; + pr_err("Can't stop daemon in parasite (pid: %d)\n", pid); + goto err_cure; } ret = dump_task_threads(parasite_ctl, item); if (ret) { pr_err("Can't dump threads\n"); - goto err; + goto err_cure; } + /* + * On failure local map will be cured in cr_dump_finish() + * for lazy pages. + */ if (opts.lazy_pages) ret = compel_cure_remote(parasite_ctl); else @@ -1427,7 +1431,9 @@ err: err_cure: close_cr_imgset(&cr_imgset); err_cure_imgset: - compel_cure(parasite_ctl); + ret = compel_cure(parasite_ctl); + if (ret) + pr_err("Can't cure (pid: %d) from parasite\n", pid); goto err; } From 1038a0ae44971129b3720c4351b788913e7be8f2 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:40 +0000 Subject: [PATCH 0063/1854] cr-dump: Warn if unmapping local memfd failed Probably, not the worst that could happen, but still unexpected. Preparing the ground to make compel_cure*() functions __must_check. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 56724f9a5..4b5a01cfd 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1537,7 +1537,8 @@ static int cr_pre_dump_finish(int status) timing_stop(TIME_MEMWRITE); destroy_page_pipe(mem_pp); - compel_cure_local(ctl); + if (compel_cure_local(ctl)) + pr_err("Can't cure local: something happened with mapping?\n"); } free_pstree(root_item); @@ -1664,7 +1665,8 @@ static int cr_lazy_mem_dump(void) for_each_pstree_item(item) { if (item->pid->state != TASK_DEAD) { destroy_page_pipe(dmpi(item)->mem_pp); - compel_cure_local(dmpi(item)->parasite_ctl); + if (compel_cure_local(dmpi(item)->parasite_ctl)) + pr_err("Can't cure local: something happened with mapping?\n"); } } From abe48f8c3618113035c3e5ff76747b0342b6c7e7 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:41 +0000 Subject: [PATCH 0064/1854] cr-restore: Warn if restorer can't be unmapped Too late to stop restore: it's already printed that restore was successful. Oh, well warn aloud about infection. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b4530f8e5..25b820132 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1952,6 +1952,7 @@ static void finalize_restore(void) for_each_pstree_item(item) { pid_t pid = item->pid->real; struct parasite_ctl *ctl; + unsigned long restorer_addr; if (!task_alive(item)) continue; @@ -1961,7 +1962,9 @@ static void finalize_restore(void) if (ctl == NULL) continue; - compel_unmap(ctl, (unsigned long)rsti(item)->munmap_restorer); + restorer_addr = (unsigned long)rsti(item)->munmap_restorer; + if (compel_unmap(ctl, restorer_addr)) + pr_err("Failed to unmap restorer from %d\n", pid); xfree(ctl); From bd17ee85882033ead401a89e1eb21b18c7cb2afb Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:42 +0000 Subject: [PATCH 0065/1854] parasite-syscall: Log if can't cure on failed infection Maybe expected, hopefully never happens - let's warn in any case. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/parasite-syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index b9788a4c2..e5a8194e5 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -565,7 +565,8 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item, parasite_ensure_args_size(aio_rings_args_size(vma_area_list)); if (compel_infect(ctl, item->nr_threads, parasite_args_size) < 0) { - compel_cure(ctl); + if (compel_cure(ctl)) + pr_warn("Can't cure failed infection\n"); return NULL; } From c21c0aea1bd11b9d5c99803a7413314e0d6a0866 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:43 +0000 Subject: [PATCH 0066/1854] compel/infect: Detach but fail compel_resume_task() Unknown state means that the task in the end may be not in wanted state. Return err code. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/infect.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 8b377e7d2..3fad85ed3 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -313,6 +313,8 @@ err: int compel_resume_task(pid_t pid, int orig_st, int st) { + int ret = 0; + pr_debug("\tUnseizing %d into %d\n", pid, st); if (st == COMPEL_TASK_DEAD) { @@ -335,15 +337,17 @@ int compel_resume_task(pid_t pid, int orig_st, int st) */ if (orig_st == COMPEL_TASK_STOPPED) kill(pid, SIGSTOP); - } else + } else { pr_err("Unknown final state %d\n", st); + ret = -1; + } if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) { pr_perror("Unable to detach from %d", pid); return -1; } - return 0; + return ret; } static int gen_parasite_saddr(struct sockaddr_un *saddr, int key) From 56bc4189e47c3f356c6f407544f5a88768bd4f00 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:44 +0000 Subject: [PATCH 0067/1854] criu: Kill tasks even when the network is unlocked Currently if anything fails after network has been unlocked tasks aren't killed. Which doesn't work anyway: any stage sets `ret` and nothing later gets called. Which means the tasks aren't resumed properly. Furthermore, functions like catch_tasks() and compel_stop_on_syscall() return failure on the first error. Let's do the cleanup even when the network is unlocked. If we want to keep the mess and ignore failures - a cli option should be introduced for that (and existing code should be reworked with decisions what is critical and what can be ignored). Move "Restore finished successfully" message accordingly where everything is evidently good. While at here, any late failure will result not only in cleanup but in criu returning error code. Which in result makes tests to fail in such case: > ======================= Run zdtm/static/inotify04 in ns ======================== > Start test > ./inotify04 --pidfile=inotify04.pid --outfile=inotify04.out --dirname=inotify04.test > Run criu dump > =[log]=> dump/zdtm/static/inotify04/84/1/dump.log > ------------------------ grep Error ------------------------ > (00.119763) fsnotify: openable (inode match) as zdtm/static/inotify04.test/inotify-testfile > (00.119766) fsnotify: Dumping /zdtm/static/inotify04.test/inotify-testfile as path for handle > (00.119769) fsnotify: id 0x00000b flags 0x000800 > (00.119787) 88 fdinfo 5: pos: 0 flags: 4000/0 > (00.119796) Warn (criu/fsnotify.c:336): fsnotify: The 0x00000c inotify events will be dropped > ------------------------ ERROR OVER ------------------------ > Run criu restore > =[log]=> dump/zdtm/static/inotify04/84/1/restore.log > ------------------------ grep Error ------------------------ > (00.391582) 123 was stopped > (00.391667) 106 was trapped > (00.391674) 106 (native) is going to execute the syscall 11, required is 11 > (00.391697) 106 was stopped > (00.391720) Error (compel/src/lib/infect.c:1439): Task 123 is in unexpected state: b7f > (00.391736) Error (compel/src/lib/infect.c:1447): Task stopped with 11: Segmentation fault > ------------------------ ERROR OVER ------------------------ > 5: Old maps lost: set([]) > 5: New maps appeared: set([u'10000-1a000 rwxp', u'1a000-24000 rw-p']) > ############### Test zdtm/static/inotify04 FAIL at maps compare ################ > Send the 9 signal to 106 > Wait for zdtm/static/inotify04(106) to die for 0.100000 > ======================= Test zdtm/static/inotify04 PASS ======================== Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 50 ++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 25b820132..05a25835a 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1974,7 +1974,7 @@ static void finalize_restore(void) } } -static void finalize_restore_detach(int status) +static int finalize_restore_detach(void) { struct pstree_item *item; @@ -1988,16 +1988,21 @@ static void finalize_restore_detach(int status) for (i = 0; i < item->nr_threads; i++) { pid = item->threads[i].real; if (pid < 0) { - BUG_ON(status >= 0); - break; + pr_err("pstree item has unvalid pid %d\n", pid); + continue; } - if (arch_set_thread_regs_nosigrt(&item->threads[i])) + if (arch_set_thread_regs_nosigrt(&item->threads[i])) { pr_perror("Restoring regs for %d failed", pid); - if (ptrace(PTRACE_DETACH, pid, NULL, 0)) - pr_perror("Unable to execute %d", pid); + return -1; + } + if (ptrace(PTRACE_DETACH, pid, NULL, 0)) { + pr_perror("Unable to detach %d", pid); + return -1; + } } } + return 0; } static void ignore_kids(void) @@ -2255,32 +2260,37 @@ skip_ns_bouncing: /* * ------------------------------------------------------------- - * Below this line nothing should fail, because network is unlocked + * Network is unlocked. If something fails below - we lose data + * or a connection. */ attach_to_tasks(root_seized); - ret = restore_switch_stage(CR_STATE_RESTORE_CREDS); - BUG_ON(ret); + if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) + goto out_kill_network_unlocked; timing_stop(TIME_RESTORE); - ret = catch_tasks(root_seized, &flag); + if (catch_tasks(root_seized, &flag)) { + pr_err("Can't catch all tasks\n"); + goto out_kill_network_unlocked; + } if (lazy_pages_finish_restore()) - goto out_kill; + goto out_kill_network_unlocked; - pr_info("Restore finished successfully. Resuming tasks.\n"); __restore_switch_stage(CR_STATE_COMPLETE); - if (ret == 0) - ret = compel_stop_on_syscall(task_entries->nr_threads, - __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag); + ret = compel_stop_on_syscall(task_entries->nr_threads, + __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag); + if (ret) { + pr_err("Can't stop all tasks on rt_sigreturn\n"); + goto out_kill_network_unlocked; + } if (clear_breakpoints()) pr_err("Unable to flush breakpoints\n"); - if (ret == 0) - finalize_restore(); + finalize_restore(); ret = run_scripts(ACT_PRE_RESUME); if (ret) @@ -2292,8 +2302,10 @@ skip_ns_bouncing: fini_cgroup(); /* Detaches from processes and they continue run through sigreturn. */ - finalize_restore_detach(ret); + if (finalize_restore_detach()) + goto out_kill_network_unlocked; + pr_info("Restore finished successfully. Tasks resumed.\n"); write_stats(RESTORE_STATS); ret = run_scripts(ACT_POST_RESUME); @@ -2305,6 +2317,8 @@ skip_ns_bouncing: return 0; +out_kill_network_unlocked: + pr_err("Killing processes because of failure on restore.\nThe Network was unlocked so some data or a connection may have been lost.\n"); out_kill: /* * The processes can be killed only when all of them have been created, From 1c0716924bbc1128c478388b70904438a5934e73 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:45 +0000 Subject: [PATCH 0068/1854] compel/criu: Add __must_check All those compel functions can fail by various reasons. It may be status of the system, interruption by user or anything else. It's really desired to handle as many PIE related errors as possible otherwise it's hard to analyze statuses of parasite/restorer and the C/R process. At least warning for logs should be produced or even C/R stopped. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/include/uapi/cpu.h | 2 +- compel/include/uapi/infect-rpc.h | 6 ++-- compel/include/uapi/infect-util.h | 5 ++- compel/include/uapi/infect.h | 39 +++++++++++++----------- compel/include/uapi/ptrace.h | 7 +++-- compel/include/uapi/sigframe-common.h | 5 +-- compel/plugins/include/uapi/plugin-fds.h | 2 +- compel/plugins/include/uapi/std/infect.h | 8 +++-- compel/plugins/include/uapi/std/log.h | 1 + criu/seize.c | 2 +- include/common/compiler.h | 27 ++++++++++++++++ 11 files changed, 71 insertions(+), 33 deletions(-) diff --git a/compel/include/uapi/cpu.h b/compel/include/uapi/cpu.h index 6f827d447..72c8a516c 100644 --- a/compel/include/uapi/cpu.h +++ b/compel/include/uapi/cpu.h @@ -6,7 +6,7 @@ #include -extern int compel_cpuid(compel_cpuinfo_t *info); +extern int /* TODO: __must_check */ compel_cpuid(compel_cpuinfo_t *info); extern bool compel_cpu_has_feature(unsigned int feature); extern bool compel_fpu_has_feature(unsigned int feature); extern uint32_t compel_fpu_feature_size(unsigned int feature); diff --git a/compel/include/uapi/infect-rpc.h b/compel/include/uapi/infect-rpc.h index 0176c1142..180dedf1f 100644 --- a/compel/include/uapi/infect-rpc.h +++ b/compel/include/uapi/infect-rpc.h @@ -6,9 +6,9 @@ #include struct parasite_ctl; -extern int compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl); -extern int compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl); -extern int compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl); extern int compel_rpc_sock(struct parasite_ctl *ctl); #define PARASITE_USER_CMDS 64 diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h index 7307ba57a..4e32d13dc 100644 --- a/compel/include/uapi/infect-util.h +++ b/compel/include/uapi/infect-util.h @@ -1,6 +1,9 @@ #ifndef __COMPEL_INFECT_UTIL_H__ #define __COMPEL_INFECT_UTIL_H__ + +#include "common/compiler.h" + struct parasite_ctl; -extern int compel_util_send_fd(struct parasite_ctl *ctl, int fd); +extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd); extern int compel_util_recv_fd(struct parasite_ctl *ctl, int *pfd); #endif diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index 08beaffcd..dd672bc1c 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -13,7 +13,7 @@ #define PARASITE_START_AREA_MIN (4096) -extern int compel_interrupt_task(int pid); +extern int __must_check compel_interrupt_task(int pid); struct seize_task_status { unsigned long long sigpnd; @@ -23,27 +23,28 @@ struct seize_task_status { int seccomp_mode; }; -extern int compel_wait_task(int pid, int ppid, +extern int __must_check compel_wait_task(int pid, int ppid, int (*get_status)(int pid, struct seize_task_status *, void *data), void (*free_status)(int pid, struct seize_task_status *, void *data), struct seize_task_status *st, void *data); -extern int compel_stop_task(int pid); +extern int __must_check compel_stop_task(int pid); extern int compel_resume_task(pid_t pid, int orig_state, int state); struct parasite_ctl; struct parasite_thread_ctl; -extern struct parasite_ctl *compel_prepare(int pid); -extern struct parasite_ctl *compel_prepare_noctx(int pid); -extern int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size); -extern struct parasite_thread_ctl *compel_prepare_thread(struct parasite_ctl *ctl, int pid); +extern struct parasite_ctl __must_check *compel_prepare(int pid); +extern struct parasite_ctl __must_check *compel_prepare_noctx(int pid); +extern int __must_check compel_infect(struct parasite_ctl *ctl, + unsigned long nr_threads, unsigned long args_size); +extern struct parasite_thread_ctl __must_check *compel_prepare_thread(struct parasite_ctl *ctl, int pid); extern void compel_release_thread(struct parasite_thread_ctl *); -extern int compel_stop_daemon(struct parasite_ctl *ctl); -extern int compel_cure_remote(struct parasite_ctl *ctl); -extern int compel_cure_local(struct parasite_ctl *ctl); -extern int compel_cure(struct parasite_ctl *ctl); +extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl); +extern int __must_check compel_cure_remote(struct parasite_ctl *ctl); +extern int __must_check compel_cure_local(struct parasite_ctl *ctl); +extern int __must_check compel_cure(struct parasite_ctl *ctl); #define PARASITE_ARG_SIZE_MIN ( 1 << 12) @@ -58,15 +59,16 @@ extern int compel_cure(struct parasite_ctl *ctl); extern void *compel_parasite_args_p(struct parasite_ctl *ctl); extern void *compel_parasite_args_s(struct parasite_ctl *ctl, unsigned long args_size); -extern int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, +extern int __must_check compel_syscall(struct parasite_ctl *ctl, + int nr, long *ret, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6); -extern int compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd); -extern int compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs); +extern int __must_check compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd); +extern int __must_check compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs); /* * The PTRACE_SYSCALL will trap task twice -- on @@ -80,12 +82,13 @@ enum trace_flags { TRACE_EXIT, }; -extern int compel_stop_on_syscall(int tasks, int sys_nr, +extern int __must_check compel_stop_on_syscall(int tasks, int sys_nr, int sys_nr_compat, enum trace_flags trace); -extern int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp); +extern int __must_check compel_stop_pie(pid_t pid, void *addr, + enum trace_flags *tf, bool no_bp); -extern int compel_unmap(struct parasite_ctl *ctl, unsigned long addr); +extern int __must_check compel_unmap(struct parasite_ctl *ctl, unsigned long addr); extern int compel_mode_native(struct parasite_ctl *ctl); @@ -159,7 +162,7 @@ struct parasite_blob_desc { extern struct parasite_blob_desc *compel_parasite_blob_desc(struct parasite_ctl *); -extern int compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); +extern int __must_check compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); extern void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs); diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h index 4df00b6e1..13eed7232 100644 --- a/compel/include/uapi/ptrace.h +++ b/compel/include/uapi/ptrace.h @@ -1,6 +1,7 @@ #ifndef UAPI_COMPEL_PTRACE_H__ #define UAPI_COMPEL_PTRACE_H__ +#include "common/compiler.h" /* * We'd want to include both sys/ptrace.h and linux/ptrace.h, * hoping that most definitions come from either one or another. @@ -75,8 +76,8 @@ typedef struct { extern int ptrace_suspend_seccomp(pid_t pid); -extern int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes); -extern int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes); -extern int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes); +extern int __must_check ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes); +extern int __must_check ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes); +extern int __must_check ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes); #endif /* UAPI_COMPEL_PTRACE_H__ */ diff --git a/compel/include/uapi/sigframe-common.h b/compel/include/uapi/sigframe-common.h index fc93c5480..177bf4c48 100644 --- a/compel/include/uapi/sigframe-common.h +++ b/compel/include/uapi/sigframe-common.h @@ -8,6 +8,7 @@ # error "Direct inclusion is forbidden, use instead" #endif +#include "common/compiler.h" #include #include @@ -56,7 +57,7 @@ struct rt_ucontext { unsigned long uc_regspace[128] __attribute__((aligned(8))); }; -extern int sigreturn_prep_fpu_frame(struct rt_sigframe *frame, - struct rt_sigframe *rframe); +extern int __must_check sigreturn_prep_fpu_frame(struct rt_sigframe *frame, + struct rt_sigframe *rframe); #endif /* UAPI_COMPEL_SIGFRAME_COMMON_H__ */ diff --git a/compel/plugins/include/uapi/plugin-fds.h b/compel/plugins/include/uapi/plugin-fds.h index cececb21d..e995b4b66 100644 --- a/compel/plugins/include/uapi/plugin-fds.h +++ b/compel/plugins/include/uapi/plugin-fds.h @@ -1,7 +1,7 @@ #ifndef COMPEL_PLUGIN_STD_STD_H__ #define COMPEL_PLUGIN_STD_STD_H__ -extern int fds_send_fd(int fd); +extern int __must_check fds_send_fd(int fd); extern int fds_recv_fd(void); #endif /* COMPEL_PLUGIN_STD_STD_H__ */ diff --git a/compel/plugins/include/uapi/std/infect.h b/compel/plugins/include/uapi/std/infect.h index 800df2509..1e784f8b4 100644 --- a/compel/plugins/include/uapi/std/infect.h +++ b/compel/plugins/include/uapi/std/infect.h @@ -1,14 +1,16 @@ #ifndef COMPEL_PLUGIN_STD_INFECT_H__ #define COMPEL_PLUGIN_STD_INFECT_H__ +#include "common/compiler.h" + extern int parasite_get_rpc_sock(void); -extern int parasite_service(unsigned int cmd, void *args); +extern int __must_check parasite_service(unsigned int cmd, void *args); /* * Must be supplied by user plugins. */ -extern int parasite_daemon_cmd(int cmd, void *args); -extern int parasite_trap_cmd(int cmd, void *args); +extern int __must_check parasite_daemon_cmd(int cmd, void *args); +extern int __must_check parasite_trap_cmd(int cmd, void *args); extern void parasite_cleanup(void); /* diff --git a/compel/plugins/include/uapi/std/log.h b/compel/plugins/include/uapi/std/log.h index f21b6df0d..91462c85b 100644 --- a/compel/plugins/include/uapi/std/log.h +++ b/compel/plugins/include/uapi/std/log.h @@ -2,6 +2,7 @@ #define COMPEL_PLUGIN_STD_LOG_H__ #include "compel/loglevels.h" +#include "common/compiler.h" #define STD_LOG_SIMPLE_CHUNK 256 diff --git a/criu/seize.c b/criu/seize.c index cce8911b9..e1e6b8195 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -483,7 +483,7 @@ static int collect_children(struct pstree_item *item) if (!opts.freeze_cgroup) /* fails when meets a zombie */ - compel_interrupt_task(pid); + __ignore_value(compel_interrupt_task(pid)); ret = compel_wait_task(pid, item->pid->real, parse_pid_status, NULL, &creds.s, NULL); if (ret < 0) { diff --git a/include/common/compiler.h b/include/common/compiler.h index fc8abcfef..1d431a529 100644 --- a/include/common/compiler.h +++ b/include/common/compiler.h @@ -22,6 +22,7 @@ #define __used __attribute__((__used__)) #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) +#define __must_check __attribute__((__warn_unused_result__)) #define __section(S) __attribute__ ((__section__(#S))) @@ -99,4 +100,30 @@ #define is_log2(v) (((v) & ((v) - 1)) == 0) +/* + * Use "__ignore_value" to avoid a warning when using a function declared with + * gcc's warn_unused_result attribute, but for which you really do want to + * ignore the result. Traditionally, people have used a "(void)" cast to + * indicate that a function's return value is deliberately unused. However, + * if the function is declared with __attribute__((warn_unused_result)), + * gcc issues a warning even with the cast. + * + * Caution: most of the time, you really should heed gcc's warning, and + * check the return value. However, in those exceptional cases in which + * you're sure you know what you're doing, use this function. + * + * Normally casting an expression to void discards its value, but GCC + * versions 3.4 and newer have __attribute__ ((__warn_unused_result__)) + * which may cause unwanted diagnostics in that case. Use __typeof__ + * and __extension__ to work around the problem, if the workaround is + * known to be needed. + * Written by Jim Meyering, Eric Blake and Pádraig Brady. + * (See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425 for the details) + */ +#if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) +# define __ignore_value(x) ({ __typeof__ (x) __x = (x); (void) __x; }) +#else +# define __ignore_value(x) ((void) (x)) +#endif + #endif /* __CR_COMPILER_H__ */ From dc4677123ba03f93deab2d1ec6047d3a35ba694c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Wed, 6 Nov 2019 02:15:20 +0100 Subject: [PATCH 0069/1854] Checkpoint only specified controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this change CRIU would checkpoint all controllers, even the ones not specified in --cgroup-dump-controller. That becomes a problem if there's a cgroup controller on the checkpointing machine that doesn't exist on the restoring machine even if CRIU is instructed not to dump that controller. After that change everything works as expected. Signed-off-by: Michał Cłapiński --- criu/proc_parse.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index d67392a12..fa7644992 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -2498,6 +2498,12 @@ int collect_controllers(struct list_head *cgroups, unsigned int *n_cgroups) goto err; } *off = '\0'; + + if (cgp_should_skip_controller(controllers)) { + pr_debug("cg-prop: Skipping controller %s\n", controllers); + continue; + } + while (1) { off = strchr(controllers, ','); if (off) From 8f45330d168df043c400593f2387a92e2b686ef8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 17 Nov 2019 16:04:16 +0200 Subject: [PATCH 0070/1854] travis: group lazy-pages options The amount of lazy-pages options keeps growing, let's put the common ones into a variable. Signed-off-by: Mike Rapoport --- scripts/travis/travis-tests | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index bc97fd455..a87ddbaf4 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -146,10 +146,11 @@ fi LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps04" LAZY_TESTS=.*\(maps0\|uffd-events\|lazy-thp\|futex\|fork\).* +LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS" -./test/zdtm.py run -p 2 -T $LAZY_TESTS --lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS -./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS -./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages --tls $LAZY_EXCLUDE $ZDTM_OPTS +./test/zdtm.py run $LAZY_OPTS --lazy-pages +./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages +./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages --tls bash ./test/jenkins/criu-fault.sh bash ./test/jenkins/criu-fcg.sh From 75fcec0ecbccea5b8258def25adb056e4d02c0c1 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 17 Nov 2019 16:05:47 +0200 Subject: [PATCH 0071/1854] travis: exclude uns tests for lazy-pages on newer kernels Kernels 5.4 and higher will restrict availability of UFFD_EVENT_FORK only for users with SYS_CAP_PTRACE. This prevents running --lazy-pages tests with 'uns' flavor. Disable 'uns' for lazy pages testing in travis for newer kernels. Signed-off-by: Mike Rapoport --- scripts/travis/travis-tests | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index a87ddbaf4..4cb842c97 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -145,8 +145,15 @@ else fi LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps04" +# Starting with 5.4 kernel requires SYS_CAP_PTRACE to use uffd events; as such +# we cannot run lazy-pages tests in uns +LAZY_FLAVORS="" +if [ $KERN_MAJ -ge "5" ] && [ $KERN_MIN -ge "4" ]; then + LAZY_FLAVORS = "-f h,ns" +fi + LAZY_TESTS=.*\(maps0\|uffd-events\|lazy-thp\|futex\|fork\).* -LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS" +LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $LAZY_FLAVORS $ZDTM_OPTS" ./test/zdtm.py run $LAZY_OPTS --lazy-pages ./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages From b50b6ea09e7a80b91f2bdeb0b5cd444b0ae800ca Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 3 Nov 2019 20:18:38 +0000 Subject: [PATCH 0072/1854] mount: Add error messages Suggested-by: Andrei Vagin Signed-off-by: Radostin Stoyanov --- criu/mount.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 974af6eb2..6b1adecc6 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1325,8 +1325,10 @@ int ns_open_mountpoint(void *arg) } /* Remount all mounts as private to disable propagation */ - if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) + if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) { + pr_perror("Unable to remount"); goto err; + } if (umount_overmounts(mi)) goto err; @@ -1536,6 +1538,7 @@ static __maybe_unused int mount_cr_time_mount(struct ns_id *ns, unsigned int *s_ ret = mount(source, target, type, 0, NULL); if (ret < 0) { + pr_perror("Unable to mount %s %s", source, target); exit_code = -errno; goto restore_ns; } else { @@ -2004,7 +2007,10 @@ static int fetch_rt_stat(struct mount_info *m, const char *where) static int do_simple_mount(struct mount_info *mi, const char *src, const char *fstype, unsigned long mountflags) { - return mount(src, mi->mountpoint, fstype, mountflags, mi->options); + int ret = mount(src, mi->mountpoint, fstype, mountflags, mi->options); + if (ret) + pr_perror("Unable to mount %s %s (id=%d)", src, mi->mountpoint, mi->mnt_id); + return ret; } static char *mnt_fsname(struct mount_info *mi) @@ -2491,8 +2497,11 @@ static int do_mount_one(struct mount_info *mi) } /* do_mount_root() is called from populate_mnt_ns() */ - if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) + if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) { + pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); return -1; + } + if (do_mount_root(mi)) return -1; mi->mounted = true; From d99ee9753e90df1040dc49341a38357e58d838ee Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 9 Nov 2019 22:48:32 +0000 Subject: [PATCH 0073/1854] mount: Bind-mount root via userns_call When restoring a runc container with enabled user namespace CRIU fails to mount the specified root directory because the path is under /run/runc which is inaccessible to unprivileged users. Signed-off-by: Radostin Stoyanov --- criu/mount.c | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 6b1adecc6..52e70d376 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2020,20 +2020,20 @@ static char *mnt_fsname(struct mount_info *mi) return mi->fstype->name; } -static int apply_sb_flags(void *args, int fd, pid_t pid) +static int userns_mount(char *src, void *args, int fd, pid_t pid) { unsigned long flags = *(unsigned long *) args; int rst = -1, err = -1; - char path[PSFDS]; + char target[PSFDS]; - snprintf(path, sizeof(path), "/proc/self/fd/%d", fd); + snprintf(target, sizeof(target), "/proc/self/fd/%d", fd); if (pid != getpid() && switch_ns(pid, &mnt_ns_desc, &rst)) return -1; - err = mount(NULL, path, NULL, MS_REMOUNT | flags, NULL); + err = mount(src, target, NULL, flags, NULL); if (err) - pr_perror("Unable to remount %s", path); + pr_perror("Unable to mount %s", target); if (rst >= 0 && restore_ns(rst, &mnt_ns_desc)) return -1; @@ -2041,6 +2041,16 @@ static int apply_sb_flags(void *args, int fd, pid_t pid) return err; } +static int apply_sb_flags(void *args, int fd, pid_t pid) +{ + return userns_mount(NULL, args, fd, pid); +} + +static int mount_root(void *args, int fd, pid_t pid) +{ + return userns_mount(opts.root, args, fd, pid); +} + static int do_new_mount(struct mount_info *mi) { unsigned long sflags = mi->sb_flags; @@ -2088,10 +2098,9 @@ static int do_new_mount(struct mount_info *mi) pr_perror("Unable to open %s", mi->mountpoint); return -1; } - sflags |= MS_RDONLY; - if (userns_call(apply_sb_flags, 0, - &sflags, sizeof(sflags), fd)) { - pr_perror("Unable to apply mount flags %d for %s", + sflags |= MS_RDONLY | MS_REMOUNT; + if (userns_call(apply_sb_flags, 0, &sflags, sizeof(sflags), fd)) { + pr_err("Unable to apply mount flags %d for %s", mi->sb_flags, mi->mountpoint); close(fd); return -1; @@ -2491,15 +2500,33 @@ static int do_mount_one(struct mount_info *mi) pr_debug("\tMounting %s @%s (%d)\n", mi->fstype->name, mi->mountpoint, mi->need_plugin); if (rst_mnt_is_root(mi)) { + int fd; + unsigned long flags = MS_BIND | MS_REC; + if (opts.root == NULL) { pr_err("The --root option is required to restore a mount namespace\n"); return -1; } /* do_mount_root() is called from populate_mnt_ns() */ - if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) { - pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); - return -1; + if (root_ns_mask & CLONE_NEWUSER) { + fd = open(mi->mountpoint, O_PATH); + if (fd < 0) { + pr_perror("Unable to open %s", mi->mountpoint); + return -1; + } + + if (userns_call(mount_root, 0, &flags, sizeof(flags), fd)) { + pr_err("Unable to mount %s\n", mi->mountpoint); + close(fd); + return -1; + } + close(fd); + } else { + if (mount(opts.root, mi->mountpoint, NULL, flags, NULL)) { + pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); + return -1; + } } if (do_mount_root(mi)) From 8ab3e40e3e45a4e0337c6715c923fb640e2e8973 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 10 Nov 2019 07:35:50 +0000 Subject: [PATCH 0074/1854] restore: Create temp proc in /tmp When restoring a container with user namespace, CRIU fails to create a temporary directory for proc. The is because the unprivileged user that has been just restored does not have permissions to access the working directory used by CRIU. Resolves #828 Signed-off-by: Radostin Stoyanov --- criu/cr-restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 05a25835a..5694931f4 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1588,7 +1588,7 @@ static void restore_pgid(void) static int mount_proc(void) { int fd, ret; - char proc_mountpoint[] = "crtools-proc.XXXXXX"; + char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; if (root_ns_mask == 0) fd = ret = open("/proc", O_DIRECTORY); From 9a50fbce72228404c29642af70af7b42fbc60a7b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 3 Nov 2019 20:35:18 +0000 Subject: [PATCH 0075/1854] man: Describe --root option requirements These requirements have been described in https://github.com/opencontainers/runc/blob/b133feae/libcontainer/container_linux.go#L1265 Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 2729bc95a..133a094c0 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -414,6 +414,8 @@ usually need to be escaped from shell. *-r*, *--root* 'path':: Change the root filesystem to 'path' (when run in a mount namespace). + This option is required to restore a mount namespace. The directory + 'path' must be a mount point and its parent must not be overmounted. *--external* 'type'*[*'id'*]:*'value':: Restore an instance of an external resource. The generic syntax is From 90cbeadb668d99f9d9557cee7a4c67e593f6e7ad Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 19 Nov 2019 22:10:39 +0000 Subject: [PATCH 0076/1854] zdtm: Replace if->continue with if->elif->else Replacing the if->continue pattern with if->elif->else reduces the number of lines while preserving the logic. Signed-off-by: Radostin Stoyanov --- test/zdtm.py | 63 ++++++++++++++++++---------------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 6d3fddfad..17e0540eb 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -867,76 +867,57 @@ class criu_rpc: def __set_opts(criu, args, ctx): while len(args) != 0: arg = args.pop(0) - if arg == '-v4': + if "-v4" == arg: criu.opts.log_level = 4 - continue - if arg == '-o': + elif "-o" == arg: criu.opts.log_file = args.pop(0) - continue - if arg == '-D': + elif "-D" == arg: criu.opts.images_dir_fd = os.open(args.pop(0), os.O_DIRECTORY) ctx['imgd'] = criu.opts.images_dir_fd - continue - if arg == '-t': + elif "-t" == arg: criu.opts.pid = int(args.pop(0)) - continue - if arg == '--pidfile': + elif "--pidfile" == arg: ctx['pidf'] = args.pop(0) - continue - if arg == '--timeout': + elif "--timeout" == arg: criu.opts.timeout = int(args.pop(0)) - continue - if arg == '--restore-detached': - # Set by service by default - ctx['rd'] = True - continue - if arg == '--root': + elif "--restore-detached" == arg: + ctx['rd'] = True # Set by service by default + elif "--root" == arg: criu.opts.root = args.pop(0) - continue - if arg == '--external': + elif "--external" == arg: criu.opts.external.append(args.pop(0)) - continue - if arg == '--status-fd': + elif "--status-fd" == arg: fd = int(args.pop(0)) os.write(fd, b"\0") fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) - continue - if arg == '--port': + elif "--port" == arg: criu.opts.ps.port = int(args.pop(0)) - continue - if arg == '--address': + elif "--address" == arg: criu.opts.ps.address = args.pop(0) + elif "--page-server" == arg: continue - if arg == '--page-server': - continue - if arg == '--prev-images-dir': + elif "--prev-images-dir" == arg: criu.opts.parent_img = args.pop(0) - continue - if arg == '--pre-dump-mode': + elif "--pre-dump-mode" == arg: key = args.pop(0) mode = crpc.rpc.VM_READ if key == "splice": mode = crpc.rpc.SPLICE criu.opts.pre_dump_mode = mode - continue - if arg == '--track-mem': + elif "--track-mem" == arg: criu.opts.track_mem = True - continue - if arg == '--tcp-established': + elif "--tcp-established" == arg: criu.opts.tcp_established = True - continue - if arg == '--restore-sibling': + elif "--restore-sibling" == arg: criu.opts.rst_sibling = True - continue - if arg == "--inherit-fd": + elif "--inherit-fd" == arg: inhfd = criu.opts.inherit_fd.add() key = args.pop(0) fd, key = key.split(":", 1) inhfd.fd = int(fd[3:-1]) inhfd.key = key - continue - - raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) + else: + raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) @staticmethod def run(action, From 60bb5c731078ad15b3d9e62782d692d91c5c2db0 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 19 Nov 2019 22:48:44 +0000 Subject: [PATCH 0077/1854] zdtm: Set --root path to 0700 on restore Update zdtm tests to verify that CRIU does not require the --root path to be accessible to the unprivileged user being restored when restoring user namespace. Signed-off-by: Radostin Stoyanov --- test/zdtm.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 17e0540eb..16ff0b379 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -62,6 +62,7 @@ tests_root = None def clean_tests_root(): global tests_root if tests_root and tests_root[0] == os.getpid(): + os.rmdir(os.path.join(tests_root[1], "root")) os.rmdir(tests_root[1]) @@ -70,7 +71,9 @@ def make_tests_root(): if not tests_root: tests_root = (os.getpid(), tempfile.mkdtemp("", "criu-root-", "/tmp")) atexit.register(clean_tests_root) - return tests_root[1] + os.mkdir(os.path.join(tests_root[1], "root")) + os.chmod(tests_root[1], 0o777) + return os.path.join(tests_root[1], "root") # Report generation @@ -483,6 +486,13 @@ class zdtm_test: # move into some semi-random state time.sleep(random.random()) + if self.__flavor.ns: + # In the case of runc the path specified with the opts.root + # option is created in /run/runc/ which is inaccessible to + # unprivileged users. The permissions here are set to test + # this use case. + os.chmod(os.path.dirname(self.__flavor.root), 0o700) + def kill(self, sig=signal.SIGKILL): self.__freezer.thaw() if self.__pid: From 25f6d4f72fb995cb776d65a9d4d539d4fdcc6740 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:34 +0000 Subject: [PATCH 0078/1854] build: Remove SRCARCH SRCARCH is always equal ARCH. There are no rules when to use one or another and architectures may forget to set one of them up. No need for a second variable meaning the same and confusing people. Remove it completely. Self-correction [after some debug]: SRCARCH was different in one place: zdtm Makefile by some unintentional mistake: > ifeq ($(ARCH),arm64) > ARCH ?= aarch64 > SRCARCH ?= aarch64 > endif That meant to be "ARCH := aarch64" because "?=" would never work inside that ifeq. Fix up this part of mess too. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 3 +-- Makefile.config | 2 +- compel/plugins/Makefile | 4 ++-- criu/Makefile | 2 +- criu/pie/Makefile | 6 +++--- criu/pie/Makefile.library | 4 ++-- test/zdtm/Makefile.inc | 7 ++----- test/zdtm/static/Makefile | 4 ++-- 8 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index f827e7baa..2e62f6f39 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,6 @@ endif # commit "S/390: Fix 64 bit sibcall". ifeq ($(ARCH),s390) ARCH := s390 - SRCARCH := s390 DEFINES := -DCONFIG_S390 CFLAGS_PIE := -fno-optimize-sibling-calls endif @@ -94,7 +93,7 @@ endif CFLAGS_PIE += -DCR_NOGLIBC export CFLAGS_PIE -LDARCH ?= $(SRCARCH) +LDARCH ?= $(ARCH) export LDARCH export PROTOUFIX DEFINES diff --git a/Makefile.config b/Makefile.config index 1e4352b9d..5af3fed38 100644 --- a/Makefile.config +++ b/Makefile.config @@ -30,7 +30,7 @@ CONFIG_FILE = .config $(CONFIG_FILE): touch $(CONFIG_FILE) -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) # CONFIG_COMPAT is only for x86 now, no need for compile-test other archs ifeq ($(call try-asm,$(FEATURE_TEST_X86_COMPAT)),true) export CONFIG_COMPAT := y diff --git a/compel/plugins/Makefile b/compel/plugins/Makefile index a326e2a66..197ff1b24 100644 --- a/compel/plugins/Makefile +++ b/compel/plugins/Makefile @@ -53,11 +53,11 @@ std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/parasite-head.o target += fds fds-lib-y += fds/fds.o -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o endif -ifeq ($(SRCARCH),ppc64) +ifeq ($(ARCH),ppc64) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcmp.o endif diff --git a/criu/Makefile b/criu/Makefile index 4134e5052..ceb49ce09 100644 --- a/criu/Makefile +++ b/criu/Makefile @@ -2,7 +2,7 @@ # 6a8d90f5fec4 "attr: Allow attribute type 0" WRAPFLAGS += -Wl,--wrap=nla_parse,--wrap=nlmsg_parse -ARCH_DIR := criu/arch/$(SRCARCH) +ARCH_DIR := criu/arch/$(ARCH) PIE_DIR := criu/pie export ARCH_DIR PIE_DIR diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 1ad456f43..a30747ac3 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -14,7 +14,7 @@ ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif -LDS := compel/arch/$(SRCARCH)/scripts/compel-pack.lds.S +LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o restorer-obj-y += ./$(ARCH_DIR)/restorer.o @@ -26,11 +26,11 @@ ifeq ($(ARCH),x86) endif endif -ifeq ($(SRCARCH),aarch64) +ifeq ($(ARCH),aarch64) restorer-obj-y += ./$(ARCH_DIR)/intraprocedure.o endif -ifeq ($(SRCARCH),ppc64) +ifeq ($(ARCH),ppc64) restorer-obj-y += ./$(ARCH_DIR)/vdso-trampoline.o endif diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 658c8a4eb..de75b11d4 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -9,14 +9,14 @@ lib-name := pie.lib.a lib-y += util.o lib-y += util-vdso.o -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) ifeq ($(CONFIG_COMPAT),y) lib-y += util-vdso-elf32.o endif CFLAGS_util-vdso-elf32.o += -DCONFIG_VDSO_32 endif -ifeq ($(SRCARCH),arm) +ifeq ($(ARCH),arm) lib-y += ./$(ARCH_DIR)/aeabi-helpers.o lib-y += ./$(ARCH_DIR)/pie-cacheflush.o endif diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 170f31632..d5c013a3e 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -15,12 +15,9 @@ ARCH ?= $(shell uname -m | sed \ -e s/aarch64.*/arm64/) ifeq ($(ARCH),arm64) - ARCH ?= aarch64 - SRCARCH ?= aarch64 + ARCH := aarch64 endif -SRCARCH ?= $(ARCH) - ifeq ($(ARCH),arm) ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') @@ -35,7 +32,7 @@ CC := gcc CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE -CPPFLAGS += -iquote $(LIBDIR)/arch/$(SRCARCH)/include +CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include ifeq ($(strip $(V)),) E = @echo diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index a38482f44..e0d4d2c5c 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -219,13 +219,13 @@ TST_NOFILE := \ child_subreaper_and_reparent \ # jobctl00 \ -ifneq ($(SRCARCH),arm) +ifneq ($(ARCH),arm) ifneq ($(COMPAT_TEST),y) TST_NOFILE += maps03 endif endif -ifeq ($(SRCARCH),s390) +ifeq ($(ARCH),s390) TST_NOFILE += s390x_regs_check \ s390x_gs_threads \ s390x_runtime_instr From a4fa4162d410c0bbc751d92119022f9a1c3a6723 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:35 +0000 Subject: [PATCH 0079/1854] build/nmk: Remove SRCARCH It's not used anywhere now. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- scripts/nmk/scripts/include.mk | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk index e1701103f..ee0e32f62 100644 --- a/scripts/nmk/scripts/include.mk +++ b/scripts/nmk/scripts/include.mk @@ -22,9 +22,8 @@ SUBARCH := $(shell uname -m | sed \ -e s/aarch64.*/aarch64/) ARCH ?= $(SUBARCH) -SRCARCH := $(ARCH) -export SUBARCH ARCH SRCARCH +export SUBARCH ARCH ifndef ____nmk_defined__tools include $(__nmk_dir)tools.mk From df66aa99b6ce59108055759d5ebda69e2fd00669 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:36 +0000 Subject: [PATCH 0080/1854] build/nmk: Provide proper SUBARCH It's always equal ARCH and not very useful (so nothing actually uses it). Time for a change: SUBARCH now is meaningful and gives a way to detect what kind of ARCH flavor build is dealing with. Also, for cross-compiling sake don't set SUBARCH if the user supplied it. (and don't call useless uname during cross compilation) Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- scripts/nmk/scripts/include.mk | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk index ee0e32f62..c1c1e94af 100644 --- a/scripts/nmk/scripts/include.mk +++ b/scripts/nmk/scripts/include.mk @@ -8,21 +8,20 @@ endif # # Common vars. -SUBARCH := $(shell uname -m | sed \ - -e s/i.86/x86/ \ - -e s/x86_64/x86/ \ - -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ \ - -e s/sa110/arm/ \ - -e s/s390x/s390/ \ - -e s/parisc64/parisc/ \ - -e s/ppc64.*/ppc64/ \ - -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ \ +SUBARCH ?= $(shell uname -m) +ARCH ?= $(shell echo $(SUBARCH) | sed \ + -e s/i.86/x86/ \ + -e s/x86_64/x86/ \ + -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ \ + -e s/sa110/arm/ \ + -e s/s390x/s390/ \ + -e s/parisc64/parisc/ \ + -e s/ppc64.*/ppc64/ \ + -e s/mips.*/mips/ \ + -e s/sh[234].*/sh/ \ -e s/aarch64.*/aarch64/) -ARCH ?= $(SUBARCH) - export SUBARCH ARCH ifndef ____nmk_defined__tools From 1463c41119c8eef8ccf135e71359f579e821a21e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:37 +0000 Subject: [PATCH 0081/1854] build: Use SUBARCH Instead of doing additional `uname -m` - use provided $(SUBARCH) to detect what architecture flavour the build should produce the result for. Fixes two things: - zdtm make now correctly supplies $(USERCFLAGS) - subtly fixes cross compilation by providing a way to specify $(SUBARCH) Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 6 ++---- test/zdtm/Makefile.inc | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 2e62f6f39..ef76d706c 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,6 @@ ifeq ($(origin HOSTCFLAGS), undefined) HOSTCFLAGS := $(CFLAGS) $(USERCFLAGS) endif -UNAME-M := $(shell uname -m) - # # Supported Architectures ifneq ($(filter-out x86 arm aarch64 ppc64 s390,$(ARCH)),) @@ -27,14 +25,14 @@ endif # The PowerPC 64 bits architecture could be big or little endian. # They are handled in the same way. -ifeq ($(UNAME-M),ppc64) +ifeq ($(SUBARCH),ppc64) error := $(error ppc64 big endian is not yet supported) endif # # Architecture specific options. ifeq ($(ARCH),arm) - ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index d5c013a3e..7584d3b06 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -1,17 +1,18 @@ .SUFFIXES: MAKEFLAGS += -r -ARCH ?= $(shell uname -m | sed \ - -e s/i.86/x86/ \ - -e s/x86_64/x86/ \ - -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ \ - -e s/sa110/arm/ \ - -e s/s390x/s390/ \ - -e s/parisc64/parisc/ \ - -e s/ppc64.*/ppc64/ \ - -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ \ +SUBARCH ?= $(shell uname -m) +ARCH ?= $(shell echo $(SUBARCH) | sed \ + -e s/i.86/x86/ \ + -e s/x86_64/x86/ \ + -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ \ + -e s/sa110/arm/ \ + -e s/s390x/s390/ \ + -e s/parisc64/parisc/ \ + -e s/ppc64.*/ppc64/ \ + -e s/mips.*/mips/ \ + -e s/sh[234].*/sh/ \ -e s/aarch64.*/arm64/) ifeq ($(ARCH),arm64) @@ -19,7 +20,7 @@ ifeq ($(ARCH),arm64) endif ifeq ($(ARCH),arm) - ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 From 70fae12509d7e0448e00fa0b0aa3a94b2384025f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:38 +0000 Subject: [PATCH 0082/1854] build/zdtm: Support cross-build Maybe not that useful, but only little change needed. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 7584d3b06..8f2650b44 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -29,7 +29,10 @@ ifeq ($(ARCH),arm) endif endif -CC := gcc +HOSTCC ?= gcc +ifeq ($(origin CC), default) + CC := $(CROSS_COMPILE)$(HOSTCC) +endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE From 3b24574b6d48b386127386b18036767a89ad6d0f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:39 +0000 Subject: [PATCH 0083/1854] build/zdtm: Makefile hack for travis aarch64/armv8l The very same hack to build aarch32 zdtm tests on armv8 Travis-CI as in the commit dfa0a1edcbcb ("Makefile hack for travis aarch64/armv8l") Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 8f2650b44..d132ca981 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -20,13 +20,17 @@ ifeq ($(ARCH),arm64) endif ifeq ($(ARCH),arm) - ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') - ifeq ($(ARMV),6) - USERCFLAGS += -march=armv6 - else ifeq ($(ARMV),7) - USERCFLAGS += -march=armv7-a - endif + ifeq ($(ARMV),6) + USERCFLAGS += -march=armv6 + else ifeq ($(ARMV),7) + USERCFLAGS += -march=armv7-a + else ifeq ($(ARMV),8) + # To build aarch32 on armv8 Travis-CI (see criu Makefile) + USERCFLAGS += -march=armv7-a + ARMV := 7 + endif endif HOSTCC ?= gcc From bffa6e0ad005a3e125b9b6c3da527a929ed18c79 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:40 +0000 Subject: [PATCH 0084/1854] build/zdtm: Use pkg-config to find includes/libs Helps to cross-compile zdtm tests in case somebody needs it. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 13 +++++++++++++ test/zdtm/static/Makefile | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index d132ca981..32fc72d32 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -53,12 +53,25 @@ endif RM := rm -f --one-file-system ifeq ($(COMPAT_TEST),y) + # Firstly look for 32-bit libs and then in standard path. + PKG_CONFIG_PATH := $(shell pkg-config --variable pc_path pkg-config) + PKG_CONFIG_PATH := /usr/lib32/pkgconfig:$(PKG_CONFIG_PATH) ifeq ($(ARCH),x86) export CFLAGS += -m32 export LDFLAGS += -m32 + PKG_CONFIG_PATH := /usr/lib/i386-linux-gnu/pkgconfig:$(PKG_CONFIG_PATH) endif + export PKG_CONFIG_PATH endif +define pkg-libs + $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" pkg-config --libs $(1)) +endef + +define pkg-cflags + $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" pkg-config --cflags $(1)) +endef + %.d: %.c $(E) " DEP " $@ $(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP -c $< -o $@ diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index e0d4d2c5c..36d00ca5c 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -528,8 +528,8 @@ stopped12: CFLAGS += -DZDTM_STOPPED_KILL -DZDTM_STOPPED_TKILL clone_fs: LDLIBS += -pthread # As generating dependencies won't work without proper includes, # we have to explicitly specify both .o and .d for this case: -netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += -I/usr/include/libnl3 -netns_sub_veth: LDLIBS += -lnl-3 -l nl-route-3 +netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += $(call pkg-cflags, libnl-3.0) +netns_sub_veth: LDLIBS += $(call pkg-libs, libnl-route-3.0 libnl-3.0) socket-tcp-fin-wait1: CFLAGS += -D ZDTM_TCP_FIN_WAIT1 socket-tcp-fin-wait2: CFLAGS += -D ZDTM_TCP_FIN_WAIT2 From 1dbc835954d9c27ad1edb8184c02cfea1fd414b1 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:41 +0000 Subject: [PATCH 0085/1854] travis: Add armv7-cross as cross-compile test Fixes: #455 Based-on-patch-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- .travis.yml | 4 +++ scripts/build/Dockerfile.armv7-cross | 44 ++++++++++++++++++++++++++++ scripts/build/Makefile | 1 + 3 files changed, 49 insertions(+) create mode 100644 scripts/build/Dockerfile.armv7-cross diff --git a/.travis.yml b/.travis.yml index f6f71be48..b27dbfe7b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,10 @@ matrix: arch: amd64 env: TR_ARCH=fedora-asan dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=armv7-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide diff --git a/scripts/build/Dockerfile.armv7-cross b/scripts/build/Dockerfile.armv7-cross new file mode 100644 index 000000000..434934aad --- /dev/null +++ b/scripts/build/Dockerfile.armv7-cross @@ -0,0 +1,44 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ + dpkg --add-architecture armhf && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-armhf \ + libbz2-dev:armhf \ + libexpat1-dev:armhf \ + ncurses-dev:armhf \ + libssl-dev:armhf \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:armhf \ + libprotobuf-dev:armhf \ + libnet-dev:armhf \ + libprotobuf-c-dev:armhf \ + libcap-dev:armhf \ + libaio-dev:armhf \ + libnl-route-3-dev:armhf + +ENV CROSS_TRIPLE=arm-linux-gnueabihf +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=arm \ + SUBARCH=armv7 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index a7c78e8bd..d093ce76c 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,6 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker +TARGETS += armv7-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From 434e6b92dbcd47354f02e3a992ead6c25a6db16f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:42 +0000 Subject: [PATCH 0086/1854] Documentation: Add a hint about docker build The original/old guide probably doesn't work anymore: - the patch isn't accessible; - criu now depends on more libraries not only protobuf Still, keep it as it might be helpful for someone. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Documentation/HOWTO.cross-compile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/HOWTO.cross-compile b/Documentation/HOWTO.cross-compile index f1b17842b..44b19dfea 100644 --- a/Documentation/HOWTO.cross-compile +++ b/Documentation/HOWTO.cross-compile @@ -1,4 +1,10 @@ -This HOWTO explains how to cross-compile CRIU on x86 +How to cross-compile CRIU on x86: + +Use the Dockerfile provided: + scripts/build/Dockerfile.armv7-cross + +Historical guide how-to do it without docker container: +[Unsupported, may not work anymore!] 1. Download the protobuf sources. 2. Apply the patch http://16918.selcdn.ru/crtools/aarch64/0001-protobuf-added-the-support-for-the-acrchitecture-AAr.patch From 6ab2bdd940c392ba58ebe68b5134d6327381a498 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:43 +0000 Subject: [PATCH 0087/1854] zdtm/socket-tcp-fin-wait1: Use array index fro TEST_MSG Fixes the following compile-error: > CC socket-tcp-fin-wait1.o > socket-tcp-fin-wait1.c:144:26: error: adding 'int' to a string does not append to the string [-Werror,-Wstring-plus-int] > if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { > ~~~~~~~~~^~~ > socket-tcp-fin-wait1.c:144:26: note: use array indexing to silence this warning > if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { > ^ > & [ ] > 1 error generated. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/static/socket-tcp-fin-wait1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm/static/socket-tcp-fin-wait1.c b/test/zdtm/static/socket-tcp-fin-wait1.c index 6c7cc93e5..50da9c152 100644 --- a/test/zdtm/static/socket-tcp-fin-wait1.c +++ b/test/zdtm/static/socket-tcp-fin-wait1.c @@ -141,7 +141,7 @@ int main(int argc, char **argv) return 1; } - if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { + if (write(fd, &TEST_MSG[2], sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { pr_err("write"); return 1; } From 37220b3c418d8d09ff2ef147e94c37fc897b3e27 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Thu, 7 Nov 2019 14:38:42 +0530 Subject: [PATCH 0088/1854] Add File-based Java Functional Tests Signed-off-by: Nidhi Gupta --- test/javaTests/README.md | 8 + .../criu/java/tests/CheckpointRestore.java | 4 +- .../src/org/criu/java/tests/FileRead.java | 2 +- .../src/org/criu/java/tests/Helper.java | 39 +++- .../org/criu/java/tests/MemoryMappings.java | 121 +++++++++++ .../org/criu/java/tests/MultipleFileRead.java | 203 ++++++++++++++++++ .../criu/java/tests/MultipleFileWrite.java | 140 ++++++++++++ .../src/org/criu/java/tests/ReadWrite.java | 119 ++++++++++ test/javaTests/test.xml | 30 +++ 9 files changed, 659 insertions(+), 7 deletions(-) create mode 100644 test/javaTests/src/org/criu/java/tests/MemoryMappings.java create mode 100644 test/javaTests/src/org/criu/java/tests/MultipleFileRead.java create mode 100644 test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java create mode 100644 test/javaTests/src/org/criu/java/tests/ReadWrite.java diff --git a/test/javaTests/README.md b/test/javaTests/README.md index cb779285e..670741677 100644 --- a/test/javaTests/README.md +++ b/test/javaTests/README.md @@ -23,6 +23,14 @@ CAP_SETUID Here we test the File-Based Java APIs by checkpointing the application in the following scenarios and verifying the contents of the file after restore: - Reading and writing in the same file. (FileRead.java) +- Read from a file and write its content to another file. (ReadWrite.java) +- Reading from multiple files and writing their content to another file. (MultipleFileRead) +- Reading from a file and writing its content to multiple files. (MultipleFileWrite) + +## Memory mapping Java APIs + +Here we test the Memory Mapping APIs by checkpointing the application in following scenario and verifying the contents after restore: +- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) ### Prerequisites for running the tests: - Maven diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java index 968488191..b848c9938 100644 --- a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -77,7 +77,7 @@ public class CheckpointRestore { private void testSetup(String testName) throws IOException { Path testFolderPath = Paths.get(outputFolder + testName + "/"); if (!Files.exists(testFolderPath)) { - System.out.println("Test Folder does not exist creating it"); + System.out.println("Creating the test folder"); Files.createDirectory(testFolderPath); } } @@ -245,7 +245,7 @@ public class CheckpointRestore { Assert.assertNotEquals(currentState, Helper.STATE_TERMINATE, testName + ": ERROR: Checkpoint-Restore failed"); Assert.assertNotEquals(currentState, Helper.STATE_FAIL, testName + ": ERROR: Test Failed, Check Log for details"); Assert.assertEquals(currentState, Helper.STATE_PASS, testName + " ERROR: Unexpected State of Mapped Buffer"); - System.out.println("-----" + "PASS" + "-----"); + System.out.println("----- " + "PASS" + " -----"); } diff --git a/test/javaTests/src/org/criu/java/tests/FileRead.java b/test/javaTests/src/org/criu/java/tests/FileRead.java index d94a14112..d8851a73e 100644 --- a/test/javaTests/src/org/criu/java/tests/FileRead.java +++ b/test/javaTests/src/org/criu/java/tests/FileRead.java @@ -50,7 +50,7 @@ class FileRead { /* * Mapped Byte Buffer should be in init state at the beginning of test */ - if ('I' != b.getChar(Helper.MAPPED_INDEX)) { + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); System.exit(1); diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java index d608fba47..fdf20bb52 100644 --- a/test/javaTests/src/org/criu/java/tests/Helper.java +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -1,9 +1,6 @@ package org.criu.java.tests; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; +import java.io.*; import java.nio.MappedByteBuffer; import java.util.logging.FileHandler; import java.util.logging.Level; @@ -96,4 +93,38 @@ class Helper { System.exit(1); } } + + + /** + * Compare two files and return true if their content is similar. + * + * @param readFile File 1 whose content has to be compared. + * @param writeFile File 2 whose content has to be compared. + * @return true if the files are similar, false otherwise. + * @throws IOException + */ + static boolean compare(File readFile, File writeFile) throws IOException { + BufferedReader bir = new BufferedReader(new FileReader(readFile)); + BufferedReader bor = new BufferedReader(new FileReader(writeFile)); + String si, so; + si = bir.readLine(); + so = bor.readLine(); + while (null != si && null != so) { + if (!si.equals(so)) { + return false; + } + + si = bir.readLine(); + so = bor.readLine(); + } + + if ((null == si) && (null == so)) { + return true; + } + bir.close(); + bor.close(); + + return false; + } + } diff --git a/test/javaTests/src/org/criu/java/tests/MemoryMappings.java b/test/javaTests/src/org/criu/java/tests/MemoryMappings.java new file mode 100644 index 000000000..4ac6f4a17 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MemoryMappings.java @@ -0,0 +1,121 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MemoryMappings { + private static String TESTNAME = "MemoryMappings"; + + /** + * Map a file to memory and write the mapped data into a file, + * checkpointing and restoring in between. + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + Logger logger = null; + + try { + MappedByteBuffer testBuffer; + char ch; + int i = 1; + boolean similar; + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "ReadWrite.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "MemoryMappings_file.txt"); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of file to be memory mapped"); + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + channel = FileChannel.open(readFile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + testBuffer = channel.map(MapMode.READ_WRITE, 0, readFile.length()); + channel.close(); + + if (writeFile.exists()) { + writeFile.delete(); + } + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + + while (testBuffer.hasRemaining()) { + ch = (char) testBuffer.get(); + brw.write(ch); + i++; + if (200 == i) { + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + } + + brw.close(); + logger.log(Level.INFO, "Comparing contents of the file"); + + similar = Helper.compare(readFile, writeFile); + if (!similar) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Data was read and written correctly!"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + brw.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java b/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java new file mode 100644 index 000000000..7b023673e --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java @@ -0,0 +1,203 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MultipleFileRead { + private static String TESTNAME = "MultipleFileRead"; + + /** + * @param readFile1 File 1 whose contents are read. + * @param readFile2 File 2 whose contents are read. + * @param writeFile File in which data has been written to. + * @return true if the data written is as expected, false otherwise. + * @throws IOException + */ + private static boolean compare(File readFile1, File readFile2, File writeFile) throws IOException { + BufferedReader br1 = new BufferedReader(new FileReader(readFile1)); + BufferedReader br2 = new BufferedReader(new FileReader(readFile2)); + BufferedReader brw = new BufferedReader(new FileReader(writeFile)); + boolean eof1, eof2; + eof1 = false; + eof2 = false; + String inpString, wrtString; + + while (!eof1 || !eof2) { + if (!eof1) { + inpString = br1.readLine(); + if (null == inpString) { + eof1 = true; + } else { + wrtString = brw.readLine(); + if (null == wrtString) { + return false; + } + if (!wrtString.equals(inpString)) { + return false; + } + } + } + if (!eof2) { + inpString = br2.readLine(); + if (null == inpString) { + eof2 = true; + } else { + wrtString = brw.readLine(); + if (null == wrtString) { + return false; + } + if (!wrtString.equals(inpString)) { + return false; + } + } + } + } + + wrtString = brw.readLine(); + if (null != wrtString) { + return false; + } + + br1.close(); + br2.close(); + brw.close(); + + return true; + } + + /** + * Read from multiple files and write their content into another file, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + String s; + int i = 0; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + File readFile1 = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File readFile2 = new File(Helper.SOURCE_FOLDER + "/" + "ReadWrite.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "MultipleFileRead_file.txt"); + boolean eofFile1 = false, eofFile2 = false, check; + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of the read files"); + + if (!readFile1.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (!readFile2.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (writeFile.exists()) { + writeFile.delete(); + } + logger.log(Level.INFO, "Creating writeFile"); + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedReader br1 = new BufferedReader(new FileReader(readFile1)); + BufferedReader br2 = new BufferedReader(new FileReader(readFile2)); + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + + logger.log(Level.INFO, "Writing in file"); + + while (!eofFile1 || !eofFile2) { + if (!eofFile1) { + s = br1.readLine(); + i++; + if (null == s) { + eofFile1 = true; + } else { + brw.write(s + "\n"); + } + } + if (!eofFile2) { + s = br2.readLine(); + i++; + if (null == s) { + eofFile2 = true; + } else { + brw.write(s + "\n"); + } + } + if (10 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + } + brw.flush(); + logger.log(Level.INFO, "Checking the content of the file"); + check = compare(readFile1, readFile2, writeFile); + + if (!check) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "The file has been written as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + br1.close(); + br2.close(); + brw.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java b/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java new file mode 100644 index 000000000..76d287a07 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java @@ -0,0 +1,140 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MultipleFileWrite { + private static String TESTNAME = "MultipleFileWrite"; + + /** + * Reads from a file and write its content into multiple files, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + String s, pid; + int i = 1; + Logger logger = null; + boolean similar1, similar2; + try { + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File writeFile1 = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + TESTNAME + "1_file.txt"); + File writeFile2 = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + TESTNAME + "2_file.txt"); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of read files!"); + + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (writeFile1.exists()) { + writeFile1.delete(); + } + boolean newFile = writeFile1.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + if (writeFile2.exists()) { + writeFile2.delete(); + } + newFile = writeFile2.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Created write files"); + + BufferedReader br = new BufferedReader(new FileReader(readFile)); + BufferedWriter bw1 = new BufferedWriter(new FileWriter(writeFile1)); + BufferedWriter bw2 = new BufferedWriter(new FileWriter(writeFile2)); + + s = br.readLine(); + + while (null != s) { + bw1.write(s + "\n"); + bw2.write(s + "\n"); + if (90 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + + i++; + s = br.readLine(); + } + + bw1.flush(); + bw2.flush(); + logger.log(Level.INFO, "Checking files have been written correctly"); + + similar1 = Helper.compare(readFile, writeFile1); + similar2 = Helper.compare(readFile, writeFile2); + + if (!similar1 || !similar2) { + logger.log(Level.SEVERE, "Error: Written data is not identical to the data read"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Content of files is as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + br.close(); + bw1.close(); + bw2.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/ReadWrite.java b/test/javaTests/src/org/criu/java/tests/ReadWrite.java new file mode 100644 index 000000000..fa98447ed --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/ReadWrite.java @@ -0,0 +1,119 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class ReadWrite { + private static String TESTNAME = "ReadWrite"; + + /** + * Read from a file and write its content into another file, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + int i = 0; + String s, pid; + boolean similar; + MappedByteBuffer b = null; + Logger logger = null; + try { + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "ReadWrite_file.txt"); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of files to be read!"); + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + if (writeFile.exists()) { + writeFile.delete(); + } + logger.log(Level.INFO, "Creating the writeFile"); + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedReader brr = new BufferedReader(new FileReader(readFile)); + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + logger.log(Level.INFO, "Start writing"); + + s = brr.readLine(); + + while (null != s) { + i++; + brw.write(s + "\n"); + + if (50 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + s = brr.readLine(); + } + + brw.flush(); + logger.log(Level.INFO, "Checking content of the files."); + similar = Helper.compare(readFile, writeFile); + + if (!similar) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Content of file is as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml index 8ff67c5e0..b73a31db2 100644 --- a/test/javaTests/test.xml +++ b/test/javaTests/test.xml @@ -4,10 +4,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 3ca09b191429a4260a12daf6bbaf58da2aebd656 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 20 Nov 2019 11:01:33 +0300 Subject: [PATCH 0089/1854] travis: ignore fails of podman-test until it will not be fixed. Signed-off-by: Andrei Vagin --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index b27dbfe7b..e6e410191 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,6 +81,7 @@ matrix: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - env: TR_ARCH=local GCOV=1 + - env: TR_ARCH=podman-test script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: From b5b1c4ec4506df7cee6a9ba8ffff36f43e0cd8e3 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 21 Nov 2019 01:24:44 +0300 Subject: [PATCH 0090/1854] kerndat: check whether the new mount API is supported of not Signed-off-by: Andrei Vagin --- Makefile.config | 2 +- .../arch/arm/plugins/std/syscalls/syscall.def | 3 ++ .../plugins/std/syscalls/syscall-ppc64.tbl | 3 ++ .../plugins/std/syscalls/syscall-s390.tbl | 3 ++ .../x86/plugins/std/syscalls/syscall_32.tbl | 3 ++ .../x86/plugins/std/syscalls/syscall_64.tbl | 3 ++ criu/include/kerndat.h | 1 + criu/include/linux/mount.h | 35 +++++++++++++++++++ criu/kerndat.c | 16 +++++++++ scripts/feature-tests.mak | 12 +++++++ 10 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 criu/include/linux/mount.h diff --git a/Makefile.config b/Makefile.config index 5af3fed38..81aae24f8 100644 --- a/Makefile.config +++ b/Makefile.config @@ -47,7 +47,7 @@ export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG # $1 - config name define gen-feature-test diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 721ff16dc..d5bdc677e 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -112,3 +112,6 @@ userfaultfd 282 388 (int flags) fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) cacheflush ! 983042 (void *start, void *end, int flags) ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +fsopen 430 430 (char *fsname, unsigned int flags) +fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) +fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 3b3079040..4e283d5e9 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -108,3 +108,6 @@ __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_userfaultfd 364 sys_userfaultfd (int flags) __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index cc13a63dd..fd48e3950 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -108,3 +108,6 @@ __NR_userfaultfd 355 sys_userfaultfd (int flags) __NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index 7903ab150..038aeb4f7 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -96,3 +96,6 @@ __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char __NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 374 sys_userfaultfd (int flags) __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 4ac9164ea..215f32026 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -107,3 +107,6 @@ __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1 __NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 323 sys_userfaultfd (int flags) __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index d93e07813..771195860 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -65,6 +65,7 @@ struct kerndat_s { bool x86_has_ptrace_fpu_xsave_bug; bool has_inotify_setnextwd; bool has_kcmp_epoll_tfd; + bool has_fsopen; }; extern struct kerndat_s kdat; diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h new file mode 100644 index 000000000..aa6be69ec --- /dev/null +++ b/criu/include/linux/mount.h @@ -0,0 +1,35 @@ +#ifndef _CRIU_LINUX_MOUNT_H +#define _CRIU_LINUX_MOUNT_H + +#include "common/config.h" +#include "compel/plugins/std/syscall-codes.h" + +#ifdef CONFIG_HAS_FSCONFIG +#include +#else +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ +}; +#endif + +static inline int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} +static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} +static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +#endif diff --git a/criu/kerndat.c b/criu/kerndat.c index 39cacb8fe..b0dd83135 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -723,6 +723,20 @@ static int kerndat_has_inotify_setnextwd(void) return ret; } +static int kerndat_has_fsopen(void) +{ + if (syscall(__NR_fsopen, NULL, -1) != -1) { + pr_err("fsopen should fail\n"); + return -1; + } + if (errno == ENOSYS) + pr_info("The new mount API (fsopen, fsmount) isn't supported\n"); + else + kdat.has_fsopen = true; + + return 0; +} + static int has_kcmp_epoll_tfd(void) { kcmp_epoll_slot_t slot = { }; @@ -1043,6 +1057,8 @@ int kerndat_init(void) ret = kerndat_has_inotify_setnextwd(); if (!ret) ret = has_kcmp_epoll_tfd(); + if (!ret) + ret = kerndat_has_fsopen(); kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index e39d97bb1..39ddfd053 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -136,3 +136,15 @@ ENTRY(main) nop END(main) endef + +define FEATURE_TEST_FSCONFIG + +#include + +int main(void) +{ + if (FSCONFIG_CMD_CREATE > 0) + return 0; + return 0; +} +endef From 4997a096e4ffad4778a24f903e4450842171e576 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:50:08 +0300 Subject: [PATCH 0091/1854] util: introduce the mount_detached_fs helper Signed-off-by: Andrei Vagin --- criu/include/util.h | 2 ++ criu/util.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/criu/include/util.h b/criu/include/util.h index 313aacd8c..45bebf673 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -380,4 +380,6 @@ static inline void print_stack_trace(pid_t pid) {} ___ret; \ }) +extern int mount_detached_fs(const char *fsname); + #endif /* __CR_UTIL_H__ */ diff --git a/criu/util.c b/criu/util.c index e47e109ae..3bae18ab2 100644 --- a/criu/util.c +++ b/criu/util.c @@ -28,6 +28,8 @@ #include #include +#include "linux/mount.h" + #include "kerndat.h" #include "page.h" #include "util.h" @@ -1423,3 +1425,27 @@ void print_stack_trace(pid_t pid) free(strings); } #endif + +int mount_detached_fs(const char *fsname) +{ + int fsfd, fd; + + fsfd = sys_fsopen(fsname, 0); + if (fsfd < 0) { + pr_perror("Unable to open the %s file system", fsname); + return -1; + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) { + pr_perror("Unable to create the %s file system", fsname); + close(fsfd); + return -1; + } + + fd = sys_fsmount(fsfd, 0, 0); + if (fd < 0) + pr_perror("Unable to mount the %s file system", fsname); + close(fsfd); + return fd; +} + From 1a2d8ad7e162adf95124064109c959c7f7beb77a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 21 Nov 2019 01:26:38 +0300 Subject: [PATCH 0092/1854] mount: use new mount API to open the proc file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 5694931f4..b920ce262 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -23,6 +23,8 @@ #include #include "common/compiler.h" +#include "linux/mount.h" + #include "clone-noasan.h" #include "cr_options.h" #include "servicefd.h" @@ -1585,27 +1587,39 @@ static void restore_pgid(void) futex_set_and_wake(&rsti(current)->pgrp_set, 1); } +static int __legacy_mount_proc() +{ + char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; + int fd; + + if (mkdtemp(proc_mountpoint) == NULL) { + pr_perror("mkdtemp failed %s", proc_mountpoint); + return -1; + } + + pr_info("Mount procfs in %s\n", proc_mountpoint); + if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) { + pr_perror("mount failed"); + if (rmdir(proc_mountpoint)) + pr_perror("Unable to remove %s", proc_mountpoint); + return -1; + } + + fd = open_detach_mount(proc_mountpoint); + return fd; +} + static int mount_proc(void) { int fd, ret; - char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; if (root_ns_mask == 0) fd = ret = open("/proc", O_DIRECTORY); else { - if (mkdtemp(proc_mountpoint) == NULL) { - pr_perror("mkdtemp failed %s", proc_mountpoint); - return -1; - } - - pr_info("Mount procfs in %s\n", proc_mountpoint); - if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) { - pr_perror("mount failed"); - rmdir(proc_mountpoint); - return -1; - } - - ret = fd = open_detach_mount(proc_mountpoint); + if (kdat.has_fsopen) + fd = ret = mount_detached_fs("proc"); + else + fd = ret = __legacy_mount_proc(); } if (fd >= 0) { From 76e4d31a3fa6a8d4ccee9a111c212c27ab69474f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:51:40 +0300 Subject: [PATCH 0093/1854] net: use new mount API to open the sysfs file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/criu/net.c b/criu/net.c index 9825db10f..5822de629 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2133,6 +2133,11 @@ static int mount_ns_sysfs(void) BUG_ON(ns_sysfs_fd != -1); + if (kdat.has_fsopen) { + ns_sysfs_fd = mount_detached_fs("sysfs"); + return ns_sysfs_fd >= 0 ? 0 : -1; + } + /* * A new mntns is required to avoid the race between * open_detach_mount and creating mntns. From be43c3b840b657a6a31a6885ca6e03da70de1b04 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:52:25 +0300 Subject: [PATCH 0094/1854] cgroup: use new mount API to open the cgroup file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/cgroup.c | 105 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 23 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index a66fc960e..d4c712167 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -8,6 +8,7 @@ #include #include #include + #include "common/list.h" #include "xmalloc.h" #include "cgroup.h" @@ -24,6 +25,8 @@ #include "protobuf.h" #include "images/core.pb-c.h" #include "images/cgroup.pb-c.h" +#include "kerndat.h" +#include "linux/mount.h" /* * This structure describes set of controller groups @@ -542,6 +545,84 @@ static int add_freezer_state(struct cg_controller *controller) return 0; } +static const char namestr[] = "name="; +static int __new_open_cgroupfs(struct cg_ctl *cc) +{ + int fsfd, fd; + char *name; + + fsfd = sys_fsopen("cgroup", 0); + if (fsfd < 0) { + pr_perror("Unable to open the cgroup file system"); + return -1; + } + + if (strstartswith(cc->name, namestr)) { + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, + "name", cc->name + strlen(namestr), 0)) { + pr_perror("Unable to configure the cgroup (%s) file system", cc->name); + goto err; + } + } else { + char *saveptr = NULL, *buf = strdupa(cc->name); + name = strtok_r(buf, ",", &saveptr); + while (name) { + if (sys_fsconfig(fsfd, FSCONFIG_SET_FLAG, name, NULL, 0)) { + pr_perror("Unable to configure the cgroup (%s) file system", name); + goto err; + } + name = strtok_r(NULL, ",", &saveptr); + } + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { + pr_perror("Unable to create the cgroup (%s) file system", cc->name); + goto err; + } + + fd = sys_fsmount(fsfd, 0, 0); + if (fd < 0) + pr_perror("Unable to mount the cgroup (%s) file system", cc->name); + close(fsfd); + + return fd; +err: + close(fsfd); + return -1; +} + +static int open_cgroupfs(struct cg_ctl *cc) +{ + char prefix[] = ".criu.cgmounts.XXXXXX"; + char mopts[1024]; + int fd; + + if (kdat.has_fsopen) + return __new_open_cgroupfs(cc); + + if (strstartswith(cc->name, namestr)) + snprintf(mopts, sizeof(mopts), "none,%s", cc->name); + else + snprintf(mopts, sizeof(mopts), "%s", cc->name); + + if (mkdtemp(prefix) == NULL) { + pr_perror("can't make dir for cg mounts"); + return -1; + } + + if (mount("none", prefix, "cgroup", 0, mopts) < 0) { + pr_perror("Unable to mount %s", mopts); + rmdir(prefix); + return -1; + } + + fd = open_detach_mount(prefix); + if (fd < 0) + return -1; + + return fd; +} + static int collect_cgroups(struct list_head *ctls) { struct cg_ctl *cc; @@ -550,8 +631,6 @@ static int collect_cgroups(struct list_head *ctls) list_for_each_entry(cc, ctls, l) { char path[PATH_MAX], *root; - char prefix[] = ".criu.cgmounts.XXXXXX"; - const char namestr[] = "name="; struct cg_controller *cg; struct cg_root_opt *o; @@ -603,27 +682,7 @@ static int collect_cgroups(struct list_head *ctls) return -1; } } else { - char mopts[1024]; - - if (strstartswith(cc->name, namestr)) - snprintf(mopts, sizeof(mopts), "none,%s", cc->name); - else - snprintf(mopts, sizeof(mopts), "%s", cc->name); - - if (mkdtemp(prefix) == NULL) { - pr_perror("can't make dir for cg mounts"); - return -1; - } - - if (mount("none", prefix, "cgroup", 0, mopts) < 0) { - pr_perror("couldn't mount %s", mopts); - rmdir(prefix); - return -1; - } - - fd = open_detach_mount(prefix); - if (fd < 0) - return -1; + fd = open_cgroupfs(cc); } path_pref_len = snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd); From af7e5f994b4d2221af1a0110dbfe5bdadd67f964 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 26 Nov 2019 07:26:31 +0300 Subject: [PATCH 0095/1854] readme: github pull-requests is the preferred way to contribute We will continue accepting patches. Signed-off-by: Andrei Vagin --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 558e87160..6a578b953 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Here are some useful hints to get involved. * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches); +* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); From 2237666ac1d277051d2bb90796fb1a0c5febb885 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 12 Nov 2019 14:31:08 +0300 Subject: [PATCH 0096/1854] restorer/inotify: reorder inotify cleanup after waiting helpers and zombies We've seen ppoll interrupted with signal in VZ7 CT migration tests, that is because in the beggining of CR_STATE_RESTORE_SIGCHLD zombies and helpers die, and that can trigger SIGCHILDs sent to their parents. Adding additional debug (printing "Task..." for zombies and helpers) in sigchld_handler I see: (15.644339) pie: 1: Task 10718 exited, status= 0 (15.644349) pie: 1: Cleaning inotify events from 29 (15.644359) pie: 1: Cleaning inotify events from 19 (15.644367) pie: 1: Cleaning inotify events from 10 And previousely we had: (05.718449) pie: 104: Cleaning inotify events from 5 (05.718835) pie: 330: Cleaning inotify events from 3 (05.719046) pie: 1: Cleaning inotify events from 23 (05.719164) pie: 80: Cleaning inotify events from 7 (05.719185) pie: 1: Error (criu/pie/restorer.c:1287): Failed to poll from inotify fd: -4 (05.719202) pie: 95: Cleaning inotify events from 6 (05.719269) pie: 1: Error (criu/pie/restorer.c:1890): Restorer fail 1 So reordering cleanup and wait should fix it. Signed-off-by: Pavel Tikhomirov --- criu/pie/restorer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index dab58add6..888eb8e65 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1839,9 +1839,6 @@ long __export_restore_task(struct task_restore_args *args) restore_finish_stage(task_entries_local, CR_STATE_RESTORE); - if (cleanup_current_inotify_events(args)) - goto core_restore_end; - if (wait_helpers(args) < 0) goto core_restore_end; if (wait_zombies(args) < 0) @@ -1854,6 +1851,9 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; } + if (cleanup_current_inotify_events(args)) + goto core_restore_end; + if (!args->compatible_mode) { ret = sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t)); From 1d23dc4a3042599cabea90a81c063db453b89abb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 29 Nov 2019 10:57:29 +0300 Subject: [PATCH 0097/1854] mount: Order call_helper_process calls When we do clone threads in a later stage of restore procedure it may race with helpers which do call clone_noasan by self. Thus we need to walk over each clone_noasan call and figure out if calling it without last_pid lock is safe. - open_mountpoint: called by fusectl_dump, dump_empty_fs, binfmt_misc_dump, tmpfs_dump -- they all are processing dump stage, thus safe - call_helper_process: try_remount_writable -- called from various places in reg-files.c, in particular open_reg_by_id called in parallel with other threads, needs a lock remount_readonly_mounts -- called from sigreturn_restore, so in parallel, needs a lock - call_in_child_process: prepare_net_namespaces -- called from prepare_namespace which runs before we start forking, no need for lock Thus call_helper_process should use lock_last_pid and unlock_last_pid helpers and wait for subprocess to finish. Same time put a warning text into clone_noasan comment so next time we need to use it we would recall the pitfalls. v2: - fix unitialized ret variable v3: - use exit_code instead of ret Signed-off-by: Cyrill Gorcunov --- criu/clone-noasan.c | 9 +++++++++ criu/mount.c | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index 5ca280eb8..5f1858d4d 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -18,6 +18,15 @@ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69863 * * So the only way is to put this wrapper in separate non-instrumented file + * + * WARNING: When calling clone_noasan make sure your not sitting in a later + * __restore__ phase where other tasks might be creating threads, otherwise + * all calls to clone_noasan should be guarder with + * + * lock_last_pid + * clone_noasan + * ... wait for process to finish ... + * unlock_last_pid */ int clone_noasan(int (*fn)(void *), int flags, void *arg) { diff --git a/criu/mount.c b/criu/mount.c index 52e70d376..24a8516c6 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3738,27 +3738,38 @@ struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt"); static int call_helper_process(int (*call)(void *), void *arg) { - int pid, status; + int pid, status, exit_code = -1; + + /* + * Running new helper process on the restore must be + * done under last_pid mutex: other tasks may be restoring + * threads and the PID we need there might be occupied by + * this clone() call. + */ + lock_last_pid(); pid = clone_noasan(call, CLONE_VFORK | CLONE_VM | CLONE_FILES | CLONE_IO | CLONE_SIGHAND | CLONE_SYSVSEM, arg); if (pid == -1) { pr_perror("Can't clone helper process"); - return -1; + goto out; } errno = 0; if (waitpid(pid, &status, __WALL) != pid) { pr_perror("Unable to wait %d", pid); - return -1; + goto out; } if (status) { pr_err("Bad child exit status: %d\n", status); - return -1; + goto out; } - return 0; + exit_code = 0; +out: + unlock_last_pid(); + return exit_code; } static int ns_remount_writable(void *arg) From ebe3b52353c5d380d01c332e7d57594995258c18 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 14 Nov 2019 14:41:04 +0300 Subject: [PATCH 0098/1854] unix: sysctl -- Preserve max_dgram_qlen value The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and we already noticed that systemd inside a container may change its value (for example it sets it to 512 by now instead of kernel's default value 10), thus we need keep it inside image and restore then. Signed-off-by: Cyrill Gorcunov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- criu/net.c | 104 +++++++++++++++++++++++++++++++++++++++++++- images/netdev.proto | 1 + 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/criu/net.c b/criu/net.c index 5822de629..e960a34f9 100644 --- a/criu/net.c +++ b/criu/net.c @@ -210,6 +210,19 @@ char *devconfs6[] = { #define MAX_CONF_OPT_PATH IFNAMSIZ+60 #define MAX_STR_CONF_LEN 200 +static const char *unix_conf_entries[] = { + "max_dgram_qlen", +}; + +/* + * MAX_CONF_UNIX_PATH = (sizeof(CONF_UNIX_FMT) - strlen("%s")) + * + MAX_CONF_UNIX_OPT_PATH + */ +#define CONF_UNIX_BASE "net/unix" +#define CONF_UNIX_FMT CONF_UNIX_BASE"/%s" +#define MAX_CONF_UNIX_OPT_PATH 32 +#define MAX_CONF_UNIX_PATH (sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2) + static int net_conf_op(char *tgt, SysctlEntry **conf, int n, int op, char *proto, struct sysctl_req *req, char (*path)[MAX_CONF_OPT_PATH], int size, char **devconfs, SysctlEntry **def_conf) @@ -339,6 +352,72 @@ static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr devconfs6, def_conf); } +static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op) +{ + int i, ret = -1, flags = 0; + char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { }; + struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { }; + SysctlEntry **conf = *rconf; + + if (*n != ARRAY_SIZE(unix_conf_entries)) { + pr_err("unix: Unexpected entries in config (%zu %zu)\n", + *n, ARRAY_SIZE(unix_conf_entries)); + return -EINVAL; + } + + if (opts.weak_sysctls || op == CTL_READ) + flags = CTL_FLAGS_OPTIONAL; + + for (i = 0; i < *n; i++) { + snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT, + unix_conf_entries[i]); + req[i].name = path[i]; + req[i].flags = flags; + + switch (conf[i]->type) { + case SYSCTL_TYPE__CTL_32: + req[i].type = CTL_32; + req[i].arg = &conf[i]->iarg; + break; + default: + pr_err("unix: Unknown config type %d\n", + conf[i]->type); + return -1; + } + } + + ret = sysctl_op(req, *n, op, CLONE_NEWNET); + if (ret < 0) { + pr_err("unix: Failed to %s %s/\n", + (op == CTL_READ) ? "read" : "write", + CONF_UNIX_BASE); + return -1; + } + + if (op == CTL_READ) { + bool has_entries = false; + + for (i = 0; i < *n; i++) { + if (req[i].flags & CTL_FLAGS_HAS) { + conf[i]->has_iarg = true; + if (!has_entries) + has_entries = true; + } + } + + /* + * Zap the whole section of data. + * Unix conf is optional. + */ + if (!has_entries) { + *n = 0; + *rconf = NULL; + } + } + + return 0; +} + /* * I case if some entry is missing in * the kernel, simply write DEVCONFS_UNUSED @@ -1824,6 +1903,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) int ret = -1; int i; NetnsEntry netns = NETNS_ENTRY__INIT; + SysctlEntry *unix_confs = NULL; + size_t sizex = ARRAY_SIZE(unix_conf_entries); SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL; int size4 = ARRAY_SIZE(devconfs4); SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL; @@ -1840,7 +1921,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) o_buf = buf = xmalloc( i * (sizeof(NetnsId*) + sizeof(NetnsId)) + size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + - size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + + sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) ); if (!buf) goto out; @@ -1896,6 +1978,16 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) } } + netns.n_unix_conf = sizex; + netns.unix_conf = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry*)); + unix_confs = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry)); + + for (i = 0; i < sizex; i++) { + sysctl_entry__init(&unix_confs[i]); + netns.unix_conf[i] = &unix_confs[i]; + netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32; + } + ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL); if (ret < 0) goto err_free; @@ -1910,6 +2002,10 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) if (ret < 0) goto err_free; + ret = unix_conf_op(&netns.unix_conf, &netns.n_unix_conf, CTL_READ); + if (ret < 0) + goto err_free; + ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS); err_free: xfree(o_buf); @@ -2122,6 +2218,12 @@ static int restore_netns_conf(struct ns_id *ns) ret = ipv6_conf_op("default", (netns)->def_conf6, (netns)->n_def_conf6, CTL_WRITE, NULL); } + if ((netns)->unix_conf) { + ret = unix_conf_op(&(netns)->unix_conf, &(netns)->n_unix_conf, CTL_WRITE); + if (ret) + goto out; + } + ns->net.netns = netns; out: return ret; diff --git a/images/netdev.proto b/images/netdev.proto index 476a92ced..ae9c99531 100644 --- a/images/netdev.proto +++ b/images/netdev.proto @@ -71,4 +71,5 @@ message netns_entry { repeated netns_id nsids = 7; optional string ext_key = 8; + repeated sysctl_entry unix_conf = 9; } From 55f7a571f286baa6eac6fe7a020914505a0eb464 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 14 Nov 2019 14:50:43 +0300 Subject: [PATCH 0099/1854] zdtm: sysctl net.unix.max_dgram_qlen value preservation test Test checks that if the /proc/sys/net/unix/max_dgram_qlen value has been changed in process net namespace, then it is saved after c/r. Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- test/zdtm/lib/Makefile | 2 +- test/zdtm/lib/sysctl.c | 59 ++++++++++++++++++++++++++ test/zdtm/lib/sysctl.h | 7 +++ test/zdtm/static/Makefile | 1 + test/zdtm/static/netns_sub_sysctl.c | 56 ++++++++++++++++++++++++ test/zdtm/static/netns_sub_sysctl.desc | 4 ++ 6 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/lib/sysctl.c create mode 100644 test/zdtm/lib/sysctl.h create mode 100644 test/zdtm/static/netns_sub_sysctl.c create mode 100644 test/zdtm/static/netns_sub_sysctl.desc diff --git a/test/zdtm/lib/Makefile b/test/zdtm/lib/Makefile index d2d9f1cc3..b87f36e8f 100644 --- a/test/zdtm/lib/Makefile +++ b/test/zdtm/lib/Makefile @@ -4,7 +4,7 @@ CFLAGS += $(USERCFLAGS) LIB := libzdtmtst.a -LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c +LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c sysctl.c LIBOBJ := $(LIBSRC:%.c=%.o) BIN := groups diff --git a/test/zdtm/lib/sysctl.c b/test/zdtm/lib/sysctl.c new file mode 100644 index 000000000..9583ec3df --- /dev/null +++ b/test/zdtm/lib/sysctl.c @@ -0,0 +1,59 @@ +#include + +#include "zdtmtst.h" +#include "sysctl.h" + +int sysctl_read_int(const char *name, int *data) +{ + int fd; + int ret; + char buf[16]; + + fd = open(name, O_RDONLY); + if (fd < 0) { + pr_perror("Can't open %s", name); + return fd; + } + + ret = read(fd, buf, sizeof(buf) - 1); + if (ret < 0) { + pr_perror("Can't read %s", name); + ret = -errno; + goto err; + } + + buf[ret] = '\0'; + + *data = (int)strtoul(buf, NULL, 10); + ret = 0; +err: + close(fd); + return ret; +} + +int sysctl_write_int(const char *name, int val) +{ + int fd; + int ret; + char buf[16]; + + fd = open(name, O_WRONLY); + if (fd < 0) { + pr_perror("Can't open %s", name); + return fd; + } + + sprintf(buf, "%d\n", val); + + ret = write(fd, buf, strlen(buf)); + if (ret < 0) { + pr_perror("Can't write %d into %s", val, name); + ret = -errno; + goto err; + } + + ret = 0; +err: + close(fd); + return ret; +} diff --git a/test/zdtm/lib/sysctl.h b/test/zdtm/lib/sysctl.h new file mode 100644 index 000000000..67129102f --- /dev/null +++ b/test/zdtm/lib/sysctl.h @@ -0,0 +1,7 @@ +#ifndef __ZDTM_SYSCTL__ +#define __ZDTM_SYSCTL__ + +extern int sysctl_read_int(const char *name, int *data); +extern int sysctl_write_int(const char *name, int val); + +#endif diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 36d00ca5c..f9d2efe74 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -207,6 +207,7 @@ TST_NOFILE := \ pipe03 \ netns_sub \ netns_sub_veth \ + netns_sub_sysctl \ unlink_multiple_largefiles \ config_inotify_irmap \ thp_disable \ diff --git a/test/zdtm/static/netns_sub_sysctl.c b/test/zdtm/static/netns_sub_sysctl.c new file mode 100644 index 000000000..bf828e08e --- /dev/null +++ b/test/zdtm/static/netns_sub_sysctl.c @@ -0,0 +1,56 @@ +#include + +#include "zdtmtst.h" +#include "sysctl.h" + +const char *test_doc = "Check dump and restore a net.unix.max_dgram_qlen sysctl parameter in subns"; +const char *test_author = "Alexander Mikhalitsyn "; + +typedef struct { + const char *path; + int old; + int new; +} sysctl_opt_t; + +#define CONF_UNIX_BASE "/proc/sys/net/unix" + +static sysctl_opt_t net_unix_params[] = { + {CONF_UNIX_BASE"/max_dgram_qlen", 0, 0}, + {NULL, 0, 0} +}; + +int main(int argc, char **argv) +{ + int ret = 0; + sysctl_opt_t *p; + test_init(argc, argv); + + for (p = net_unix_params; p->path != NULL; p++) { + p->old = (((unsigned)lrand48()) % 1023) + 1; + if (sysctl_write_int(p->path, p->old)) { + pr_perror("Can't change %s", p->path); + return -1; + } + } + + test_daemon(); + test_waitsig(); + + for (p = net_unix_params; p->path != NULL; p++) { + if (sysctl_read_int(p->path, &p->new)) + ret = 1; + + if (p->old != p->new) { + errno = EINVAL; + pr_perror("%s changed: %d ---> %d", p->path, p->old, p->new); + ret = 1; + } + } + + if (ret) + fail(); + else + pass(); + + return ret; +} diff --git a/test/zdtm/static/netns_sub_sysctl.desc b/test/zdtm/static/netns_sub_sysctl.desc new file mode 100644 index 000000000..535842668 --- /dev/null +++ b/test/zdtm/static/netns_sub_sysctl.desc @@ -0,0 +1,4 @@ +{ + 'flavor': 'ns', + 'flags': 'suid' +} From 4c46cbc4d86c7578b98e64b8f664cf9c0b0fe978 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 16 Dec 2019 15:34:10 +0300 Subject: [PATCH 0100/1854] x86/cpu: cleanup and improve xfeatures_mask check Make xfeatures_mask check explicit. We were relying on our guess about hardware "backward compatibility" and used ">" check here for a long time. But it looks better to explicitly check that all xfeature bits available on the source are also available on the destination. For xsave_size we need to have smaller size on destination than on source, because xsave operation on small allocated buffer may corrupt the nearby data. So split up comments about xfeatures_mask and xsave_size, as having single comment for quiet a different cases is less understandable. v2: improve comments, remove extra else-ifs, remove extra typecast Signed-off-by: Pavel Tikhomirov --- criu/arch/x86/cpu.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/criu/arch/x86/cpu.c b/criu/arch/x86/cpu.c index 3808b9d33..72c5bd59c 100644 --- a/criu/arch/x86/cpu.c +++ b/criu/arch/x86/cpu.c @@ -236,6 +236,7 @@ static int cpu_validate_features(compel_cpuinfo_t *cpu_info) return -1; if (opts.cpu_cap & CPU_CAP_FPU) { + uint64_t m; /* * If we're requested to check FPU only ignore * any other bit. It's up to a user if the @@ -261,24 +262,33 @@ static int cpu_validate_features(compel_cpuinfo_t *cpu_info) #undef __mismatch_fpu_bit /* - * Make sure the xsave features are compatible. We already hit the - * issue with libc where we've checkpointed the container on old - * machine but restored on more modern one and libc fetched new - * xsave frame size directly by xsave instruction with greedy - * feature mask causing programs to misbehave. + * Make sure the xsave features are compatible. Check that on + * the destination there are all the features which were on the + * source. */ - if (cpu_info->xfeatures_mask > rt_cpu_info.xfeatures_mask) { - uint64_t m = cpu_info->xfeatures_mask & ~rt_cpu_info.xfeatures_mask; - pr_err("CPU xfeatures has unsupported bits (%#llx)\n", - (unsigned long long)m); + if ((m = cpu_info->xfeatures_mask & + ~rt_cpu_info.xfeatures_mask)) { + pr_err("CPU xfeatures has unsupported bits (%#" + PRIx64")\n", m); return -1; - } else if (cpu_info->xsave_size != rt_cpu_info.xsave_size) { + } + + /* + * Make sure the xsave sizes are compatible. We already hit the + * issue with libc where we've checkpointed the container on + * old machine but restored on more modern one and libc fetched + * new xsave frame size directly by xsave instruction with + * greedy feature mask causing programs to misbehave. + */ + if (cpu_info->xsave_size != rt_cpu_info.xsave_size) { pr_err("CPU xsave size mismatch (%u/%u)\n", cpu_info->xsave_size, rt_cpu_info.xsave_size); return -1; - } else if (cpu_info->xsave_size_max != rt_cpu_info.xsave_size_max) { + } + if (cpu_info->xsave_size_max != rt_cpu_info.xsave_size_max) { pr_err("CPU xsave max size mismatch (%u/%u)\n", - cpu_info->xsave_size_max, rt_cpu_info.xsave_size_max); + cpu_info->xsave_size_max, + rt_cpu_info.xsave_size_max); return -1; } } From 2e656222d78fecc1bf6490bed59078083bdb4351 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0101/1854] crit: fix python3 encoding issues Signed-off-by: Nicolas Viennot --- lib/py/images/images.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/py/images/images.py b/lib/py/images/images.py index f4517d845..3eedfca69 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -244,7 +244,7 @@ class ghost_file_handler: while True: gc = pb.ghost_chunk_entry() buf = f.read(4) - if buf == '': + if len(buf) == 0: break size, = struct.unpack('i', buf) gc.ParseFromString(f.read(size)) @@ -252,13 +252,13 @@ class ghost_file_handler: if no_payload: f.seek(gc.len, os.SEEK_CUR) else: - entry['extra'] = base64.encodebytes(f.read(gc.len)) + entry['extra'] = base64.encodebytes(f.read(gc.len)).decode('utf-8') entries.append(entry) else: if no_payload: f.seek(0, os.SEEK_END) else: - g_entry['extra'] = base64.encodebytes(f.read()) + g_entry['extra'] = base64.encodebytes(f.read()).decode('utf-8') entries.append(g_entry) return entries From 00bb068785a8b1a7c4481e2e7f2c0b9f903d941b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 20 Dec 2019 18:09:15 +0000 Subject: [PATCH 0102/1854] scripts: alpine: Install py2 packages with pip The py-future package has been renamed to py3-future [1] and py2 package for yaml has been dropped [2]. [1] https://git.alpinelinux.org/aports/commit/main?id=316d44abaed13964e97eb43c095cd1b64e3943ad [2] https://git.alpinelinux.org/aports/commit/main?id=e369c1fd7707a73f2c3e2b11b613198d9a4106de Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.alpine | 4 +--- scripts/build/Dockerfile.openj9-alpine | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 70fdf480a..a1d1d9191 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -27,9 +27,7 @@ RUN mv .ccache /tmp && make mrproper && ccache -sz && \ date && make -j $(nproc) CC="$CC" && date && ccache -s RUN apk add \ - py-yaml \ py-pip \ - py2-future \ ip6tables \ iptables \ iproute2 \ @@ -42,5 +40,5 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install protobuf ipaddress junit_xml flake8 +RUN pip install PyYAML future protobuf ipaddress junit_xml flake8 RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine index 654e7bf31..43a993444 100644 --- a/scripts/build/Dockerfile.openj9-alpine +++ b/scripts/build/Dockerfile.openj9-alpine @@ -17,9 +17,6 @@ RUN apk update && apk add \ python \ sudo \ maven \ - py-yaml \ - py-pip \ - py2-future \ ip6tables \ iptables \ bash From 0980617e24004ea00e4e0841c97b138f0a4e0073 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 12 Dec 2019 23:04:30 +0000 Subject: [PATCH 0103/1854] sockets: Remove duplicate variable assignment Signed-off-by: Radostin Stoyanov --- criu/sockets.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/criu/sockets.c b/criu/sockets.c index 312b55c6d..80f3153ba 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -524,7 +524,7 @@ int restore_prepare_socket(int sk) int restore_socket_opts(int sk, SkOptsEntry *soe) { - int ret = 0, val; + int ret = 0, val = 1; struct timeval tv; /* In kernel a bufsize value is doubled. */ u32 bufs[2] = { soe->so_sndbuf / 2, soe->so_rcvbuf / 2}; @@ -547,27 +547,22 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) ret |= restore_opt(sk, SOL_SOCKET, SO_MARK, &soe->so_mark); } if (soe->has_so_passcred && soe->so_passcred) { - val = 1; pr_debug("\tset passcred for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSCRED, &val); } if (soe->has_so_passsec && soe->so_passsec) { - val = 1; pr_debug("\tset passsec for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSSEC, &val); } if (soe->has_so_dontroute && soe->so_dontroute) { - val = 1; pr_debug("\tset dontroute for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val); } if (soe->has_so_no_check && soe->so_no_check) { - val = 1; pr_debug("\tset no_check for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_NO_CHECK, &val); } if (soe->has_so_broadcast && soe->so_broadcast) { - val = 1; pr_debug("\tset broadcast for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); } From d4e6fc2a0dcff62ff246544d3d9a78d6961f253a Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 13 Dec 2019 00:10:28 +0000 Subject: [PATCH 0104/1854] socket: c/r support for SO_KEEPALIVE TCP keepalive packets can be used to determine if a connection is still valid. When the SO_KEEPALIVE option is set, TCP packets are periodically sent to keep the connection alive. This patch implements checkpoint/restore support for SO_KEEPALIVE, TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT options. Signed-off-by: Radostin Stoyanov --- criu/include/sk-inet.h | 2 +- criu/sk-inet.c | 6 +++++- criu/sk-tcp.c | 20 +++++++++++++++++++- criu/sockets.c | 20 ++++++++++++++++++++ images/sk-opts.proto | 4 ++++ 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h index 79966517b..dec67ca6c 100644 --- a/criu/include/sk-inet.h +++ b/criu/include/sk-inet.h @@ -83,7 +83,7 @@ extern void tcp_locked_conn_add(struct inet_sk_info *); extern void rst_unlock_tcp_connections(void); extern void cpt_unlock_tcp_connections(void); -extern int dump_one_tcp(int sk, struct inet_sk_desc *sd); +extern int dump_one_tcp(int sk, struct inet_sk_desc *sd, SkOptsEntry *soe); extern int restore_one_tcp(int sk, struct inet_sk_info *si); #define SK_EST_PARAM "tcp-established" diff --git a/criu/sk-inet.c b/criu/sk-inet.c index f9c64c7af..342548585 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -551,7 +551,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa switch (proto) { case IPPROTO_TCP: - err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0; + err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk, &skopts) : 0; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: @@ -747,6 +747,10 @@ static int post_open_inet_sk(struct file_desc *d, int sk) if (!val && restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val)) return -1; + val = ii->ie->opts->so_keepalive; + if (!val && restore_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val)) + return -1; + return 0; } diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c index 4fd2eb8e6..7ee603818 100644 --- a/criu/sk-tcp.c +++ b/criu/sk-tcp.c @@ -218,8 +218,26 @@ err_r: return ret; } -int dump_one_tcp(int fd, struct inet_sk_desc *sk) +int dump_one_tcp(int fd, struct inet_sk_desc *sk, SkOptsEntry *soe) { + soe->has_tcp_keepcnt = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt)) { + pr_perror("Can't read TCP_KEEPCNT"); + return -1; + } + + soe->has_tcp_keepidle = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPIDLE, &soe->tcp_keepidle)) { + pr_perror("Can't read TCP_KEEPIDLE"); + return -1; + } + + soe->has_tcp_keepintvl = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPINTVL, &soe->tcp_keepintvl)) { + pr_perror("Can't read TCP_KEEPINTVL"); + return -1; + } + if (sk->dst_port == 0) return 0; diff --git a/criu/sockets.c b/criu/sockets.c index 80f3153ba..2e1ce9d7b 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -566,6 +566,22 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) pr_debug("\tset broadcast for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); } + if (soe->has_so_keepalive && soe->so_keepalive) { + pr_debug("\tset keepalive for socket\n"); + ret |= restore_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val); + } + if (soe->has_tcp_keepcnt) { + pr_debug("\tset keepcnt for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt); + } + if (soe->has_tcp_keepidle) { + pr_debug("\tset keepidle for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPIDLE, &soe->tcp_keepidle); + } + if (soe->has_tcp_keepintvl) { + pr_debug("\tset keepintvl for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPINTVL, &soe->tcp_keepintvl); + } tv.tv_sec = soe->so_snd_tmo_sec; tv.tv_usec = soe->so_snd_tmo_usec; @@ -651,6 +667,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe) soe->has_so_broadcast = true; soe->so_broadcast = val ? true : false; + ret |= dump_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val); + soe->has_so_keepalive = true; + soe->so_keepalive = val ? true : false; + ret |= dump_bound_dev(sk, soe); ret |= dump_socket_filter(sk, soe); diff --git a/images/sk-opts.proto b/images/sk-opts.proto index c93ec5fd5..336cca22a 100644 --- a/images/sk-opts.proto +++ b/images/sk-opts.proto @@ -23,6 +23,10 @@ message sk_opts_entry { repeated fixed64 so_filter = 16; optional bool so_reuseport = 17; optional bool so_broadcast = 18; + optional bool so_keepalive = 19; + optional uint32 tcp_keepcnt = 20; + optional uint32 tcp_keepidle = 21; + optional uint32 tcp_keepintvl = 22; } enum sk_shutdown { From 8b467dd944f6b3bed0a468800b041efdb218d6e8 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 13 Dec 2019 04:01:36 +0000 Subject: [PATCH 0105/1854] zdtm: Add test for SO_KEEPALIVE Signed-off-by: Radostin Stoyanov --- test/zdtm/static/Makefile | 3 +- test/zdtm/static/socket-tcp-keepalive.c | 97 +++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/static/socket-tcp-keepalive.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index f9d2efe74..ea5d3c42e 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -105,7 +105,8 @@ TST_NOFILE := \ socket-tcp-unconn \ socket-tcp6-unconn \ socket-tcp-syn-sent \ - socket-tcp-skip-in-flight \ + socket-tcp-skip-in-flight \ + socket-tcp-keepalive \ sock_opts00 \ sock_opts01 \ sk-unix-unconn \ diff --git a/test/zdtm/static/socket-tcp-keepalive.c b/test/zdtm/static/socket-tcp-keepalive.c new file mode 100644 index 000000000..a977a03b5 --- /dev/null +++ b/test/zdtm/static/socket-tcp-keepalive.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "test checkpoint/restore of SO_KEEPALIVE\n"; +const char *test_author = "Radostin Stoyanov \n"; + +int main(int argc, char **argv) +{ + int sk; + int alive = 1; + int cnt = 5; + int idle = 10; + int intvl = 15; + int optval; + socklen_t optlen; + + test_init(argc, argv); + + sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + pr_perror("Can't create socket"); + return 1; + } + + /* Set the option active */ + if (setsockopt(sk, SOL_SOCKET, SO_KEEPALIVE, &alive, sizeof(alive)) < 0) { + pr_perror("setsockopt SO_KEEPALIVE"); + return 1; + } + + if (setsockopt(sk, SOL_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt)) < 0) { + pr_perror("setsockopt TCP_KEEPCNT"); + return 1; + } + + if (setsockopt(sk, SOL_TCP, TCP_KEEPIDLE, &idle, sizeof(idle)) < 0) { + pr_perror("setsockopt TCP_KEEPIDLE"); + return 1; + } + + optval = 5; + optlen = sizeof(optval); + if (setsockopt(sk, SOL_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl)) < 0) { + pr_perror("setsockopt TCP_KEEPINTVL"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (getsockopt(sk, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen)) { + pr_perror("getsockopt SO_KEEPALIVE"); + return 1; + } + + if (optlen != sizeof(optval) || optval != alive) { + fail("SO_KEEPALIVE not set"); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPCNT, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPCNT"); + return 1; + } + + if (optval != cnt) { + fail("TCP_KEEPCNT has incorrect value (%d != %d)", cnt, optval); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPIDLE, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPIDLE"); + return 1; + } + + if (optval != idle) { + fail("TCP_KEEPIDLE has incorrect value (%d != %d)", idle, optval); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPINTVL, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPINTVL"); + return 1; + } + + if (optval != intvl) { + fail("TCP_KEEPINTVL has incorrect value (%d != %d)", intvl, optval); + return 1; + } + + pass(); + return 0; +} \ No newline at end of file From 79559bef92b524911b766d674ac8bc4470b8b378 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 20 Dec 2019 17:50:37 +0100 Subject: [PATCH 0106/1854] Fix tests on Ubuntu It seems like Ubuntu introduced a overlayfs change which breaks CRIU: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 This disables overlayfs (unfortunately) in most tests by switching to devicemapper or vfs. Upstream kernels do not seem to have this problem. This also adds the 'docker-test' for xenial which still has a working overlayfs from CRIU's point of view. Also adjust Podman Ubuntu package location Podman Ubuntu packages are now available via OBS and no longer via PPA. Signed-off-by: Adrian Reber --- .travis.yml | 13 +++++++++++-- scripts/travis/Makefile | 5 ++++- scripts/travis/docker-test.sh | 15 ++++++++++----- scripts/travis/podman-test.sh | 13 ++++++++++--- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index e6e410191..25dd6a29b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,6 @@ env: - TR_ARCH=local CLANG=1 COMPAT_TEST=y - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=docker-test - TR_ARCH=openj9-test matrix: include: @@ -57,6 +56,16 @@ matrix: arch: amd64 env: TR_ARCH=podman-test dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=docker-test + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=docker-test DIST=xenial + # On xenial it should be possible to test overlayfs; + # broken on the latest bionic kernel + dist: xenial - os: linux arch: amd64 env: TR_ARCH=alpine CLANG=1 @@ -79,9 +88,9 @@ matrix: dist: bionic allow_failures: - env: TR_ARCH=docker-test + - env: TR_ARCH=docker-test DIST=xenial - env: TR_ARCH=fedora-rawhide - env: TR_ARCH=local GCOV=1 - - env: TR_ARCH=podman-test script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 373171149..17abb703a 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -58,7 +58,10 @@ docker-test: podman-test: ./podman-test.sh -openj9-test: +# overlayfs behaves differently on Ubuntu and breaks CRIU +# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 +# Switch to devicemapper +openj9-test: restart-docker ./openj9-test.sh %: diff --git a/scripts/travis/docker-test.sh b/scripts/travis/docker-test.sh index ee96fef48..ac420a445 100755 --- a/scripts/travis/docker-test.sh +++ b/scripts/travis/docker-test.sh @@ -19,11 +19,16 @@ apt-get update -qq apt-get install -qq docker-ce -cat > /etc/docker/daemon.json < /etc/docker/daemon.json +else + echo '{ "experimental": true }' > /etc/docker/daemon.json +fi service docker restart diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index eafdc73be..5189477cd 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -1,7 +1,13 @@ #!/bin/bash set -x -e -o pipefail -add-apt-repository -y ppa:projectatomic/ppa +echo 'deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list + +wget -nv https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable/xUbuntu_18.04/Release.key -O- | apt-key add - + +# podman conflicts with a man page from docker-ce +# this is a podman packaging bug (https://github.com/containers/libpod/issues/4747) +apt-get -y purge docker-ce apt-get install -qq \ apt-transport-https \ @@ -10,7 +16,6 @@ apt-get install -qq \ software-properties-common apt-get update -qq - apt-get install -qqy podman containernetworking-plugins export SKIP_TRAVIS_TEST=1 @@ -21,7 +26,9 @@ cd ../../ make install -podman info +# overlaysfs behaves differently on Ubuntu and breaks CRIU +# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 +podman --storage-driver vfs info criu --version From 8bb3c17a0f7f14baaed8d9b6ebf953c24a793ccc Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 21 Dec 2019 18:08:23 +0000 Subject: [PATCH 0107/1854] style: Enforce kernel style -Wdeclaration-after-statement Include warnings that the kernel uses during compilation: -Wdeclaration-after-statement: enforces having variables declared at the top of scopes Signed-off-by: Nicolas Viennot [Generated a commit message from the pull request] Signed-off-by: Dmitry Safonov --- Makefile | 2 +- criu/net.c | 7 +++++-- criu/page-xfer.c | 10 +++++----- criu/pie/util-vdso.c | 3 ++- test/zdtm/Makefile.inc | 1 + test/zdtm/static/arm-neon00.c | 11 +++++----- test/zdtm/static/child_subreaper.c | 6 +++--- test/zdtm/static/config_inotify_irmap.c | 3 ++- test/zdtm/static/inotify00.c | 3 ++- test/zdtm/static/maps03.c | 3 ++- test/zdtm/static/mnt_ext_dev.c | 3 ++- test/zdtm/static/mntns_link_remap.c | 2 +- test/zdtm/static/mntns_open.c | 2 +- test/zdtm/static/mountpoints.c | 2 +- test/zdtm/static/remap_dead_pid.c | 4 ++-- test/zdtm/static/selinux01.c | 3 ++- test/zdtm/static/sigaltstack.c | 20 +++++++++---------- test/zdtm/static/socket-tcp-syn-sent.c | 4 ++-- test/zdtm/static/unlink_multiple_largefiles.c | 3 ++- test/zdtm/transition/file_aio.c | 3 ++- test/zdtm/transition/file_read.c | 5 ++++- test/zdtm/transition/maps008.c | 14 +++++++------ 22 files changed, 66 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index ef76d706c..133390f17 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ export PROTOUFIX DEFINES DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE -WARNINGS := -Wall -Wformat-security +WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV diff --git a/criu/net.c b/criu/net.c index e960a34f9..712837782 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2817,6 +2817,9 @@ int macvlan_ext_add(struct external *ext) static int prep_ns_sockets(struct ns_id *ns, bool for_dump) { int nsret = -1, ret; +#ifdef CONFIG_HAS_SELINUX + security_context_t ctx; +#endif if (ns->type != NS_CRIU) { pr_info("Switching to %d's net for collecting sockets\n", ns->ns_pid); @@ -2854,7 +2857,6 @@ static int prep_ns_sockets(struct ns_id *ns, bool for_dump) * policies installed. For Fedora based systems this is part * of the container-selinux package. */ - security_context_t ctx; /* * This assumes that all processes CRIU wants to dump are labeled @@ -3294,6 +3296,7 @@ int kerndat_link_nsid() } if (pid == 0) { + bool has_link_nsid; NetDeviceEntry nde = NET_DEVICE_ENTRY__INIT; struct net_link link = { .created = false, @@ -3336,7 +3339,7 @@ int kerndat_link_nsid() exit(1); } - bool has_link_nsid = false; + has_link_nsid = false; if (check_link_nsid(sk, &has_link_nsid)) exit(1); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 4d2d046ef..9affc2706 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -624,17 +624,17 @@ unsigned long handle_faulty_iov(int pid, struct iovec* riov, unsigned long* aux_len, unsigned long partial_read_bytes) { + struct iovec dummy; + ssize_t bytes_read; + unsigned long offset = 0; + unsigned long final_read_cnt = 0; + /* Handling Case 2*/ if (riov[faulty_index].iov_len == PAGE_SIZE) { cnt_sub(CNT_PAGES_WRITTEN, 1); return 0; } - struct iovec dummy; - ssize_t bytes_read; - unsigned long offset = 0; - unsigned long final_read_cnt = 0; - /* Handling Case 3-Part 3.2*/ offset = (partial_read_bytes)? partial_read_bytes : PAGE_SIZE; diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c index 104da0633..58b27680c 100644 --- a/criu/pie/util-vdso.c +++ b/criu/pie/util-vdso.c @@ -243,10 +243,11 @@ static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, k = elf_hash((const unsigned char *)symbol); for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) { - addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; Sym_t *sym; char *name; + addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; + addr += sizeof(Sym_t)*j; if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size)) continue; diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 32fc72d32..6958d128e 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -38,6 +38,7 @@ ifeq ($(origin CC), default) CC := $(CROSS_COMPILE)$(HOSTCC) endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 +CFLAGS += -Wdeclaration-after-statement CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include diff --git a/test/zdtm/static/arm-neon00.c b/test/zdtm/static/arm-neon00.c index 96da16c6b..ce8123e51 100644 --- a/test/zdtm/static/arm-neon00.c +++ b/test/zdtm/static/arm-neon00.c @@ -12,13 +12,14 @@ const char *test_author = "Alexander Karatshov "; int main(int argc, char ** argv) { + int a, b, c, y1, y2; + srand(time(0)); - int a = rand() % 100; - int b = rand() % 100; - int c = rand() % 100; - int y1 = a + b*c; - int y2; + a = rand() % 100; + b = rand() % 100; + c = rand() % 100; + y1 = a + b*c; test_init(argc, argv); diff --git a/test/zdtm/static/child_subreaper.c b/test/zdtm/static/child_subreaper.c index 267795249..6d02c9f93 100644 --- a/test/zdtm/static/child_subreaper.c +++ b/test/zdtm/static/child_subreaper.c @@ -8,10 +8,11 @@ const char *test_author = "Michał Cłapiński "; int main(int argc, char **argv) { + int cs_before = 1, cs_after, ret; + test_init(argc, argv); - int cs_before = 1; - int ret = prctl(PR_SET_CHILD_SUBREAPER, cs_before, 0, 0, 0); + ret = prctl(PR_SET_CHILD_SUBREAPER, cs_before, 0, 0, 0); if (ret) { pr_perror("Can't set child subreaper attribute, err = %d", ret); exit(1); @@ -20,7 +21,6 @@ int main(int argc, char **argv) test_daemon(); test_waitsig(); - int cs_after; ret = prctl(PR_GET_CHILD_SUBREAPER, (unsigned long)&cs_after, 0, 0, 0); if (ret) { pr_perror("Can't get child subreaper attribute, err = %d", ret); diff --git a/test/zdtm/static/config_inotify_irmap.c b/test/zdtm/static/config_inotify_irmap.c index 831dc1974..3cbeba7d3 100644 --- a/test/zdtm/static/config_inotify_irmap.c +++ b/test/zdtm/static/config_inotify_irmap.c @@ -31,6 +31,7 @@ char test_files[2][128] = {TDIR"/zdtm-test", TDIR"/zdtm-test1",}; int main (int argc, char *argv[]) { + FILE *configfile; char buf[BUFF_SIZE]; int fd, wd, i; @@ -56,7 +57,7 @@ int main (int argc, char *argv[]) } } - FILE *configfile = fopen(CONFIG_PATH, "w"); + configfile = fopen(CONFIG_PATH, "w"); if (configfile == NULL) { pr_perror("Unable to create configuration file %s", CONFIG_PATH); goto err; diff --git a/test/zdtm/static/inotify00.c b/test/zdtm/static/inotify00.c index 67088edd8..635c05047 100644 --- a/test/zdtm/static/inotify00.c +++ b/test/zdtm/static/inotify00.c @@ -125,9 +125,10 @@ int main (int argc, char *argv[]) { pid_t pid; task_waiter_t t; - task_waiter_init(&t); static char buf[PATH_MAX]; + task_waiter_init(&t); + if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL)) { pr_perror("Unable to remount /"); return 1; diff --git a/test/zdtm/static/maps03.c b/test/zdtm/static/maps03.c index f2bf7957a..0e0a5b8f2 100644 --- a/test/zdtm/static/maps03.c +++ b/test/zdtm/static/maps03.c @@ -16,9 +16,10 @@ const char *test_author = "Cyrill Gorcunov "; int main(int argc, char **argv) { - test_init(argc, argv); unsigned char *mem; + test_init(argc, argv); + test_msg("Alloc huge VMA\n"); mem = (void *)mmap(NULL, (10L << 30), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/test/zdtm/static/mnt_ext_dev.c b/test/zdtm/static/mnt_ext_dev.c index a9ac01333..1d60fc92f 100644 --- a/test/zdtm/static/mnt_ext_dev.c +++ b/test/zdtm/static/mnt_ext_dev.c @@ -20,10 +20,11 @@ TEST_OPTION(dirname, string, "directory name", 1); int main(int argc, char **argv) { char *loop, fd, dfd, fd2; - test_init(argc, argv); struct stat st, stp, st2; char dname[PATH_MAX], dname2[PATH_MAX]; + test_init(argc, argv); + snprintf(dname, sizeof(dname), "%s/test_dir", dirname); snprintf(dname2, sizeof(dname2), "%s/test_dir2", dirname); diff --git a/test/zdtm/static/mntns_link_remap.c b/test/zdtm/static/mntns_link_remap.c index 642641b16..6ac08191a 100644 --- a/test/zdtm/static/mntns_link_remap.c +++ b/test/zdtm/static/mntns_link_remap.c @@ -230,8 +230,8 @@ int main(int argc, char **argv) if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (WIFEXITED(status)) { if (WEXITSTATUS(status) == AWK_OK) diff --git a/test/zdtm/static/mntns_open.c b/test/zdtm/static/mntns_open.c index e19c4ea72..c687080a7 100644 --- a/test/zdtm/static/mntns_open.c +++ b/test/zdtm/static/mntns_open.c @@ -119,8 +119,8 @@ int main(int argc, char **argv) test_waitsig(); if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (WIFEXITED(status)) { if (WEXITSTATUS(status) == AWK_OK) diff --git a/test/zdtm/static/mountpoints.c b/test/zdtm/static/mountpoints.c index 00475cdc5..cf54d1096 100644 --- a/test/zdtm/static/mountpoints.c +++ b/test/zdtm/static/mountpoints.c @@ -292,8 +292,8 @@ int main(int argc, char **argv) } if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (status) return 1; diff --git a/test/zdtm/static/remap_dead_pid.c b/test/zdtm/static/remap_dead_pid.c index 261c591b7..5d4241fc6 100644 --- a/test/zdtm/static/remap_dead_pid.c +++ b/test/zdtm/static/remap_dead_pid.c @@ -40,12 +40,12 @@ int main(int argc, char **argv) while(1) sleep(10); } else { - test_msg("child is %d\n", pid); - int fd, ret; char path[PATH_MAX]; pid_t result; + test_msg("child is %d\n", pid); + sprintf(path, proc_path, pid); fd = open(path, O_RDONLY); if (fd < 0) { diff --git a/test/zdtm/static/selinux01.c b/test/zdtm/static/selinux01.c index 9966455c4..cec5980e8 100644 --- a/test/zdtm/static/selinux01.c +++ b/test/zdtm/static/selinux01.c @@ -133,6 +133,7 @@ int check_sockcreate_empty() int main(int argc, char **argv) { + int sk; char ctx[1024]; test_init(argc, argv); @@ -159,7 +160,7 @@ int main(int argc, char **argv) #endif /* Open our test socket */ - int sk = socket(AF_INET, SOCK_STREAM, 0); + sk = socket(AF_INET, SOCK_STREAM, 0); memset(ctx, 0, 1024); /* Read out the socket label */ if (fgetxattr(sk, "security.selinux", ctx, 1024) == -1) { diff --git a/test/zdtm/static/sigaltstack.c b/test/zdtm/static/sigaltstack.c index d324b0d37..f36d409f5 100644 --- a/test/zdtm/static/sigaltstack.c +++ b/test/zdtm/static/sigaltstack.c @@ -61,17 +61,17 @@ void thread_sigaction(int signo, siginfo_t *info, void *context) static void *thread_func(void *arg) { + struct sigaction sa = { + .sa_sigaction = thread_sigaction, + .sa_flags = SA_RESTART | SA_ONSTACK, + }; + sas_state[SAS_THRD_OLD] = (stack_t) { .ss_size = sizeof(stack_thread) - 8, .ss_sp = stack_thread, .ss_flags = 0, }; - struct sigaction sa = { - .sa_sigaction = thread_sigaction, - .sa_flags = SA_RESTART | SA_ONSTACK, - }; - sigemptyset(&sa.sa_mask); if (sigaction(SIGUSR2, &sa, NULL)) { @@ -103,17 +103,17 @@ int main(int argc, char *argv[]) { pthread_t thread; + struct sigaction sa = { + .sa_sigaction = leader_sigaction, + .sa_flags = SA_RESTART | SA_ONSTACK, + }; + sas_state[SAS_MAIN_OLD] = (stack_t) { .ss_size = sizeof(stack_main) - 8, .ss_sp = stack_main, .ss_flags = 0, }; - struct sigaction sa = { - .sa_sigaction = leader_sigaction, - .sa_flags = SA_RESTART | SA_ONSTACK, - }; - sigemptyset(&sa.sa_mask); test_init(argc, argv); diff --git a/test/zdtm/static/socket-tcp-syn-sent.c b/test/zdtm/static/socket-tcp-syn-sent.c index cf4c3bb46..755532a8a 100644 --- a/test/zdtm/static/socket-tcp-syn-sent.c +++ b/test/zdtm/static/socket-tcp-syn-sent.c @@ -37,7 +37,7 @@ int main(int argc, char **argv) { int fd, fd_s, sock, sk; union sockaddr_inet addr; - char cmd[4096]; + char c, cmd[4096]; test_init(argc, argv); @@ -113,7 +113,7 @@ int main(int argc, char **argv) fcntl(sock, F_SETFL, 0); - char c = 5; + c = 5; if (write(sock, &c, 1) != 1) { fail("Unable to send data"); return 1; diff --git a/test/zdtm/static/unlink_multiple_largefiles.c b/test/zdtm/static/unlink_multiple_largefiles.c index 7cf628606..2f9248c2f 100644 --- a/test/zdtm/static/unlink_multiple_largefiles.c +++ b/test/zdtm/static/unlink_multiple_largefiles.c @@ -30,10 +30,11 @@ void create_check_pattern(char *buf, size_t count, unsigned char seed) struct fiemap *read_fiemap(int fd) { - test_msg("Obtaining fiemap for fd %d\n", fd); struct fiemap *fiemap, *tmp; int extents_size; + test_msg("Obtaining fiemap for fd %d\n", fd); + fiemap = malloc(sizeof(struct fiemap)); if (fiemap == NULL) { pr_perror("Cannot allocate fiemap"); diff --git a/test/zdtm/transition/file_aio.c b/test/zdtm/transition/file_aio.c index a16010158..4a76c9390 100644 --- a/test/zdtm/transition/file_aio.c +++ b/test/zdtm/transition/file_aio.c @@ -17,7 +17,6 @@ const char *test_author = "Andrew Vagin "; int main(int argc, char **argv) { - test_init(argc, argv); char buf[BUF_SIZE]; int fd; struct aiocb aiocb; @@ -25,6 +24,8 @@ int main(int argc, char **argv) char tmpfname[256]="/tmp/file_aio.XXXXXX"; int ret; + test_init(argc, argv); + fd = mkstemp(tmpfname); if (fd == -1) { pr_perror("mkstemp() failed"); diff --git a/test/zdtm/transition/file_read.c b/test/zdtm/transition/file_read.c index 50dffd8c4..5d6e4dbba 100644 --- a/test/zdtm/transition/file_read.c +++ b/test/zdtm/transition/file_read.c @@ -158,9 +158,11 @@ static void chew_some_file(int num) rv = SEEK_FAILED; goto out_exit; case 1: - rv = FILE_CORRUPTED; + { int fd1; char str[PATH_MAX]; + + rv = FILE_CORRUPTED; // create standard file sprintf(str, "standard_%s.%d", filename, num); fd1 = open(str, O_WRONLY | O_CREAT | O_TRUNC, 0666); @@ -168,6 +170,7 @@ static void chew_some_file(int num) pr_perror("can't write %s", str); close(fd1); goto out_exit; + } } } rv = SUCCESS; diff --git a/test/zdtm/transition/maps008.c b/test/zdtm/transition/maps008.c index 5f6eb0887..7ed7c10a5 100644 --- a/test/zdtm/transition/maps008.c +++ b/test/zdtm/transition/maps008.c @@ -348,6 +348,7 @@ static int proc11_func(task_waiter_t *setup_waiter) void *mem3_old = mem3; size_t mem3_size_old = mem3_size; uint32_t crc_epoch = 0; + uint8_t *proc1_mem3; pstree->proc11 = getpid(); xmunmap(mem3, MEM3_START_CUT); @@ -382,7 +383,7 @@ static int proc11_func(task_waiter_t *setup_waiter) chk_proc_mem_eq(pstree->proc11, mem3, mem3_size, pstree->proc112, mem3, mem3_size + MEM3_END_CUT); - uint8_t *proc1_mem3 = mmap_proc_mem(pstree->proc1, + proc1_mem3 = mmap_proc_mem(pstree->proc1, (unsigned long)mem3_old, mem3_size_old); check_mem_eq(mem3, mem3_size, proc1_mem3 + MEM3_START_CUT, mem3_size); xmunmap(proc1_mem3, mem3_size_old); @@ -489,16 +490,17 @@ static void sigchld_hand(int signo, siginfo_t *info, void *ucontext) int main(int argc, char **argv) { - test_init(argc, argv); - - pstree = (struct pstree *)mmap_ashmem(PAGE_SIZE); - test_sync = (struct test_sync *)mmap_ashmem(sizeof(*test_sync)); - struct sigaction sa = { .sa_sigaction = sigchld_hand, .sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP }; sigemptyset(&sa.sa_mask); + + test_init(argc, argv); + + pstree = (struct pstree *)mmap_ashmem(PAGE_SIZE); + test_sync = (struct test_sync *)mmap_ashmem(sizeof(*test_sync)); + if (sigaction(SIGCHLD, &sa, NULL)) { pr_perror("SIGCHLD handler setup"); exit(1); From 17c4a8b24507d1bd1a906aa4a9d5ea3054072141 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 21 Dec 2019 18:13:06 +0000 Subject: [PATCH 0108/1854] style: Enforce kernel style -Wstrict-prototypes Include warnings that the kernel uses during compilation: -Wstrict-prototypes: enforces full declaration of functions. Previously, when declaring extern void func(), one can call func(123) and have no compilation error. This is dangerous. The correct declaration is extern void func(void). Signed-off-by: Nicolas Viennot [Generated a commit message from the pull request] Signed-off-by: Dmitry Safonov --- Makefile | 2 +- criu/config.c | 2 +- criu/cr-check.c | 10 ++++----- criu/cr-dump.c | 4 ++-- criu/cr-restore.c | 8 +++---- criu/cr-service.c | 4 ++-- criu/crtools.c | 2 +- criu/fault-injection.c | 2 +- criu/include/cr_options.h | 4 ++-- criu/include/lsm.h | 4 ++-- criu/include/mount.h | 2 +- criu/include/net.h | 8 +++---- criu/include/tls.h | 2 +- criu/kerndat.c | 6 ++--- criu/lsm.c | 2 +- criu/mount.c | 6 ++--- criu/namespaces.c | 2 +- criu/net.c | 10 ++++----- criu/pstree.c | 2 +- criu/seize.c | 2 +- criu/tls.c | 6 ++--- criu/util.c | 4 ++-- soccr/test/tcp-conn.c | 2 +- soccr/test/tcp-constructor.c | 2 +- test/others/unix-callback/unix-client.c | 2 +- test/others/unix-callback/unix-server.c | 2 +- test/zdtm/Makefile.inc | 2 +- test/zdtm/lib/test.c | 6 ++--- test/zdtm/static/apparmor.c | 4 ++-- .../static/child_subreaper_and_reparent.c | 6 ++--- .../static/child_subreaper_existing_child.c | 6 ++--- test/zdtm/static/dumpable02.c | 2 +- test/zdtm/static/fdt_shared.c | 4 ++-- test/zdtm/static/file_locks00.c | 2 +- test/zdtm/static/inotify_system.c | 2 +- test/zdtm/static/maps00.c | 7 +++--- test/zdtm/static/selinux00.c | 8 +++---- test/zdtm/static/selinux01.c | 10 ++++----- test/zdtm/static/session02.c | 8 +++---- test/zdtm/static/session03.c | 10 ++++----- test/zdtm/transition/netlink00.c | 22 +++++++++---------- 41 files changed, 101 insertions(+), 100 deletions(-) diff --git a/Makefile b/Makefile index 133390f17..00e563c11 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ export PROTOUFIX DEFINES DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE -WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement +WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement -Wstrict-prototypes CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV diff --git a/criu/config.c b/criu/config.c index e5d42efe4..73c62f5bb 100644 --- a/criu/config.c +++ b/criu/config.c @@ -853,7 +853,7 @@ bad_arg: return 1; } -int check_options() +int check_options(void) { if (opts.tcp_established_ok) pr_info("Will dump/restore TCP connections\n"); diff --git a/criu/cr-check.c b/criu/cr-check.c index 729b2dc38..17dd29b42 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -51,7 +51,7 @@ #include "restorer.h" #include "uffd.h" -static char *feature_name(int (*func)()); +static char *feature_name(int (*func)(void)); static int check_tty(void) { @@ -513,7 +513,7 @@ static int check_ipc(void) return -1; } -static int check_sigqueuinfo() +static int check_sigqueuinfo(void) { siginfo_t info = { .si_code = 1 }; @@ -960,7 +960,7 @@ static int clone_cb(void *_arg) { exit(0); } -static int check_clone_parent_vs_pid() +static int check_clone_parent_vs_pid(void) { struct clone_arg ca; pid_t pid; @@ -1447,7 +1447,7 @@ static int check_external_net_ns(void) struct feature_list { char *name; - int (*func)(); + int (*func)(void); }; static struct feature_list feature_list[] = { @@ -1517,7 +1517,7 @@ int check_add_feature(char *feat) return -1; } -static char *feature_name(int (*func)()) +static char *feature_name(int (*func)(void)) { struct feature_list *fl; diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 4b5a01cfd..88323af92 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1439,7 +1439,7 @@ err_cure_imgset: static int alarm_attempts = 0; -bool alarm_timeouted() { +bool alarm_timeouted(void) { return alarm_attempts > 0; } @@ -1456,7 +1456,7 @@ static void alarm_handler(int signo) BUG(); } -static int setup_alarm_handler() +static int setup_alarm_handler(void) { struct sigaction sa = { .sa_handler = alarm_handler, diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b920ce262..687cd6c68 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -182,13 +182,13 @@ static int __restore_wait_inprogress_tasks(int participants) return 0; } -static int restore_wait_inprogress_tasks() +static int restore_wait_inprogress_tasks(void) { return __restore_wait_inprogress_tasks(0); } /* Wait all tasks except the current one */ -static int restore_wait_other_tasks() +static int restore_wait_other_tasks(void) { int participants, stage; @@ -1587,7 +1587,7 @@ static void restore_pgid(void) futex_set_and_wake(&rsti(current)->pgrp_set, 1); } -static int __legacy_mount_proc() +static int __legacy_mount_proc(void) { char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; int fd; @@ -1941,7 +1941,7 @@ static int catch_tasks(bool root_seized, enum trace_flags *flag) return 0; } -static int clear_breakpoints() +static int clear_breakpoints(void) { struct pstree_item *item; int ret = 0, i; diff --git a/criu/cr-service.c b/criu/cr-service.c index 549b3368b..279016bcd 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -1278,7 +1278,7 @@ static void reap_worker(int signo) } } -static int setup_sigchld_handler() +static int setup_sigchld_handler(void) { struct sigaction action; @@ -1295,7 +1295,7 @@ static int setup_sigchld_handler() return 0; } -static int restore_sigchld_handler() +static int restore_sigchld_handler(void) { struct sigaction action; diff --git a/criu/crtools.c b/criu/crtools.c index 700fad994..9b6e94809 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -47,7 +47,7 @@ #include "setproctitle.h" #include "sysctl.h" -void flush_early_log_to_stderr() __attribute__((destructor)); +void flush_early_log_to_stderr(void) __attribute__((destructor)); void flush_early_log_to_stderr(void) { diff --git a/criu/fault-injection.c b/criu/fault-injection.c index 4128814d5..4b0650008 100644 --- a/criu/fault-injection.c +++ b/criu/fault-injection.c @@ -3,7 +3,7 @@ enum faults fi_strategy; -int fault_injection_init() +int fault_injection_init(void) { char *val; int start; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 2c1451e86..c5af33186 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -158,7 +158,7 @@ extern struct cr_options opts; char *rpc_cfg_file; extern int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, int state); -extern int check_options(); -extern void init_opts(); +extern int check_options(void); +extern void init_opts(void); #endif /* __CR_OPTIONS_H__ */ diff --git a/criu/include/lsm.h b/criu/include/lsm.h index 3b8271282..a41915a4c 100644 --- a/criu/include/lsm.h +++ b/criu/include/lsm.h @@ -39,7 +39,7 @@ extern int lsm_check_opts(void); #ifdef CONFIG_HAS_SELINUX int dump_xattr_security_selinux(int fd, FdinfoEntry *e); int run_setsockcreatecon(FdinfoEntry *e); -int reset_setsockcreatecon(); +int reset_setsockcreatecon(void); #else static inline int dump_xattr_security_selinux(int fd, FdinfoEntry *e) { return 0; @@ -47,7 +47,7 @@ static inline int dump_xattr_security_selinux(int fd, FdinfoEntry *e) { static inline int run_setsockcreatecon(FdinfoEntry *e) { return 0; } -static inline int reset_setsockcreatecon() { +static inline int reset_setsockcreatecon(void) { return 0; } #endif diff --git a/criu/include/mount.h b/criu/include/mount.h index d9b375f5d..8bf19b266 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -96,7 +96,7 @@ extern int collect_binfmt_misc(void); static inline int collect_binfmt_misc(void) { return 0; } #endif -extern struct mount_info *mnt_entry_alloc(); +extern struct mount_info *mnt_entry_alloc(void); extern void mnt_entry_free(struct mount_info *mi); extern int __mntns_get_root_fd(pid_t pid); diff --git a/criu/include/net.h b/criu/include/net.h index 9976f6eb0..0a556f3da 100644 --- a/criu/include/net.h +++ b/criu/include/net.h @@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump); extern int network_lock(void); extern void network_unlock(void); -extern int network_lock_internal(); +extern int network_lock_internal(void); extern struct ns_desc net_ns_desc; @@ -47,11 +47,11 @@ extern int move_veth_to_bridge(void); extern int kerndat_link_nsid(void); extern int net_get_nsid(int rtsk, int fd, int *nsid); -extern struct ns_id *net_get_root_ns(); +extern struct ns_id *net_get_root_ns(void); extern int kerndat_nsid(void); extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name); extern int net_set_ext(struct ns_id *ns); -extern struct ns_id *get_root_netns(); -extern int read_net_ns_img(); +extern struct ns_id *get_root_netns(void); +extern int read_net_ns_img(void); #endif /* __CR_NET_H__ */ diff --git a/criu/include/tls.h b/criu/include/tls.h index aa2517887..b48e4b480 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -4,7 +4,7 @@ # ifdef CONFIG_GNUTLS int tls_x509_init(int sockfd, bool is_server); -void tls_terminate_session(); +void tls_terminate_session(void); ssize_t tls_send(const void *buf, size_t len, int flags); ssize_t tls_recv(void *buf, size_t len, int flags); diff --git a/criu/kerndat.c b/criu/kerndat.c index b0dd83135..d1afde71d 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -364,7 +364,7 @@ no_dt: } /* The page frame number (PFN) is constant for the zero page */ -static int init_zero_page_pfn() +static int init_zero_page_pfn(void) { void *addr; int ret = 0; @@ -429,7 +429,7 @@ static int get_task_size(void) return 0; } -static int kerndat_fdinfo_has_lock() +static int kerndat_fdinfo_has_lock(void) { int fd, pfd = -1, exit_code = -1, len; char buf[PAGE_SIZE]; @@ -464,7 +464,7 @@ out: return exit_code; } -static int get_ipv6() +static int get_ipv6(void) { if (access("/proc/sys/net/ipv6", F_OK) < 0) { if (errno == ENOENT) { diff --git a/criu/lsm.c b/criu/lsm.c index 9d7e55c11..060f10259 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -133,7 +133,7 @@ static int selinux_get_sockcreate_label(pid_t pid, char **output) return 0; } -int reset_setsockcreatecon() +int reset_setsockcreatecon(void) { /* Currently this only works for SELinux. */ if (kdat.lsm != LSMTYPE__SELINUX) diff --git a/criu/mount.c b/criu/mount.c index 24a8516c6..180f2a62d 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2140,7 +2140,7 @@ static int restore_ext_mount(struct mount_info *mi) static char mnt_clean_path[] = "/tmp/cr-tmpfs.XXXXXX"; -static int mount_clean_path() +static int mount_clean_path(void) { /* * To make a bind mount, we need to have access to a source directory, @@ -2167,7 +2167,7 @@ static int mount_clean_path() return 0; } -static int umount_clean_path() +static int umount_clean_path(void) { if (umount2(mnt_clean_path, MNT_DETACH)) { pr_perror("Unable to umount %s", mnt_clean_path); @@ -2659,7 +2659,7 @@ static int find_remap_mounts(struct mount_info *root) } /* Move remapped mounts to places where they have to be */ -static int fixup_remap_mounts() +static int fixup_remap_mounts(void) { struct mnt_remap_entry *r; diff --git a/criu/namespaces.c b/criu/namespaces.c index 57f6bdfef..21266df7c 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -976,7 +976,7 @@ err: return exit_code; } -void free_userns_maps() +void free_userns_maps(void) { if (userns_entry.n_uid_map > 0) { xfree(userns_entry.uid_map[0]); diff --git a/criu/net.c b/criu/net.c index 712837782..8e6cfaff1 100644 --- a/criu/net.c +++ b/criu/net.c @@ -1765,7 +1765,7 @@ static int __restore_links(struct ns_id *nsid, int *nrlinks, int *nrcreated) return 0; } -static int restore_links() +static int restore_links(void) { int nrcreated, nrlinks; struct ns_id *nsid; @@ -2080,7 +2080,7 @@ out: * iptables-restore is executed from a target userns and it may have not enough * rights to open /run/xtables.lock. Here we try to workaround this problem. */ -static int prepare_xtable_lock() +static int prepare_xtable_lock(void) { int fd; @@ -2700,7 +2700,7 @@ err: return ret; } -int network_lock_internal() +int network_lock_internal(void) { char conf[] = "*filter\n" ":CRIU - [0:0]\n" @@ -2731,7 +2731,7 @@ int network_lock_internal() return ret; } -static int network_unlock_internal() +static int network_unlock_internal(void) { char conf[] = "*filter\n" ":CRIU - [0:0]\n" @@ -3284,7 +3284,7 @@ static int check_link_nsid(int rtsk, void *args) return do_rtnl_req(rtsk, &req, sizeof(req), check_one_link_nsid, NULL, NULL, args); } -int kerndat_link_nsid() +int kerndat_link_nsid(void) { int status; pid_t pid; diff --git a/criu/pstree.c b/criu/pstree.c index 92b4167aa..19cf5ad38 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -608,7 +608,7 @@ err: } #define RESERVED_PIDS 300 -static int get_free_pid() +static int get_free_pid(void) { static struct pid *prev, *next; diff --git a/criu/seize.c b/criu/seize.c index e1e6b8195..fd314666f 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -194,7 +194,7 @@ static int seize_cgroup_tree(char *root_path, const char *state) * A freezer cgroup can contain tasks which will not be dumped * and we need to wait them, because the are interrupted them by ptrace. */ -static int freezer_wait_processes() +static int freezer_wait_processes(void) { int i; diff --git a/criu/tls.c b/criu/tls.c index db9cc4f5a..f7b94dee8 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -31,7 +31,7 @@ static gnutls_certificate_credentials_t x509_cred; static int tls_sk = -1; static int tls_sk_flags = 0; -void tls_terminate_session() +void tls_terminate_session(void) { int ret; @@ -227,7 +227,7 @@ static int tls_x509_verify_peer_cert(void) return 0; } -static int tls_handshake() +static int tls_handshake(void) { int ret = -1; while (ret != GNUTLS_E_SUCCESS) { @@ -241,7 +241,7 @@ static int tls_handshake() return 0; } -static int tls_x509_setup_creds() +static int tls_x509_setup_creds(void) { int ret; char *cacert = CRIU_CACERT; diff --git a/criu/util.c b/criu/util.c index 3bae18ab2..1646ce1c4 100644 --- a/criu/util.c +++ b/criu/util.c @@ -326,7 +326,7 @@ int close_pid_proc(void) return 0; } -void close_proc() +void close_proc(void) { close_pid_proc(); close_service_fd(PROC_FD_OFF); @@ -690,7 +690,7 @@ int cr_daemon(int nochdir, int noclose, int close_fd) return 0; } -int is_root_user() +int is_root_user(void) { if (geteuid() != 0) { pr_err("You need to be root to run this command\n"); diff --git a/soccr/test/tcp-conn.c b/soccr/test/tcp-conn.c index 1a1a5bb39..e31f58e7e 100644 --- a/soccr/test/tcp-conn.c +++ b/soccr/test/tcp-conn.c @@ -23,7 +23,7 @@ static void pr_printf(unsigned int level, const char *fmt, ...) va_end(args); } -int main() +int main(void) { union libsoccr_addr addr, dst; int srv, sock, clnt, rst; diff --git a/soccr/test/tcp-constructor.c b/soccr/test/tcp-constructor.c index 89f201000..973dbf10c 100644 --- a/soccr/test/tcp-constructor.c +++ b/soccr/test/tcp-constructor.c @@ -20,7 +20,7 @@ struct tcp { uint16_t wscale; }; -static void usage() +static void usage(void) { printf( "Usage: --addr ADDR -port PORT --seq SEQ --next --addr ADDR -port PORT --seq SEQ -- CMD ...\n" diff --git a/test/others/unix-callback/unix-client.c b/test/others/unix-callback/unix-client.c index 69808b53c..676c4adbc 100644 --- a/test/others/unix-callback/unix-client.c +++ b/test/others/unix-callback/unix-client.c @@ -86,7 +86,7 @@ static int check_sock(int i) return 0; } -int main() +int main(void) { int i, fd; sigset_t set; diff --git a/test/others/unix-callback/unix-server.c b/test/others/unix-callback/unix-server.c index 8f32f53dd..47bebd05d 100644 --- a/test/others/unix-callback/unix-server.c +++ b/test/others/unix-callback/unix-server.c @@ -19,7 +19,7 @@ struct ticket *tickets; #define SK_NAME "/tmp/criu.unix.callback.test" -int main() +int main(void) { int sk, ret, id; char buf[4096]; diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 6958d128e..43763321f 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -38,7 +38,7 @@ ifeq ($(origin CC), default) CC := $(CROSS_COMPILE)$(HOSTCC) endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -CFLAGS += -Wdeclaration-after-statement +CFLAGS += -Wdeclaration-after-statement -Wstrict-prototypes CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c index a1bdfc1b4..630476de0 100644 --- a/test/zdtm/lib/test.c +++ b/test/zdtm/lib/test.c @@ -71,7 +71,7 @@ static void test_fini(void) unlinkat(cwd, pidfile, 0); } -static void setup_outfile() +static void setup_outfile(void) { if (!access(outfile, F_OK) || errno != ENOENT) { fprintf(stderr, "Output file %s appears to exist, aborting\n", @@ -93,7 +93,7 @@ static void setup_outfile() exit(1); } -static void redir_stdfds() +static void redir_stdfds(void) { int nullfd; @@ -346,7 +346,7 @@ void test_init(int argc, char **argv) srand48(time(NULL)); /* just in case we need it */ } -void test_daemon() +void test_daemon(void) { futex_set_and_wake(&test_shared_state->stage, TEST_RUNNING_STAGE); } diff --git a/test/zdtm/static/apparmor.c b/test/zdtm/static/apparmor.c index 15930c761..b3a4d7549 100644 --- a/test/zdtm/static/apparmor.c +++ b/test/zdtm/static/apparmor.c @@ -15,7 +15,7 @@ const char *test_author = "Tycho Andersen "; #define PROFILE "criu_test" -int setprofile() +int setprofile(void) { char profile[1024]; int fd, len; @@ -45,7 +45,7 @@ int setprofile() return 0; } -int checkprofile() +int checkprofile(void) { FILE *f; char path[PATH_MAX], profile[1024]; diff --git a/test/zdtm/static/child_subreaper_and_reparent.c b/test/zdtm/static/child_subreaper_and_reparent.c index 57943a67b..e3955d3d9 100644 --- a/test/zdtm/static/child_subreaper_and_reparent.c +++ b/test/zdtm/static/child_subreaper_and_reparent.c @@ -25,7 +25,7 @@ struct shared { int parent_after_cr; } *sh; -int orphan() +int orphan(void) { /* * Wait until reparented to the pidns init. (By waiting @@ -45,7 +45,7 @@ int orphan() return 0; } -int helper() +int helper(void) { int pid; @@ -59,7 +59,7 @@ int helper() return 0; } -int subreaper() +int subreaper(void) { int pid, ret, status; diff --git a/test/zdtm/static/child_subreaper_existing_child.c b/test/zdtm/static/child_subreaper_existing_child.c index 28e9dbb8a..8291aba08 100644 --- a/test/zdtm/static/child_subreaper_existing_child.c +++ b/test/zdtm/static/child_subreaper_existing_child.c @@ -24,7 +24,7 @@ struct shared { } *sh; -int orphan() +int orphan(void) { /* Return the control back to MAIN worker to do C/R */ futex_set_and_wake(&sh->fstate, TEST_CRIU); @@ -36,7 +36,7 @@ int orphan() return 0; } -int helper() +int helper(void) { int pid; @@ -52,7 +52,7 @@ int helper() return 0; } -int subreaper() +int subreaper(void) { int pid, ret, status; diff --git a/test/zdtm/static/dumpable02.c b/test/zdtm/static/dumpable02.c index 024371bd8..7e2eee2d1 100644 --- a/test/zdtm/static/dumpable02.c +++ b/test/zdtm/static/dumpable02.c @@ -13,7 +13,7 @@ const char *test_doc = "Check dumpable flag handling (non-dumpable case)"; const char *test_author = "Filipe Brandenburger "; -int dumpable_server() { +int dumpable_server(void) { char buf[256]; int ret; diff --git a/test/zdtm/static/fdt_shared.c b/test/zdtm/static/fdt_shared.c index 2111356f5..a84444af5 100644 --- a/test/zdtm/static/fdt_shared.c +++ b/test/zdtm/static/fdt_shared.c @@ -22,7 +22,7 @@ TEST_OPTION(filename, string, "file name", 1); #define CHILDREN 4 static int fork_pfd[2]; -static void forked() +static void forked(void) { char c = 0; @@ -32,7 +32,7 @@ static void forked() } } -static void wait_children() +static void wait_children(void) { int i; char c; diff --git a/test/zdtm/static/file_locks00.c b/test/zdtm/static/file_locks00.c index 59e19cfe1..fa98a31b3 100644 --- a/test/zdtm/static/file_locks00.c +++ b/test/zdtm/static/file_locks00.c @@ -101,7 +101,7 @@ static int check_write_lock(int fd, int whence, off_t offset, off_t len) return -1; } -static int check_file_locks() +static int check_file_locks(void) { int fd_0, fd_1; int ret0, ret1; diff --git a/test/zdtm/static/inotify_system.c b/test/zdtm/static/inotify_system.c index 59f47c41c..3e6b2ad48 100644 --- a/test/zdtm/static/inotify_system.c +++ b/test/zdtm/static/inotify_system.c @@ -68,7 +68,7 @@ typedef struct { int dir; } desc; -void do_wait() { +void do_wait(void) { test_daemon(); test_waitsig(); } diff --git a/test/zdtm/static/maps00.c b/test/zdtm/static/maps00.c index a6c68cd25..f2da9b975 100644 --- a/test/zdtm/static/maps00.c +++ b/test/zdtm/static/maps00.c @@ -123,7 +123,7 @@ static void segfault(int signo) * after test func should be placed check map, because size of test_func * is calculated as (check_map-test_func) */ -int test_func() +int test_func(void) { return 1; } @@ -176,8 +176,9 @@ static int check_map(struct map *map) memcpy(map->ptr,test_func, getpagesize()); } else { if (!(map->flag & MAP_ANONYMOUS)) { + uint8_t funlen = (uint8_t *)check_map - (uint8_t *)test_func; lseek(map->fd,0,SEEK_SET); - if (write(map->fd,test_func,check_map - test_func)fd,test_func,funlen)filename); return -1; } @@ -185,7 +186,7 @@ static int check_map(struct map *map) } if (!(map->flag & MAP_ANONYMOUS) || map->prot & PROT_WRITE) /* Function body has been copied into the mapping */ - ((int (*)())map->ptr)(); /* perform exec access */ + ((int (*)(void))map->ptr)(); /* perform exec access */ else /* No way to copy function body into mapping, * clear exec bit from effective protection diff --git a/test/zdtm/static/selinux00.c b/test/zdtm/static/selinux00.c index db8420eac..b5b3e3cc0 100644 --- a/test/zdtm/static/selinux00.c +++ b/test/zdtm/static/selinux00.c @@ -26,14 +26,14 @@ const char *test_author = "Adrian Reber "; */ char state; -int check_for_selinux() +int check_for_selinux(void) { if (access("/sys/fs/selinux", F_OK) == 0) return 0; return 1; } -int setprofile() +int setprofile(void) { int fd, len; @@ -54,7 +54,7 @@ int setprofile() return 0; } -int checkprofile() +int checkprofile(void) { int fd; char context[1024]; @@ -83,7 +83,7 @@ int checkprofile() return 0; } -int check_sockcreate() +int check_sockcreate(void) { char *output = NULL; FILE *f = fopen("/proc/self/attr/sockcreate", "r"); diff --git a/test/zdtm/static/selinux01.c b/test/zdtm/static/selinux01.c index cec5980e8..cbf145d2a 100644 --- a/test/zdtm/static/selinux01.c +++ b/test/zdtm/static/selinux01.c @@ -28,14 +28,14 @@ const char *test_author = "Adrian Reber "; */ char state; -int check_for_selinux() +int check_for_selinux(void) { if (access("/sys/fs/selinux", F_OK) == 0) return 0; return 1; } -int setprofile() +int setprofile(void) { int fd, len; @@ -56,7 +56,7 @@ int setprofile() return 0; } -int set_sockcreate() +int set_sockcreate(void) { int fd, len; @@ -77,7 +77,7 @@ int set_sockcreate() return 0; } -int check_sockcreate() +int check_sockcreate(void) { int fd; char context[1024]; @@ -106,7 +106,7 @@ int check_sockcreate() return 0; } -int check_sockcreate_empty() +int check_sockcreate_empty(void) { char *output = NULL; FILE *f = fopen("/proc/self/attr/sockcreate", "r"); diff --git a/test/zdtm/static/session02.c b/test/zdtm/static/session02.c index 37f245d2e..f5c81df16 100644 --- a/test/zdtm/static/session02.c +++ b/test/zdtm/static/session02.c @@ -25,7 +25,7 @@ struct process *processes; int nr_processes = 20; int current = 0; -static void cleanup() +static void cleanup(void) { int i; @@ -55,9 +55,9 @@ struct command int arg2; }; -static void handle_command(); +static void handle_command(void); -static void mainloop() +static void mainloop(void) { while (1) handle_command(); @@ -100,7 +100,7 @@ static int make_child(int id, int flags) return cid; } -static void handle_command() +static void handle_command(void) { int sk = processes[current].sks[0], ret, status = 0; struct command cmd; diff --git a/test/zdtm/static/session03.c b/test/zdtm/static/session03.c index 2b3c46c32..8ca16e410 100644 --- a/test/zdtm/static/session03.c +++ b/test/zdtm/static/session03.c @@ -36,7 +36,7 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data) waitpid(pid, NULL, WNOHANG); } -static void cleanup() +static void cleanup(void) { int i, ret; @@ -72,7 +72,7 @@ enum commands int cmd_weght[TEST_MAX] = {10, 3, 1, 10, 7}; int sum_weight = 0; -static int get_rnd_op() +static int get_rnd_op(void) { int i, m; if (sum_weight == 0) { @@ -97,9 +97,9 @@ struct command int arg2; }; -static void handle_command(); +static void handle_command(void); -static void mainloop() +static void mainloop(void) { while (1) handle_command(); @@ -142,7 +142,7 @@ static int make_child(int id, int flags) return cid; } -static void handle_command() +static void handle_command(void) { int sk = processes[current].sks[0], ret, status = 0; struct command cmd; diff --git a/test/zdtm/transition/netlink00.c b/test/zdtm/transition/netlink00.c index c9b2303e8..3504a48a1 100644 --- a/test/zdtm/transition/netlink00.c +++ b/test/zdtm/transition/netlink00.c @@ -56,12 +56,12 @@ struct rtmsg *rtp; int rtl; struct rtattr *rtap; -int send_request(); -int recv_reply(); -int form_request_add(); -int form_request_del(); -int read_reply(); -typedef int (*cmd_t)(); +int send_request(void); +int recv_reply(void); +int form_request_add(void); +int form_request_del(void); +int read_reply(void); +typedef int (*cmd_t)(void); #define CMD_NUM 2 cmd_t cmd[CMD_NUM]={form_request_add, form_request_del}; @@ -120,7 +120,7 @@ out: return 0; } -int send_request() +int send_request(void) { // create the remote address // to communicate @@ -145,7 +145,7 @@ int send_request() } return 0; } -int recv_reply() +int recv_reply(void) { char *p; // initialize the socket read buffer @@ -191,7 +191,7 @@ int recv_reply() return 0; } -int read_reply() +int read_reply(void) { //string to hold content of the route // table (i.e. one entry) @@ -250,7 +250,7 @@ int read_reply() #define NLMSG_TAIL(nmsg) \ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) -int form_request_del() +int form_request_del(void) { bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); @@ -272,7 +272,7 @@ int form_request_del() return 0; } -int form_request_add() +int form_request_add(void) { int ifcn = 1; //interface number From e1c4871759d6edb4d7c2d3129981060b873ec912 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 11 Nov 2019 19:07:52 +0300 Subject: [PATCH 0109/1854] net: add nftables c/r After Centos-8 nft used instead of iptables. But we had never supported nft rules in CRIU, and after c/r all rules are flushed. Co-developed-by: Pavel Tikhomirov Signed-off-by: Pavel Tikhomirov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Dmitry Safonov --- Makefile.config | 17 ++++ criu/image-desc.c | 1 + criu/include/image-desc.h | 1 + criu/include/magic.h | 1 + criu/net.c | 115 +++++++++++++++++++++++++++ scripts/build/Dockerfile.alpine | 1 + scripts/build/Dockerfile.fedora.tmpl | 2 + scripts/feature-tests.mak | 23 ++++++ 8 files changed, 161 insertions(+) diff --git a/Makefile.config b/Makefile.config index 81aae24f8..161365960 100644 --- a/Makefile.config +++ b/Makefile.config @@ -23,6 +23,23 @@ else $(info Note: Building without GnuTLS support) endif +ifeq ($(call pkg-config-check,libnftables),y) + LIB_NFTABLES := $(shell pkg-config --libs libnftables) + ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_0),$(LIB_NFTABLES)),true) + LIBS_FEATURES += $(LIB_NFTABLES) + FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_0 + else ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_1),$(LIB_NFTABLES)),true) + LIBS_FEATURES += $(LIB_NFTABLES) + FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_1 + else + $(warning Warn: you have libnftables installed but it has incompatible API) + $(warning Warn: Building without nftables support) + endif +else + $(warning Warn: you have no libnftables installed) + $(warning Warn: Building without nftables support) +endif + export LIBS += $(LIBS_FEATURES) CONFIG_FILE = .config diff --git a/criu/image-desc.c b/criu/image-desc.c index 81cd07484..ae5d817fe 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -76,6 +76,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY_F(RULE, "rule-%u", O_NOBUF), FD_ENTRY_F(IPTABLES, "iptables-%u", O_NOBUF), FD_ENTRY_F(IP6TABLES, "ip6tables-%u", O_NOBUF), + FD_ENTRY_F(NFTABLES, "nftables-%u", O_NOBUF), FD_ENTRY_F(TMPFS_IMG, "tmpfs-%u.tar.gz", O_NOBUF), FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%u.tar.gz", O_NOBUF), FD_ENTRY_F(AUTOFS, "autofs-%u", O_NOBUF), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index fea80a719..6db8bf94f 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -42,6 +42,7 @@ enum { CR_FD_RULE, CR_FD_IPTABLES, CR_FD_IP6TABLES, + CR_FD_NFTABLES, CR_FD_NETNS, CR_FD_NETNF_CT, CR_FD_NETNF_EXP, diff --git a/criu/include/magic.h b/criu/include/magic.h index 05101f436..1a583f4ed 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -103,6 +103,7 @@ #define TMPFS_DEV_MAGIC RAW_IMAGE_MAGIC #define IPTABLES_MAGIC RAW_IMAGE_MAGIC #define IP6TABLES_MAGIC RAW_IMAGE_MAGIC +#define NFTABLES_MAGIC RAW_IMAGE_MAGIC #define NETNF_CT_MAGIC RAW_IMAGE_MAGIC #define NETNF_EXP_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/net.c b/criu/net.c index 8e6cfaff1..762f9b547 100644 --- a/criu/net.c +++ b/criu/net.c @@ -17,6 +17,10 @@ #include #include +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +#include +#endif + #ifdef CONFIG_HAS_SELINUX #include #endif @@ -1897,6 +1901,55 @@ static inline int dump_iptables(struct cr_imgset *fds) return 0; } +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +static inline int dump_nftables(struct cr_imgset *fds) +{ + int ret = -1; + struct cr_img *img; + int img_fd; + FILE *fp; + struct nft_ctx *nft; + + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + return -1; + + img = img_from_set(fds, CR_FD_NFTABLES); + img_fd = dup(img_raw_fd(img)); + if (img_fd < 0) { + pr_perror("dup() failed"); + goto nft_ctx_free_out; + } + + fp = fdopen(img_fd, "w"); + if (!fp) { + pr_perror("fdopen() failed"); + close(img_fd); + goto nft_ctx_free_out; + } + + nft_ctx_set_output(nft, fp); +#define DUMP_NFTABLES_CMD "list ruleset" +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) + if (nft_run_cmd_from_buffer(nft, DUMP_NFTABLES_CMD, strlen(DUMP_NFTABLES_CMD))) +#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (nft_run_cmd_from_buffer(nft, DUMP_NFTABLES_CMD)) +#else + BUILD_BUG_ON(1); +#endif + goto fp_close_out; + + ret = 0; + +fp_close_out: + fclose(fp); +nft_ctx_free_out: + nft_ctx_free(nft); + + return ret; +} +#endif + static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) { void *buf, *o_buf; @@ -2149,6 +2202,60 @@ out: return ret; } +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +static inline int restore_nftables(int pid) +{ + int ret = -1; + struct cr_img *img; + struct nft_ctx *nft; + off_t img_data_size; + char *buf; + + img = open_image(CR_FD_NFTABLES, O_RSTR, pid); + if (img == NULL) + return -1; + if (empty_image(img)) { + /* Backward compatibility */ + pr_info("Skipping nft restore, no image"); + ret = 0; + goto image_close_out; + } + + if ((img_data_size = img_raw_size(img)) < 0) + goto image_close_out; + + if (read_img_str(img, &buf, img_data_size) < 0) + goto image_close_out; + + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + goto buf_free_out; + + if (nft_ctx_buffer_output(nft) || nft_ctx_buffer_error(nft) || +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) + nft_run_cmd_from_buffer(nft, buf, strlen(buf))) +#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1) + nft_run_cmd_from_buffer(nft, buf)) +#else + { + BUILD_BUG_ON(1); + } +#endif + goto nft_ctx_free_out; + + ret = 0; + +nft_ctx_free_out: + nft_ctx_free(nft); +buf_free_out: + xfree(buf); +image_close_out: + close_image(img); + + return ret; +} +#endif + int read_net_ns_img(void) { struct ns_id *ns; @@ -2380,6 +2487,10 @@ int dump_net_ns(struct ns_id *ns) ret = dump_rule(fds); if (!ret) ret = dump_iptables(fds); +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (!ret) + ret = dump_nftables(fds); +#endif if (!ret) ret = dump_netns_conf(ns, fds); } else if (ns->type != NS_ROOT) { @@ -2473,6 +2584,10 @@ static int prepare_net_ns_second_stage(struct ns_id *ns) ret = restore_rule(nsid); if (!ret) ret = restore_iptables(nsid); +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (!ret) + ret = restore_nftables(nsid); +#endif } if (!ret) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index a1d1d9191..29a754058 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -14,6 +14,7 @@ RUN apk update && apk add \ libcap-dev \ libnet-dev \ libnl3-dev \ + nftables \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 0500a8fc5..138588bce 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -10,6 +10,8 @@ RUN dnf install -y \ gnutls-devel \ iproute \ iptables \ + nftables \ + nftables-devel \ libaio-devel \ libasan \ libcap-devel \ diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 39ddfd053..6f67c6035 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -147,4 +147,27 @@ int main(void) return 0; return 0; } + +endef + +define FEATURE_TEST_NFTABLES_LIB_API_0 + +#include + +int main(int argc, char **argv) +{ + return nft_run_cmd_from_buffer(nft_ctx_new(NFT_CTX_DEFAULT), \"cmd\", strlen(\"cmd\")); +} + +endef + +define FEATURE_TEST_NFTABLES_LIB_API_1 + +#include + +int main(int argc, char **argv) +{ + return nft_run_cmd_from_buffer(nft_ctx_new(NFT_CTX_DEFAULT), \"cmd\"); +} + endef From acb42456dc707a303b308fef67b2be92c81ab427 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 11 Nov 2019 19:20:22 +0300 Subject: [PATCH 0110/1854] zdtm: nft tables preservation test Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn [Added test_author to zdtm test] Signed-off-by: Dmitry Safonov --- scripts/build/Dockerfile.alpine | 1 + test/zdtm/static/Makefile | 1 + test/zdtm/static/netns-nft.c | 64 ++++++++++++++++++++++++++++ test/zdtm/static/netns-nft.checkskip | 3 ++ test/zdtm/static/netns-nft.desc | 5 +++ 5 files changed, 74 insertions(+) create mode 100644 test/zdtm/static/netns-nft.c create mode 100755 test/zdtm/static/netns-nft.checkskip create mode 100644 test/zdtm/static/netns-nft.desc diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 29a754058..601a8693a 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -31,6 +31,7 @@ RUN apk add \ py-pip \ ip6tables \ iptables \ + nftables \ iproute2 \ tar \ bash \ diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index ea5d3c42e..19d93e315 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -288,6 +288,7 @@ TST_FILE = \ file_locks07 \ file_locks08 \ netns-nf \ + netns-nft \ maps_file_prot \ socket_close_data01 \ diff --git a/test/zdtm/static/netns-nft.c b/test/zdtm/static/netns-nft.c new file mode 100644 index 000000000..f4991afda --- /dev/null +++ b/test/zdtm/static/netns-nft.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check that nft rules (some) are kept"; +const char *test_author = "Alexander Mikhalitsyn "; + +char *filename; +TEST_OPTION(filename, string, "file name", 1); + +int main(int argc, char **argv) +{ + char cmd[128]; + + test_init(argc, argv); + + /* create nft table */ + if (system("nft add table inet netns-nft-zdtm-test")) { + pr_perror("Can't create nft table"); + return -1; + } + + /* create input chain in table */ + if (system("nft add chain inet netns-nft-zdtm-test input { type filter hook input priority 0 \\; }")) { + pr_perror("Can't create input chain in nft table"); + return -1; + } + + /* block ICMPv4 traffic */ + if (system("nft add rule inet netns-nft-zdtm-test input meta nfproto ipv4 icmp type { echo-request } reject")) { + pr_perror("Can't set input rule"); + return -1; + } + + /* save resulting nft table */ + sprintf(cmd, "nft list table inet netns-nft-zdtm-test > pre-%s", filename); + if (system(cmd)) { + pr_perror("Can't get nft table"); + return -1; + } + + test_daemon(); + test_waitsig(); + + /* get nft table */ + sprintf(cmd, "nft list table inet netns-nft-zdtm-test > post-%s", filename); + if (system(cmd)) { + fail("Can't get nft table"); + return -1; + } + + /* compare nft table before/after c/r */ + sprintf(cmd, "diff pre-%s post-%s", filename, filename); + if (system(cmd)) { + fail("nft table differ"); + return -1; + } + + pass(); + return 0; +} diff --git a/test/zdtm/static/netns-nft.checkskip b/test/zdtm/static/netns-nft.checkskip new file mode 100755 index 000000000..270cafeb5 --- /dev/null +++ b/test/zdtm/static/netns-nft.checkskip @@ -0,0 +1,3 @@ +#!/bin/bash + +test -f /usr/sbin/nft || exit 1 diff --git a/test/zdtm/static/netns-nft.desc b/test/zdtm/static/netns-nft.desc new file mode 100644 index 000000000..f53890a24 --- /dev/null +++ b/test/zdtm/static/netns-nft.desc @@ -0,0 +1,5 @@ +{ 'deps': [ '/bin/sh', + '/usr/sbin/nft', + '/usr/bin/diff'], + 'flags': 'suid', + 'flavor': 'ns uns'} From 7622b7a70eb7a00b2ba542f9118f4351b3df2538 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0111/1854] files: fix ghost file error path Signed-off-by: Nicolas Viennot --- criu/files-reg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 2f68bc03f..90fb7dd7f 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -844,10 +844,13 @@ static int dump_ghost_remap(char *path, const struct stat *st, gf->dev = phys_dev; gf->ino = st->st_ino; gf->id = ghost_file_ids++; - list_add_tail(&gf->list, &ghost_files); - if (dump_ghost_file(lfd, gf->id, st, phys_dev)) + if (dump_ghost_file(lfd, gf->id, st, phys_dev)) { + xfree(gf); return -1; + } + + list_add_tail(&gf->list, &ghost_files); dump_entry: rpe.orig_id = id; From 2ac43cd426badfc6d68582b203ace318e5f79427 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:27:40 +0000 Subject: [PATCH 0112/1854] python: Improve decoding of file flags Signed-off-by: Nicolas Viennot --- lib/py/images/pb2dict.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index daaa7297e..6fce4be22 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -105,11 +105,22 @@ mmap_status_map = [ ] rfile_flags_map = [ - ('O_WRONLY', 0o1), - ('O_RDWR', 0o2), - ('O_APPEND', 0o2000), - ('O_DIRECT', 0o40000), - ('O_LARGEFILE', 0o100000), + ('O_WRONLY', 0o00000001), + ('O_RDWR', 0o00000002), + ('O_CREAT', 0o00000100), + ('O_EXCL', 0o00000200), + ('O_NOCTTY', 0o00000400), + ('O_TRUNC', 0o00001000), + ('O_APPEND', 0o00002000), + ('O_NONBLOCK', 0o00004000), + ('O_DSYNC', 0o00010000), + ('FASYNC', 0o00020000), + ('O_DIRECT', 0o00040000), + ('O_LARGEFILE', 0o00100000), + ('O_DIRECTORY', 0o00200000), + ('O_NOFOLLOW', 0o00400000), + ('O_NOATIME', 0o01000000), + ('O_CLOEXEC', 0o02000000), ] pmap_flags_map = [ From 8255caf27b5e2bb96af6affc161b8d0d3bbdccbe Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:29:27 +0000 Subject: [PATCH 0113/1854] files: Remove O_CLOEXEC from file flags The kernel artificially adds the O_CLOEXEC flag when reading from the /proc/fdinfo/fd interface if FD_CLOEXEC is set on the file descriptor used to access the file. This commit removes the O_CLOEXEC flag in our file flags. To restore the proper FD_CLOEXEC value in each of the file descriptors, CRIU uses fcntl(F_GETFD) to retrieve the FD_CLOEXEC status, and restore it later with fcntl(F_SETFD). This is necessary because multiple file descriptors may point to the same open file. --- criu/files.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index ffdaa459f..e26897870 100644 --- a/criu/files.c +++ b/criu/files.c @@ -382,7 +382,13 @@ static int fill_fd_params(struct pid *owner_pid, int fd, int lfd, p->fs_type = fsbuf.f_type; p->fd = fd; p->pos = fdinfo.pos; - p->flags = fdinfo.flags; + /* + * The kernel artificially adds the O_CLOEXEC flag on the file pointer + * flags by looking at the flags on the file descriptor (see kernel + * code fs/proc/fd.c). FD_CLOEXEC is a file descriptor property, which + * is saved in fd_flags. + */ + p->flags = fdinfo.flags & ~O_CLOEXEC; p->mnt_id = fdinfo.mnt_id; p->pid = owner_pid->real; p->fd_flags = opts->flags; From 75a74423801a83ef7657e67e0b016a76f741db11 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:21:03 +0000 Subject: [PATCH 0114/1854] files: Add FD_CLOEXEC test --- test/zdtm/static/Makefile | 1 + test/zdtm/static/file_cloexec.c | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 test/zdtm/static/file_cloexec.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 19d93e315..5ca05ee9e 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -122,6 +122,7 @@ TST_NOFILE := \ groups \ pdeath_sig \ file_fown \ + file_cloexec \ proc-self \ eventfs00 \ epoll \ diff --git a/test/zdtm/static/file_cloexec.c b/test/zdtm/static/file_cloexec.c new file mode 100644 index 000000000..b8eba39e5 --- /dev/null +++ b/test/zdtm/static/file_cloexec.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check FD_CLOEXEC flag"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static void assert_fd_flags(int fd, int mask, int value) +{ + int flags = fcntl(fd, F_GETFD); + if (flags == -1) + err(1, "Can't get fd flags"); + + if ((flags & mask) != value) { + fail("fd flags mismatch"); + exit(1); + } +} + +int main(int argc, char *argv[]) +{ + int fd1, fd2, fd3, fd4; + + test_init(argc, argv); + + fd1 = open("/", O_RDONLY | O_CLOEXEC); + if (fd1 < 0) + err(1, "Can't open()"); + + fd2 = open("/", O_RDONLY); + if (fd2 < 0) + err(1, "Can't open()"); + + fd3 = dup(fd1); + if (fd3 < 0) + err(1, "Can't dup()"); + + fd4 = fcntl(fd2, F_DUPFD_CLOEXEC, 0); + if (fd4 < 0) + err(1, "Can't dup()"); + + test_daemon(); + test_waitsig(); + + assert_fd_flags(fd1, FD_CLOEXEC, FD_CLOEXEC); + assert_fd_flags(fd2, FD_CLOEXEC, 0); + assert_fd_flags(fd3, FD_CLOEXEC, 0); + assert_fd_flags(fd4, FD_CLOEXEC, FD_CLOEXEC); + + pass(); + + return 0; +} From 8b5dea33f6bff8e8d35e709e026218caf7d4a2d8 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 9 Jan 2020 10:31:41 +0000 Subject: [PATCH 0115/1854] travis: switch alpine to python3 Now that Python 2 has officially reached its end of life also switch the Alpine based test to Python 3. Signed-off-by: Adrian Reber --- scripts/build/Dockerfile.alpine | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 601a8693a..5785102da 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -18,7 +18,7 @@ RUN apk update && apk add \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ - python \ + python3 \ sudo COPY . /criu @@ -28,7 +28,6 @@ RUN mv .ccache /tmp && make mrproper && ccache -sz && \ date && make -j $(nproc) CC="$CC" && date && ccache -s RUN apk add \ - py-pip \ ip6tables \ iptables \ nftables \ @@ -37,10 +36,16 @@ RUN apk add \ bash \ go \ e2fsprogs \ + py-yaml \ + py3-flake8 \ asciidoctor # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install PyYAML future protobuf ipaddress junit_xml flake8 +RUN pip3 install protobuf junit_xml + +# For zdtm we need an unversioned python binary +RUN ln -s /usr/bin/python3 /usr/bin/python + RUN make -C test/zdtm From 1e9ff2aa03206102a7aeaf1d32f61056d3d05e46 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Sun, 8 Dec 2019 10:14:40 +0530 Subject: [PATCH 0116/1854] Add Socket-based Java Functional Tests Signed-off-by: Nidhi Gupta --- scripts/build/Dockerfile.openj9-ubuntu | 1 + test/javaTests/README.md | 11 +- .../criu/java/tests/CheckpointRestore.java | 1 + .../src/org/criu/java/tests/Helper.java | 2 +- .../src/org/criu/java/tests/SocketHelper.java | 100 ++++++++ .../src/org/criu/java/tests/Sockets.java | 141 ++++++++++++ .../org/criu/java/tests/SocketsClient.java | 133 +++++++++++ .../org/criu/java/tests/SocketsConnect.java | 157 +++++++++++++ .../criu/java/tests/SocketsConnectClient.java | 130 +++++++++++ .../criu/java/tests/SocketsConnectServer.java | 151 ++++++++++++ .../src/org/criu/java/tests/SocketsData.java | 156 +++++++++++++ .../criu/java/tests/SocketsDataClient.java | 141 ++++++++++++ .../criu/java/tests/SocketsDataServer.java | 124 ++++++++++ .../org/criu/java/tests/SocketsListen.java | 153 +++++++++++++ .../criu/java/tests/SocketsListenClient.java | 136 +++++++++++ .../criu/java/tests/SocketsListenServer.java | 160 +++++++++++++ .../org/criu/java/tests/SocketsMultiple.java | 152 +++++++++++++ .../java/tests/SocketsMultipleClient.java | 174 ++++++++++++++ .../java/tests/SocketsMultipleServer.java | 215 ++++++++++++++++++ .../org/criu/java/tests/SocketsServer.java | 142 ++++++++++++ test/javaTests/test.xml | 46 ++++ 21 files changed, 2424 insertions(+), 2 deletions(-) create mode 100644 test/javaTests/src/org/criu/java/tests/SocketHelper.java create mode 100644 test/javaTests/src/org/criu/java/tests/Sockets.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnect.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsData.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsDataClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsDataServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListen.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListenClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListenServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultiple.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsServer.java diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu index 13d9080ff..f235cc004 100644 --- a/scripts/build/Dockerfile.openj9-ubuntu +++ b/scripts/build/Dockerfile.openj9-ubuntu @@ -18,6 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends protobuf-c-comp make \ git \ pkg-config \ + iptables \ gcc \ maven diff --git a/test/javaTests/README.md b/test/javaTests/README.md index 670741677..4315b9b12 100644 --- a/test/javaTests/README.md +++ b/test/javaTests/README.md @@ -30,7 +30,16 @@ Here we test the File-Based Java APIs by checkpointing the application in the fo ## Memory mapping Java APIs Here we test the Memory Mapping APIs by checkpointing the application in following scenario and verifying the contents after restore: -- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) +- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) + +## Socket-based Java APIs + +Here we test the Socket-based API's by checkpointing the application in the following scenario and verifying the state after restore: +- Checkpointing the server process in the middle of data transfer. (Sockets.java) +- Checkpointing the server process after it has bound to a port but is not listening for client connections. (SocketListen.java) +- Checkpointing the server process while it is listening for client connections, and no client has connected yet. (SocketConnect.java) +- Checkpointing the server process when it has multiple clients in multiple states connected to it. (SocketMultiple.java) +- Checkpointing the client process in the middle of data transfer. (SocketsData.java) ### Prerequisites for running the tests: - Maven diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java index b848c9938..9d61e126f 100644 --- a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -154,6 +154,7 @@ public class CheckpointRestore { */ while (Helper.STATE_INIT == currentState) { currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + Thread.sleep(100); } /* diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java index fdf20bb52..9a1b33328 100644 --- a/test/javaTests/src/org/criu/java/tests/Helper.java +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -30,7 +30,7 @@ class Helper { * the pid to the pidFile. * * @param testName Name of the java test - * @param pid Pid of the java test process + * @param pid Pid of the java test process * @param logger * @return 0 or 1 denoting whether the function was successful or not. * @throws IOException diff --git a/test/javaTests/src/org/criu/java/tests/SocketHelper.java b/test/javaTests/src/org/criu/java/tests/SocketHelper.java new file mode 100644 index 000000000..684125019 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketHelper.java @@ -0,0 +1,100 @@ +package org.criu.java.tests; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.logging.SimpleFormatter; + +class SocketHelper { + + static char STATE_LISTEN = 'S'; + static char STATE_SUCCESS = 'Z'; + static String IP_ADDRESS = "127.0.0.1"; + + /** + * Creates a new log file, for the logger to log in. + * + * @param testName Name of the server or client program + * @param parentTestName Name of the test + * @param logger + * @throws IOException + */ + static void init(String testName, String parentTestName, Logger logger) throws IOException { + FileHandler handler = new FileHandler(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/" + testName + ".log", false); + handler.setFormatter(new SimpleFormatter()); + handler.setLevel(Level.FINE); + logger.addHandler(handler); + logger.setLevel(Level.FINE); + } + + /** + * Writes pid of the process to be checkpointed in the file + * + * @param parentTestName Name of the test + * @param pid Pid of the process to be checkpointed + * @throws IOException + */ + static void writePid(String parentTestName, String pid) throws IOException { + File pidfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/" + parentTestName + Helper.PID_APPEND); + BufferedWriter pidwriter = new BufferedWriter(new FileWriter(pidfile)); + /* + * Overwriting pid to be checkpointed + */ + pidwriter.write(pid + "\n"); + pidwriter.close(); + } + + /** + * Waits for the MappedByteBuffer to change state from STATE_CHECKPOINT to STATE_RESTORE + * + * @param socketMappedBuffer MappedByteBuffer between the client, server and the controller process. + * @param logger + */ + static void socketWaitForRestore(MappedByteBuffer socketMappedBuffer, Logger logger) { + while (Helper.STATE_CHECKPOINT == socketMappedBuffer.getChar(Helper.MAPPED_INDEX)) { + ; + } + if (Helper.STATE_RESTORE != socketMappedBuffer.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Server socket was not in expected restore state " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } else { + logger.log(Level.INFO, "Restored!!!"); + } + } + + /** + * Puts the MappedByteBuffer to Helper.STATE_CHECKPOINT and waits for CheckpointRestore.java to change its state to Helper.STATE_RESTORE + * + * @param b MappedByteBuffer between the controller process and CheckpointRestore.java + * @param logger Logger to log the messages + * @param p1 Process object for the client process + * @param p2 Process object for the server process + */ + static void checkpointAndWait(MappedByteBuffer b, Logger logger, Process p1, Process p2) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + char c = b.getChar(Helper.MAPPED_INDEX); + while (Helper.STATE_CHECKPOINT == c) { + c = b.getChar(Helper.MAPPED_INDEX); + } + if (Helper.STATE_TERMINATE == c) { + logger.log(Level.SEVERE, "Error during checkpoint-restore, Test terminated"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + p1.destroy(); + p2.destroy(); + System.exit(1); + } + if (Helper.STATE_RESTORE != c) { + logger.log(Level.SEVERE, "Error: Test state is not the expected Restored state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + p1.destroy(); + p2.destroy(); + System.exit(1); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/Sockets.java b/test/javaTests/src/org/criu/java/tests/Sockets.java new file mode 100644 index 000000000..94cc217c4 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/Sockets.java @@ -0,0 +1,141 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class Sockets { + static String TESTNAME = "Sockets"; + + /** + * Runs the client and server process, checkpoints the server process while its in the middle of data transfer + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Creating socketBufferFile and setting the init value of buffer"); + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsServer", TESTNAME, port); + Process serverProcess = builder.start(); + logger.log(Level.INFO, "Server process started"); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Error took place in the client or server process; check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored"); + } + /* + * Loop while test is running. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_FAIL && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_PASS) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + /* + * Client process puts socketMappedBuffer to Pass state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsClient.java b/test/javaTests/src/org/criu/java/tests/SocketsClient.java new file mode 100644 index 000000000..1c8e7b9a1 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsClient.java @@ -0,0 +1,133 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsClient { + static String TESTNAME = "SocketsClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + Logger logger = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + String parentTestName, portArg; + int port; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + /* + * Ensure client does not try to connect to port before server has bound itself. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + /* + * Socket Buffer should be put in SocketHelper.STATE_LISTEN state by server process, just before + * it starts listening for client connections. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Buffer does not contain the expected 'server bound to port and listening' state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + /* + * Ensure server has bound to port + */ + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "InterruptedException occurred!"); + } + + socket = new Socket(SocketHelper.IP_ADDRESS, port); + + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "Error: wrong message received; message expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "Error: wrong message received; message expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + /* + * Wait for server process to end and then check whether it ended successfully or not + * If it has finished properly the socketMappedBuffer will contain SocketHelper.STATE_SUCCESS + */ + logger.log(Level.INFO, "Waiting for server process to end...."); + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + logger.log(Level.INFO, "Test ends"); + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnect.java b/test/javaTests/src/org/criu/java/tests/SocketsConnect.java new file mode 100644 index 000000000..164c21089 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnect.java @@ -0,0 +1,157 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnect { + static String TESTNAME = "SocketsConnect"; + + /** + * Runs the client and server process, checkpoints the server when its listening for incoming client connection requests on a port but no client has connected yet + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsConnectServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsConnectClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interrupted"); + } + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to 'P'-Pass state if the test passed. + * Send pass message to Checkpoint-restore.java + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java b/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java new file mode 100644 index 000000000..ed1c7fab3 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java @@ -0,0 +1,130 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnectClient { + static String TESTNAME = "SocketsConnectClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Waiting for CR"); + /* + * Wait for Checkpoint-Restore to occur + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Error:Buffer does not contain the expected restored state: " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Restored"); + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + /* + * Server should has have been listening for client connections when it was checkpointed, and it should continue to listen after restore. + */ + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java b/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java new file mode 100644 index 000000000..1e4cf3aeb --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java @@ -0,0 +1,151 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnectServer { + static String TESTNAME = "SocketsConnectServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", msg3 = "@Ft@rCPM$$g3", + msg4 = "Aft@rCPM$$g4", readMssg; + Logger logger = null; + String parentTestName, portArg; + int port; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port: " + port); + + /* + * Timeout after 7 sec if client does not connect + */ + try { + ser.setSoTimeout(7 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "Cannot set timeout!"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + logger.log(Level.INFO, "Waiting for client to connect"); + logger.log(Level.INFO, "Going to checkpoint"); + + try { + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + ser.close(); + System.exit(1); + } + /* + * Checkpoint when server is listening for connections, and no client has connected to the server. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + socket = ser.accept(); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect\n" + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "Server is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong,received: " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong, received: " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + socket.close(); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsData.java b/test/javaTests/src/org/criu/java/tests/SocketsData.java new file mode 100644 index 000000000..67d8cef0e --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsData.java @@ -0,0 +1,156 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsData { + static String TESTNAME = "SocketsData"; + + /** + * Runs the server and client processes, checkpoints the client process when its in the middle of data transfer + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + Logger logger = null; + String port = "49200"; + try { + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsDataServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsDataClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint client process"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interrupted"); + } + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + serverProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + serverProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to STATE_PASS if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + logger.log(Level.INFO, "Did not receive pass message from the client process"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java b/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java new file mode 100644 index 000000000..49885a886 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java @@ -0,0 +1,141 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsDataClient { + static String TESTNAME = "SocketsDataClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + + logger.log(Level.INFO, "Client pid: " + pid); + SocketHelper.writePid(parentTestName, pid); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + /* + * Socket Mapped Buffer should be in 'Server listening for connections' state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "socket-buffer not in expected state, current state: " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Server starts listening on port after putting the Mapped Buffer is in SocketHelper.STATE_LISTEN state + */ + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (IOException e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + /* + * Checkpoints and wait for Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java b/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java new file mode 100644 index 000000000..65fe92a9d --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java @@ -0,0 +1,124 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsDataServer { + static String TESTNAME = "SocketsDataServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Socket socket = null; + Logger logger = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4", readMssg; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + /* + * Wait for 7 seconds for client to connect, else throw a timeout exception + */ + try { + ser.setSoTimeout(7 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "cannot set timeout"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Waiting for client to connect"); + /* + * Put Socket Mapped Buffer to SocketHelper.STATE_LISTEN state - server has bound to port and + * begin listening for connections. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + socket.close(); + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListen.java b/test/javaTests/src/org/criu/java/tests/SocketsListen.java new file mode 100644 index 000000000..3fad38549 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListen.java @@ -0,0 +1,153 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListen { + static String TESTNAME = "SocketsListen"; + + /** + * Runs the client and server process, checkpoints the server process when the server has bound to a port, but has not yet started listening + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsListenServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsListenClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to Helper.STATE_PASS-Pass state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java b/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java new file mode 100644 index 000000000..efcb3d545 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java @@ -0,0 +1,136 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListenClient { + static String TESTNAME = "SocketsListenClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", readMssg, + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Waiting for CR"); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + + logger.log(Level.INFO, "Restored"); + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Buffer does not contain the expected 'server bound to port' state" + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * Make the thread sleep to ensure server is listening on the port for client connections. + */ + logger.log(Level.INFO, "Put thread to sleep"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interuptedp"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put MappedBuffer to STATE_PASS, else to STATE_FAIL + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java b/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java new file mode 100644 index 000000000..46fef40ec --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java @@ -0,0 +1,160 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListenServer { + static String TESTNAME = "SocketsListenServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + Socket socket = null; + String readMssg, msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Server will be listening on Port " + port); + ServerSocket ser = new ServerSocket(port); + /* + * Server has bound to a port but is not listening yet! + */ + logger.log(Level.INFO, "Going to checkpoint"); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + ser.close(); + System.exit(1); + } + /* + * Checkpoint and wait for Restore. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "SServer is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * Timeout after 5 sec if client does not connect + */ + try { + ser.setSoTimeout(5 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "cannot set timeout"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + try { + logger.log(Level.INFO, "Waiting for client to connect"); + /* + * Put Socket Mapped Buffer to SocketHelper.STATE_LISTEN state - server has bound to port and + * will begin listening for connections. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect\n" + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sending message: " + msg4); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + socket.close(); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java b/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java new file mode 100644 index 000000000..5e55c4274 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java @@ -0,0 +1,152 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultiple { + static String TESTNAME = "SocketsMultiple"; + + /** + * Runs the Client and Server Processes, Multiple clients connect to server Process, checkpoints the server process + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketBufferFile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsMultipleServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsMultipleClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to STATE_PASS state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java b/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java new file mode 100644 index 000000000..d97a946fd --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java @@ -0,0 +1,174 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultipleClient { + static String TESTNAME = "SocketsMultipleClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String msg1 = "Message1", msg2 = "Message2", readMssg; + Socket socket1 = null, socket2 = null, socket3 = null, socket4 = null; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected state"); + + } + try { + logger.log(Level.INFO, "client 1 connecting..."); + socket1 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 1 connected to server successfully"); + PrintStream out1 = new PrintStream(socket1.getOutputStream()); + BufferedReader br1 = new BufferedReader(new InputStreamReader(socket1.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket1"); + try { + logger.log(Level.INFO, "client 2 connecting..."); + socket2 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 2 connected to server successfully"); + PrintStream out2 = new PrintStream(socket2.getOutputStream()); + BufferedReader br2 = new BufferedReader(new InputStreamReader(socket2.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket2"); + + try { + logger.log(Level.INFO, "client 3 connecting..."); + socket3 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 3 connected to server successfully"); + PrintStream out3 = new PrintStream(socket3.getOutputStream()); + BufferedReader br3 = new BufferedReader(new InputStreamReader(socket3.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket3"); + + out1.println(msg1); + + readMssg = br1.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + socket1.close(); + + out2.println(msg1); + + /* + * Wait for Checkpoint-Restore + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Socket-mapped-buffer is not in restored state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Server is Restored!!"); + + out3.println(msg1); + readMssg = br2.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 2; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + readMssg = br3.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 3; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + socket2.close(); + socket3.close(); + + try { + logger.log(Level.INFO, "client 4 connecting..."); + socket4 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + } + logger.log(Level.INFO, "Client 4 connected to server successfully"); + PrintStream out4 = new PrintStream(socket4.getOutputStream()); + BufferedReader br4 = new BufferedReader(new InputStreamReader(socket4.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket4"); + + out4.println(msg1); + readMssg = br4.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 4; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + socket4.close(); + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to STATE_PASS, else to STATE_FAIL + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java b/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java new file mode 100644 index 000000000..a7e4d3b9e --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java @@ -0,0 +1,215 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultipleServer { + static String TESTNAME = "SocketsMultipleServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * The array indexes 3, 5, 7 and 9 will map the state of client 1, 2, 3 and 4. + * Set these array indexes to init state. + */ + + socketMappedBuffer.putChar(3, Helper.STATE_INIT); + socketMappedBuffer.putChar(5, Helper.STATE_INIT); + socketMappedBuffer.putChar(7, Helper.STATE_INIT); + socketMappedBuffer.putChar(9, Helper.STATE_INIT); + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + Socket[] sockets = new Socket[4]; + + /* + * Set the SocketMappedBuffer to S state-server will be listening for connections + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + + for (int i = 1; i <= 4; i++) { + sockets[i - 1] = ser.accept(); + ServerThread serverThread = new ServerThread(sockets[i - 1], "ser-socket " + i, 2 * i + 1, logger, socketMappedBuffer); + serverThread.start(); + if (i == 3) { + logger.log(Level.INFO, "Connected to client: 3"); + /* + * Client 3 has connected, wait for thread 1 to finish and then checkpoint. + */ + while (socketMappedBuffer.getChar(3) != Helper.STATE_FAIL && socketMappedBuffer.getChar(3) != Helper.STATE_PASS) { + ; + } + logger.log(Level.INFO, "Going to checkpoint"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + } + } + + /* + * Loop while any of the 4 thread is running + */ + while (socketMappedBuffer.getChar(3) == Helper.STATE_INIT || socketMappedBuffer.getChar(5) == Helper.STATE_INIT + || socketMappedBuffer.getChar(7) == Helper.STATE_INIT || socketMappedBuffer.getChar(9) == Helper.STATE_INIT) { + ; + } + + /* + * Check Socket Mapped Buffer for a thread that failed + */ + for (int i = 1; i <= 4; i++) { + if (socketMappedBuffer.getChar(i * 2 + 1) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Error in thread connected to client " + i); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + + /* + * Check the 1st Socket is closed + */ + if (!sockets[0].isClosed()) { + logger.log(Level.SEVERE, "socket 1 is not closed"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Socket 1 is in expected closed state: " + sockets[0].isClosed()); + + /* + * Check all threads are in expected pass state + */ + for (int i = 1; i <= 4; i++) { + if (socketMappedBuffer.getChar(i * 2 + 1) != Helper.STATE_PASS) { + logger.log(Level.SEVERE, "Unexpected State of buffer: " + socketMappedBuffer.getChar(i * 2 + 1) + ", client: " + i); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + logger.log(Level.INFO, "Done"); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} + +class ServerThread extends Thread { + Socket socket = null; + String name; + int num; + MappedByteBuffer socketMappedBuffer; + Logger logger; + + ServerThread(Socket socket, String name, int num, Logger logger, MappedByteBuffer socketMappedBuffer) { + this.socket = socket; + this.name = name; + this.logger = logger; + this.num = num; + this.socketMappedBuffer = socketMappedBuffer; + } + + public void run() { + try { + String readMssg, msg1 = "Message1", msg2 = "Message2"; + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream out = new PrintStream(socket.getOutputStream()); + readMssg = br.readLine(); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message read by thread " + name + " was not 'Message1', received Message: " + readMssg); + socket.close(); + socketMappedBuffer.putChar(num, Helper.STATE_FAIL); + } else { + logger.log(Level.INFO, name + " received correct message"); + out.println(msg2); + logger.log(Level.INFO, name + " has sent message"); + socket.close(); + socketMappedBuffer.putChar(num, Helper.STATE_PASS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred in thread :" + name + " " + exception); + logger.log(Level.FINE, writer.toString()); + } + + try { + if (socket != null) { + socket.close(); + } + } catch (IOException e) { + ; + } + + /* + * If exception occurs fail the thread + */ + socketMappedBuffer.putChar(num, Helper.STATE_FAIL); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsServer.java b/test/javaTests/src/org/criu/java/tests/SocketsServer.java new file mode 100644 index 000000000..051233443 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsServer.java @@ -0,0 +1,142 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsServer { + static String TESTNAME = "SocketsServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4", readMssg; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + SocketHelper.init(TESTNAME, parentTestName, logger); + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Socket buffer mapped"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + /* + * Timeout after 5 second if client does not connect + */ + ser.setSoTimeout(5 * 1000); + logger.log(Level.INFO, "Waiting for client to connect"); + Socket socket = null; + try { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + logger.log(Level.INFO, "Going to checkpoint"); + /* + * Put socket Mapped Buffer to 'to be checkpointed' state and wait for restore + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "Server is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + socket.close(); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + socket.close(); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml index b73a31db2..4768bf193 100644 --- a/test/javaTests/test.xml +++ b/test/javaTests/test.xml @@ -40,4 +40,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 0c218746d587483346d5dcb7638b642392a4985f Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Thu, 9 Jan 2020 22:43:25 +0530 Subject: [PATCH 0117/1854] Switch open-j9 alpine tests to python3 Signed-off-by: Nidhi Gupta --- scripts/build/Dockerfile.openj9-alpine | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine index 43a993444..39ea4d08e 100644 --- a/scripts/build/Dockerfile.openj9-alpine +++ b/scripts/build/Dockerfile.openj9-alpine @@ -14,7 +14,7 @@ RUN apk update && apk add \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ - python \ + python3 \ sudo \ maven \ ip6tables \ From f1abc9aa26421b0c0ea52a703590cd998e676b55 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 Jan 2020 14:47:18 +0100 Subject: [PATCH 0118/1854] ppc64le: remove register '1' from clobber list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling 'criu-dev' on Fedora 31 gives two errors about wrong clobber lists: compel/include/uapi/compel/asm/sigframe.h:47:9: error: listing the stack pointer register ‘1’ in a clobber list is deprecated [-Werror=deprecated] criu/arch/ppc64/include/asm/restore.h:14:2: error: listing the stack pointer register ‘1’ in a clobber list is deprecated [-Werror=deprecated] There was also a bug report from Debian that CRIU does not build because of this. Each of these errors comes with the following note: note: the value of the stack pointer after an ‘asm’ statement must be the same as it was before the statement As far as I understand it this should not be a problem in this cases as the code never returns anyway. Running zdtm very seldom fails during 'zdtm/static/cgroup_ifpriomap' with a double free or corruption. This happens not very often and I cannot verify if it happens without this patch. As CRIU does not build without the patch. Signed-off-by: Adrian Reber --- compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h | 2 +- criu/arch/ppc64/include/asm/restore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h index 9467a1b99..5c98b199d 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h @@ -50,7 +50,7 @@ struct rt_sigframe { "sc \n" \ : \ : "r"(new_sp) \ - : "1", "memory") + : "memory") #if _CALL_ELF != 2 # error Only supporting ABIv2. diff --git a/criu/arch/ppc64/include/asm/restore.h b/criu/arch/ppc64/include/asm/restore.h index 8d4516090..f065ec3a0 100644 --- a/criu/arch/ppc64/include/asm/restore.h +++ b/criu/arch/ppc64/include/asm/restore.h @@ -21,7 +21,7 @@ : "r"(new_sp), \ "r"((unsigned long)restore_task_exec_start), \ "r"(task_args) \ - : "1", "3", "12") + : "3", "12") /* There is nothing to do since TLS is accessed through r13 */ #define core_get_tls(pcore, ptls) From 4232b270b8a0c866b9ee26e7b5f381cbf7cea56a Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Fri, 10 Jan 2020 15:57:50 +0300 Subject: [PATCH 0119/1854] image: core -- Reserve start_time field To ensure consistency of runtime environment processes within a container need to see same start time values over suspend/resume cycles. We introduce new field to the core image structure to store start time of a dumped process. Later same value would be restored to a newly created task. In future the feature is likely to be pulled here, so we reserve field id in protobuf descriptor. Signed-off-by: Valeriy Vdovin --- images/core.proto | 2 ++ 1 file changed, 2 insertions(+) diff --git a/images/core.proto b/images/core.proto index c3dba6f6d..e90522914 100644 --- a/images/core.proto +++ b/images/core.proto @@ -53,6 +53,8 @@ message task_core_entry { //optional int32 tty_pgrp = 17; optional bool child_subreaper = 18; + // Reserved for container relative start time + //optional uint64 start_time = 19; } message task_kobj_ids_entry { From 8fea2647b69fc0be0cac6a43639ed648cdd08db6 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 9 Jan 2020 18:54:50 +0000 Subject: [PATCH 0120/1854] travis: reduce the number of podman tests We are running each podman test loop 50 times. This takes more than 20 minutes in Travis. Reduce both test loops to only run 20 times. Signed-off-by: Adrian Reber --- scripts/travis/podman-test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 5189477cd..825bca746 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -35,7 +35,7 @@ criu --version podman run --name cr -d docker.io/library/alpine /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done' sleep 1 -for i in `seq 50`; do +for i in `seq 20`; do echo "Test $i for podman container checkpoint" podman exec cr ps axf podman logs cr @@ -48,7 +48,7 @@ for i in `seq 50`; do podman logs cr done -for i in `seq 50`; do +for i in `seq 20`; do echo "Test $i for podman container checkpoint --export" podman ps -a podman exec cr ps axf From ca02c47075b69c3387d03ae2a09ab9499d5bd27f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 15 Dec 2019 20:38:46 +0000 Subject: [PATCH 0121/1854] kerndat: detect if system support clone3() with set_tid Linux kernel 5.4 extends clone3() with set_tid to allow processes to specify the PID of a newly created process. This introduces detection of the clone3() syscall and if set_tid is supported. This first implementation is X86_64 only. Signed-off-by: Adrian Reber --- .../arch/arm/plugins/std/syscalls/syscall.def | 1 + .../plugins/std/syscalls/syscall-ppc64.tbl | 1 + .../plugins/std/syscalls/syscall-s390.tbl | 1 + .../x86/plugins/std/syscalls/syscall_32.tbl | 1 + .../x86/plugins/std/syscalls/syscall_64.tbl | 1 + .../plugins/include/uapi/std/syscall-types.h | 1 + criu/cr-check.c | 12 ++++++ criu/include/kerndat.h | 1 + criu/include/sched.h | 33 +++++++++++++++ criu/kerndat.c | 41 +++++++++++++++++++ 10 files changed, 93 insertions(+) create mode 100644 criu/include/sched.h diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index d5bdc677e..f7ebc8527 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -115,3 +115,4 @@ ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *t fsopen 430 430 (char *fsname, unsigned int flags) fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) +clone3 435 435 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 4e283d5e9..1afaf1e70 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -111,3 +111,4 @@ __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index fd48e3950..ae6fdb5f8 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -111,3 +111,4 @@ __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index 038aeb4f7..7a487110d 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -99,3 +99,4 @@ __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 215f32026..6667c07db 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -110,3 +110,4 @@ __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struc __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/plugins/include/uapi/std/syscall-types.h b/compel/plugins/include/uapi/std/syscall-types.h index 57865e741..031e773bb 100644 --- a/compel/plugins/include/uapi/std/syscall-types.h +++ b/compel/plugins/include/uapi/std/syscall-types.h @@ -39,6 +39,7 @@ struct msghdr; struct rusage; struct iocb; struct pollfd; +struct clone_args; typedef unsigned long aio_context_t; diff --git a/criu/cr-check.c b/criu/cr-check.c index 17dd29b42..80df3f7cd 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1224,6 +1224,16 @@ static int check_uffd_noncoop(void) return 0; } +static int check_clone3_set_tid(void) +{ + if (!kdat.has_clone3_set_tid) { + pr_warn("clone3() with set_tid not supported\n"); + return -1; + } + + return 0; +} + static int check_can_map_vdso(void) { if (kdat_can_map_vdso() == 1) @@ -1373,6 +1383,7 @@ int cr_check(void) ret |= check_sk_netns(); ret |= check_kcmp_epoll(); ret |= check_net_diag_raw(); + ret |= check_clone3_set_tid(); } /* @@ -1476,6 +1487,7 @@ static struct feature_list feature_list[] = { { "link_nsid", check_link_nsid}, { "kcmp_epoll", check_kcmp_epoll}, { "external_net_ns", check_external_net_ns}, + { "clone3_set_tid", check_clone3_set_tid}, { NULL, NULL }, }; diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 771195860..27c870bb8 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -66,6 +66,7 @@ struct kerndat_s { bool has_inotify_setnextwd; bool has_kcmp_epoll_tfd; bool has_fsopen; + bool has_clone3_set_tid; }; extern struct kerndat_s kdat; diff --git a/criu/include/sched.h b/criu/include/sched.h new file mode 100644 index 000000000..78f65e3b7 --- /dev/null +++ b/criu/include/sched.h @@ -0,0 +1,33 @@ +#ifndef __CR_SCHED_H__ +#define __CR_SCHED_H__ + +#include + +#ifndef ptr_to_u64 +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +#endif +#ifndef u64_to_ptr +#define u64_to_ptr(x) ((void *)(uintptr_t)x) +#endif + +/* + * This structure is needed by clone3(). The kernel + * calls it 'struct clone_args'. As CRIU will always + * need at least this part of the structure (VER1) + * to be able to test if clone3() with set_tid works, + * the structure is defined here as 'struct _clone_args'. + */ + +struct _clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; +}; +#endif /* __CR_SCHED_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index d1afde71d..0772828bc 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -41,6 +41,7 @@ #include "uffd.h" #include "vdso.h" #include "kcmp.h" +#include "sched.h" struct kerndat_s kdat = { }; @@ -986,6 +987,44 @@ static int kerndat_tun_netns(void) return check_tun_netns_cr(&kdat.tun_ns); } +static bool kerndat_has_clone3_set_tid(void) +{ + pid_t pid; + struct _clone_args args = {}; + +#ifndef CONFIG_X86_64 + /* + * Currently the CRIU PIE assembler clone3() wrapper is + * only implemented for X86_64. + */ + kdat.has_clone3_set_tid = false; + return 0; +#endif + + args.set_tid = -1; + /* + * On a system without clone3() this will return ENOSYS. + * On a system with clone3() but without set_tid this + * will return E2BIG. + * On a system with clone3() and set_tid it will return + * EINVAL. + */ + pid = syscall(__NR_clone3, &args, sizeof(args)); + + if (pid == -1 && (errno == ENOSYS || errno == E2BIG)) { + kdat.has_clone3_set_tid = false; + return 0; + } + if (pid == -1 && errno == EINVAL) { + kdat.has_clone3_set_tid = true; + } else { + pr_perror("Unexpected error from clone3\n"); + return -1; + } + + return 0; +} + int kerndat_init(void) { int ret; @@ -1059,6 +1098,8 @@ int kerndat_init(void) ret = has_kcmp_epoll_tfd(); if (!ret) ret = kerndat_has_fsopen(); + if (!ret) + ret = kerndat_has_clone3_set_tid(); kerndat_lsm(); kerndat_mmap_min_addr(); From 97c03b97d00e27397ca2ea0f9b5569739e24ae27 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 16 Dec 2019 07:57:03 +0000 Subject: [PATCH 0122/1854] Add assembler wrapper for clone3() To create a new process/thread with a certain PID based on clone3() a new assembler wrapper is necessary as there is not glibc wrapper (yet). Signed-off-by: Adrian Reber --- criu/arch/aarch64/include/asm/restorer.h | 7 ++ criu/arch/arm/include/asm/restorer.h | 7 ++ criu/arch/ppc64/include/asm/restorer.h | 7 ++ criu/arch/s390/include/asm/restorer.h | 7 ++ criu/arch/x86/include/asm/restorer.h | 92 ++++++++++++++++++++++++ 5 files changed, 120 insertions(+) diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index f502cdcaf..2fe58915b 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -42,6 +42,13 @@ "r"(&thread_args[i]) \ : "x0", "x1", "x2", "x3", "x8", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "mov sp, %0 \n" \ diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h index 217d920e8..ad4b58f93 100644 --- a/criu/arch/arm/include/asm/restorer.h +++ b/criu/arch/arm/include/asm/restorer.h @@ -43,6 +43,13 @@ "r"(&thread_args[i]) \ : "r0", "r1", "r2", "r3", "r7", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "mov sp, %0 \n" \ diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h index d48d833d6..19bc3ea36 100644 --- a/criu/arch/ppc64/include/asm/restorer.h +++ b/criu/arch/ppc64/include/asm/restorer.h @@ -48,6 +48,13 @@ "r"(&thread_args[i]) /* %6 */ \ : "memory","0","3","4","5","6","7","14","15") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define arch_map_vdso(map, compat) -1 int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r); diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h index cfdefcab9..733f2de33 100644 --- a/criu/arch/s390/include/asm/restorer.h +++ b/criu/arch/s390/include/asm/restorer.h @@ -39,6 +39,13 @@ "d"(&thread_args[i]) \ : "0", "1", "2", "3", "4", "5", "6", "cc", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define arch_map_vdso(map, compat) -1 int restore_gpregs(struct rt_sigframe *f, UserS390RegsEntry *r); diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h index 25559b57c..731477ec9 100644 --- a/criu/arch/x86/include/asm/restorer.h +++ b/criu/arch/x86/include/asm/restorer.h @@ -25,6 +25,21 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) } #endif /* !CONFIG_COMPAT */ +/* + * Documentation copied from glibc sysdeps/unix/sysv/linux/x86_64/clone.S + * The kernel expects: + * rax: system call number + * rdi: flags + * rsi: child_stack + * rdx: TID field in parent + * r10: TID field in child + * r8: thread pointer + * + * int clone(unsigned long clone_flags, unsigned long newsp, + * int *parent_tidptr, int *child_tidptr, + * unsigned long tls); + */ + #define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ thread_args, clone_restore_fn) \ asm volatile( \ @@ -63,6 +78,83 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) "g"(&thread_args[i]) \ : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") +/* int clone3(struct clone_args *args, size_t size) */ +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + "clone3_emul: \n" \ + /* + * Prepare stack pointer for child process. The kernel does + * stack + stack_size before passing the stack pointer to the + * child process. As we have to put the function and the + * arguments for the new process on that stack we have handle + * the kernel's implicit stack + stack_size. + */ \ + "movq (%3), %%rsi /* new stack pointer */ \n" \ + /* Move the stack_size to %rax to use later as the offset */ \ + "movq %4, %%rax \n" \ + /* 16 bytes are needed on the stack for function and args */ \ + "subq $16, (%%rsi, %%rax) \n" \ + "movq %6, %%rdi /* thread args */ \n" \ + "movq %%rdi, 8(%%rsi, %%rax) \n" \ + "movq %5, %%rdi /* thread function */ \n" \ + "movq %%rdi, 0(%%rsi, %%rax) \n" \ + /* + * The stack address has been modified for the two + * elements above (child function, child arguments). + * This modified stack needs to be stored back into the + * clone_args structure. + */ \ + "movq (%%rsi), %3 \n" \ + /* + * Do the actual clone3() syscall. First argument (%rdi) is + * the clone_args structure, second argument is the size + * of clone_args. + */ \ + "movq %1, %%rdi /* clone_args */ \n" \ + "movq %2, %%rsi /* size */ \n" \ + "movl $"__stringify(__NR_clone3)", %%eax \n" \ + "syscall \n" \ + /* + * If clone3() was successful and if we are in the child + * '0' is returned. Jump to the child function handler. + */ \ + "testq %%rax,%%rax \n" \ + "jz thread3_run \n" \ + /* Return the PID to the parent process. */ \ + "movq %%rax, %0 \n" \ + "jmp clone3_end \n" \ + \ + "thread3_run: /* Child process */ \n" \ + /* Clear the frame pointer */ \ + "xorq %%rbp, %%rbp \n" \ + /* Pop the child function from the stack */ \ + "popq %%rax \n" \ + /* Pop the child function arguments from the stack */ \ + "popq %%rdi \n" \ + /* Run the child function */ \ + "callq *%%rax \n" \ + /* + * If the child function is expected to return, this + * would be the place to handle the return code. In CRIU's + * case the child function is expected to not return + * and do exit() itself. + */ \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + /* + * This uses the "r" modifier for all parameters + * as clang complained if using "g". + */ \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(&clone_args.stack), \ + "r"(clone_args.stack_size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "movq %0, %%rsp \n" \ From a1ea8deb4c0824dc2178dbe116c9b10f81a608aa Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 16 Dec 2019 10:42:13 +0000 Subject: [PATCH 0123/1854] Use clone3() with set_tid to create processes With the in Linux Kernel 5.4 introduced clone3() with set_tid it is no longer necessary to write to to /proc/../ns_last_pid to influence the next PID number. clone3() can directly select a PID for the newly created process/thread. After checking for the availability of clone3() with set_tid and adding the assembler wrapper for clone3() in previous patches, this extends criu/pie/restorer.c and criu/clone-noasan.c to use the newly added assembler clone3() wrapper to create processes with a certain PID. This is a RFC and WIP, but I wanted to share it and run it through CI for feedback. As the CI will probably not use a 5.4 based kernel it should just keep on working as before. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 32 +++++++++++++++++++ criu/cr-restore.c | 64 +++++++++++++++++++++++-------------- criu/include/clone-noasan.h | 2 ++ criu/include/restorer.h | 1 + criu/include/rst_info.h | 1 + criu/pie/restorer.c | 64 ++++++++++++++++++++++++------------- 6 files changed, 117 insertions(+), 47 deletions(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index 5f1858d4d..bcbc3e4bd 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -1,4 +1,10 @@ +#include #include +#include + +#include + +#include "sched.h" #include "common/compiler.h" #include "log.h" #include "common/bug.h" @@ -31,6 +37,7 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg) { void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16); + BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK)); /* * Reserve some bytes for clone() internal needs @@ -38,3 +45,28 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg) */ return clone(fn, stack_ptr, flags, arg); } + +int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, + int exit_signal, pid_t pid) +{ + struct _clone_args c_args = {}; + + BUG_ON(flags & CLONE_VM); + + /* + * Make sure no child signals are requested. clone3() uses + * exit_signal for that. + */ + BUG_ON(flags & 0xff); + + pr_debug("Creating process using clone3()\n"); + + c_args.exit_signal = exit_signal; + c_args.flags = flags; + c_args.set_tid = ptr_to_u64(&pid); + c_args.set_tid_size = 1; + pid = syscall(__NR_clone3, &c_args, sizeof(c_args)); + if (pid == 0) + exit(fn(arg)); + return pid; +} diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 687cd6c68..b4f8d9e75 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1374,40 +1374,55 @@ static inline int fork_with_pid(struct pstree_item *item) if (!(ca.clone_flags & CLONE_NEWPID)) { char buf[32]; int len; - int fd; + int fd = -1; - fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); - if (fd < 0) - goto err; + if (!kdat.has_clone3_set_tid) { + fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); + if (fd < 0) + goto err; + } lock_last_pid(); - len = snprintf(buf, sizeof(buf), "%d", pid - 1); - if (write(fd, buf, len) != len) { - pr_perror("%d: Write %s to %s", pid, buf, LAST_PID_PATH); + if (!kdat.has_clone3_set_tid) { + len = snprintf(buf, sizeof(buf), "%d", pid - 1); + if (write(fd, buf, len) != len) { + pr_perror("%d: Write %s to %s", pid, buf, + LAST_PID_PATH); + close(fd); + goto err_unlock; + } close(fd); - goto err_unlock; } - close(fd); } else { BUG_ON(pid != INIT_PID); } - /* - * Some kernel modules, such as network packet generator - * run kernel thread upon net-namespace creattion taking - * the @pid we've been requeting via LAST_PID_PATH interface - * so that we can't restore a take with pid needed. - * - * Here is an idea -- unhare net namespace in callee instead. - */ - /* - * The cgroup namespace is also unshared explicitly in the - * move_in_cgroup(), so drop this flag here as well. - */ - close_pid_proc(); - ret = clone_noasan(restore_task_with_children, - (ca.clone_flags & ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, &ca); + if (kdat.has_clone3_set_tid) { + ret = clone3_with_pid_noasan(restore_task_with_children, + &ca, (ca.clone_flags & + ~(CLONE_NEWNET | CLONE_NEWCGROUP)), + SIGCHLD, pid); + } else { + /* + * Some kernel modules, such as network packet generator + * run kernel thread upon net-namespace creation taking + * the @pid we've been requesting via LAST_PID_PATH interface + * so that we can't restore a take with pid needed. + * + * Here is an idea -- unshare net namespace in callee instead. + */ + /* + * The cgroup namespace is also unshared explicitly in the + * move_in_cgroup(), so drop this flag here as well. + */ + close_pid_proc(); + ret = clone_noasan(restore_task_with_children, + (ca.clone_flags & + ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, + &ca); + } + if (ret < 0) { pr_perror("Can't fork for %d", pid); goto err_unlock; @@ -3588,6 +3603,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns task_args->vdso_maps_rt = vdso_maps_rt; task_args->vdso_rt_size = vdso_rt_size; task_args->can_map_vdso = kdat.can_map_vdso; + task_args->has_clone3_set_tid = kdat.has_clone3_set_tid; new_sp = restorer_stack(task_args->t->mz); diff --git a/criu/include/clone-noasan.h b/criu/include/clone-noasan.h index 8ef75fa73..0cfdaa1d9 100644 --- a/criu/include/clone-noasan.h +++ b/criu/include/clone-noasan.h @@ -2,5 +2,7 @@ #define __CR_CLONE_NOASAN_H__ int clone_noasan(int (*fn)(void *), int flags, void *arg); +int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, + int exit_signal, pid_t pid); #endif /* __CR_CLONE_NOASAN_H__ */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h index b93807f5f..dfb4e6b71 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -221,6 +221,7 @@ struct task_restore_args { #endif int lsm_type; int child_subreaper; + bool has_clone3_set_tid; } __aligned(64); /* diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h index 07c634f4a..3283849e4 100644 --- a/criu/include/rst_info.h +++ b/criu/include/rst_info.h @@ -4,6 +4,7 @@ #include "common/lock.h" #include "common/list.h" #include "vma.h" +#include "kerndat.h" struct task_entries { int nr_threads, nr_tasks, nr_helpers; diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 888eb8e65..7012b88a1 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -35,6 +35,7 @@ #include "sk-inet.h" #include "vma.h" #include "uffd.h" +#include "sched.h" #include "common/lock.h" #include "common/page.h" @@ -1771,16 +1772,19 @@ long __export_restore_task(struct task_restore_args *args) long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_FS; long last_pid_len; + pid_t thread_pid; long parent_tid; int i, fd = -1; - /* One level pid ns hierarhy */ - fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); - if (fd < 0) { - pr_err("can't open last pid fd %d\n", fd); - goto core_restore_end; - } + if (!args->has_clone3_set_tid) { + /* One level pid ns hierarhy */ + fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); + if (fd < 0) { + pr_err("can't open last pid fd %d\n", fd); + goto core_restore_end; + } + } mutex_lock(&task_entries_local->last_pid_mutex); for (i = 0; i < args->nr_threads; i++) { @@ -1791,24 +1795,38 @@ long __export_restore_task(struct task_restore_args *args) continue; new_sp = restorer_stack(thread_args[i].mz); - last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); - sys_lseek(fd, 0, SEEK_SET); - ret = sys_write(fd, s, last_pid_len); - if (ret < 0) { - pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); - sys_close(fd); - mutex_unlock(&task_entries_local->last_pid_mutex); - goto core_restore_end; + if (args->has_clone3_set_tid) { + struct _clone_args c_args = {}; + thread_pid = thread_args[i].pid; + c_args.set_tid = ptr_to_u64(&thread_pid); + c_args.flags = clone_flags; + c_args.set_tid_size = 1; + /* The kernel does stack + stack_size. */ + c_args.stack = new_sp - RESTORE_STACK_SIZE; + c_args.stack_size = RESTORE_STACK_SIZE; + c_args.child_tid = ptr_to_u64(&thread_args[i].pid); + c_args.parent_tid = ptr_to_u64(&parent_tid); + pr_debug("Using clone3 to restore the process\n"); + RUN_CLONE3_RESTORE_FN(ret, c_args, sizeof(c_args), &thread_args[i], args->clone_restore_fn); + } else { + last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); + sys_lseek(fd, 0, SEEK_SET); + ret = sys_write(fd, s, last_pid_len); + if (ret < 0) { + pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); + sys_close(fd); + mutex_unlock(&task_entries_local->last_pid_mutex); + goto core_restore_end; + } + + /* + * To achieve functionality like libc's clone() + * we need a pure assembly here, because clone()'ed + * thread will run with own stack and we must not + * have any additional instructions... oh, dear... + */ + RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); } - - /* - * To achieve functionality like libc's clone() - * we need a pure assembly here, because clone()'ed - * thread will run with own stack and we must not - * have any additional instructions... oh, dear... - */ - - RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); if (ret != thread_args[i].pid) { pr_err("Unable to create a thread: %ld\n", ret); mutex_unlock(&task_entries_local->last_pid_mutex); From 4c4f67a56be60300e734d92411e16e26928d3776 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 Jan 2020 14:27:09 +0100 Subject: [PATCH 0124/1854] s390x: remove stack pointer from clobber list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like on all other supported architectures gcc complains about the stack pointer register being part of the clobber list: error: listing the stack pointer register ‘15’ in a clobber list is deprecated [-Werror=deprecated] This removes the stack pointer from the clobber list. 'zdtm.py run -a' still runs without any errors after this change. Signed-off-by: Adrian Reber --- compel/arch/s390/src/lib/include/uapi/asm/sigframe.h | 2 +- criu/arch/s390/include/asm/restore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h index b6b894473..c599ef3ab 100644 --- a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h @@ -66,7 +66,7 @@ struct rt_sigframe { "svc 0\n" \ : \ : "d" (new_sp) \ - : "15", "memory") + : "memory") #define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc) #define RT_SIGFRAME_REGIP(rt_sigframe) (rt_sigframe)->uc.uc_mcontext.regs.psw.addr diff --git a/criu/arch/s390/include/asm/restore.h b/criu/arch/s390/include/asm/restore.h index 6463d8e62..b77e36c77 100644 --- a/criu/arch/s390/include/asm/restore.h +++ b/criu/arch/s390/include/asm/restore.h @@ -18,7 +18,7 @@ : "d" (new_sp), \ "d"((unsigned long)restore_task_exec_start), \ "d" (task_args) \ - : "2", "14", "15", "memory") + : "2", "14", "memory") /* There is nothing to do since TLS is accessed through %a01 */ #define core_get_tls(pcore, ptls) From cbadd201cbd20b7e44b9f8edea932a9420f67230 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 16 Jan 2020 16:41:40 +0100 Subject: [PATCH 0125/1854] s390x: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for s390x. In contrast to the x86_64 implementation the thread start address and arguments are not put on the thread stack but passed via r4 and r5. As those registers are caller-saved they still contain the correct value (thread start address and arguments) after returning from the syscall. Tested on 5.5.0-rc6. Signed-off-by: Adrian Reber --- criu/arch/s390/include/asm/restorer.h | 40 +++++++++++++++++++++++---- criu/kerndat.c | 4 +-- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h index 733f2de33..2fc266535 100644 --- a/criu/arch/s390/include/asm/restorer.h +++ b/criu/arch/s390/include/asm/restorer.h @@ -40,11 +40,41 @@ : "0", "1", "2", "3", "4", "5", "6", "cc", "memory") #define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) + clone_restore_fn) \ + asm volatile( \ + /* + * clone3 only needs two arguments (r2, r3), this means + * we can use r4 and r5 for args and thread function. + * r4 and r5 are callee-saved and are not overwritten. + * No need to put these values on the child stack. + */ \ + "lgr %%r4,%4\n" /* Save args in %r4 */ \ + "lgr %%r5,%3\n" /* Save clone_restore_fn in %r5 */ \ + "lgr %%r2,%1\n" /* Parameter 1: clone_args */ \ + "lgr %%r3,%2\n" /* Parameter 2: size */ \ + /* + * On s390x a syscall is done sc . + * That only works for syscalls < 255. clone3 is 435, + * therefore it is necessary to load the syscall number + * into r1 and do 'svc 0'. + */ \ + "lghi %%r1,"__stringify(__NR_clone3)"\n" \ + "svc 0\n" \ + "ltgr %0,%%r2\n" /* Set and check "ret" */ \ + "jnz 0f\n" /* ret != 0: Continue caller */ \ + "lgr %%r2,%%r4\n" /* Thread arguments taken from r4. */ \ + "lgr %%r1,%%r5\n" /* Thread function taken from r5. */ \ + "aghi %%r15,-160\n" /* Prepare stack frame */ \ + "xc 0(8,%%r15),0(%%r15)\n" \ + "basr %%r14,%%r1\n" /* Jump to clone_restore_fn() */ \ + "j .+2\n" /* BUG(): Force PGM check */ \ +"0:\n" /* Continue caller */ \ + : "=d"(ret) \ + : "a"(&clone_args), \ + "d"(size), \ + "d"(clone_restore_fn), \ + "d"(args) \ + : "0", "1", "2", "3", "4", "5", "cc", "memory") #define arch_map_vdso(map, compat) -1 diff --git a/criu/kerndat.c b/criu/kerndat.c index 0772828bc..2261cca60 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#ifndef CONFIG_X86_64 +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64. + * only implemented for X86_64 and S390X. */ kdat.has_clone3_set_tid = false; return 0; From 55c8ec62a53e2728b09ee93d36fd6dd36eeb0c49 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 21 Jan 2020 14:20:05 +0100 Subject: [PATCH 0126/1854] arm: remove stack pointer from clobber list Just like on all other supported architectures gcc complains about the stack pointer register being part of the clobber list. This removes the stack pointer from the clobber list. Signed-off-by: Adrian Reber --- criu/arch/arm/include/asm/restore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/arch/arm/include/asm/restore.h b/criu/arch/arm/include/asm/restore.h index 4c64d58ef..c3b64c5b7 100644 --- a/criu/arch/arm/include/asm/restore.h +++ b/criu/arch/arm/include/asm/restore.h @@ -16,7 +16,7 @@ : "r"(new_sp), \ "r"(restore_task_exec_start), \ "r"(task_args) \ - : "sp", "r0", "r1", "memory") + : "r0", "r1", "memory") static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) { From f6469493dd739d4c16c8230524fa5d5a88731b80 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 17 Jan 2020 13:35:48 +0100 Subject: [PATCH 0127/1854] ppc64le: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for ppc64le. Signed-off-by: Adrian Reber --- criu/arch/ppc64/include/asm/restorer.h | 46 ++++++++++++++++++++++---- criu/kerndat.c | 4 +-- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h index 19bc3ea36..c447eefea 100644 --- a/criu/arch/ppc64/include/asm/restorer.h +++ b/criu/arch/ppc64/include/asm/restorer.h @@ -48,12 +48,46 @@ "r"(&thread_args[i]) /* %6 */ \ : "memory","0","3","4","5","6","7","14","15") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ +/* + * The clone3() function accepts following parameters: + * int clone3(struct clone_args *args, size_t size) + * + * Always consult the CLONE3 wrappers for other architectures + * for additional details. + * + * For PPC64LE the first parameter (clone_args) is passed in r3 and + * the second parameter (size) is passed in r4. + * + * This clone3() wrapper is based on the clone() wrapper from above. + */ \ + asm volatile( \ + "clone3_emul: \n" \ + "/* Save fn, args across syscall. */ \n" \ + "mr 14, %3 /* clone_restore_fn in r14 */ \n" \ + "mr 15, %4 /* &thread_args[i] in r15 */ \n" \ + "mr 3, %1 /* clone_args */ \n" \ + "mr 4, %2 /* size */ \n" \ + "li 0,"__stringify(__NR_clone3)" \n" \ + "sc \n" \ + "/* Check for child process. */ \n" \ + "cmpdi cr1,3,0 \n" \ + "crandc cr1*4+eq,cr1*4+eq,cr0*4+so \n" \ + "bne- cr1,clone3_end \n" \ + "/* child */ \n" \ + "addi 14, 14, 8 /* jump over r2 fixup */ \n" \ + "mtctr 14 \n" \ + "mr 3,15 \n" \ + "bctr \n" \ + "clone3_end: \n" \ + "mr %0,3 \n" \ + : "=r"(ret) /* %0 */ \ + : "r"(&clone_args), /* %1 */ \ + "r"(size), /* %2 */ \ + "r"(clone_restore_fn), /* %3 */ \ + "r"(args) /* %4 */ \ + : "memory","0","3","4","5","14","15") #define arch_map_vdso(map, compat) -1 diff --git a/criu/kerndat.c b/criu/kerndat.c index 2261cca60..c1fc9259b 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64 and S390X. + * only implemented for X86_64, S390X and PPC64LE. */ kdat.has_clone3_set_tid = false; return 0; From 3dabd38a8292872dcf4c5710449a4c8017304ac2 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 25 Jan 2020 13:25:21 +0100 Subject: [PATCH 0128/1854] clone3: handle clone3() with CLONE_PARENT clone3() explicitly blocks setting an exit_signal if CLONE_PARENT is specified. With clone() it also did not work, but there was no error message. The exit signal from the thread group leader is taken. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index bcbc3e4bd..a2190ba0a 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -61,7 +61,19 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, pr_debug("Creating process using clone3()\n"); - c_args.exit_signal = exit_signal; + /* + * clone3() explicitly blocks setting an exit_signal + * if CLONE_PARENT is specified. With clone() it also + * did not work, but there was no error message. The + * exit signal from the thread group leader is taken. + */ + if (!(flags & CLONE_PARENT)) { + if (exit_signal != SIGCHLD) { + pr_err("Exit signal not SIGCHLD\n"); + return -1; + } + c_args.exit_signal = exit_signal; + } c_args.flags = flags; c_args.set_tid = ptr_to_u64(&pid); c_args.set_tid_size = 1; From f991f235064807a59ad8e4f98e82f34767b05e18 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 19 Jan 2020 21:42:58 +0100 Subject: [PATCH 0129/1854] aarch64: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for aarch64. Tested on Fedora 31 with 5.5.0-rc6. Signed-off-by: Adrian Reber --- criu/arch/aarch64/include/asm/restorer.h | 67 +++++++++++++++++++++--- criu/kerndat.c | 4 +- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index 2fe58915b..120fa8fb2 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -42,12 +42,67 @@ "r"(&thread_args[i]) \ : "x0", "x1", "x2", "x3", "x8", "memory") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) +/* + * Based on sysdeps/unix/sysv/linux/aarch64/clone.S + * + * int clone(int (*fn)(void *arg), x0 + * void *child_stack, x1 + * int flags, x2 + * void *arg, x3 + * pid_t *ptid, x4 + * struct user_desc *tls, x5 + * pid_t *ctid); x6 + * + * int clone3(struct clone_args *args, x0 + * size_t size); x1 + * + * Always consult the CLONE3 wrappers for other architectures + * for additional details. + * + */ + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + /* In contrast to the clone() wrapper above this does not put + * the thread function and its arguments on the child stack, + * but uses registers to pass these parameters to the child process. + * Based on the glibc clone() wrapper at + * sysdeps/unix/sysv/linux/aarch64/clone.S. + */ \ + "clone3_emul: \n" \ + /* + * Based on the glibc clone() wrapper, which uses x10 and x11 + * to save the arguments for the child process, this does the same. + * x10 for the thread function and x11 for the thread arguments. + */ \ + "mov x10, %3 /* clone_restore_fn */ \n" \ + "mov x11, %4 /* args */ \n" \ + "mov x0, %1 /* &clone_args */ \n" \ + "mov x1, %2 /* size */ \n" \ + /* Load syscall number */ \ + "mov x8, #"__stringify(__NR_clone3)" \n" \ + /* Do the syscall */ \ + "svc #0 \n" \ + \ + "cbz x0, clone3_thread_run \n" \ + \ + "mov %0, x0 \n" \ + "b clone3_end \n" \ + \ + "clone3_thread_run: \n" \ + /* Move args to x0 */ \ + "mov x0, x11 \n" \ + /* Jump to clone_restore_fn */ \ + "br x10 \n" \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "x0", "x1", "x8", "x10", "x11", "memory") #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ diff --git a/criu/kerndat.c b/criu/kerndat.c index c1fc9259b..4070e01d2 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) && !defined(CONFIG_AARCH64) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64, S390X and PPC64LE. + * only implemented for X86_64, S390X, AARCH64 and PPC64LE. */ kdat.has_clone3_set_tid = false; return 0; From 0e291d26c9e0258f60ae8921f0e03c89f332dc31 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 22 Jan 2020 19:41:41 +0100 Subject: [PATCH 0130/1854] arm: use clone3() if it exists This is the last architecture specific change to make CRIU use clone3() with set_tid if available. Just as on all other architectures this adds a clone3() based assembler wrapper to be used in the restorer code. Tested on Fedora 31 with the same 5.5.0-rc6 kernel as on the other architectures. Signed-off-by: Adrian Reber --- criu/arch/arm/include/asm/restorer.h | 62 +++++++++++++++++++++++++--- criu/kerndat.c | 9 ---- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h index ad4b58f93..13ed15b26 100644 --- a/criu/arch/arm/include/asm/restorer.h +++ b/criu/arch/arm/include/asm/restorer.h @@ -43,12 +43,62 @@ "r"(&thread_args[i]) \ : "r0", "r1", "r2", "r3", "r7", "memory") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) + +/* + * The clone3() assembler wrapper is based on the clone() wrapper above + * and on code from the glibc wrapper at + * sysdeps/unix/sysv/linux/arm/clone.S + * + * For arm it is necessary to change the child stack as on x86_64 as + * it seems there are not registers which stay the same over a syscall + * like on s390x, ppc64le and aarch64. + * + * Changing the child stack means that this code has to deal with the + * kernel doing stack + stack_size implicitly. + * + * int clone3(struct clone_args *args, size_t size) + */ + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + "clone3_emul: \n" \ + /* Load thread stack pointer */ \ + "ldr r1, [%3] \n" \ + /* Load thread stack size */ \ + "mov r2, %4 \n" \ + /* Goto to the end of stack */ \ + "add r1, r1, r2 \n" \ + /* Load thread function and arguments and push on stack */ \ + "mov r2, %6 /* args */ \n" \ + "str r2, [r1, #4] /* args */ \n" \ + "mov r2, %5 /* function */ \n" \ + "str r2, [r1] /* function */ \n" \ + "mov r0, %1 /* clone_args */ \n" \ + "mov r1, %2 /* size */ \n" \ + "mov r7, #"__stringify(__NR_clone3)" \n" \ + "svc #0 \n" \ + \ + "cmp r0, #0 \n" \ + "beq thread3_run \n" \ + \ + "mov %0, r0 \n" \ + "b clone3_end \n" \ + \ + "thread3_run: \n" \ + "pop { r1 } \n" \ + "pop { r0 } \n" \ + "bx r1 \n" \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(&clone_args.stack), \ + "r"(clone_args.stack_size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "r0", "r1", "r2", "r7", "memory") #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ diff --git a/criu/kerndat.c b/criu/kerndat.c index 4070e01d2..e0b5731d5 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,15 +992,6 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) && !defined(CONFIG_AARCH64) - /* - * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64, S390X, AARCH64 and PPC64LE. - */ - kdat.has_clone3_set_tid = false; - return 0; -#endif - args.set_tid = -1; /* * On a system without clone3() this will return ENOSYS. From f6de8d4ea9a2d0ce8f9d3373ab04a3c080348a86 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 14:43:59 +0000 Subject: [PATCH 0131/1854] travis: fix warning and errors from validation This fixes the validation errors from Travis: Build config validation root: deprecated key sudo (The key `sudo` has no effect anymore.) root: missing os, using the default linux root: key matrix is an alias for jobs, using jobs Signed-off-by: Adrian Reber --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 25dd6a29b..7c36af006 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: c -sudo: required +os: linux dist: bionic cache: ccache services: @@ -12,7 +12,7 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=openj9-test -matrix: +jobs: include: - os: linux arch: ppc64le From c98af78c58e2168d2322cd0ee15837468fd4ffb0 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 14 Jan 2020 12:04:40 +0300 Subject: [PATCH 0132/1854] compel: add -ffreestanding to force gcc not to use builtin memcpy, memset This patch fixes the problem with SSE (xmm) registers corruption on amd64 architecture. The problem was that gcc generates parasite blob that uses xmm registers, but we don't preserve this registers in CRIU when injecting parasite. Also, gcc, even with -nostdlib option uses builtin memcpy, memset functions that optimized for amd64 and involves SSE registers. It seems, that optimal solution is to use -ffreestanding gcc option to compile parasite. This option implies -fno-builtin and also it designed for OS kernels compilation/another code that suited to work on non-hosted environments and could prevent future sumilar bugs. To check that you amd64 CRIU build affected by this problem you could simply objdump -dS criu/pie/parasite.o | grep xmm Output should be empty. Reported-by: Diyu Zhou Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- compel/src/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compel/src/main.c b/compel/src/main.c index 51bac099f..8b2c8bc8d 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -21,7 +21,7 @@ #define CFLAGS_DEFAULT_SET \ "-Wstrict-prototypes " \ - "-fno-stack-protector -nostdlib -fomit-frame-pointer " + "-fno-stack-protector -nostdlib -fomit-frame-pointer -ffreestanding " #define COMPEL_CFLAGS_PIE CFLAGS_DEFAULT_SET "-fpie" #define COMPEL_CFLAGS_NOPIC CFLAGS_DEFAULT_SET "-fno-pic" From 8477875dc29e82485318b1c0f8482735755d5265 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:21:54 -0800 Subject: [PATCH 0133/1854] doc/Makefile: don't hide xmlto stderr In case asciidoc is installed and xmlto is not, make returns an error but there's no diagnostics shown, since "xmlto: command not found" goes to /dev/null. Remove the redirect. Signed-off-by: Kir Kolyshkin --- Documentation/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/Makefile b/Documentation/Makefile index cbc7ff2c8..5025e2b99 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -54,7 +54,7 @@ ifneq ($(USE_ASCIIDOCTOR),) $(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $< else $(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.1,%.xml,$@) $< - $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@) 2>/dev/null + $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@) endif %.8: %.txt $(FOOTER) custom.xsl @@ -63,7 +63,7 @@ ifneq ($(USE_ASCIIDOCTOR),) $(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $< else $(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.8,%.xml,$@) $< - $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@) 2>/dev/null + $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@) endif %.ps: %.1 From a15426a111eb50e2339607225b0ab0d1dc49e0ed Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:27:07 -0800 Subject: [PATCH 0134/1854] criu(8): some minor rewording 1. Add a/the articles where I see them missing 2. s/Forbid/disable/ 3. s/crit/crit(1)/ as we're referring to a man page 4. Simplify some descriptions Signed-off-by: Kir Kolyshkin --- Documentation/criu.txt | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 133a094c0..64b33ce6d 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -24,8 +24,8 @@ on a different system, or both. OPTIONS ------- -Most of the true / false long options (the ones without arguments) can be -prefixed with *--no-* to negate the option (example: *--display-stats* +Most of the long flags can be +prefixed with *no-* to negate the option (example: *--display-stats* and *--no-display-stats*). Common options @@ -33,9 +33,8 @@ Common options Common options are applicable to any 'command'. *-v*[*v*...], *--verbosity*:: - Increase verbosity up from the default level. Multiple *v* can be used, - each increasing verbosity by one level. Using long option without argument - increases verbosity by one level. + Increase verbosity up from the default level. In case of short option, + multiple *v* can be used, each increasing verbosity by one. *-v*'num', *--verbosity*='num':: Set verbosity level to 'num'. The higher the level, the more output @@ -57,22 +56,22 @@ The following levels are available: Pass a specific configuration file to criu. *--no-default-config*:: - Forbid parsing of default configuration files. + Disable parsing of default configuration files. *--pidfile* 'file':: Write root task, service or page-server pid into a 'file'. *-o*, *--log-file* 'file':: - Write logging messages to 'file'. + Write logging messages to a 'file'. *--display-stats*:: - During dump as well as during restore *criu* collects information - like the time required to dump or restore the process or the + During dump, as well as during restore, *criu* collects some statistics, + like the time required to dump or restore the process, or the number of pages dumped or restored. This information is always - written to the files 'stats-dump' and 'stats-restore' and can - be easily displayed using *crit*. The option *--display-stats* - additionally prints out this information on the console at the end - of a dump or a restore. + saved to the *stats-dump* and *stats-restore* files, and can + be shown using *crit*(1). The option *--display-stats* + prints out this information on the console at the end + of a dump or restore operation. *-D*, *--images-dir* 'path':: Use 'path' as a base directory where to look for sets of image files. From 23374b779898470016b2a0e95af56b0766aa6b3e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:30:57 -0800 Subject: [PATCH 0135/1854] criu(8): fix for asciidoctor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0493724c8eda3 added support for using asciidoctor (instead of asciidoc + xmlto) to generate man pages. For some reason, asciidoctor does not deal well with some complex formatting that we use for options such as --external, leading to literal ’ and ' appearing in the man page instead of italic formatting. For example: > --inherit-fd fd[’N']:’resource' (here both N and resource should be in italic). Asciidoctor documentation (asciidoctor --help syntax) tells: > == Text Formatting > > .Constrained (applied at word boundaries) > *strong importance* (aka bold) > _stress emphasis_ (aka italic) > `monospaced` (aka typewriter text) > "`double`" and '`single`' typographic quotes > +passthrough text+ (substitutions disabled) > `+literal text+` (monospaced with substitutions disabled) > > .Unconstrained (applied anywhere) > **C**reate+**R**ead+**U**pdate+**D**elete > fan__freakin__tastic > ``mono``culture so I had to carefully replace *bold* with **bold** and 'italic' with __italic__ to make it all work. Tested with both terminal and postscript output, with both asciidoctor and asciidoc+xmlto. TODO: figure out how to fix examples (literal multi-line text), since asciidoctor does not display it in monospaced font (this is only true for postscript/pdf output so low priority). Signed-off-by: Kir Kolyshkin --- Documentation/criu.txt | 51 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 64b33ce6d..a6b9f7fae 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -36,8 +36,8 @@ Common options are applicable to any 'command'. Increase verbosity up from the default level. In case of short option, multiple *v* can be used, each increasing verbosity by one. -*-v*'num', *--verbosity*='num':: - Set verbosity level to 'num'. The higher the level, the more output +**-v**__num__, **--verbosity=**__num__:: + Set verbosity level to _num_. The higher the level, the more output is produced. + The following levels are available: @@ -184,7 +184,7 @@ In other words, do not use it unless really needed. *-s*, *--leave-stopped*:: Leave tasks in stopped state after checkpoint, instead of killing. -*--external* 'type'*[*'id'*]:*'value':: +*--external* __type__**[**__id__**]:**__value__:: Dump an instance of an external resource. The generic syntax is 'type' of resource, followed by resource 'id' (enclosed in literal square brackets), and optional 'value' (prepended by a literal colon). @@ -193,35 +193,36 @@ In other words, do not use it unless really needed. Note to restore external resources, either *--external* or *--inherit-fd* is used, depending on resource type. -*--external mnt[*'mountpoint'*]:*'name':: +*--external* **mnt[**__mountpoint__**]:**__name__:: Dump an external bind mount referenced by 'mountpoint', saving it to image under the identifier 'name'. -*--external mnt[]:*'flags':: +*--external* **mnt[]:**__flags__:: Dump all external bind mounts, autodetecting those. Optional 'flags' can contain *m* to also dump external master mounts, *s* to also dump external shared mounts (default behavior is to abort dumping if such mounts are found). If 'flags' are not provided, colon is optional. -*--external dev[*'major'*/*'minor'*]:*'name':: +*--external* **dev[**__major__**/**__minor__**]:**__name__:: Allow to dump a mount namespace having a real block device mounted. A block device is identified by its 'major' and 'minor' numbers, and *criu* saves its information to image under the identifier 'name'. -*--external file[*'mnt_id'*:*'inode'*]*:: +*--external* **file[**__mnt_id__**:**__inode__**]**:: Dump an external file, i.e. an opened file that is can not be resolved from the current mount namespace, which can not be dumped without using this option. The file is identified by 'mnt_id' (a field obtained from - */proc/*'pid'*/fdinfo/*'N') and 'inode' (as returned by *stat*(2)). + **/proc/**__pid__**/fdinfo/**__N__) and 'inode' (as returned by + *stat*(2)). -*--external tty[*'rdev'*:*'dev'*]*:: +*--external* **tty[**__rdev__**:**__dev__**]**:: Dump an external TTY, identified by *st_rdev* and *st_dev* fields returned by *stat*(2). -*--external unix[*'id'*]*:: +*--external* **unix[**__id__**]**:: Tell *criu* that one end of a pair of UNIX sockets (created by - *socketpair*(2)) with 'id' is OK to be disconnected. + *socketpair*(2)) with the given _id_ is OK to be disconnected. *--freeze-cgroup*:: Use cgroup freezer to collect processes. @@ -379,7 +380,7 @@ By default the option is set to *fpu* and *ins*. ~~~~~~~~~ Restores previously checkpointed processes. -*--inherit-fd* *fd[*'N'*]:*'resource':: +*--inherit-fd* **fd[**__N__**]:**__resource__:: Inherit a file descriptor. This option lets *criu* use an already opened file descriptor 'N' for restoring a file identified by 'resource'. This option can be used to restore an external resource dumped @@ -387,10 +388,10 @@ Restores previously checkpointed processes. + The 'resource' argument can be one of the following: + - - *tty[*'rdev'*:*'dev'*]* - - *pipe[*'inode'*]* - - *socket[*'inode'*]* - - *file[*'mnt_id'*:*'inode'*]* + - **tty[**__rdev__**:**__dev__**]** + - **pipe[**__inode__**]** + - **socket[**__inode__*]* + - **file[**__mnt_id__**:**__inode__**]** - 'path/to/file' + @@ -416,7 +417,7 @@ usually need to be escaped from shell. This option is required to restore a mount namespace. The directory 'path' must be a mount point and its parent must not be overmounted. -*--external* 'type'*[*'id'*]:*'value':: +*--external* __type__**[**__id__**]:**__value__:: Restore an instance of an external resource. The generic syntax is 'type' of resource, followed by resource 'id' (enclosed in literal square brackets), and optional 'value' (prepended by a literal colon). @@ -426,7 +427,7 @@ usually need to be escaped from shell. the help of *--external* *file*, *tty*, and *unix* options), option *--inherit-fd* should be used. -*--external mnt[*'name'*]:*'mountpoint':: +*--external* **mnt[**__name__**]:**__mountpoint__:: Restore an external bind mount referenced in the image by 'name', bind-mounting it from the host 'mountpoint' to a proper mount point. @@ -434,17 +435,17 @@ usually need to be escaped from shell. Restore all external bind mounts (dumped with the help of *--external mnt[]* auto-detection). -*--external dev[*'name'*]:*'/dev/path':: +*--external* **dev[**__name__**]:**__/dev/path__:: Restore an external mount device, identified in the image by 'name', using the existing block device '/dev/path'. -*--external veth[*'inner_dev'*]:*'outer_dev'*@*'bridge':: +*--external* **veth[**__inner_dev__**]:**__outer_dev__**@**__bridge__:: Set the outer VETH device name (corresponding to 'inner_dev' being - restored) to 'outer_dev'. If optional *@*'bridge' is specified, + restored) to 'outer_dev'. If optional **@**_bridge_ is specified, 'outer_dev' is added to that bridge. If the option is not used, 'outer_dev' will be autogenerated by the kernel. -*--external macvlan[*'inner_dev'*]:*'outer_dev':: +*--external* **macvlan[**__inner_dev__**]:**__outer_dev__:: When restoring an image that have a MacVLAN device in it, this option must be used to specify to which 'outer_dev' (an existing network device in CRIU namespace) the restored 'inner_dev' should be bound to. @@ -489,14 +490,14 @@ The 'mode' may be one of the following: *--tcp-close*:: Restore connected TCP sockets in closed state. -*--veth-pair* 'IN'*=*'OUT':: +*--veth-pair* __IN__**=**__OUT__:: Correspondence between outside and inside names of veth devices. *-l*, *--file-locks*:: Restore file locks from the image. -*--lsm-profile* 'type'*:*'name':: - Specify an LSM profile to be used during restore. The `type` can be +*--lsm-profile* __type__**:**__name__:: + Specify an LSM profile to be used during restore. The _type_ can be either *apparmor* or *selinux*. *--auto-dedup*:: From 56258da17619883631d0d3c96ad583bc697f953e Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 2 Feb 2020 18:45:59 +0000 Subject: [PATCH 0136/1854] criu: fix build failure against gcc-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On gcc-10 (and gcc-9 -fno-common) build fails as: ``` ld: criu/arch/x86/crtools.o:criu/include/cr_options.h:159: multiple definition of `rpc_cfg_file'; criu/arch/x86/cpu.o:criu/include/cr_options.h:159: first defined here make[2]: *** [scripts/nmk/scripts/build.mk:164: criu/arch/x86/crtools.built-in.o] Error 1 ``` gcc-10 will change the default from -fcommon to fno-common: https://gcc.gnu.org/PR85678. The error also happens if CFLAGS=-fno-common passed explicitly. Reported-by: Toralf Förster Bug: https://bugs.gentoo.org/707942 Signed-off-by: Sergei Trofimovich --- criu/config.c | 1 + criu/include/cr_options.h | 2 +- criu/include/pstree.h | 2 +- criu/include/tun.h | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/criu/config.c b/criu/config.c index 73c62f5bb..b84b7da28 100644 --- a/criu/config.c +++ b/criu/config.c @@ -30,6 +30,7 @@ #include "common/xmalloc.h" struct cr_options opts; +char *rpc_cfg_file; static int count_elements(char **to_count) { diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index c5af33186..ba405182e 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -155,7 +155,7 @@ struct cr_options { }; extern struct cr_options opts; -char *rpc_cfg_file; +extern char *rpc_cfg_file; extern int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, int state); extern int check_options(void); diff --git a/criu/include/pstree.h b/criu/include/pstree.h index 7303c1fed..61ab0ce0e 100644 --- a/criu/include/pstree.h +++ b/criu/include/pstree.h @@ -42,7 +42,7 @@ enum { }; #define FDS_EVENT (1 << FDS_EVENT_BIT) -struct pstree_item *current; +extern struct pstree_item *current; struct rst_info; /* See alloc_pstree_item() for details */ diff --git a/criu/include/tun.h b/criu/include/tun.h index ce0b266a6..b82c445a7 100644 --- a/criu/include/tun.h +++ b/criu/include/tun.h @@ -5,7 +5,7 @@ #define TUN_MINOR 200 #endif -struct ns_id *ns; +extern struct ns_id *ns; #include From f1714ccce714093170a2616474cfc7b33298c75e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 4 Feb 2020 23:12:22 -0800 Subject: [PATCH 0137/1854] test/vdso: check the code path when here is no API to map vDSO Signed-off-by: Andrei Vagin --- criu/crtools.c | 3 +++ criu/include/fault-injection.h | 1 + test/jenkins/criu-fault.sh | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/criu/crtools.c b/criu/crtools.c index 9b6e94809..3cd40e87d 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -176,6 +176,9 @@ int main(int argc, char *argv[], char *envp[]) if (kerndat_init()) return 1; + if (fault_injected(FI_CANNOT_MAP_VDSO)) + kdat.can_map_vdso = 0; + if (opts.deprecated_ok) pr_debug("DEPRECATED ON\n"); diff --git a/criu/include/fault-injection.h b/criu/include/fault-injection.h index 852d27166..31fe16178 100644 --- a/criu/include/fault-injection.h +++ b/criu/include/fault-injection.h @@ -17,6 +17,7 @@ enum faults { FI_NO_BREAKPOINTS = 130, FI_PARTIAL_PAGES = 131, FI_HUGE_ANON_SHMEM_ID = 132, + FI_CANNOT_MAP_VDSO = 133, FI_MAX, }; diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index ec6d26f89..4e3790e59 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -12,6 +12,10 @@ prep ./test/zdtm.py run -t zdtm/static/vdso01 --fault 127 || fail ./test/zdtm.py run -t zdtm/static/vdso-proxy --fault 127 --iters 3 || fail +if [ "${COMPAT_TEST}" != "y" ] ; then + ./test/zdtm.py run -t zdtm/static/vdso01 --fault 133 -f h || fail +fi + ./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 2 --keep-going --report report || fail ./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 4 --keep-going --report report || fail @@ -23,3 +27,4 @@ prep ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail + From 9bc9366c94d2c81f706d56d0227bf32a2425eef1 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 4 Feb 2020 23:13:43 -0800 Subject: [PATCH 0138/1854] vdso: use correct offsets to remap vdso and vvar mappings In the current version, the offsets of remapping vvar and vdso regions are mixed up. If vdso is before vvar, vvar has to be mapped with the vdso_size offset. if vvar is before vdso, vdso has to be mapped with the vvar_size offset. Signed-off-by: Andrei Vagin --- criu/pie/parasite-vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 38da76680..3a1684d35 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -119,9 +119,9 @@ int vdso_do_park(struct vdso_maps *rt, unsigned long addr, unsigned long space) BUG_ON((vdso_size + vvar_size) < space); if (rt->sym.vdso_before_vvar) - return park_at(rt, addr, addr + vvar_size); + return park_at(rt, addr, addr + vdso_size); else - return park_at(rt, addr + vdso_size, addr); + return park_at(rt, addr + vvar_size, addr); } #ifndef CONFIG_COMPAT From 0f438ceeed27f3473a9ebda6c9e15d593ceeebde Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 5 Feb 2020 22:33:02 +0000 Subject: [PATCH 0139/1854] typo: fix missing space in error message Signed-off-by: Nicolas Viennot --- criu/sk-unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f43aa2124..048ff44ae 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1877,7 +1877,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) !(opts.ext_unix_sk)) { pr_err("External socket found in image. " "Consider using the --" USK_EXT_PARAM - "option to allow restoring it.\n"); + " option to allow restoring it.\n"); return -1; } From 72ff29070816e57b408f0bd6b8f71ff50c2e9cd4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 6 Feb 2020 18:01:00 +0000 Subject: [PATCH 0140/1854] criu: Make use strlcpy() to copy into allocated strings strncpy() with n == strlen(src) won't put NULL-terminator in dst. Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b4f8d9e75..c1dfc44f1 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -75,6 +75,7 @@ #include "sk-queue.h" #include "sigframe.h" #include "fdstore.h" +#include "string.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -3142,7 +3143,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos) args = rst_mem_remap_ptr(this_pos, RM_PRIVATE); args->lsm_profile = lsm_profile; - strncpy(args->lsm_profile, rendered, lsm_profile_len); + strlcpy(args->lsm_profile, rendered, lsm_profile_len + 1); xfree(rendered); } } else { @@ -3176,7 +3177,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos) args = rst_mem_remap_ptr(this_pos, RM_PRIVATE); args->lsm_sockcreate = lsm_sockcreate; - strncpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len); + strlcpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len + 1); xfree(rendered); } } else { From 99346a28247a3abeae094008fefa2edbc78fbb4d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 24 Jan 2020 11:55:00 +0000 Subject: [PATCH 0141/1854] zdtm: Make test_{doc,author} weak variables Allows to override them in every test, optionally. Signed-off-by: Dmitry Safonov --- test/zdtm/lib/parseargs.c | 4 ++-- test/zdtm/lib/zdtmtst.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/zdtm/lib/parseargs.c b/test/zdtm/lib/parseargs.c index 7e411f6b6..d8aa4ed63 100644 --- a/test/zdtm/lib/parseargs.c +++ b/test/zdtm/lib/parseargs.c @@ -113,8 +113,8 @@ static void helpexit(void) exit(1); } -const char *test_doc; -const char *test_author; +const char __attribute__((weak)) *test_doc; +const char __attribute__((weak)) *test_author; static void prdoc(void) { diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h index 1fbf795bf..2cd4bdd1d 100644 --- a/test/zdtm/lib/zdtmtst.h +++ b/test/zdtm/lib/zdtmtst.h @@ -155,6 +155,9 @@ struct zdtm_tcp_opts { int flags; }; +extern const char *test_author; +extern const char *test_doc; + extern int tcp_init_server_with_opts(int family, int *port, struct zdtm_tcp_opts *opts); extern pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid, unsigned long newtls); From 0022c28468714a5329fd41ec12d744340b250cd4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 21 Jan 2020 12:31:00 +0000 Subject: [PATCH 0142/1854] vdso: Add vdso_is_present() helper Use it in kerndat to check if the kernel provides vDSO. Signed-off-by: Dmitry Safonov --- criu/include/util-vdso.h | 5 +++++ criu/vdso.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/criu/include/util-vdso.h b/criu/include/util-vdso.h index 33b7411de..046cd96d7 100644 --- a/criu/include/util-vdso.h +++ b/criu/include/util-vdso.h @@ -41,6 +41,11 @@ struct vdso_maps { bool compatible; }; +static inline bool vdso_is_present(struct vdso_maps *m) +{ + return m->vdso_start != VDSO_BAD_ADDR; +} + #define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, } #define VDSO_SYMTABLE_INIT \ diff --git a/criu/vdso.c b/criu/vdso.c index 50b8b8dba..b8df2d7a6 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -611,6 +611,12 @@ int kerndat_vdso_fill_symtable(void) return -1; } + if (!vdso_is_present(&vdso_maps)) { + pr_debug("Kernel doesn't premap vDSO - probably CONFIG_VDSO is not set\n"); + kdat.vdso_sym = vdso_maps.sym; + return 0; + } + if (vdso_fill_self_symtable(&vdso_maps)) { pr_err("Failed to fill self vdso symtable\n"); return -1; @@ -643,7 +649,7 @@ int kerndat_vdso_preserves_hint(void) kdat.vdso_hint_reliable = 0; - if (vdso_maps.vdso_start == VDSO_BAD_ADDR) + if (!vdso_is_present(&vdso_maps)) return 0; child = fork(); @@ -693,7 +699,7 @@ int kerndat_vdso_preserves_hint(void) goto out_kill; } - if (vdso_maps_after.vdso_start != VDSO_BAD_ADDR) + if (vdso_is_present(&vdso_maps_after)) kdat.vdso_hint_reliable = 1; ret = 0; From a96a7ed87fece0eeb397b9dd4901f680fbf2b4f6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 22 Jan 2020 14:00:27 +0000 Subject: [PATCH 0143/1854] vdso: Repair !CONFIG_VDSO Apparently, C/R is broken when CONFIG_VDSO is not set. Probably, I've broken it while adding arm vdso support. Or maybe some commits after. Repair it by adding checks into vdso_init_dump(), vdso_init_restore(). Also, don't try handling vDSO in restorer if it wasn't present in parent. And prevent summing VDSO_BAD_SIZE to {vdso,vvar}_rt_size. Reported-by: Adrian Reber Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 9 +++++--- criu/pie/parasite-vdso.c | 12 ++++++++++ criu/pie/restorer.c | 2 +- criu/vdso.c | 48 ++++++++++++++++++++++++++-------------- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index c1dfc44f1..03dbc850f 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3373,10 +3373,13 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns vdso_maps_rt = vdso_maps; /* * Figure out how much memory runtime vdso and vvar will need. + * Check if vDSO or VVAR is not provided by kernel. */ - vdso_rt_size = vdso_maps_rt.sym.vdso_size; - if (vdso_rt_size && vdso_maps_rt.sym.vvar_size) - vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + if (vdso_maps_rt.sym.vdso_size != VDSO_BAD_SIZE) { + vdso_rt_size = vdso_maps_rt.sym.vdso_size; + if (vdso_maps_rt.sym.vvar_size != VVAR_BAD_SIZE) + vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + } task_args->bootstrap_len += vdso_rt_size; /* diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 3a1684d35..3f5cb1431 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -292,6 +292,18 @@ int vdso_proxify(struct vdso_maps *rt, bool *added_proxy, return -1; } + /* + * We could still do something about it here.. + * 1. Hope that vDSO from images still works (might not be the case). + * 2. Try to map vDSO. + * But, hopefully no one intends to migrate application that uses + * vDSO to a dut where kernel doesn't provide it. + */ + if (!vdso_is_present(rt)) { + pr_err("vDSO isn't provided by kernel, but exists in images\n"); + return -1; + } + /* * vDSO mark overwrites Elf program header of proxy vDSO thus * it must never ever be greater in size. diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 7012b88a1..afe185f04 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1454,7 +1454,7 @@ long __export_restore_task(struct task_restore_args *args) * it's presence in original task: vdso will be used for fast * getttimeofday() in restorer's log timings. */ - if (!args->can_map_vdso) { + if (!args->can_map_vdso && vdso_is_present(&args->vdso_maps_rt)) { /* It's already checked in kdat, but let's check again */ if (args->compatible_mode) { pr_err("Compatible mode without vdso map support\n"); diff --git a/criu/vdso.c b/criu/vdso.c index b8df2d7a6..19ba4765d 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -275,6 +275,10 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, struct vma_area *vma; int fd = -1; + /* vDSO is not provided by kernel */ + if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) + return 0; + vcheck = get_vdso_check_type(ctl); if (vcheck == VDSO_CHECK_PFN) { BUG_ON(vdso_pfn == VDSO_BAD_PFN); @@ -534,21 +538,6 @@ out_unmap: } #endif /* CONFIG_COMPAT */ -int vdso_init_dump(void) -{ - if (vdso_parse_maps(PROC_SELF, &vdso_maps)) { - pr_err("Failed reading self/maps for filling vdso/vvar bounds\n"); - return -1; - } - - if (kdat.pmap != PM_FULL) - pr_info("VDSO detection turned off\n"); - else if (vaddr_to_pfn(-1, vdso_maps.vdso_start, &vdso_pfn)) - return -1; - - return 0; -} - /* * Check vdso/vvar sized read from maps to kdat values. * We do not read /proc/self/maps for compatible vdso as it's @@ -566,11 +555,36 @@ static int is_kdat_vdso_sym_valid(void) return true; } +int vdso_init_dump(void) +{ + if (vdso_parse_maps(PROC_SELF, &vdso_maps)) { + pr_err("Failed reading self/maps for filling vdso/vvar bounds\n"); + return -1; + } + + if (!is_kdat_vdso_sym_valid()) { + pr_err("Kdat sizes of vdso/vvar differ to maps file \n"); + return -1; + } + + if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) { + pr_debug("Kdat has empty vdso symtable - probably CONFIG_VDSO is not set\n"); + return 0; + } + + if (kdat.pmap != PM_FULL) + pr_info("VDSO detection turned off\n"); + else if (vaddr_to_pfn(-1, vdso_maps.vdso_start, &vdso_pfn)) + return -1; + + return 0; +} + int vdso_init_restore(void) { if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) { - pr_err("Kdat has empty vdso symtable\n"); - return -1; + pr_debug("Kdat has empty vdso symtable - probably CONFIG_VDSO is not set\n"); + return 0; } /* Already filled vdso_maps during kdat test */ From 9cb4067e132ba48eb36b0b8075043b13e96e8974 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 22 Jan 2020 14:05:47 +0000 Subject: [PATCH 0144/1854] vdso: Don't page-align vvar It's always page-aligned (as any VMA). Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 03dbc850f..e5e8fc9c5 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3378,7 +3378,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns if (vdso_maps_rt.sym.vdso_size != VDSO_BAD_SIZE) { vdso_rt_size = vdso_maps_rt.sym.vdso_size; if (vdso_maps_rt.sym.vvar_size != VVAR_BAD_SIZE) - vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + vdso_rt_size += vdso_maps_rt.sym.vvar_size; } task_args->bootstrap_len += vdso_rt_size; From 3a4c33c502b8ed685d1ffe81b15d11159d43848a Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 18 Jan 2020 14:28:03 +0000 Subject: [PATCH 0145/1854] zdtm: mntns_rw_ro_rw update error msg Signed-off-by: Radostin Stoyanov --- test/zdtm/static/mntns_rw_ro_rw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/mntns_rw_ro_rw.c b/test/zdtm/static/mntns_rw_ro_rw.c index 7aed254b6..6179c4788 100644 --- a/test/zdtm/static/mntns_rw_ro_rw.c +++ b/test/zdtm/static/mntns_rw_ro_rw.c @@ -31,12 +31,12 @@ int main(int argc, char **argv) test_waitsig(); if (access("/proc/sys/net/ipv4/ip_forward", W_OK)) { - fail("Unable to access /proc/sys/net/core/wmem_max"); + fail("Unable to access /proc/sys/net/ipv4/ip_forward"); return 1; } if (access("/proc/sys/kernel/ns_last_pid", W_OK) != -1 || errno != EROFS) { - fail("Unable to access /proc/sys/kernel/pid_max"); + fail("Unable to access /proc/sys/kernel/ns_last_pid"); return 1; } From f5181b2767d03f17e72dd6f70c83ce394b750e68 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 8 Feb 2020 19:43:39 +0100 Subject: [PATCH 0146/1854] Travis: fix podman test case Podman changed the output of 'podman ps'. For the test only running containers are interesting. Adding the filter '-f status=running' only returns running containers as previously. Signed-off-by: Adrian Reber --- scripts/travis/podman-test.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 825bca746..7490d5fe9 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -39,12 +39,12 @@ for i in `seq 20`; do echo "Test $i for podman container checkpoint" podman exec cr ps axf podman logs cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container checkpoint cr - [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "0" ] podman ps -a podman container restore cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman logs cr done @@ -53,16 +53,16 @@ for i in `seq 20`; do podman ps -a podman exec cr ps axf podman logs cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container checkpoint -l --export /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "0" ] podman ps -a podman rm -fa podman ps -a podman container restore --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container restore --name cr2 --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr2 -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr2 -q -f status=running | wc -l` -eq "1" ] podman ps -a podman logs cr podman logs cr2 @@ -70,7 +70,7 @@ for i in `seq 20`; do podman rm -fa podman ps -a podman container restore --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman ps -a rm -f /tmp/chkpt.tar.gz done From d68a68b8f478e12bd457eccfbf8e49b50ca95e86 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 20:46:17 -0800 Subject: [PATCH 0147/1854] test/zdtm/inhfd: update dump options one each iteration This allows to run inhfd tests with many iterations of C/R. Signed-off-by: Andrei Vagin --- test/zdtm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/zdtm.py b/test/zdtm.py index 16ff0b379..47c89a162 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -757,6 +757,11 @@ class inhfd_test: fcntl.fcntl(fd, fcntl.F_SETFD, fdflags) peer_file_name = self.__peer_file_names[i] ropts.extend(["--inherit-fd", "fd[%d]:%s" % (fd, peer_file_name)]) + self.__peer_file_names = [] + self.__dump_opts = [] + for _, peer_file in self.__files: + self.__peer_file_names.append(self.__fdtyp.filename(peer_file)) + self.__dump_opts += self.__fdtyp.dump_opts(peer_file) return ropts def print_output(self): From ff756cbb28c4ee10651ed80f38b8ef37ee74fc39 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 21:20:20 -0800 Subject: [PATCH 0148/1854] python: sort imports 202 Additional newline in a group of imports. I100 Import statements are in the wrong order. Signed-off-by: Andrei Vagin --- lib/py/images/pb2dict.py | 13 ++++++------ test/inhfd/socket.py | 2 +- test/others/rpc/config_file.py | 7 ++++--- test/zdtm.py | 38 ++++++++++++++++++---------------- 4 files changed, 32 insertions(+), 28 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index 6fce4be22..a89850a1d 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -1,12 +1,13 @@ -from google.protobuf.descriptor import FieldDescriptor as FD -import opts_pb2 -from ipaddress import IPv4Address, ip_address -from ipaddress import IPv6Address -import socket +import base64 import collections import os -import base64 import quopri +import socket +from ipaddress import IPv4Address, IPv6Address, ip_address + +from google.protobuf.descriptor import FieldDescriptor as FD + +import opts_pb2 if "encodebytes" not in dir(base64): base64.encodebytes = base64.encodestring diff --git a/test/inhfd/socket.py b/test/inhfd/socket.py index 9cea16ffb..7efe7faab 100755 --- a/test/inhfd/socket.py +++ b/test/inhfd/socket.py @@ -1,5 +1,5 @@ -import socket import os +import socket def create_fds(): diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py index 7b07bc145..90c80fcae 100755 --- a/test/others/rpc/config_file.py +++ b/test/others/rpc/config_file.py @@ -1,11 +1,12 @@ #!/usr/bin/python +import argparse import os import sys -import rpc_pb2 as rpc -import argparse -from tempfile import mkstemp import time +from tempfile import mkstemp + +import rpc_pb2 as rpc from setup_swrk import setup_swrk diff --git a/test/zdtm.py b/test/zdtm.py index 47c89a162..3fc57ba55 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1,31 +1,33 @@ #!/usr/bin/env python # vim: noet ts=8 sw=8 sts=8 from __future__ import absolute_import, division, print_function, unicode_literals -from builtins import (str, open, range, zip, int, input) import argparse -import glob -import os -import subprocess -import time -import tempfile -import shutil -import re -import stat -import signal import atexit -import sys -import linecache -import random -import string -import fcntl -import errno import datetime -import yaml -import struct +import errno +import fcntl +import glob +import linecache import mmap +import os +import random +import re +import shutil +import signal +import stat +import string +import struct +import subprocess +import sys +import tempfile +import time +from builtins import (input, int, open, range, str, zip) + import pycriu as crpc +import yaml + os.chdir(os.path.dirname(os.path.abspath(__file__))) prev_line = None From 872b795a5678d82a415419e139d680cbf81391ff Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 21 Feb 2020 18:48:41 +0300 Subject: [PATCH 0149/1854] Maintainers: Suggest the maintainers codex (#932) The guide is based on the one from the RunC project, but has some criu-related specifics. Signed-off-by: Pavel Emelyanov --- MAINTAINERS | 2 + MAINTAINERS_GUIDE.md | 136 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 MAINTAINERS create mode 100644 MAINTAINERS_GUIDE.md diff --git a/MAINTAINERS b/MAINTAINERS new file mode 100644 index 000000000..fbd5d03e8 --- /dev/null +++ b/MAINTAINERS @@ -0,0 +1,2 @@ +Pavel Emelyanov (chief) +Andrey Vagin diff --git a/MAINTAINERS_GUIDE.md b/MAINTAINERS_GUIDE.md new file mode 100644 index 000000000..2830a3caa --- /dev/null +++ b/MAINTAINERS_GUIDE.md @@ -0,0 +1,136 @@ +## Introduction + +Dear maintainer. Thank you for investing the time and energy to help +make CRIU as useful as possible. Maintaining a project is difficult, +sometimes unrewarding work. Sure, you will contribute cool features +to the project, but most of your time will be spent reviewing patches, +cleaning things up, documenting, answering questions, justifying design +decisions - while everyone else will just have fun! But remember -- the +quality of the maintainers work is what distinguishes the good projects +from the great. So please be proud of your work, even the unglamorous +parts, and encourage a culture of appreciation and respect for *every* +aspect of improving the project -- not just the hot new features. + +Being a maintainer is a time consuming commitment and should not be +taken lightly. This document is a manual for maintainers old and new. +It explains what is expected of maintainers, how they should work, and +what tools are available to them. + +This is a living document - if you see something out of date or missing, +speak up! + +## What are a maintainer's responsibility? + +Part of a healthy project is to have active maintainers to support the +community in contributions and perform tasks to keep the project running. +It is every maintainer's responsibility to: + + * Keep the community a friendly place + * Deliver prompt feedback and decisions on pull requests and mailing + list threads + * Encourage other members to help each other, especially in cases the + maintainer is overloaded or feels the lack of needed expertise + * Make sure the changes made respects the philosophy, design and + roadmap of the project + +## How are decisions made? + +CRIU is an open-source project with an open design philosophy. This +means that the repository is the source of truth for EVERY aspect of the +project. *If it's part of the project, it's in the repo. It's in the +repo, it's part of the project.* + +All decisions affecting CRIU, big and small, follow the same 3 steps: + + * Submit a change. Anyone can do this + + * Discuss it. Anyone can and is encouraged to do this + + * Accept or decline it. Only maintainers do this + +*I'm a maintainer, should I make pull requests / send patches too?* + +Yes. Nobody should ever push to the repository directly. All changes +should be made through submitting (and accepting) the change. + +### Two-steps decision making ### + +Since CRIU is extremely complex piece of software we try double hard +not to make mistakes, that would be hard to fix in the future. In order +to facilitate this, the "final" decision is made in two stages: + + * We definitely want to try something out + + * We think that the attempt was successful + +Respectively, new features get accepted first into the *criu-dev* branch and +after they have been validated they are merged into the *master* branch. Yet, +urgent bug fixes may land directly in the master branch. If a change in +the criu-dev branch is considered to be bad (whatever it means), then it +can be reverted without propagation to the master branch. Reverting from +the master branch is expected not to happen at all, but if such an +extraordinary case occurs, the impact of this step, especially the question +of backward compatibility, should be considered in the most careful manner. + +## Who decides what? + +All decisions can be expressed as changes to the repository (either in the +form of pull requests, or patches sent to the mailing list), and maintainers +make decisions by merging or rejecting them. Review and approval or +disagreement can be done by anyone and is denoted by adding a respective +comment in the pull request. However, merging the change into either branch +only happens after approvals from maintainers. + +In order for a patch to be merged into the criu-dev branch at least two +maintainers should accept it. In order for a patch to be merged into the +master branch the majority of maintainers should decide that (then prepare +a pull request, submit it, etc.). + +Overall the maintainer system works because of mutual respect across the +maintainers of the project. The maintainers trust one another to make +decisions in the best interests of the project. Sometimes maintainers +can disagree and this is part of a healthy project to represent the point +of views of various people. In the case where maintainers cannot find +agreement on a specific change the role of a Chief Maintainer comes into +play. + +### Chief maintainer + +The chief maintainer for the project is responsible for overall architecture +of the project to maintain conceptual integrity. Large decisions and +architecture changes should be reviewed by the chief maintainer. + +Also the chief maintainer has the veto power on any change submitted +to any branch. Naturally, a change in the criu-dev branch can be reverted +after a chief maintainer veto, a change in the master branch must be +carefully reviwed by the chief maintainer and vetoed in advance. + +### How are maintainers added (and removed)? + +The best maintainers have a vested interest in the project. Maintainers +are first and foremost contributors that have shown they are committed to +the long term success of the project. Contributors wanting to become +maintainers are expected to be deeply involved in contributing code, +patches review, and paying needed attention to the issues in the project. +Just contributing does not make you a maintainer, it is about building trust +with the current maintainers of the project and being a person that they can +rely on and trust to make decisions in the best interest of the project. + +When a contributor wants to become a maintainer or nominate someone as a +maintainer, one can submit a "nomination", which technically is the +respective modification to the `MAINTAINERS` file. When a maintainer feels +they is unable to perform the required duties, or someone else wants to draw +the community attention to this fact, one can submit a "(self-)removing" +change. + +The final vote to add or to remove a maintainer is to be approved by the +majority of current maintainers (with the chief maintainer having veto power +on that too). + +One might have noticed, that the chief maintainer (re-)assignment is not +regulated by this document. That's true :) However, this can be done. If +the community decides that the chief maintainer needs to be changed the +respective "decision making rules" are to be prepared, submitted and +accepted into this file first. + +Good luck! From 42db2c1563544fc4307c95b8cc2cba8ddfc51262 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 26 Feb 2020 20:35:12 +0200 Subject: [PATCH 0150/1854] MAINTAINERS: add Mike Signed-off-by: Mike Rapoport --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fbd5d03e8..ed5bf25c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,2 +1,3 @@ Pavel Emelyanov (chief) Andrey Vagin +Mike Rapoport From e19f4cf3b120b91384cdd87fb138a319857f8d8b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 3 Mar 2020 11:46:08 -0800 Subject: [PATCH 0151/1854] MAINTAINERS: Add Dima and Adrian to maintainers Signed-off-by: Andrei Vagin --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ed5bf25c1..5c28463a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,3 +1,5 @@ Pavel Emelyanov (chief) Andrey Vagin Mike Rapoport +Dmitry Safonov <0x7f454c46@gmail.com> +Adrian Reber From 5dbc24b206cd365db7498dddcd03798c5d8ed4e4 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:50:08 +0300 Subject: [PATCH 0152/1854] util: introduce the mount_detached_fs helper Signed-off-by: Andrei Vagin --- criu/util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/util.c b/criu/util.c index 1646ce1c4..a0a49c5a3 100644 --- a/criu/util.c +++ b/criu/util.c @@ -30,6 +30,8 @@ #include "linux/mount.h" +#include "linux/mount.h" + #include "kerndat.h" #include "page.h" #include "util.h" From c1e72aa936bd86fb4cd819e84791e8a9a5c4d572 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0153/1854] memfd: add file support See "man memfd_create" for more information of what memfd is. This adds support for memfd open files, that are not not memory mapped. * We add a new kind of file: MEMFD. * We add two image types MEMFD_FILE, and MEMFD_INODE. MEMFD_FILE contains usual file information (e.g., position). MEMFD_INODE contains the memfd name, and a shmid identifier referring to the content. * We reuse the shmem facilities for dumping memfd content as it would be easier to support incremental checkpoints in the future. Signed-off-by: Nicolas Viennot --- Makefile.config | 2 +- criu/Makefile.crtools | 1 + criu/cr-restore.c | 2 + criu/files.c | 18 +- criu/image-desc.c | 1 + criu/include/image-desc.h | 2 + criu/include/magic.h | 1 + criu/include/memfd.h | 24 +++ criu/include/protobuf-desc.h | 2 + criu/include/shmem.h | 3 + criu/kerndat.c | 3 +- criu/memfd.c | 350 +++++++++++++++++++++++++++++++++++ criu/shmem.c | 66 ++++++- images/Makefile | 1 + images/fdinfo.proto | 3 + images/memfd.proto | 20 ++ lib/py/images/images.py | 2 + scripts/feature-tests.mak | 11 ++ 18 files changed, 503 insertions(+), 9 deletions(-) create mode 100644 criu/include/memfd.h create mode 100644 criu/memfd.c create mode 100644 images/memfd.proto diff --git a/Makefile.config b/Makefile.config index 161365960..98ba5d892 100644 --- a/Makefile.config +++ b/Makefile.config @@ -64,7 +64,7 @@ export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG MEMFD_CREATE # $1 - config name define gen-feature-test diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 4588ea5b8..1a6e0b5b5 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -37,6 +37,7 @@ obj-y += libnetlink.o obj-y += log.o obj-y += lsm.o obj-y += mem.o +obj-y += memfd.o obj-y += mount.o obj-y += filesystems.o obj-y += namespaces.o diff --git a/criu/cr-restore.c b/criu/cr-restore.c index e5e8fc9c5..13d1001c9 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,6 +76,7 @@ #include "sigframe.h" #include "fdstore.h" #include "string.h" +#include "memfd.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -289,6 +290,7 @@ static struct collect_image_info *cinfos_files[] = { &fanotify_cinfo, &fanotify_mark_cinfo, &ext_file_cinfo, + &memfd_cinfo, }; /* These images are required to restore namespaces */ diff --git a/criu/files.c b/criu/files.c index e26897870..ea86deaa3 100644 --- a/criu/files.c +++ b/criu/files.c @@ -34,6 +34,7 @@ #include "sk-packet.h" #include "mount.h" #include "signalfd.h" +#include "memfd.h" #include "namespaces.h" #include "tun.h" #include "timerfd.h" @@ -546,13 +547,17 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return -1; p.link = &link; - if (link.name[1] == '/') - return do_dump_gen_file(&p, lfd, ®file_dump_ops, e); - if (check_ns_proc(&link)) - return do_dump_gen_file(&p, lfd, &nsfile_dump_ops, e); + if (is_memfd(p.stat.st_dev, &link.name[1])) + ops = &memfd_dump_ops; + else if (link.name[1] == '/') + ops = ®file_dump_ops; + else if (check_ns_proc(&link)) + ops = &nsfile_dump_ops; + else + return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e); - return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e); + return do_dump_gen_file(&p, lfd, ops, e); } if (S_ISFIFO(p.stat.st_mode)) { @@ -1721,6 +1726,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i) case FD_TYPES__TTY: ret = collect_one_file_entry(fe, fe->tty->id, &fe->tty->base, &tty_cinfo); break; + case FD_TYPES__MEMFD: + ret = collect_one_file_entry(fe, fe->memfd->id, &fe->memfd->base, &memfd_cinfo); + break; } return ret; diff --git a/criu/image-desc.c b/criu/image-desc.c index ae5d817fe..b538a76ea 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -66,6 +66,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(FS, "fs-%u"), FD_ENTRY(REMAP_FPATH, "remap-fpath"), FD_ENTRY_F(GHOST_FILE, "ghost-file-%x", O_NOBUF), + FD_ENTRY_F(MEMFD_INODE, "memfd-%u", O_NOBUF), FD_ENTRY(TCP_STREAM, "tcp-stream-%x"), FD_ENTRY(MNTS, "mountpoints-%u"), FD_ENTRY(NETDEV, "netdev-%u"), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 6db8bf94f..9ca9643a1 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -106,6 +106,8 @@ enum { CR_FD_FIFO, CR_FD_PIPES, CR_FD_TTY_FILES, + CR_FD_MEMFD_FILE, + CR_FD_MEMFD_INODE, CR_FD_AUTOFS, diff --git a/criu/include/magic.h b/criu/include/magic.h index 1a583f4ed..bdaca968d 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -94,6 +94,7 @@ #define BINFMT_MISC_MAGIC 0x67343323 /* Apatity */ #define AUTOFS_MAGIC 0x49353943 /* Sochi */ #define FILES_MAGIC 0x56303138 /* Toropets */ +#define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/memfd.h b/criu/include/memfd.h new file mode 100644 index 000000000..c1d7949cb --- /dev/null +++ b/criu/include/memfd.h @@ -0,0 +1,24 @@ +#ifndef __CR_MEMFD_H__ +#define __CR_MEMFD_H__ + +#include +#include "int.h" +#include "common/config.h" + +extern int is_memfd(dev_t dev, const char *path); +extern const struct fdtype_ops memfd_dump_ops; + +extern struct collect_image_info memfd_cinfo; + +#ifdef CONFIG_HAS_MEMFD_CREATE +# include +#else +# include +# include +static inline int memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} +#endif /* CONFIG_HAS_MEMFD_CREATE */ + +#endif /* __CR_MEMFD_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 31f5b9a79..7e0385ef4 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -61,6 +61,8 @@ enum { PB_AUTOFS, PB_GHOST_CHUNK, PB_FILE, + PB_MEMFD_FILE, + PB_MEMFD_INODE, /* 60 */ /* PB_AUTOGEN_STOP */ diff --git a/criu/include/shmem.h b/criu/include/shmem.h index 04ab8d076..9afdb799a 100644 --- a/criu/include/shmem.h +++ b/criu/include/shmem.h @@ -13,8 +13,11 @@ extern int collect_sysv_shmem(unsigned long shmid, unsigned long size); extern int cr_dump_shmem(void); extern int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map); extern int fixup_sysv_shmems(void); +extern int dump_one_memfd_shmem(int fd, unsigned long shmid, unsigned long size); extern int dump_one_sysv_shmem(void *addr, unsigned long size, unsigned long shmid); extern int restore_sysv_shmem_content(void *addr, unsigned long size, unsigned long shmid); +extern int restore_memfd_shmem_content(int fd, unsigned long shmid, unsigned long size); + #define SYSV_SHMEM_SKIP_FD (0x7fffffff) diff --git a/criu/kerndat.c b/criu/kerndat.c index e0b5731d5..8ac83820b 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -42,6 +42,7 @@ #include "vdso.h" #include "kcmp.h" #include "sched.h" +#include "memfd.h" struct kerndat_s kdat = { }; @@ -409,7 +410,7 @@ static bool kerndat_has_memfd_create(void) { int ret; - ret = syscall(SYS_memfd_create, NULL, 0); + ret = memfd_create(NULL, 0); if (ret == -1 && errno == ENOSYS) kdat.has_memfd = false; diff --git a/criu/memfd.c b/criu/memfd.c new file mode 100644 index 000000000..bcef35e75 --- /dev/null +++ b/criu/memfd.c @@ -0,0 +1,350 @@ +#include + +#include "common/compiler.h" +#include "common/lock.h" +#include "memfd.h" +#include "fdinfo.h" +#include "imgset.h" +#include "image.h" +#include "util.h" +#include "log.h" +#include "files.h" +#include "fs-magic.h" +#include "kerndat.h" +#include "files-reg.h" +#include "rst-malloc.h" +#include "fdstore.h" +#include "file-ids.h" +#include "namespaces.h" +#include "shmem.h" + +#include "protobuf.h" +#include "images/memfd.pb-c.h" + +#define MEMFD_PREFIX "/memfd:" +#define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX)-1) + +struct memfd_inode { + struct list_head list; + u32 id; + union { + /* Only for dump */ + struct { + u32 dev; + u32 ino; + }; + /* Only for restore */ + struct { + mutex_t lock; + int fdstore_id; + }; + }; +}; + +static LIST_HEAD(memfd_inodes); + +/* + * Dump only + */ + +static u32 memfd_inode_ids = 1; + +int is_memfd(dev_t dev, const char *path) +{ + /* + * TODO When MAP_HUGETLB is used, the file device is not shmem_dev, + * Note that other parts of CRIU have similar issues, see + * is_anon_shmem_map(). + */ + return dev == kdat.shmem_dev && + !strncmp(path, MEMFD_PREFIX, MEMFD_PREFIX_LEN); +} + +static int dump_memfd_inode(int fd, struct memfd_inode *inode, + const char *name, const struct stat *st) +{ + int ret = -1; + struct cr_img *img = NULL; + MemfdInodeEntry mie = MEMFD_INODE_ENTRY__INIT; + u32 shmid; + + /* + * shmids are chosen as the inode number of the corresponding mmaped + * file. See handle_vma() in proc_parse.c. + * It works for memfd too, because we share the same device as the + * shmem device. + */ + shmid = inode->ino; + + pr_info("Dumping memfd:%s contents (id %#x, shmid: %#x, size: %"PRIu64")\n", + name, inode->id, shmid, st->st_size); + + if (dump_one_memfd_shmem(fd, shmid, st->st_size) < 0) + goto out; + + img = open_image(CR_FD_MEMFD_INODE, O_DUMP, inode->id); + if (!img) + goto out; + + mie.uid = userns_uid(st->st_uid); + mie.gid = userns_gid(st->st_gid); + mie.name = (char *)name; + mie.size = st->st_size; + mie.shmid = shmid; + + if (pb_write_one(img, &mie, PB_MEMFD_INODE)) + goto out; + + ret = 0; + +out: + if (img) + close_image(img); + return ret; +} + +static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) +{ + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) + if ((inode->dev == st->st_dev) && (inode->ino == st->st_ino)) + return inode; + + inode = xmalloc(sizeof(*inode)); + if (inode == NULL) + return NULL; + + inode->dev = st->st_dev; + inode->ino = st->st_ino; + inode->id = memfd_inode_ids++; + + if (dump_memfd_inode(lfd, inode, name, st)) { + xfree(inode); + return NULL; + } + + list_add_tail(&inode->list, &memfd_inodes); + + return inode; +} + +static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) +{ + MemfdFileEntry mfe = MEMFD_FILE_ENTRY__INIT; + FileEntry fe = FILE_ENTRY__INIT; + struct memfd_inode *inode; + struct fd_link _link, *link; + const char *name; + + if (!p->link) { + if (fill_fdlink(lfd, p, &_link)) + return -1; + link = &_link; + } else + link = p->link; + + strip_deleted(link); + name = &link->name[1+MEMFD_PREFIX_LEN]; + + inode = dump_unique_memfd_inode(lfd, name, &p->stat); + if (!inode) + return -1; + + mfe.id = id; + mfe.flags = p->flags; + mfe.pos = p->pos; + mfe.fown = (FownEntry *)&p->fown; + mfe.inode_id = inode->id; + + fe.type = FD_TYPES__MEMFD; + fe.id = mfe.id; + fe.memfd = &mfe; + + return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); +} + +const struct fdtype_ops memfd_dump_ops = { + .type = FD_TYPES__MEMFD, + .dump = dump_one_memfd, +}; + + +/* + * Restore only + */ + +struct memfd_info { + MemfdFileEntry *mfe; + struct file_desc d; + struct memfd_inode *inode; +}; + +static struct memfd_inode *memfd_alloc_inode(int id) +{ + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) + if (inode->id == id) + return inode; + + inode = shmalloc(sizeof(*inode)); + if (!inode) + return NULL; + + inode->id = id; + mutex_init(&inode->lock); + inode->fdstore_id = -1; + + list_add_tail(&inode->list, &memfd_inodes); + return inode; +} + +extern int restore_memfd_shm(int fd, u64 id, u64 size); +static int memfd_open_inode_nocache(struct memfd_inode *inode) +{ + MemfdInodeEntry *mie = NULL; + struct cr_img *img = NULL; + int fd = -1; + int ret = -1; + int flags; + + img = open_image(CR_FD_MEMFD_INODE, O_RSTR, inode->id); + if (!img) + goto out; + + if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) + goto out; + + fd = memfd_create(mie->name, 0); + if (fd < 0) { + pr_perror("Can't create memfd:%s", mie->name); + goto out; + } + + if (restore_memfd_shmem_content(fd, mie->shmid, mie->size)) + goto out; + + if (fchown(fd, mie->uid, mie->gid)) { + pr_perror("Can't change uid %d gid %d of memfd:%s", + (int)mie->uid, (int)mie->gid, mie->name); + goto out; + } + + inode->fdstore_id = fdstore_add(fd); + if (inode->fdstore_id < 0) + goto out; + + ret = fd; + fd = -1; + +out: + if (fd != -1) + close(fd); + if (img) + close_image(img); + if (mie) + memfd_inode_entry__free_unpacked(mie, NULL); + return ret; +} + +static int memfd_open_inode(struct memfd_inode *inode) +{ + int fd; + + if (inode->fdstore_id != -1) + return fdstore_get(inode->fdstore_id); + + mutex_lock(&inode->lock); + if (inode->fdstore_id != -1) + fd = fdstore_get(inode->fdstore_id); + else + fd = memfd_open_inode_nocache(inode); + mutex_unlock(&inode->lock); + + return fd; +} + +static int memfd_open(struct file_desc *d, u32 *fdflags) +{ + char lpath[PSFDS]; + struct memfd_info *mfi; + MemfdFileEntry *mfe; + int fd, _fd; + u32 flags; + + mfi = container_of(d, struct memfd_info, d); + mfe = mfi->mfe; + + pr_info("Restoring memfd id=%d\n", mfe->id); + + fd = memfd_open_inode(mfi->inode); + if (fd < 0) + goto err; + + /* Reopen the fd with original permissions */ + sprintf(lpath, "/proc/self/fd/%d", fd); + flags = fdflags ? *fdflags : mfe->flags; + /* + * Ideally we should call compat version open() to not force the + * O_LARGEFILE file flag with regular open(). It doesn't seem that + * important though. + */ + _fd = open(lpath, flags); + if (_fd < 0) { + pr_perror("Can't reopen memfd id=%d", mfe->id); + goto err; + } + close(fd); + fd = _fd; + + if (restore_fown(fd, mfe->fown) < 0) + goto err; + + if (lseek(fd, mfe->pos, SEEK_SET) < 0) { + pr_perror("Can't restore file position of memfd id=%d", mfe->id); + goto err; + } + + return fd; + +err: + if (fd >= 0) + close(fd); + return -1; +} + +static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) +{ + int tmp; + + tmp = memfd_open(fd, NULL); + if (tmp < 0) + return -1; + *new_fd = tmp; + return 0; +} + +static struct file_desc_ops memfd_desc_ops = { + .type = FD_TYPES__MEMFD, + .open = memfd_open_fe_fd, +}; + +static int collect_one_memfd(void *o, ProtobufCMessage *msg, struct cr_img *i) +{ + struct memfd_info *info = o; + + info->mfe = pb_msg(msg, MemfdFileEntry); + info->inode = memfd_alloc_inode(info->mfe->inode_id); + if (!info->inode) + return -1; + + return file_desc_add(&info->d, info->mfe->id, &memfd_desc_ops); +} + +struct collect_image_info memfd_cinfo = { + .fd_type = CR_FD_MEMFD_FILE, + .pb_type = PB_MEMFD_FILE, + .priv_size = sizeof(struct memfd_info), + .collect = collect_one_memfd, +}; diff --git a/criu/shmem.c b/criu/shmem.c index cee47dba7..29383e79a 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -23,6 +23,7 @@ #include "types.h" #include "page.h" #include "util.h" +#include "memfd.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" @@ -490,7 +491,7 @@ static int do_restore_shmem_content(void *addr, unsigned long size, unsigned lon return ret; } -static int restore_shmem_content(void *addr, struct shmem_info *si) +int restore_shmem_content(void *addr, struct shmem_info *si) { return do_restore_shmem_content(addr, si->size, si->shmid); } @@ -500,6 +501,41 @@ int restore_sysv_shmem_content(void *addr, unsigned long size, unsigned long shm return do_restore_shmem_content(addr, round_up(size, PAGE_SIZE), shmid); } +int restore_memfd_shmem_content(int fd, unsigned long shmid, unsigned long size) +{ + void *addr = NULL; + int ret = 1; + + if (size == 0) + return 0; + + if (ftruncate(fd, size) < 0) { + pr_perror("Can't resize shmem 0x%lx size=%ld", shmid, size); + goto out; + } + + addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + pr_perror("Can't mmap shmem 0x%lx size=%ld", shmid, size); + goto out; + } + + /* + * do_restore_shmem_content needs size to be page aligned. + */ + if (do_restore_shmem_content(addr, round_up(size, PAGE_SIZE), shmid) < 0) { + pr_err("Can't restore shmem content\n"); + goto out; + } + + ret = 0; + +out: + if (addr) + munmap(addr, size); + return ret; +} + static int open_shmem(int pid, struct vma_area *vma) { VmaEntry *vi = vma->e; @@ -532,7 +568,7 @@ static int open_shmem(int pid, struct vma_area *vma) flags = MAP_SHARED; if (kdat.has_memfd) { - f = syscall(SYS_memfd_create, "", 0); + f = memfd_create("", 0); if (f < 0) { pr_perror("Unable to create memfd"); goto err; @@ -779,6 +815,32 @@ err: return ret; } +int dump_one_memfd_shmem(int fd, unsigned long shmid, unsigned long size) +{ + int ret = -1; + void *addr; + struct shmem_info si; + + if (size == 0) + return 0; + + memset(&si, 0, sizeof(si)); + si.shmid = shmid; + si.size = size; + + addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) { + pr_perror("Can't mmap shmem 0x%lx", shmid); + goto err; + } + + ret = do_dump_one_shmem(fd, addr, &si); + + munmap(addr, size); +err: + return ret; +} + int dump_one_sysv_shmem(void *addr, unsigned long size, unsigned long shmid) { int fd, ret; diff --git a/images/Makefile b/images/Makefile index edaab0633..e7f0580cf 100644 --- a/images/Makefile +++ b/images/Makefile @@ -63,6 +63,7 @@ proto-obj-y += sysctl.o proto-obj-y += autofs.o proto-obj-y += macvlan.o proto-obj-y += sit.o +proto-obj-y += memfd.o CFLAGS += -iquote $(obj)/ diff --git a/images/fdinfo.proto b/images/fdinfo.proto index 77e375aa9..d966d5bc5 100644 --- a/images/fdinfo.proto +++ b/images/fdinfo.proto @@ -16,6 +16,7 @@ import "sk-unix.proto"; import "fifo.proto"; import "pipe.proto"; import "tty.proto"; +import "memfd.proto"; enum fd_types { UND = 0; @@ -36,6 +37,7 @@ enum fd_types { TUNF = 15; EXT = 16; TIMERFD = 17; + MEMFD = 18; /* Any number above the real used. Not stored to image */ CTL_TTY = 65534; @@ -70,4 +72,5 @@ message file_entry { optional fifo_entry fifo = 17; optional pipe_entry pipe = 18; optional tty_file_entry tty = 19; + optional memfd_file_entry memfd = 20; } diff --git a/images/memfd.proto b/images/memfd.proto new file mode 100644 index 000000000..8eccd6f4f --- /dev/null +++ b/images/memfd.proto @@ -0,0 +1,20 @@ +syntax = "proto2"; + +import "opts.proto"; +import "fown.proto"; + +message memfd_file_entry { + required uint32 id = 1; + required uint32 flags = 2 [(criu).flags = "rfile.flags"]; + required uint64 pos = 3; + required fown_entry fown = 4; + required uint32 inode_id = 5; +}; + +message memfd_inode_entry { + required string name = 1; + required uint32 uid = 2; + required uint32 gid = 3; + required uint64 size = 4; + required uint32 shmid = 5; +}; diff --git a/lib/py/images/images.py b/lib/py/images/images.py index 3eedfca69..dca080657 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -522,6 +522,8 @@ handlers = { 'AUTOFS': entry_handler(pb.autofs_entry), 'FILES': entry_handler(pb.file_entry), 'CPUINFO': entry_handler(pb.cpuinfo_entry), + 'MEMFD_FILE': entry_handler(pb.memfd_file_entry), + 'MEMFD_INODE': entry_handler(pb.memfd_inode_entry), } diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 6f67c6035..21b390092 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -171,3 +171,14 @@ int main(int argc, char **argv) } endef + +define FEATURE_TEST_MEMFD_CREATE + +#include +#include + +int main(void) +{ + return memfd_create(NULL, 0); +} +endef From 875ac4d03f9034adb88eec5875d63d0561c48107 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0154/1854] files: increase path buffer size in inherited_fd() Prepare memfd to use inherited_fd(), needing long path names support. Signed-off-by: Nicolas Viennot --- criu/files.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index ea86deaa3..789b08a4c 100644 --- a/criu/files.c +++ b/criu/files.c @@ -1608,7 +1608,7 @@ int inherit_fd_lookup_id(char *id) bool inherited_fd(struct file_desc *d, int *fd_p) { - char buf[32], *id_str; + char buf[PATH_MAX], *id_str; int i_fd; if (!d->ops->name) From b25684e24ae7643f2a8da73617c22a44dc9023ca Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0155/1854] memfd: add --inherit-fd support Upon file restore, inherited_fd() is called to check for a user-defined inerit-fd override. Note that the MEMFD_INODE image is read at each invocation (memfd name is not cached). Signed-off-by: Nicolas Viennot --- criu/crtools.c | 1 + criu/memfd.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/criu/crtools.c b/criu/crtools.c index 3cd40e87d..7f72dde27 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -392,6 +392,7 @@ usage: " pipe[inode]\n" " socket[inode]\n" " file[mnt_id:inode]\n" +" /memfd:name\n" " path/to/file\n" " --empty-ns net Create a namespace, but don't restore its properties\n" " (assuming it will be restored by action scripts)\n" diff --git a/criu/memfd.c b/criu/memfd.c index bcef35e75..36b3be8df 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -276,6 +276,9 @@ static int memfd_open(struct file_desc *d, u32 *fdflags) mfi = container_of(d, struct memfd_info, d); mfe = mfi->mfe; + if (inherited_fd(d, &fd)) + return fd; + pr_info("Restoring memfd id=%d\n", mfe->id); fd = memfd_open_inode(mfi->inode); @@ -325,9 +328,42 @@ static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) return 0; } +static char *memfd_d_name(struct file_desc *d, char *buf, size_t s) +{ + MemfdInodeEntry *mie = NULL; + struct cr_img *img = NULL; + struct memfd_info *mfi; + char *ret = NULL; + + mfi = container_of(d, struct memfd_info, d); + + img = open_image(CR_FD_MEMFD_INODE, O_RSTR, mfi->inode->id); + if (!img) + goto out; + + if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) + goto out; + + if (snprintf(buf, s, "%s%s", MEMFD_PREFIX, mie->name) >= s) { + pr_err("Buffer too small for memfd name %s\n", mie->name); + goto out; + } + + ret = buf; + +out: + if (img) + close_image(img); + if (mie) + memfd_inode_entry__free_unpacked(mie, NULL); + + return ret; +} + static struct file_desc_ops memfd_desc_ops = { .type = FD_TYPES__MEMFD, .open = memfd_open_fe_fd, + .name = memfd_d_name, }; static int collect_one_memfd(void *o, ProtobufCMessage *msg, struct cr_img *i) From 29a1a88bcebaf9d83591077d2bec424da82c0e71 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0156/1854] memfd: add memory mapping support * During checkpoint, we add a vma flags: VMA_AREA_MEMFD to denote memfd regions. * Even though memfd is backed by the shmem device, we use the file semantics of memfd (via /proc/map_files/) which we already have support for. Signed-off-by: Nicolas Viennot --- criu/cr-dump.c | 6 +++++- criu/files-reg.c | 11 +++++++++-- criu/include/image.h | 1 + criu/include/memfd.h | 6 ++++++ criu/memfd.c | 19 ++++++++++++++++++- criu/proc_parse.c | 35 +++++++++++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 4 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 88323af92..6aa114c2d 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -80,6 +80,7 @@ #include "fault-injection.h" #include "dump.h" #include "eventpoll.h" +#include "memfd.h" /* * Architectures can overwrite this function to restore register sets that @@ -414,7 +415,10 @@ static int dump_filemap(struct vma_area *vma_area, int fd) /* Flags will be set during restore in open_filmap() */ - ret = dump_one_reg_file_cond(fd, &id, &p); + if (vma->status & VMA_AREA_MEMFD) + ret = dump_one_memfd_cond(fd, &id, &p); + else + ret = dump_one_reg_file_cond(fd, &id, &p); vma->shmid = id; return ret; diff --git a/criu/files-reg.c b/criu/files-reg.c index 90fb7dd7f..b0dad78e6 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -35,6 +35,7 @@ #include "pstree.h" #include "fault-injection.h" #include "external.h" +#include "memfd.h" #include "protobuf.h" #include "util.h" @@ -1879,7 +1880,10 @@ static int open_filemap(int pid, struct vma_area *vma) flags = vma->e->fdflags; if (ctx.flags != flags || ctx.desc != vma->vmfd) { - ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); + if (vma->e->status & VMA_AREA_MEMFD) + ret = memfd_open(vma->vmfd, &flags); + else + ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); if (ret < 0) return ret; @@ -1909,7 +1913,10 @@ int collect_filemap(struct vma_area *vma) vma->e->fdflags = O_RDONLY; } - fd = collect_special_file(vma->e->shmid); + if (vma->e->status & VMA_AREA_MEMFD) + fd = collect_memfd(vma->e->shmid); + else + fd = collect_special_file(vma->e->shmid); if (!fd) return -1; diff --git a/criu/include/image.h b/criu/include/image.h index 2baa39496..1c7cc5471 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -83,6 +83,7 @@ #define VMA_AREA_SOCKET (1 << 11) #define VMA_AREA_VVAR (1 << 12) #define VMA_AREA_AIORING (1 << 13) +#define VMA_AREA_MEMFD (1 << 14) #define VMA_CLOSE (1 << 28) #define VMA_NO_PROT_WRITE (1 << 29) diff --git a/criu/include/memfd.h b/criu/include/memfd.h index c1d7949cb..0a9aeff2f 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -5,10 +5,16 @@ #include "int.h" #include "common/config.h" +struct fd_parms; +struct file_desc; + extern int is_memfd(dev_t dev, const char *path); +extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms); extern const struct fdtype_ops memfd_dump_ops; +extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; +extern struct file_desc *collect_memfd(u32 id); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 36b3be8df..1cca96a32 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -164,6 +164,13 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); } +int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms) +{ + if (fd_id_generate_special(parms, id)) + return dump_one_memfd(lfd, *id, parms); + return 0; +} + const struct fdtype_ops memfd_dump_ops = { .type = FD_TYPES__MEMFD, .dump = dump_one_memfd, @@ -265,7 +272,7 @@ static int memfd_open_inode(struct memfd_inode *inode) return fd; } -static int memfd_open(struct file_desc *d, u32 *fdflags) +int memfd_open(struct file_desc *d, u32 *fdflags) { char lpath[PSFDS]; struct memfd_info *mfi; @@ -384,3 +391,13 @@ struct collect_image_info memfd_cinfo = { .priv_size = sizeof(struct memfd_info), .collect = collect_one_memfd, }; + +struct file_desc *collect_memfd(u32 id) { + struct file_desc *fdesc; + + fdesc = find_file_desc_raw(FD_TYPES__MEMFD, id); + if (fdesc == NULL) + pr_err("No entry for memfd %#x\n", id); + + return fdesc; +} diff --git a/criu/proc_parse.c b/criu/proc_parse.c index fa7644992..468afcdf3 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -41,6 +41,7 @@ #include "timerfd.h" #include "path.h" #include "fault-injection.h" +#include "memfd.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" @@ -303,6 +304,26 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, } vfi_dev = makedev(vfi->dev_maj, vfi->dev_min); + + if (is_memfd(vfi_dev, fname)) { + struct fd_link link; + link.len = strlen(fname); + strlcpy(link.name, fname, sizeof(link.name)); + strip_deleted(&link); + + /* + * The error EPERM will be shown in the following pr_perror(). + * It comes from the previous open() call. + */ + pr_perror("Can't open mapped [%s]", link.name); + + /* + * TODO Perhaps we could do better than failing and dump the + * memory like what is being done in shmem.c + */ + return -1; + } + if (is_anon_shmem_map(vfi_dev)) { if (!(vma->e->flags & MAP_SHARED)) return -1; @@ -578,7 +599,20 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, /* * /dev/zero stands for anon-shared mapping * otherwise it's some file mapping. + * + * We treat memfd mappings as regular file mappings because + * their backing can be seen as files, which is easy to + * support. So even though memfd is an anonymous shmem, we + * treat it differently. + * Note: maybe we should revisit this as /proc/map_files/ + * may not always be accessible. */ + + if (is_memfd(st_buf->st_dev, file_path)) { + vma_area->e->status |= VMA_AREA_MEMFD; + goto normal_file; + } + if (is_anon_shmem_map(st_buf->st_dev)) { if (!(vma_area->e->flags & MAP_SHARED)) goto err_bogus_mapping; @@ -594,6 +628,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE; } } else { +normal_file: if (vma_area->e->flags & MAP_PRIVATE) vma_area->e->status |= VMA_FILE_PRIVATE; else From 56d8e2455fb86b885775db6c236cbb04ba403f4d Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0157/1854] memfd: add seals support See "man fcntl" for more information about seals. memfd are the only files that can be sealed, currently. For this reason, we dump the seal values in the MEMFD_INODE image. Restoring seals must be done carefully as the seal F_SEAL_FUTURE_WRITE prevents future write access. This means that any memory mapping with write access must be restored before restoring the seals. Signed-off-by: Nicolas Viennot --- criu/cr-restore.c | 4 +++ criu/include/fcntl.h | 8 ++++++ criu/include/memfd.h | 1 + criu/memfd.c | 59 +++++++++++++++++++++++++++++++++++++++- images/memfd.proto | 1 + lib/py/images/pb2dict.py | 9 ++++++ 6 files changed, 81 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 13d1001c9..f50448cd2 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2232,6 +2232,10 @@ skip_ns_bouncing: if (ret < 0) goto out_kill; + ret = apply_memfd_seals(); + if (ret < 0) + goto out_kill; + /* * Zombies die after CR_STATE_RESTORE which is switched * by root task, not by us. See comment before CR_STATE_FORKING diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h index d9c5c5e7b..ea9d48c72 100644 --- a/criu/include/fcntl.h +++ b/criu/include/fcntl.h @@ -34,6 +34,14 @@ struct f_owner_ex { # define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) #endif +#ifndef F_ADD_SEALS +# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_GET_SEALS +# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif + #ifndef O_PATH # define O_PATH 010000000 #endif diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 0a9aeff2f..2d8eda545 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -15,6 +15,7 @@ extern const struct fdtype_ops memfd_dump_ops; extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; extern struct file_desc *collect_memfd(u32 id); +extern int apply_memfd_seals(void); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 1cca96a32..d17c10fb7 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -1,4 +1,5 @@ #include +#include #include "common/compiler.h" #include "common/lock.h" @@ -24,6 +25,13 @@ #define MEMFD_PREFIX "/memfd:" #define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX)-1) +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* Linux 5.1+ */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ + struct memfd_inode { struct list_head list; u32 id; @@ -37,6 +45,7 @@ struct memfd_inode { struct { mutex_t lock; int fdstore_id; + unsigned int pending_seals; }; }; }; @@ -92,6 +101,10 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, mie.size = st->st_size; mie.shmid = shmid; + mie.seals = fcntl(fd, F_GET_SEALS); + if (mie.seals == -1) + goto out; + if (pb_write_one(img, &mie, PB_MEMFD_INODE)) goto out; @@ -187,6 +200,8 @@ struct memfd_info { struct memfd_inode *inode; }; +static int memfd_open_inode(struct memfd_inode *inode); + static struct memfd_inode *memfd_alloc_inode(int id) { struct memfd_inode *inode; @@ -202,6 +217,7 @@ static struct memfd_inode *memfd_alloc_inode(int id) inode->id = id; mutex_init(&inode->lock); inode->fdstore_id = -1; + inode->pending_seals = 0; list_add_tail(&inode->list, &memfd_inodes); return inode; @@ -223,7 +239,16 @@ static int memfd_open_inode_nocache(struct memfd_inode *inode) if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) goto out; - fd = memfd_create(mie->name, 0); + if (mie->seals == F_SEAL_SEAL) { + inode->pending_seals = 0; + flags = 0; + } else { + /* Seals are applied later due to F_SEAL_FUTURE_WRITE */ + inode->pending_seals = mie->seals; + flags = MFD_ALLOW_SEALING; + } + + fd = memfd_create(mie->name, flags); if (fd < 0) { pr_perror("Can't create memfd:%s", mie->name); goto out; @@ -401,3 +426,35 @@ struct file_desc *collect_memfd(u32 id) { return fdesc; } + +int apply_memfd_seals(void) +{ + /* + * We apply the seals after all the mappings are done because the seal + * F_SEAL_FUTURE_WRITE prevents future write access (added in + * Linux 5.1). Thus we must make sure all writable mappings are opened + * before applying this seal. + */ + + int ret, fd; + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) { + if (!inode->pending_seals) + continue; + + fd = memfd_open_inode(inode); + if (fd < 0) + return -1; + + ret = fcntl(fd, F_ADD_SEALS, inode->pending_seals); + close(fd); + + if (ret < 0) { + pr_perror("Cannot apply seals on memfd"); + return -1; + } + } + + return 0; +} diff --git a/images/memfd.proto b/images/memfd.proto index 8eccd6f4f..546ffc2ab 100644 --- a/images/memfd.proto +++ b/images/memfd.proto @@ -17,4 +17,5 @@ message memfd_inode_entry { required uint32 gid = 3; required uint64 size = 4; required uint32 shmid = 5; + required uint32 seals = 6 [(criu).flags = "seals.flags"]; }; diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index a89850a1d..40a6036cf 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -124,6 +124,14 @@ rfile_flags_map = [ ('O_CLOEXEC', 0o02000000), ] +seals_flags_map = [ + ('F_SEAL_SEAL', 0x0001), + ('F_SEAL_SHRINK', 0x0002), + ('F_SEAL_GROW', 0x0004), + ('F_SEAL_WRITE', 0x0008), + ('F_SEAL_FUTURE_WRITE', 0x0010), +] + pmap_flags_map = [ ('PE_PARENT', 1 << 0), ('PE_LAZY', 1 << 1), @@ -136,6 +144,7 @@ flags_maps = { 'mmap.status': mmap_status_map, 'rfile.flags': rfile_flags_map, 'pmap.flags': pmap_flags_map, + 'seals.flags': seals_flags_map, } gen_maps = { From b133c375ad2d21cf6a1a9e96e7dab3741c966fbe Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 4 Feb 2020 16:39:53 +0000 Subject: [PATCH 0158/1854] inhfd_test: add support for non-pair files File pairs naturally block on read() until the write() happen (or the writer is closed). This is not the case for regular files, so we take extra precaution for these. Also cleaned-up an extra my_file.close() Signed-off-by: Nicolas Viennot --- test/zdtm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 3fc57ba55..4110b5142 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -691,9 +691,14 @@ class inhfd_test: i = 0 for _, peer_file in self.__files: msg = self.__get_message(i) - my_file.close() try: - data = peer_file.read(16) + # File pairs naturally block on read() until the write() + # happen (or the writer is closed). This is not the case for + # regular files, so we loop. + data = b'' + while not data: + data = peer_file.read(16) + time.sleep(0.1) except Exception as e: print("Unable to read a peer file: %s" % e) sys.exit(1) From 2dd105b8dfb23399e18ab4e3f7d13b00c19ad910 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 20 Dec 2019 21:56:38 -0500 Subject: [PATCH 0159/1854] memfd: add tests Testing for all the memfd features, namely support for CR of: * the same fd shared by multiple processes * the same file shared by multiple processes * the memfd content * file flags and fd flags * mmaps, MAP_SHARED and MAP_PRIVATE * seals, excluding F_SEAL_FUTURE_WRITE because this feature only exists in recent kernels (5.1 and up) * inherited fd Signed-off-by: Nicolas Viennot --- test/inhfd/memfd.py | 28 +++++++++ test/inhfd/memfd.py.checkskip | 7 +++ test/inhfd/memfd.py.desc | 1 + test/zdtm/static/Makefile | 4 ++ test/zdtm/static/memfd00.c | 103 ++++++++++++++++++++++++++++++ test/zdtm/static/memfd01.c | 114 ++++++++++++++++++++++++++++++++++ test/zdtm/static/memfd02.c | 87 ++++++++++++++++++++++++++ test/zdtm/static/memfd03.c | 97 +++++++++++++++++++++++++++++ 8 files changed, 441 insertions(+) create mode 100755 test/inhfd/memfd.py create mode 100755 test/inhfd/memfd.py.checkskip create mode 100644 test/inhfd/memfd.py.desc create mode 100644 test/zdtm/static/memfd00.c create mode 100644 test/zdtm/static/memfd01.c create mode 100644 test/zdtm/static/memfd02.c create mode 100644 test/zdtm/static/memfd03.c diff --git a/test/inhfd/memfd.py b/test/inhfd/memfd.py new file mode 100755 index 000000000..d9ce01e41 --- /dev/null +++ b/test/inhfd/memfd.py @@ -0,0 +1,28 @@ +import os +import ctypes +libc = ctypes.CDLL(None) + + +def memfd_create(name, flags): + return libc.memfd_create(name.encode('utf8'), flags) + + +def create_fds(): + def create_memfd_pair(name): + fd = memfd_create(name, 0) + fw = open('/proc/self/fd/{}'.format(fd), 'wb') + fr = open('/proc/self/fd/{}'.format(fd), 'rb') + os.close(fd) + return (fw, fr) + + return [create_memfd_pair("name{}".format(i)) for i in range(10)] + + +def filename(f): + name = os.readlink('/proc/self/fd/{}'.format(f.fileno())) + name = name.replace(' (deleted)', '') + return name + + +def dump_opts(sockf): + return [] diff --git a/test/inhfd/memfd.py.checkskip b/test/inhfd/memfd.py.checkskip new file mode 100755 index 000000000..252778969 --- /dev/null +++ b/test/inhfd/memfd.py.checkskip @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +import ctypes +libc = ctypes.CDLL(None) + +# libc may not have memfd_create (e.g., centos on travis) +libc.memfd_create("test".encode('utf8'), 0) diff --git a/test/inhfd/memfd.py.desc b/test/inhfd/memfd.py.desc new file mode 100644 index 000000000..10666c823 --- /dev/null +++ b/test/inhfd/memfd.py.desc @@ -0,0 +1 @@ +{ 'flavor': 'h' } diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 5ca05ee9e..5afd18cd6 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -220,6 +220,10 @@ TST_NOFILE := \ child_subreaper \ child_subreaper_existing_child \ child_subreaper_and_reparent \ + memfd00 \ + memfd01 \ + memfd02 \ + memfd03 \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/memfd00.c b/test/zdtm/static/memfd00.c new file mode 100644 index 000000000..6b56eca01 --- /dev/null +++ b/test/zdtm/static/memfd00.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd file descriptor"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ + int fd, fl_flags1, fl_flags2, fd_flags1, fd_flags2; + struct statfs statfs1, statfs2; + off_t pos1, pos2; + char buf[5]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (fcntl(fd, F_SETFL, O_APPEND) < 0) + err(1, "Can't get fl flags"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fstatfs(fd, &statfs1) < 0) + err(1, "statfs issue"); + + if (write(fd, "hello", 5) != 5) + err(1, "write error"); + + pos1 = 3; + if (lseek(fd, pos1, SEEK_SET) < 0) + err(1, "seek error"); + + test_daemon(); + test_waitsig(); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) { + fail("fl flags differs"); + return 1; + } + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) { + fail("fd flags differs"); + return 1; + } + + if (fstatfs(fd, &statfs2) < 0) + err(1, "statfs issue"); + + if (statfs1.f_type != statfs2.f_type) { + fail("statfs.f_type differs"); + return 1; + } + + pos2 = lseek(fd, 0, SEEK_CUR); + if (pos1 != pos2) { + fail("position differs"); + return 1; + } + + if (pread(fd, buf, sizeof(buf), 0) != sizeof(buf)) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "hello", sizeof(buf))) { + fail("content mismatch"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd01.c b/test/zdtm/static/memfd01.c new file mode 100644 index 000000000..7a7853642 --- /dev/null +++ b/test/zdtm/static/memfd01.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd with different file pointer"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ + pid_t pid, pid_child; + int fd, ret, status; + task_waiter_t t; + + test_init(argc, argv); + + task_waiter_init(&t); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + pid = getpid(); + + pid_child = fork(); + if (pid_child < 0) + err(1, "Can't fork"); + + if (!pid_child) { + char fdpath[100]; + char buf[1]; + int fl_flags1, fl_flags2, fd_flags1, fd_flags2; + + snprintf(fdpath, sizeof(fdpath), "/proc/%d/fd/%d", pid, fd); + /* + * We pass O_LARGEFILE because in compat mode, our file + * descriptor does not get O_LARGEFILE automatically, but the + * restorer using non-compat open() is forced O_LARGEFILE. + * This creates a flag difference, which we don't want to deal + * with this at the moment. + */ + fd = open(fdpath, O_RDONLY | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open memfd via proc"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + task_waiter_complete(&t, 1); + // checkpoint-restore happens here + task_waiter_wait4(&t, 2); + + if (read(fd, buf, 1) != 1) + err(1, "Can't read"); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) + err(1, "fl flags differs"); + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) + err(1, "fd flags differs"); + + if (buf[0] != 'x') + err(1, "Read incorrect"); + + return 0; + } + + task_waiter_wait4(&t, 1); + + test_daemon(); + test_waitsig(); + + if (write(fd, "x", 1) != 1) + err(1, "Can't write"); + + task_waiter_complete(&t, 2); + + ret = wait(&status); + if (ret == -1 || !WIFEXITED(status) || WEXITSTATUS(status)) { + kill(pid, SIGKILL); + fail("child had issue"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd02.c b/test/zdtm/static/memfd02.c new file mode 100644 index 000000000..1843e9c9a --- /dev/null +++ b/test/zdtm/static/memfd02.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd mmap"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ +#define LEN 6 + int fd; + void *addr_shared, *addr_private; + char buf[LEN]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (ftruncate(fd, LEN) < 0) + err(1, "Can't truncate"); + + addr_shared = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (addr_shared == MAP_FAILED) + err(1, "Can't mmap"); + + write(fd, "write1", LEN); + + addr_private = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (addr_private == MAP_FAILED) + err(1, "Can't mmap"); + + test_daemon(); + test_waitsig(); + + if (memcmp(addr_shared, "write1", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + strcpy(addr_shared, "write2"); + + if (pread(fd, buf, LEN, 0) != LEN) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "write2", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + if (memcmp(addr_private, "write2", LEN)) { + fail("content mismatch (private)"); + return 1; + } + + strcpy(addr_private, "write3"); + + if (memcmp(addr_shared, "write2", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd03.c b/test/zdtm/static/memfd03.c new file mode 100644 index 000000000..faedf9383 --- /dev/null +++ b/test/zdtm/static/memfd03.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd seals"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + + +#ifndef F_LINUX_SPECIFIC_BASE +# define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS + #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_GET_SEALS + #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif + + +#ifndef F_SEAL_SEAL +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +int main(int argc, char *argv[]) +{ +#define LEN 5 + int fd, fd2; + void *addr_write, *addr_read; + char fdpath[100]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_ALLOW_SEALING | MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (write(fd, "hello", LEN) != LEN) + err(1, "Can't write"); + + if (fcntl(fd, F_ADD_SEALS, F_SEAL_WRITE) < 0) + err(1, "Can't add seals"); + + test_daemon(); + test_waitsig(); + + snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", fd); + fd2 = open(fdpath, O_RDWR); + if (fd2 < 0) + err(1, "Can't open memfd via proc"); + + if (fcntl(fd, F_GET_SEALS) != F_SEAL_WRITE) { + fail("Seals are different"); + return 1; + } + + addr_write = mmap(NULL, LEN, PROT_WRITE, MAP_SHARED, fd2, 0); + if (addr_write != MAP_FAILED) { + fail("Should not be able to get write access"); + return 1; + } + + addr_read = mmap(NULL, 1, PROT_READ, MAP_PRIVATE, fd2, 0); + if (addr_read == MAP_FAILED) + err(1, "Can't mmap"); + + if (memcmp(addr_read, "hello", LEN)) { + fail("Mapping has bad data"); + return 1; + } + + pass(); + + return 0; +} From ec116449544cd2f062b7523c2eadc6d791baa0ac Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 23 Jan 2020 16:39:28 +0000 Subject: [PATCH 0160/1854] criu: Use strlcpy() instead of strncpy() gcc8 in Fedora Rawhide has a new useful warning: > criu/img-remote.c: In function 'push_snapshot_id': > criu/img-remote.c:1099:2: error: 'strncpy' specified bound 4096 equals destination size [-Werror=stringop-truncation] > 1099 | strncpy(rn.snapshot_id, snapshot_id, PATH_MAX); > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From man 3 strncpy: > Warning: If there is no null byte among the first n bytes of src, > the string placed in dest will not be null-terminated. Signed-off-by: Dmitry Safonov --- criu/files-reg.c | 3 ++- criu/files.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index b0dad78e6..c2a55aeb3 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -33,6 +33,7 @@ #include "namespaces.h" #include "proc_parse.h" #include "pstree.h" +#include "string.h" #include "fault-injection.h" #include "external.h" #include "memfd.h" @@ -457,7 +458,7 @@ static int open_remap_ghost(struct reg_file_info *rfi, gf->remap.rmnt_id = rfi->rfe->mnt_id; if (S_ISDIR(gfe->mode)) - strncpy(gf->remap.rpath, rfi->path, PATH_MAX); + strlcpy(gf->remap.rpath, rfi->path, PATH_MAX); else ghost_path(gf->remap.rpath, PATH_MAX, rfi, rpe); diff --git a/criu/files.c b/criu/files.c index 789b08a4c..f7963bf54 100644 --- a/criu/files.c +++ b/criu/files.c @@ -45,6 +45,7 @@ #include "autofs.h" #include "parasite.h" #include "parasite-syscall.h" +#include "string.h" #include "kerndat.h" #include "fdstore.h" @@ -291,8 +292,7 @@ static int fixup_overlayfs(struct fd_parms *p, struct fd_link *link) char buf[PATH_MAX]; int n; - strncpy(buf, link->name, PATH_MAX); - buf[PATH_MAX - 1] = 0; + strlcpy(buf, link->name, PATH_MAX); n = snprintf(link->name, PATH_MAX, "%s/%s", m->mountpoint, buf + 2); if (n >= PATH_MAX) { pr_err("Not enough space to replace %s\n", buf); From bc49927bbc28b41e4b2759d42dc24f1d66e22df3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 6 Feb 2020 18:01:00 +0000 Subject: [PATCH 0161/1854] criu: Make use strlcpy() to copy into allocated strings strncpy() with n == strlen(src) won't put NULL-terminator in dst. Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index f50448cd2..85105a18e 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -77,6 +77,7 @@ #include "fdstore.h" #include "string.h" #include "memfd.h" +#include "string.h" #include "parasite-syscall.h" #include "files-reg.h" From 3eab205bae1d31f00922d1e717a1cd56c1cb7177 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 21:20:20 -0800 Subject: [PATCH 0162/1854] python: sort imports 202 Additional newline in a group of imports. I100 Import statements are in the wrong order. Signed-off-by: Andrei Vagin --- test/inhfd/memfd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/inhfd/memfd.py b/test/inhfd/memfd.py index d9ce01e41..b06e35068 100755 --- a/test/inhfd/memfd.py +++ b/test/inhfd/memfd.py @@ -1,5 +1,5 @@ -import os import ctypes +import os libc = ctypes.CDLL(None) From 38793699e7f0e97d14b6b17e83e8bc071c3ce283 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 21 Feb 2020 12:14:38 +0300 Subject: [PATCH 0163/1854] test/jenkins: remove empty line at the end of file Signed-off-by: Pavel Tikhomirov --- test/jenkins/criu-fault.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index 4e3790e59..c27dd3738 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -27,4 +27,3 @@ fi ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail - From cdd08cdff8692aad9d05dd83e2ab24379cd83393 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 26 Feb 2020 12:25:37 +0200 Subject: [PATCH 0164/1854] uffd: use userns_call() to execute ioctl(UFFDIO_API) In the recent kernels the userfaultfd support for FORK events is limited to CAP_SYS_PTRACE. That causes the followong error when the ioctl(UFFDIO_API) is executed from non-privilieged userns: Error (criu/uffd.c:273): uffd: Failed to get uffd API: Operation not permitted Wrapping the call to ioctl(UFFDIO_API) in userns_call() resolves the issue. Fixes: #964 Signed-off-by: Mike Rapoport --- criu/uffd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/criu/uffd.c b/criu/uffd.c index c47b35b1f..99373c04d 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -40,6 +40,7 @@ #include "tls.h" #include "fdstore.h" #include "util.h" +#include "namespaces.h" #undef LOG_PREFIX #define LOG_PREFIX "uffd: " @@ -254,6 +255,13 @@ bool uffd_noncooperative(void) return (kdat.uffd_features & features) == features; } +static int uffd_api_ioctl(void *arg, int fd, pid_t pid) +{ + struct uffdio_api *uffdio_api = arg; + + return ioctl(fd, UFFDIO_API, uffdio_api); +} + int uffd_open(int flags, unsigned long *features) { struct uffdio_api uffdio_api = { 0 }; @@ -269,7 +277,8 @@ int uffd_open(int flags, unsigned long *features) if (features) uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { + if (userns_call(uffd_api_ioctl, 0, &uffdio_api, sizeof(uffdio_api), + uffd)) { pr_perror("Failed to get uffd API"); goto err; } From 48f3b6516b384f9c4f240aff76f671697198884b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 18 Feb 2020 19:45:08 +0000 Subject: [PATCH 0165/1854] criu(8): Add documentation for --enable-fs This option was introduced with: https://github.com/checkpoint-restore/criu/commit/e2c38245c613df5e36dcf0253c7652f928e46abf v2: (comment from Pavel Tikhomirov) --enable-fs does not fit with --external dev[]:, see try_resolve_ext_mount, external dev mounts only determined for FSTYPE__UNSUPPORTED. Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index a6b9f7fae..0ac29103a 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -90,6 +90,19 @@ The following levels are available: *-L*, *--libdir* 'path':: Path to plugins directory. +*--enable-fs* ['fs'[,'fs'...]]:: + Specify a comma-separated list of filesystem names that should + be auto-detected. The value 'all' enables auto-detection for + all filesystems. ++ +Note: This option is not safe, use at your own risk. +Auto-detecting a filesystem mount assumes that the mountpoint can +be restored with *mount(src, mountpoint, flags, options)*. When used, +*dump* is expected to always succeed if a mountpoint is to be +auto-detected, however *restore* may fail (or do something wrong) +if the assumption for restore logic is incorrect. This option is +not compatable with *--external* *dev*. + *--action-script* 'script':: Add an external action script to be executed at certain stages. The environment variable *CRTOOLS_SCRIPT_ACTION* is available From bb032cc3e218c4aee5394642caa59be3909b259a Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 18 Feb 2020 19:53:36 +0000 Subject: [PATCH 0166/1854] criu(8): Convert tabs to spaces Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 0ac29103a..ab63e461c 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -477,7 +477,7 @@ The 'mode' may be one of the following: *soft*::: Restore cgroup properties if only cgroup has been created by *criu*, otherwise do not restore properties. This is the - default if mode is unspecified. + default if mode is unspecified. *full*::: Always restore all cgroups and their properties. @@ -575,17 +575,17 @@ check* always checks Category 1 features unless *--feature* is specified which only checks a specified feature. *Category 1*::: Absolutely required. These are features like support for - */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket - monitoring, */proc/sys/kernel/ns_last_pid* etc. + */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket + monitoring, */proc/sys/kernel/ns_last_pid* etc. *Category 2*::: Required only for specific cases. These are features - like AIO remap, */dev/net/tun* and others that are only - required if a process being dumped or restored - is using those. + like AIO remap, */dev/net/tun* and others that are only + required if a process being dumped or restored + is using those. *Category 3*::: Experimental. These are features like *task-diag* that - are used for experimental purposes (mostly - during development). + are used for experimental purposes (mostly + during development). If there are no errors or warnings, *criu* prints "Looks good." and its exit code is 0. From 563c5e5e763949de7b1c48bd04c777db17d768e1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 09:10:49 +0000 Subject: [PATCH 0167/1854] seize: prepare for cgroupv2 freezer The cgroupv2 freezer does not return the same strings as v1. Instead of THAWED and FROZEN v2 returns 0 and 1 (strings). This prepares the seize code to use 0 and 1 everywhere and THAWED and FROZEN only for v1 specific code paths. Signed-off-by: Adrian Reber --- criu/seize.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index fd314666f..14cd82417 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -30,7 +30,14 @@ static const char frozen[] = "FROZEN"; static const char freezing[] = "FREEZING"; static const char thawed[] = "THAWED"; -static const char *get_freezer_state(int fd) +enum freezer_state { + FREEZER_ERROR = -1, + THAWED, + FROZEN, + FREEZING +}; + +static enum freezer_state get_freezer_state(int fd) { char state[32]; int ret; @@ -52,15 +59,15 @@ static const char *get_freezer_state(int fd) pr_debug("freezer.state=%s\n", state); if (strcmp(state, frozen) == 0) - return frozen; + return FROZEN; else if (strcmp(state, freezing) == 0) - return freezing; + return FREEZING; else if (strcmp(state, thawed) == 0) - return thawed; + return THAWED; pr_err("Unknown freezer state: %s\n", state); err: - return NULL; + return FREEZER_ERROR; } static bool freezer_thawed; @@ -98,7 +105,7 @@ static int freezer_restore_state(void) static int processes_to_wait; static pid_t *processes_to_wait_pids; -static int seize_cgroup_tree(char *root_path, const char *state) +static int seize_cgroup_tree(char *root_path, enum freezer_state state) { DIR *dir; struct dirent *de; @@ -134,7 +141,7 @@ static int seize_cgroup_tree(char *root_path, const char *state) if (!compel_interrupt_task(pid)) { pr_debug("SEIZE %d: success\n", pid); processes_to_wait++; - } else if (state == frozen) { + } else if (state == FROZEN) { char buf[] = "/proc/XXXXXXXXXX/exe"; struct stat st; @@ -332,7 +339,7 @@ static int freeze_processes(void) { int fd, exit_code = -1; char path[PATH_MAX]; - const char *state = thawed; + enum freezer_state state = THAWED; static const unsigned long step_ms = 100; unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms; @@ -361,11 +368,11 @@ static int freeze_processes(void) return -1; } state = get_freezer_state(fd); - if (!state) { + if (state == FREEZER_ERROR) { close(fd); return -1; } - if (state == thawed) { + if (state == THAWED) { freezer_thawed = true; lseek(fd, 0, SEEK_SET); @@ -384,12 +391,12 @@ static int freeze_processes(void) */ for (; i <= nr_attempts; i++) { state = get_freezer_state(fd); - if (!state) { + if (state == FREEZER_ERROR) { close(fd); return -1; } - if (state == frozen) + if (state == FROZEN) break; if (alarm_timeouted()) goto err; From 9f902e0c6b74ce2d7abf6a632d3abdf6b1370751 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 09:53:51 +0000 Subject: [PATCH 0168/1854] seize: factor out opening and writing the freezer state More preparations for cgroupv2 freezer. Factor our the freezer state opening and writing to have one location where to handle v1 and v2 differences. Signed-off-by: Adrian Reber --- criu/seize.c | 81 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index 14cd82417..b53707e44 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -20,6 +20,7 @@ #include "seccomp.h" #include "seize.h" #include "stats.h" +#include "string.h" #include "xmalloc.h" #include "util.h" #include @@ -77,13 +78,39 @@ const char *get_real_freezer_state(void) return freezer_thawed ? thawed : frozen; } -static int freezer_restore_state(void) +static int freezer_write_state(int fd, enum freezer_state new_state) { - int fd; - char path[PATH_MAX]; + char state[32]; + int ret; - if (!opts.freeze_cgroup || freezer_thawed) - return 0; + if (new_state == THAWED) { + if (strlcpy(state, thawed, sizeof(state)) >= sizeof(state)) + return -1; + } else if (new_state == FROZEN) { + if (strlcpy(state, frozen, sizeof(state)) >= sizeof(state)) + return -1; + } else { + return -1; + } + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + pr_perror("Unable to seek freezer FD"); + return -1; + } + if (write(fd, state, sizeof(state)) != sizeof(state)) { + pr_perror("Unable to %s tasks", + (new_state == THAWED) ? "thaw" : "freeze"); + return -1; + } + + return 0; +} + +static int freezer_open(void) +{ + char path[PATH_MAX]; + int fd; snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); fd = open(path, O_RDWR); @@ -92,13 +119,24 @@ static int freezer_restore_state(void) return -1; } - if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) { - pr_perror("Unable to freeze tasks"); - close(fd); + return fd; +} + +static int freezer_restore_state(void) +{ + int fd; + int ret; + + if (!opts.freeze_cgroup || freezer_thawed) + return 0; + + fd = freezer_open(); + if (fd < 0) return -1; - } + + ret = freezer_write_state(fd, FROZEN); close(fd); - return 0; + return ret; } /* A number of tasks in a freezer cgroup which are not going to be dumped */ @@ -338,7 +376,6 @@ static int log_unfrozen_stacks(char *root) static int freeze_processes(void) { int fd, exit_code = -1; - char path[PATH_MAX]; enum freezer_state state = THAWED; static const unsigned long step_ms = 100; @@ -361,12 +398,10 @@ static int freeze_processes(void) pr_debug("freezing processes: %lu attempts with %lu ms steps\n", nr_attempts, step_ms); - snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); - fd = open(path, O_RDWR); - if (fd < 0) { - pr_perror("Unable to open %s", path); + fd = freezer_open(); + if (fd < 0) return -1; - } + state = get_freezer_state(fd); if (state == FREEZER_ERROR) { close(fd); @@ -375,9 +410,7 @@ static int freeze_processes(void) if (state == THAWED) { freezer_thawed = true; - lseek(fd, 0, SEEK_SET); - if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) { - pr_perror("Unable to freeze tasks"); + if (freezer_write_state(fd, FROZEN)) { close(fd); return -1; } @@ -427,13 +460,9 @@ static int freeze_processes(void) } err: - if (exit_code == 0 || freezer_thawed) { - lseek(fd, 0, SEEK_SET); - if (write(fd, thawed, sizeof(thawed)) != sizeof(thawed)) { - pr_perror("Unable to thaw tasks"); - exit_code = -1; - } - } + if (exit_code == 0 || freezer_thawed) + exit_code = freezer_write_state(fd, THAWED); + if (close(fd)) { pr_perror("Unable to thaw tasks"); return -1; From 10416bcbcb3c2ab4732971083f99d9390f8d168b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 13:38:42 +0000 Subject: [PATCH 0169/1854] seize: support cgroup v2 freezer This adds support to checkpoint processes using the cgroup v2 freezer. Signed-off-by: Adrian Reber --- criu/seize.c | 150 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 133 insertions(+), 17 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index b53707e44..0ba2d9b1d 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -38,7 +38,10 @@ enum freezer_state { FREEZING }; -static enum freezer_state get_freezer_state(int fd) +/* Track if we are running on cgroup v2 system. */ +static bool cgroup_v2 = false; + +static enum freezer_state get_freezer_v1_state(int fd) { char state[32]; int ret; @@ -71,6 +74,70 @@ err: return FREEZER_ERROR; } +static enum freezer_state get_freezer_v2_state(int fd) +{ + int exit_code = FREEZER_ERROR; + char path[PATH_MAX]; + FILE *event; + char state; + int ret; + + /* + * cgroupv2 freezer uses cgroup.freeze to control the state. The file + * can return 0 or 1. 1 means the cgroup is frozen; 0 means it is not + * frozen. Writing 1 to an unfrozen cgroup can freeze it. Freezing can + * take some time and if the cgroup has finished freezing can be + * seen in cgroup.events: frozen 0|1. + */ + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + pr_perror("Unable to seek freezer FD"); + goto out; + } + ret = read(fd, &state, 1); + if (ret <= 0) { + pr_perror("Unable to read from freezer FD"); + goto out; + } + pr_debug("cgroup.freeze=%c\n", state); + if (state == '0') { + exit_code = THAWED; + goto out; + } + + snprintf(path, sizeof(path), "%s/cgroup.events", opts.freeze_cgroup); + event = fopen(path, "r"); + if (event == NULL) { + pr_perror("Unable to open %s", path); + goto out; + } + while (fgets(path, sizeof(path), event)) { + if (strncmp(path, "frozen", 6) != 0) { + continue; + } else if (strncmp(path, "frozen 0", 8) == 0) { + exit_code = FREEZING; + goto close; + } else if (strncmp(path, "frozen 1", 8) == 0) { + exit_code = FROZEN; + goto close; + } + } + + pr_err("Unknown freezer state: %c\n", state); +close: + fclose(event); +out: + return exit_code; +} + +static enum freezer_state get_freezer_state(int fd) +{ + if (cgroup_v2) + return get_freezer_v2_state(fd); + return get_freezer_v1_state(fd); +} + static bool freezer_thawed; const char *get_real_freezer_state(void) @@ -80,15 +147,23 @@ const char *get_real_freezer_state(void) static int freezer_write_state(int fd, enum freezer_state new_state) { - char state[32]; + char state[32] = {0}; int ret; if (new_state == THAWED) { - if (strlcpy(state, thawed, sizeof(state)) >= sizeof(state)) - return -1; + if (cgroup_v2) + state[0] = '0'; + else + if (strlcpy(state, thawed, sizeof(state)) >= + sizeof(state)) + return -1; } else if (new_state == FROZEN) { - if (strlcpy(state, frozen, sizeof(state)) >= sizeof(state)) - return -1; + if (cgroup_v2) + state[0] = '1'; + else + if (strlcpy(state, frozen, sizeof(state)) >= + sizeof(state)) + return -1; } else { return -1; } @@ -109,10 +184,13 @@ static int freezer_write_state(int fd, enum freezer_state new_state) static int freezer_open(void) { + const char freezer_v1[] = "freezer.state"; + const char freezer_v2[] = "cgroup.freeze"; char path[PATH_MAX]; int fd; - snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); + snprintf(path, sizeof(path), "%s/%s", opts.freeze_cgroup, + cgroup_v2 ? freezer_v2 : freezer_v1); fd = open(path, O_RDWR); if (fd < 0) { pr_perror("Unable to open %s", path); @@ -139,6 +217,22 @@ static int freezer_restore_state(void) return ret; } +static FILE *freezer_open_thread_list(char *root_path) +{ + char path[PATH_MAX]; + FILE *f; + + snprintf(path, sizeof(path), "%s/%s", root_path, + cgroup_v2 ? "cgroup.threads" : "tasks"); + f = fopen(path, "r"); + if (f == NULL) { + pr_perror("Unable to open %s", path); + return NULL; + } + + return f; +} + /* A number of tasks in a freezer cgroup which are not going to be dumped */ static int processes_to_wait; static pid_t *processes_to_wait_pids; @@ -154,12 +248,10 @@ static int seize_cgroup_tree(char *root_path, enum freezer_state state) * New tasks can appear while a freezer state isn't * frozen, so we need to catch all new tasks. */ - snprintf(path, sizeof(path), "%s/tasks", root_path); - f = fopen(path, "r"); - if (f == NULL) { - pr_perror("Unable to open %s", path); + f = freezer_open_thread_list(root_path); + if (f == NULL) return -1; - } + while (fgets(path, sizeof(path), f)) { pid_t pid; int ret; @@ -306,12 +398,10 @@ static int log_unfrozen_stacks(char *root) char path[PATH_MAX]; FILE *f; - snprintf(path, sizeof(path), "%s/tasks", root); - f = fopen(path, "r"); - if (f == NULL) { - pr_perror("Unable to open %s", path); + f = freezer_open_thread_list(root); + if (f == NULL) return -1; - } + while (fgets(path, sizeof(path), f)) { pid_t pid; int ret, stack; @@ -820,6 +910,27 @@ err_close: return -1; } +static int cgroup_version(void) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); + if (access(path, F_OK) == 0) { + cgroup_v2 = false; + return 0; + } + + snprintf(path, sizeof(path), "%s/cgroup.freeze", opts.freeze_cgroup); + if (access(path, F_OK) == 0) { + cgroup_v2 = true; + return 0; + } + + pr_err("Neither a cgroupv1 (freezer.state) or cgroupv2 (cgroup.freeze) control file found.\n"); + + return -1; +} + int collect_pstree(void) { pid_t pid = root_item->pid->real; @@ -835,6 +946,11 @@ int collect_pstree(void) */ alarm(opts.timeout); + if (opts.freeze_cgroup && cgroup_version()) + goto err; + + pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1); + if (opts.freeze_cgroup && freeze_processes()) goto err; From 4129d3262ad2d2ac6875c2c86d565528969d8e72 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 5 Feb 2020 09:39:32 +0000 Subject: [PATCH 0170/1854] cgroup2: add minimal cgroup2 support The runc test cases are (sometimes) mounting a cgroup inside of the container. For these tests to succeed, let CRIU know that cgroup2 exists and how to restore such a mount. This does not fix any specific cgroup2 settings, it just enables CRIU to mount cgroup2 in the restored container. Signed-off-by: Adrian Reber --- criu/filesystems.c | 5 +++++ images/mnt.proto | 2 ++ 2 files changed, 7 insertions(+) diff --git a/criu/filesystems.c b/criu/filesystems.c index 1e4550b37..d76b18291 100644 --- a/criu/filesystems.c +++ b/criu/filesystems.c @@ -747,6 +747,11 @@ static struct fstype fstypes[] = { .code = FSTYPE__CGROUP, .parse = cgroup_parse, .sb_equal = cgroup_sb_equal, + }, { + .name = "cgroup2", + .code = FSTYPE__CGROUP2, + .parse = cgroup_parse, + .sb_equal = cgroup_sb_equal, }, { .name = "aufs", .code = FSTYPE__AUFS, diff --git a/images/mnt.proto b/images/mnt.proto index 4160acbf6..8983395ae 100644 --- a/images/mnt.proto +++ b/images/mnt.proto @@ -28,6 +28,8 @@ enum fstype { // RPC_PIPEFS = 20; // NFS = 21; // NFS4 = 22; + + CGROUP2 = 23; }; message mnt_entry { From ffe0896ed01790e62cd617cf01bc6a4076fa4e87 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 1 Mar 2020 04:26:12 +0300 Subject: [PATCH 0171/1854] fs: use __open_proc instead of open("/proc/...", ... ) Processes can run in a mount namespace without /proc. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/files-reg.c | 4 +--- criu/memfd.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index c2a55aeb3..0d0076666 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -778,14 +778,12 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de if (S_ISREG(st->st_mode)) { int fd, ret; - char lpath[PSFDS]; /* * Reopen file locally since it may have no read * permissions when drained */ - sprintf(lpath, "/proc/self/fd/%d", _fd); - fd = open(lpath, O_RDONLY); + fd = open_proc(PROC_SELF, "fd/%d", _fd); if (fd < 0) { pr_perror("Can't open ghost original file"); goto err_out; diff --git a/criu/memfd.c b/criu/memfd.c index d17c10fb7..30ccdf22c 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -299,7 +299,6 @@ static int memfd_open_inode(struct memfd_inode *inode) int memfd_open(struct file_desc *d, u32 *fdflags) { - char lpath[PSFDS]; struct memfd_info *mfi; MemfdFileEntry *mfe; int fd, _fd; @@ -318,14 +317,13 @@ int memfd_open(struct file_desc *d, u32 *fdflags) goto err; /* Reopen the fd with original permissions */ - sprintf(lpath, "/proc/self/fd/%d", fd); flags = fdflags ? *fdflags : mfe->flags; /* * Ideally we should call compat version open() to not force the * O_LARGEFILE file flag with regular open(). It doesn't seem that * important though. */ - _fd = open(lpath, flags); + _fd = __open_proc(getpid(), 0, flags, "fd/%d", fd); if (_fd < 0) { pr_perror("Can't reopen memfd id=%d", mfe->id); goto err; From fce196d88df8363666922ac2fa2d6e23bb774289 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 29 Feb 2020 09:51:33 +0300 Subject: [PATCH 0172/1854] memfd: don't corrupt a state of the dumped fd Right now, criu uses a dumped fd to dump content of a memfd "file". Here are two reasons why we should not do this: * a state of a dumped fd doesn't have to be changed, but now criu calls lseek on it. This can be workarounded by using pread. * a dumped descriptor can be write-only. Reported-by: Mr Jenkins Cc: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/memfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/criu/memfd.c b/criu/memfd.c index 30ccdf22c..983e01b38 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -119,6 +119,7 @@ out: static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) { struct memfd_inode *inode; + int fd; list_for_each_entry(inode, &memfd_inodes, list) if ((inode->dev == st->st_dev) && (inode->ino == st->st_ino)) @@ -132,11 +133,19 @@ static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, co inode->ino = st->st_ino; inode->id = memfd_inode_ids++; - if (dump_memfd_inode(lfd, inode, name, st)) { + fd = open_proc(PROC_SELF, "fd/%d", lfd); + if (fd < 0) { xfree(inode); return NULL; } + if (dump_memfd_inode(fd, inode, name, st)) { + close(fd); + xfree(inode); + return NULL; + } + close(fd); + list_add_tail(&inode->list, &memfd_inodes); return inode; From 58fd63042c925c6422f5453b9a13147ebd2a6769 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 1 Mar 2020 01:04:20 +0300 Subject: [PATCH 0173/1854] zdtm/inhfd: force python to read new data from a file python 2.7 doesn't call the read system call if it's read file to the end once. The next seek allows to workaround this problem. inhfd/memfd.py hangs due to this issue. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- test/zdtm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 4110b5142..0bd7b84cc 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -697,7 +697,10 @@ class inhfd_test: # regular files, so we loop. data = b'' while not data: - data = peer_file.read(16) + # In python 2.7, peer_file.read() doesn't call the read + # system call if it's read file to the end once. The + # next seek allows to workaround this problem. + data = os.read(peer_file.fileno(), 16) time.sleep(0.1) except Exception as e: print("Unable to read a peer file: %s" % e) From f167d1f4e9eb24dfbda077746f25cfdf8a2b59b9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 7 Feb 2020 15:59:45 +0300 Subject: [PATCH 0174/1854] fown: Don't fail on dumping files opened with O_PATH O_PATH opened files are special: they have empty file operations in kernel space, so there not that much we can do with them, even setting position is not allowed. Same applies to a signal number for owner settings. Signed-off-by: Cyrill Gorcunov Co-developed-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- criu/files-reg.c | 16 +++++++--- criu/files.c | 5 ++- criu/pie/parasite.c | 75 +++++++++++++++++++++++++++++---------------- 3 files changed, 64 insertions(+), 32 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 0d0076666..d1d1ee5af 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1776,11 +1776,17 @@ static int do_open_reg(int ns_root_fd, struct reg_file_info *rfi, void *arg) if (fd < 0) return fd; - if ((rfi->rfe->pos != -1ULL) && - lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) { - pr_perror("Can't restore file pos"); - close(fd); - return -1; + /* + * O_PATH opened files carry empty fops in kernel, + * just ignore positioning at all. + */ + if (!(rfi->rfe->flags & O_PATH)) { + if (rfi->rfe->pos != -1ULL && + lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) { + pr_perror("Can't restore file pos"); + close(fd); + return -1; + } } return fd; diff --git a/criu/files.c b/criu/files.c index f7963bf54..3f1d77931 100644 --- a/criu/files.c +++ b/criu/files.c @@ -399,7 +399,10 @@ static int fill_fd_params(struct pid *owner_pid, int fd, int lfd, pr_info("%d fdinfo %d: pos: %#16"PRIx64" flags: %16o/%#x\n", owner_pid->real, fd, p->pos, p->flags, (int)p->fd_flags); - ret = fcntl(lfd, F_GETSIG, 0); + if (p->flags & O_PATH) + ret = 0; + else + ret = fcntl(lfd, F_GETSIG, 0); if (ret < 0) { pr_perror("Can't get owner signum on %d", lfd); return -1; diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 387a976da..64b5bbb3e 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -317,15 +317,60 @@ grps_err: return -1; } +static int fill_fds_fown(int fd, struct fd_opts *p) +{ + int flags, ret; + struct f_owner_ex owner_ex; + uint32_t v[2]; + + /* + * For O_PATH opened files there is no owner at all. + */ + flags = sys_fcntl(fd, F_GETFL, 0); + if (flags < 0) { + pr_err("fcntl(%d, F_GETFL) -> %d\n", fd, flags); + return -1; + } + if (flags & O_PATH) { + p->fown.pid = 0; + return 0; + } + + ret = sys_fcntl(fd, F_GETOWN_EX, (long)&owner_ex); + if (ret) { + pr_err("fcntl(%d, F_GETOWN_EX) -> %d\n", fd, ret); + return -1; + } + + /* + * Simple case -- nothing is changed. + */ + if (owner_ex.pid == 0) { + p->fown.pid = 0; + return 0; + } + + ret = sys_fcntl(fd, F_GETOWNER_UIDS, (long)&v); + if (ret) { + pr_err("fcntl(%d, F_GETOWNER_UIDS) -> %d\n", fd, ret); + return -1; + } + + p->fown.uid = v[0]; + p->fown.euid = v[1]; + p->fown.pid_type = owner_ex.type; + p->fown.pid = owner_ex.pid; + + return 0; +} + static int fill_fds_opts(struct parasite_drain_fd *fds, struct fd_opts *opts) { int i; for (i = 0; i < fds->nr_fds; i++) { - int flags, fd = fds->fds[i], ret; + int flags, fd = fds->fds[i]; struct fd_opts *p = opts + i; - struct f_owner_ex owner_ex; - uint32_t v[2]; flags = sys_fcntl(fd, F_GETFD, 0); if (flags < 0) { @@ -335,30 +380,8 @@ static int fill_fds_opts(struct parasite_drain_fd *fds, struct fd_opts *opts) p->flags = (char)flags; - ret = sys_fcntl(fd, F_GETOWN_EX, (long)&owner_ex); - if (ret) { - pr_err("fcntl(%d, F_GETOWN_EX) -> %d\n", fd, ret); + if (fill_fds_fown(fd, p)) return -1; - } - - /* - * Simple case -- nothing is changed. - */ - if (owner_ex.pid == 0) { - p->fown.pid = 0; - continue; - } - - ret = sys_fcntl(fd, F_GETOWNER_UIDS, (long)&v); - if (ret) { - pr_err("fcntl(%d, F_GETOWNER_UIDS) -> %d\n", fd, ret); - return -1; - } - - p->fown.uid = v[0]; - p->fown.euid = v[1]; - p->fown.pid_type = owner_ex.type; - p->fown.pid = owner_ex.pid; } return 0; From 8b9c1f4c5bebd501f544cfe81534e4386f85246f Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 7 Feb 2020 15:59:55 +0300 Subject: [PATCH 0175/1854] zdtm: add a test for files opened with O_PATH On these test without the patch ("fown: Don't fail on dumping files opened wit O_PATH") we trigger these errors: Error (criu/pie/parasite.c:340): fcntl(4, F_GETOWN_EX) -> -9 Error (criu/files.c:403): Can't get owner signum on 18: Bad file descriptor Error (criu/files-reg.c:1887): Can't restore file pos: Bad file descriptor Signed-off-by: Pavel Tikhomirov Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/zdtm/static/Makefile | 1 + test/zdtm/static/opath_file.c | 95 +++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 test/zdtm/static/opath_file.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 5afd18cd6..035b8fa9c 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -363,6 +363,7 @@ TST_DIR = \ private_bind_propagation \ ghost_on_rofs \ overmounted_file \ + opath_file \ TST_DIR_FILE = \ chroot \ diff --git a/test/zdtm/static/opath_file.c b/test/zdtm/static/opath_file.c new file mode 100644 index 000000000..602a5af27 --- /dev/null +++ b/test/zdtm/static/opath_file.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +#define TEST_FILE "test_file" +#define BUF_SIZE 4096 +#define fdinfo_field(str, field) !strncmp(str, field":", sizeof(field)) +#define pr_debug(format, arg...) test_msg("DBG: %s:%d: " format, __FILE__, __LINE__, ## arg) + +const char *test_doc = "Check open file with O_PATH preserved"; +const char *test_author = "Pavel Tikhomirov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +struct fdinfo { + int flags; +}; + +static int parse_self_fdinfo(int fd, struct fdinfo *fi) +{ + char path[PATH_MAX], line[BUF_SIZE]; + FILE *file; + int ret = -1; + unsigned long long val; + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); + file = fopen(path, "r"); + if (!file) { + pr_perror("fopen"); + return -1; + } + + while (fgets(line, sizeof(line), file)) { + if (fdinfo_field(line, "flags")) { + if (sscanf(line, "%*s %llu", &val) != 1) { + pr_err("failed to read flags: %s", line); + goto fail; + } + pr_debug("Open flags = %llu\n", val); + fi->flags = val; + ret = 0; + break; + } + } +fail: + fclose(file); + return ret; +} + +int main(int argc, char **argv) +{ + char test_file[PATH_MAX]; + struct fdinfo fi; + int fd; + + test_init(argc, argv); + + if (mkdir(dirname, 0700)) { + pr_perror("can't make directory %s", dirname); + exit(1); + } + + snprintf(test_file, sizeof(test_file), "%s/%s", dirname, TEST_FILE); + fd = creat(test_file, 0644); + if (fd == -1) { + pr_perror("cat't create %s", test_file); + return 1; + } + close(fd); + + fd = open(test_file, O_PATH); + if (fd == -1) { + pr_perror("cat't open file %s with O_PATH", test_file); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (parse_self_fdinfo(fd, &fi)) + return 1; + + if (!(fi.flags & O_PATH)) { + fail("File lost O_PATH open flag"); + return 1; + } + + close(fd); + pass(); + return 0; +} From 1936608ce42283b6c5aa007c883092bb2776af4b Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 7 Feb 2020 16:00:01 +0300 Subject: [PATCH 0176/1854] files: allow dumping opened symlinks To really open symlink file and not the regular file below it, one needs to do open with O_PATH|O_NOFOLLOW flags. Looks like systemd started to open /etc/localtime symlink this way sometimes, and before that nobody actually used this and thus we never supported this in CRIU. Error (criu/files-ext.c:96): Can't dump file 11 of that type [120777] (unknown /etc/localtime) Looks like it is quiet easy to support, as c/r of symlink file is almost the same as c/r of regular one. We need to only make fstatat not following links in check_path_remap. Also we need to take into account support of ghost symlinks. Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) Co-developed-by: Pavel Tikhomirov Signed-off-by: Pavel Tikhomirov --- criu/files-reg.c | 88 +++++++++++++++++++++++++++++++++++++---- criu/files.c | 3 +- images/ghost-file.proto | 2 + 3 files changed, 84 insertions(+), 9 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index d1d1ee5af..b53e9b080 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -282,19 +282,53 @@ static int mkreg_ghost(char *path, GhostFileEntry *gfe, struct cr_img *img) return ret; } +static int mklnk_ghost(char *path, GhostFileEntry *gfe) +{ + if (!gfe->symlnk_target) { + pr_err("Ghost symlink target is NULL for %s. Image from old CRIU?\n", path); + return -1; + } + + if (symlink(gfe->symlnk_target, path) < 0) { + /* + * ENOENT case is OK + * Take a look closer on create_ghost() function + */ + if (errno != ENOENT) + pr_perror("symlink(%s, %s) failed", gfe->symlnk_target, path); + return -1; + } + + return 0; +} + static int ghost_apply_metadata(const char *path, GhostFileEntry *gfe) { struct timeval tv[2]; int ret = -1; - if (chown(path, gfe->uid, gfe->gid) < 0) { - pr_perror("Can't reset user/group on ghost %s", path); - goto err; - } + if (S_ISLNK(gfe->mode)) { + if (lchown(path, gfe->uid, gfe->gid) < 0) { + pr_perror("Can't reset user/group on ghost %s", path); + goto err; + } - if (chmod(path, gfe->mode)) { - pr_perror("Can't set perms %o on ghost %s", gfe->mode, path); - goto err; + /* + * We have no lchmod() function, and fchmod() will fail on + * O_PATH | O_NOFOLLOW fd. Yes, we have fchmodat() + * function and flag AT_SYMLINK_NOFOLLOW described in + * man 2 fchmodat, but it is not currently implemented. %) + */ + } else { + if (chown(path, gfe->uid, gfe->gid) < 0) { + pr_perror("Can't reset user/group on ghost %s", path); + goto err; + } + + if (chmod(path, gfe->mode)) { + pr_perror("Can't set perms %o on ghost %s", gfe->mode, path); + goto err; + } } if (gfe->atim) { @@ -353,6 +387,9 @@ again: } else if (S_ISDIR(gfe->mode)) { if ((ret = mkdirpat(AT_FDCWD, path, gfe->mode)) < 0) msg = "Can't make ghost dir"; + } else if (S_ISLNK(gfe->mode)) { + if ((ret = mklnk_ghost(path, gfe)) < 0) + msg = "Can't create ghost symlink"; } else { if ((ret = mkreg_ghost(path, gfe, img)) < 0) msg = "Can't create ghost regfile"; @@ -740,6 +777,7 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de int exit_code = -1; GhostFileEntry gfe = GHOST_FILE_ENTRY__INIT; Timeval atim = TIMEVAL__INIT, mtim = TIMEVAL__INIT; + char pathbuf[PATH_MAX]; pr_info("Dumping ghost file contents (id %#x)\n", id); @@ -773,6 +811,36 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de gfe.size = st->st_size; } + /* + * We set gfe.symlnk_target only if we need to dump + * symlink content, otherwise we leave it NULL. + * It will be taken into account on restore in mklnk_ghost function. + */ + if (S_ISLNK(st->st_mode)) { + ssize_t ret; + + /* + * We assume that _fd opened with O_PATH | O_NOFOLLOW + * flags because S_ISLNK(st->st_mode). With current kernel version, + * it's looks like correct assumption in any case. + */ + ret = readlinkat(_fd, "", pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + pr_perror("Can't readlinkat"); + goto err_out; + } + + pathbuf[ret] = 0; + + if (ret != st->st_size) { + pr_err("Buffer for readlinkat is too small: ret %zd, st_size %"PRId64", buf %u %s\n", + ret, st->st_size, PATH_MAX, pathbuf); + goto err_out; + } + + gfe.symlnk_target = pathbuf; + } + if (pb_write_one(img, &gfe, PB_GHOST_FILE)) goto err_out; @@ -1116,6 +1184,7 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, int ret, mntns_root; struct stat pst; const struct stat *ost = &parms->stat; + int flags = 0; if (parms->fs_type == PROC_SUPER_MAGIC) { /* The file points to /proc/pid/ where pid is a dead @@ -1212,7 +1281,10 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, if (mntns_root < 0) return -1; - ret = fstatat(mntns_root, rpath, &pst, 0); + if (S_ISLNK(parms->stat.st_mode)) + flags = AT_SYMLINK_NOFOLLOW; + + ret = fstatat(mntns_root, rpath, &pst, flags); if (ret < 0) { /* * Linked file, but path is not accessible (unless any diff --git a/criu/files.c b/criu/files.c index 3f1d77931..f6ba39a30 100644 --- a/criu/files.c +++ b/criu/files.c @@ -545,7 +545,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return do_dump_gen_file(&p, lfd, ops, e); } - if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode)) { + if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || + S_ISLNK(p.stat.st_mode)) { if (fill_fdlink(lfd, &p, &link)) return -1; diff --git a/images/ghost-file.proto b/images/ghost-file.proto index eda466451..0576089fd 100644 --- a/images/ghost-file.proto +++ b/images/ghost-file.proto @@ -15,6 +15,8 @@ message ghost_file_entry { optional timeval mtim = 8; optional bool chunks = 9; optional uint64 size = 10; + /* this field makes sense only when S_ISLNK(mode) */ + optional string symlnk_target = 11; } message ghost_chunk_entry { From 73e0ed3b8ae2a87d4232dd4a39ce3dab4edb1f24 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 7 Feb 2020 16:00:05 +0300 Subject: [PATCH 0177/1854] zdtm: add a test on open symlink migration Signed-off-by: Pavel Tikhomirov Co-Developed-by: Vitaly Ostrosablin Signed-off-by: Vitaly Ostrosablin Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/zdtm/static/Makefile | 3 + test/zdtm/static/opath_file.c | 2 +- test/zdtm/static/symlink.c | 102 ++++++++++++++++++++++++++++++++++ test/zdtm/static/symlink01.c | 1 + 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/static/symlink.c create mode 120000 test/zdtm/static/symlink01.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 035b8fa9c..ee69612c7 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -364,6 +364,8 @@ TST_DIR = \ ghost_on_rofs \ overmounted_file \ opath_file \ + symlink \ + symlink01 \ TST_DIR_FILE = \ chroot \ @@ -539,6 +541,7 @@ clone_fs: LDLIBS += -pthread # we have to explicitly specify both .o and .d for this case: netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += $(call pkg-cflags, libnl-3.0) netns_sub_veth: LDLIBS += $(call pkg-libs, libnl-route-3.0 libnl-3.0) +symlink01: CFLAGS += -DZDTM_UNLINK_SYMLINK socket-tcp-fin-wait1: CFLAGS += -D ZDTM_TCP_FIN_WAIT1 socket-tcp-fin-wait2: CFLAGS += -D ZDTM_TCP_FIN_WAIT2 diff --git a/test/zdtm/static/opath_file.c b/test/zdtm/static/opath_file.c index 602a5af27..943f4eddb 100644 --- a/test/zdtm/static/opath_file.c +++ b/test/zdtm/static/opath_file.c @@ -36,7 +36,7 @@ static int parse_self_fdinfo(int fd, struct fdinfo *fi) while (fgets(line, sizeof(line), file)) { if (fdinfo_field(line, "flags")) { - if (sscanf(line, "%*s %llu", &val) != 1) { + if (sscanf(line, "%*s %llo", &val) != 1) { pr_err("failed to read flags: %s", line); goto fail; } diff --git a/test/zdtm/static/symlink.c b/test/zdtm/static/symlink.c new file mode 100644 index 000000000..074c80052 --- /dev/null +++ b/test/zdtm/static/symlink.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +#define TEST_FILE "test_file" +#define TEST_SYMLINK "test_symlink" + +const char *test_doc = "Check open symlink preserved"; +const char *test_author = "Pavel Tikhomirov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +int main(int argc, char **argv) +{ + char test_symlink[PATH_MAX]; + char test_file[PATH_MAX]; + char pathbuf[PATH_MAX]; + struct stat stb, sta; + int ret, fd; + + test_init(argc, argv); + + if (mkdir(dirname, 0700)) { + pr_perror("can't make directory %s", dirname); + exit(1); + } + + snprintf(test_file, sizeof(test_file), "%s/%s", dirname, TEST_FILE); + ret = creat(test_file, 0644); + if (ret == -1) { + pr_perror("cat't create %s", test_file); + return 1; + } + close(ret); + + snprintf(test_symlink, sizeof(test_symlink), "%s/%s", dirname, TEST_SYMLINK); + ret = symlink(test_file, test_symlink); + if (ret == -1) { + pr_perror("cat't symlink to %s", test_symlink); + return 1; + } + + fd = open(test_symlink, O_PATH | O_NOFOLLOW); + if (fd == -1) { + pr_perror("cat't open symlink %s", test_symlink); + return 1; + } + + ret = fstat(fd, &sta); + if (ret == -1) { + pr_perror("cat't fstat %s", test_symlink); + return 1; + } + + if (!S_ISLNK(sta.st_mode)) { + pr_perror("file is not symlink %s", test_symlink); + return 1; + } + +#ifdef ZDTM_UNLINK_SYMLINK + if (unlink(test_symlink)) { + pr_perror("can't unlink symlink %s", test_symlink); + return 1; + } +#endif + + test_daemon(); + test_waitsig(); + + ret = fstat(fd, &stb); + if (ret == -1) { + fail("cat't fstat %s", test_symlink); + return 1; + } + + if (!S_ISLNK(stb.st_mode)) { + fail("file is not symlink %s", test_symlink); + return 1; + } + + ret = readlinkat(fd, "", pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + fail("Can't readlinkat"); + return 1; + } + pathbuf[ret] = 0; + + if (strcmp(test_file, pathbuf)) { + fail("symlink points to %s but %s expected", pathbuf, test_file); + return 1; + } + + close(fd); + pass(); + return 0; +} diff --git a/test/zdtm/static/symlink01.c b/test/zdtm/static/symlink01.c new file mode 120000 index 000000000..e2d071ea4 --- /dev/null +++ b/test/zdtm/static/symlink01.c @@ -0,0 +1 @@ +symlink.c \ No newline at end of file From 065ff6f4151805fe50cc881ef506b6ae6407ec57 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 5 Mar 2020 08:30:23 +0300 Subject: [PATCH 0178/1854] zdtm/fifo_loop: don't try to write more than pipe size ... otherwise write() can block. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- test/zdtm/transition/fifo_loop.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/zdtm/transition/fifo_loop.c b/test/zdtm/transition/fifo_loop.c index 2e28320ba..b028c2fd5 100644 --- a/test/zdtm/transition/fifo_loop.c +++ b/test/zdtm/transition/fifo_loop.c @@ -39,6 +39,7 @@ int main(int argc, char **argv) int i; uint8_t buf[0x100000]; char *file_path; + int pipe_size; test_init(argc, argv); @@ -104,6 +105,13 @@ int main(int argc, char **argv) exit(1); } + pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); + if (pipe_size != sizeof(buf)) { + pr_perror("fcntl(writefd, F_GETPIPE_SZ) -> %d", pipe_size); + kill(0, SIGKILL); + exit(1); + } + file_path = path[i - 1]; readfd = open(file_path, O_RDONLY); if (readfd < 0) { @@ -138,13 +146,14 @@ int main(int argc, char **argv) for (p = rbuf, len = wlen; len > 0; p += rlen, len -= rlen) { rlen = read(readfd, p, len); + if (rlen < 0 && errno == EINTR) { + continue; + } + if (rlen <= 0) break; } - if (rlen < 0 && errno == EINTR) - continue; - if (len > 0) { fail("read failed: %m\n"); ret = 1; From 62ad2f6095b466bfade1af6fea60bcb0fa1505ec Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 14:45:34 +0000 Subject: [PATCH 0179/1854] criu: Remove compel.h includes The plan is to remove "compel.h". That file only includes other headers (which may be not needed). If we aim for one-include-for-compel, we could instead paste all subheaders into "compel.h". Rather, I think it's worth to migrate to more fine-grained compel headers than follow the strategy 'one header to rule them all'. Further, the header creates problems for cross-compilation: it's included in files, those are used by host-compel. Which rightfully confuses compiler/linker as host's definitions for fpu regs/other platform details get drained into host's compel. As a first step - stop including "compel.h" in criu. Signed-off-by: Dmitry Safonov --- criu/aio.c | 2 +- criu/arch/aarch64/crtools.c | 2 +- criu/arch/arm/crtools.c | 3 +-- criu/arch/ppc64/crtools.c | 2 +- criu/arch/s390/crtools.c | 2 +- criu/arch/x86/crtools.c | 2 +- criu/arch/x86/sys-exec-tbl.c | 1 - criu/cr-restore.c | 1 - criu/include/proc_parse.h | 2 +- criu/kerndat.c | 1 - criu/mem.c | 2 +- criu/parasite-syscall.c | 2 -- criu/pie/pie-relocs.h | 2 -- criu/seize.c | 1 - criu/vdso.c | 1 - 15 files changed, 8 insertions(+), 18 deletions(-) diff --git a/criu/aio.c b/criu/aio.c index 45651f2d3..6ee65d5f4 100644 --- a/criu/aio.c +++ b/criu/aio.c @@ -11,7 +11,7 @@ #include "parasite.h" #include "parasite-syscall.h" #include "images/mm.pb-c.h" -#include +#include "compel/infect.h" #define NR_IOEVENTS_IN_NPAGES(npages) ((PAGE_SIZE * (npages) - sizeof(struct aio_ring)) / sizeof(struct io_event)) diff --git a/criu/arch/aarch64/crtools.c b/criu/arch/aarch64/crtools.c index f98743a23..76bd1fea7 100644 --- a/criu/arch/aarch64/crtools.c +++ b/criu/arch/aarch64/crtools.c @@ -19,7 +19,7 @@ #include "util.h" #include "cpu.h" #include "restorer.h" -#include +#include "compel/infect.h" #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))(src)->e diff --git a/criu/arch/arm/crtools.c b/criu/arch/arm/crtools.c index c216cdc5c..840d489a6 100644 --- a/criu/arch/arm/crtools.c +++ b/criu/arch/arm/crtools.c @@ -18,8 +18,7 @@ #include "elf.h" #include "parasite-syscall.h" #include "restorer.h" - -#include +#include "compel/infect.h" #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))((src)->ARM_##e) diff --git a/criu/arch/ppc64/crtools.c b/criu/arch/ppc64/crtools.c index 5a5966ad4..0d9f49c3f 100644 --- a/criu/arch/ppc64/crtools.c +++ b/criu/arch/ppc64/crtools.c @@ -17,7 +17,7 @@ #include "log.h" #include "util.h" #include "cpu.h" -#include +#include "compel/infect.h" #include "protobuf.h" #include "images/core.pb-c.h" diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c index 238035b76..000b7779f 100644 --- a/criu/arch/s390/crtools.c +++ b/criu/arch/s390/crtools.c @@ -17,7 +17,7 @@ #include "log.h" #include "util.h" #include "cpu.h" -#include +#include "compel/infect.h" #include "protobuf.h" #include "images/core.pb-c.h" diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index e4073c27b..9c8beeedd 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -1,5 +1,5 @@ #include "compel/asm/fpu.h" -#include "compel/compel.h" +#include "compel/infect.h" #include "compel/plugins/std/syscall-codes.h" #include "cpu.h" #include "cr_options.h" diff --git a/criu/arch/x86/sys-exec-tbl.c b/criu/arch/x86/sys-exec-tbl.c index 608dc2510..225b8a153 100644 --- a/criu/arch/x86/sys-exec-tbl.c +++ b/criu/arch/x86/sys-exec-tbl.c @@ -1,4 +1,3 @@ -#include static struct syscall_exec_desc sc_exec_table_64[] = { #include "sys-exec-tbl-64.c" diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 85105a18e..41146d4ad 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -67,7 +67,6 @@ #include "timerfd.h" #include "action-scripts.h" #include "shmem.h" -#include #include "aio.h" #include "lsm.h" #include "seccomp.h" diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h index 96a097b3d..fd50ff47e 100644 --- a/criu/include/proc_parse.h +++ b/criu/include/proc_parse.h @@ -3,7 +3,7 @@ #include -#include +#include "compel/infect.h" #define PROC_TASK_COMM_LEN 32 #define PROC_TASK_COMM_LEN_FMT "(%31s" diff --git a/criu/kerndat.c b/criu/kerndat.c index 8ac83820b..2ad72c350 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -33,7 +33,6 @@ #include "net.h" #include "tun.h" #include -#include #include "netfilter.h" #include "fsnotify.h" #include "linux/userfaultfd.h" diff --git a/criu/mem.c b/criu/mem.c index 4e110c9e9..55022d94a 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -29,7 +29,7 @@ #include "pagemap-cache.h" #include "fault-injection.h" #include "prctl.h" -#include +#include "compel/infect-util.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index e5a8194e5..b649d1b51 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -45,8 +45,6 @@ #include "infect-rpc.h" #include "pie/parasite-blob.h" -#include - unsigned long get_exec_start(struct vm_area_list *vmas) { struct vma_area *vma_area; diff --git a/criu/pie/pie-relocs.h b/criu/pie/pie-relocs.h index 6797486c2..e36126be6 100644 --- a/criu/pie/pie-relocs.h +++ b/criu/pie/pie-relocs.h @@ -1,8 +1,6 @@ #ifndef __PIE_RELOCS_H__ #define __PIE_RELOCS_H__ -#include - #include "common/config.h" #include "common/compiler.h" diff --git a/criu/seize.c b/criu/seize.c index 0ba2d9b1d..f973806d9 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -23,7 +23,6 @@ #include "string.h" #include "xmalloc.h" #include "util.h" -#include #define NR_ATTEMPTS 5 diff --git a/criu/vdso.c b/criu/vdso.c index 19ba4765d..433a54728 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -20,7 +20,6 @@ #include "criu-log.h" #include "mem.h" #include "vma.h" -#include #include #ifdef LOG_PREFIX From 327554ee646ac8c7728981d0607dc42420c3a85a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 15:00:08 +0000 Subject: [PATCH 0180/1854] compel: Remove compel.h The file only includes other headers (which may be not needed). If we aim for one-include-for-compel, we could instead paste all subheaders into "compel.h". Rather, I think it's worth to migrate to more fine-grained compel headers than follow the strategy 'one header to rule them all'. Further, the header creates problems for cross-compilation: it's included in files, those are used by host-compel. Which rightfully confuses compiler/linker as host's definitions for fpu regs/other platform details get drained into host's compel. Signed-off-by: Dmitry Safonov --- Documentation/compel.txt | 2 +- compel/arch/aarch64/src/lib/handle-elf.c | 3 +-- compel/arch/arm/src/lib/handle-elf.c | 3 +-- compel/arch/arm/src/lib/infect.c | 1 + compel/arch/ppc64/src/lib/cpu.c | 1 + compel/arch/ppc64/src/lib/handle-elf.c | 3 +-- compel/arch/s390/src/lib/handle-elf.c | 3 +-- compel/arch/s390/src/lib/infect.c | 1 + compel/arch/x86/src/lib/handle-elf.c | 3 +-- compel/arch/x86/src/lib/infect.c | 1 + compel/include/log.h | 3 +-- compel/include/uapi/compel.h | 14 -------------- compel/src/lib/handle-elf.c | 6 ++---- compel/src/lib/log.c | 3 --- compel/src/main.c | 2 -- compel/test/fdspy/spy.c | 1 - compel/test/infect/spy.c | 1 - compel/test/rsys/spy.c | 2 -- include/common/scm.h | 2 ++ 19 files changed, 15 insertions(+), 40 deletions(-) delete mode 100644 compel/include/uapi/compel.h diff --git a/Documentation/compel.txt b/Documentation/compel.txt index 744a3b35d..6ccd20861 100644 --- a/Documentation/compel.txt +++ b/Documentation/compel.txt @@ -86,7 +86,7 @@ Infecting code ~~~~~~~~~~~~~~ The parasitic code is compiled and converted to a header using *compel*, and included here. -*#include * +*#include * *#include "parasite.h"* diff --git a/compel/arch/aarch64/src/lib/handle-elf.c b/compel/arch/aarch64/src/lib/handle-elf.c index 1c3686c48..1ee65ee2c 100644 --- a/compel/arch/aarch64/src/lib/handle-elf.c +++ b/compel/arch/aarch64/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/arm/src/lib/handle-elf.c b/compel/arch/arm/src/lib/handle-elf.c index 8abf8dad1..5b8d00a6f 100644 --- a/compel/arch/arm/src/lib/handle-elf.c +++ b/compel/arch/arm/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c index c17cb9c9b..0053bef58 100644 --- a/compel/arch/arm/src/lib/infect.c +++ b/compel/arch/arm/src/lib/infect.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include "common/page.h" diff --git a/compel/arch/ppc64/src/lib/cpu.c b/compel/arch/ppc64/src/lib/cpu.c index 338ab4891..7a3972790 100644 --- a/compel/arch/ppc64/src/lib/cpu.c +++ b/compel/arch/ppc64/src/lib/cpu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "compel-cpu.h" diff --git a/compel/arch/ppc64/src/lib/handle-elf.c b/compel/arch/ppc64/src/lib/handle-elf.c index 3d4020f59..f29fdc8a3 100644 --- a/compel/arch/ppc64/src/lib/handle-elf.c +++ b/compel/arch/ppc64/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/s390/src/lib/handle-elf.c b/compel/arch/s390/src/lib/handle-elf.c index 01a8bf4c8..6ed382c92 100644 --- a/compel/arch/s390/src/lib/handle-elf.c +++ b/compel/arch/s390/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index 7e7d24ce2..5a4675449 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/compel/arch/x86/src/lib/handle-elf.c b/compel/arch/x86/src/lib/handle-elf.c index 62fb28f49..938999b2e 100644 --- a/compel/arch/x86/src/lib/handle-elf.c +++ b/compel/arch/x86/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 11e7f4c91..9c4abb60c 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -3,6 +3,7 @@ #include #include #include +#include #include diff --git a/compel/include/log.h b/compel/include/log.h index 559f909ce..49e65bb50 100644 --- a/compel/include/log.h +++ b/compel/include/log.h @@ -1,8 +1,7 @@ #ifndef COMPEL_LOG_H__ #define COMPEL_LOG_H__ -#include "uapi/compel/compel.h" -#include "uapi/compel/loglevels.h" +#include "uapi/compel/log.h" #ifndef LOG_PREFIX # define LOG_PREFIX diff --git a/compel/include/uapi/compel.h b/compel/include/uapi/compel.h deleted file mode 100644 index 318a472da..000000000 --- a/compel/include/uapi/compel.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef UAPI_COMPEL_H__ -#define UAPI_COMPEL_H__ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#endif /* UAPI_COMPEL_H__ */ diff --git a/compel/src/lib/handle-elf.c b/compel/src/lib/handle-elf.c index ca7c53b71..69d5104b6 100644 --- a/compel/src/lib/handle-elf.c +++ b/compel/src/lib/handle-elf.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -12,8 +12,6 @@ #include #include -#include "uapi/compel.h" - #include "handle-elf.h" #include "piegen.h" #include "log.h" @@ -228,7 +226,7 @@ int __handle_elf(void *mem, size_t size) } pr_out("/* Autogenerated from %s */\n", opts.input_filename); - pr_out("#include \n"); + pr_out("#include \n"); for (i = 0; i < symtab_hdr->sh_size / symtab_hdr->sh_entsize; i++) { Elf_Sym *sym = &symbols[i]; diff --git a/compel/src/lib/log.c b/compel/src/lib/log.c index d195343e4..c86be02c5 100644 --- a/compel/src/lib/log.c +++ b/compel/src/lib/log.c @@ -4,11 +4,8 @@ #include #include #include - #include -#include - #include "log.h" static unsigned int current_loglevel = COMPEL_DEFAULT_LOGLEVEL; diff --git a/compel/src/main.c b/compel/src/main.c index 8b2c8bc8d..36127c357 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -13,8 +13,6 @@ #include #include -#include "uapi/compel/compel.h" - #include "version.h" #include "piegen.h" #include "log.h" diff --git a/compel/test/fdspy/spy.c b/compel/test/fdspy/spy.c index 258e3ab75..1a373b6bb 100644 --- a/compel/test/fdspy/spy.c +++ b/compel/test/fdspy/spy.c @@ -5,7 +5,6 @@ #include #include -#include #include "parasite.h" #define PARASITE_CMD_GETFD PARASITE_USER_CMDS diff --git a/compel/test/infect/spy.c b/compel/test/infect/spy.c index a5aba7308..b5f8b2559 100644 --- a/compel/test/infect/spy.c +++ b/compel/test/infect/spy.c @@ -3,7 +3,6 @@ #include #include -#include #include "parasite.h" #define PARASITE_CMD_INC PARASITE_USER_CMDS diff --git a/compel/test/rsys/spy.c b/compel/test/rsys/spy.c index f5c999d5a..98654efcf 100644 --- a/compel/test/rsys/spy.c +++ b/compel/test/rsys/spy.c @@ -4,8 +4,6 @@ #include #include -#include - static void print_vmsg(unsigned int lvl, const char *fmt, va_list parms) { printf("\tLC%u: ", lvl); diff --git a/include/common/scm.h b/include/common/scm.h index ab27137b8..a8eb9ec4c 100644 --- a/include/common/scm.h +++ b/include/common/scm.h @@ -3,7 +3,9 @@ #include #include +#include #include +#include /* * Because of kernel doing kmalloc for user data passed From 18ac1540c4b64108b53fcd8fa3b3df256075e3f6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 15:04:15 +0000 Subject: [PATCH 0181/1854] travis: Add aarch64-cross test on amd64 Fixes: #924 Signed-off-by: Dmitry Safonov --- .travis.yml | 4 +++ scripts/build/Dockerfile.aarch64-cross | 45 ++++++++++++++++++++++++++ scripts/build/Makefile | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 scripts/build/Dockerfile.aarch64-cross diff --git a/.travis.yml b/.travis.yml index 7c36af006..ffa82f15f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -86,6 +86,10 @@ jobs: arch: amd64 env: TR_ARCH=armv7-cross dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=aarch64-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.aarch64-cross b/scripts/build/Dockerfile.aarch64-cross new file mode 100644 index 000000000..38229497a --- /dev/null +++ b/scripts/build/Dockerfile.aarch64-cross @@ -0,0 +1,45 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ + dpkg --add-architecture arm64 && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-arm64 \ + libc6-dev-arm64-cross \ + libc6-arm64-cross \ + libbz2-dev:arm64 \ + libexpat1-dev:arm64 \ + ncurses-dev:arm64 \ + libssl-dev:arm64 \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:arm64 \ + libprotobuf-dev:arm64 \ + libnet-dev:arm64 \ + libprotobuf-c-dev:arm64 \ + libcap-dev:arm64 \ + libaio-dev:arm64 \ + libnl-route-3-dev:arm64 + +ENV CROSS_TRIPLE=aarch64-linux-gnu +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=aarch64 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index d093ce76c..913a86d6c 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross +TARGETS += armv7-cross aarch64-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From 1f74f8d77087ce06624cb54f2fa70afaf1380103 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 16:43:48 +0000 Subject: [PATCH 0182/1854] travis: Use debian/buster as base for cross build tests Jessie is called 'oldoldstable', migrate to Buster. Suggested-by: Adrian Reber Signed-off-by: Dmitry Safonov --- scripts/build/Dockerfile.aarch64-cross | 2 +- scripts/build/Dockerfile.armv7-cross | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build/Dockerfile.aarch64-cross b/scripts/build/Dockerfile.aarch64-cross index 38229497a..252e0f875 100644 --- a/scripts/build/Dockerfile.aarch64-cross +++ b/scripts/build/Dockerfile.aarch64-cross @@ -1,7 +1,7 @@ FROM dockcross/base:latest # Add the cross compiler sources -RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ dpkg --add-architecture arm64 && \ apt-get install emdebian-archive-keyring diff --git a/scripts/build/Dockerfile.armv7-cross b/scripts/build/Dockerfile.armv7-cross index 434934aad..17a55561e 100644 --- a/scripts/build/Dockerfile.armv7-cross +++ b/scripts/build/Dockerfile.armv7-cross @@ -1,7 +1,7 @@ FROM dockcross/base:latest # Add the cross compiler sources -RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ dpkg --add-architecture armhf && \ apt-get install emdebian-archive-keyring From b9c8e957d8f198fb47ed9e73a5d5c3727ba4d4cc Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 10 Mar 2020 17:40:57 +0300 Subject: [PATCH 0183/1854] crit-recode: skip (not try to parse) nftables raw image We should ignore (not parse) images that has non-crtool format, that images has no magic number (RAW_IMAGE_MAGIC equals 0). nftables images has format compatible with `nft -f /proc/self/fd/0` input format. Reported-by: Mr Jenkins Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/crit-recode.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/crit-recode.py b/test/crit-recode.py index a7dcc7272..adaf33733 100755 --- a/test/crit-recode.py +++ b/test/crit-recode.py @@ -47,6 +47,8 @@ for imgf in find.stdout.readlines(): continue if imgf_b.startswith(b'ip6tables-'): continue + if imgf_b.startswith(b'nftables-'): + continue if imgf_b.startswith(b'route-'): continue if imgf_b.startswith(b'route6-'): From c3ad4942d43524c617bb77345ce8912461b6f9aa Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 5 Mar 2020 20:46:16 +0200 Subject: [PATCH 0184/1854] travis: add ppc64-cross test on amd64 Signed-off-by: Mike Rapoport --- .travis.yml | 4 +++ scripts/build/Dockerfile.ppc64-cross | 45 ++++++++++++++++++++++++++++ scripts/build/Makefile | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 scripts/build/Dockerfile.ppc64-cross diff --git a/.travis.yml b/.travis.yml index ffa82f15f..9928f16c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -90,6 +90,10 @@ jobs: arch: amd64 env: TR_ARCH=aarch64-cross dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=ppc64-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.ppc64-cross b/scripts/build/Dockerfile.ppc64-cross new file mode 100644 index 000000000..44061c558 --- /dev/null +++ b/scripts/build/Dockerfile.ppc64-cross @@ -0,0 +1,45 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ + dpkg --add-architecture ppc64el && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-ppc64el \ + libc6-dev-ppc64el-cross \ + libc6-ppc64el-cross \ + libbz2-dev:ppc64el \ + libexpat1-dev:ppc64el \ + ncurses-dev:ppc64el \ + libssl-dev:ppc64el \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:ppc64el \ + libprotobuf-dev:ppc64el \ + libnet-dev:ppc64el \ + libprotobuf-c-dev:ppc64el \ + libcap-dev:ppc64el \ + libaio-dev:ppc64el \ + libnl-route-3-dev:ppc64el + +ENV CROSS_TRIPLE=powerpc64le-linux-gnu +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=ppc64 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 913a86d6c..855539152 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross aarch64-cross +TARGETS += armv7-cross aarch64-cross ppc64-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From d0d6f1ad108a6bd7eb0e2019aaca7689bff45275 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 10:35:42 -0700 Subject: [PATCH 0185/1854] mailmap: update my email Signed-off-by: Andrei Vagin --- .mailmap | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index d8c3f594d..6f046b972 100644 --- a/.mailmap +++ b/.mailmap @@ -1,6 +1,8 @@ Stanislav Kinsbursky Pavel Emelyanov -Andrey Vagin -Andrey Vagin -Andrey Vagin Andrew Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin Cyrill Gorcunov From f42ae70c75802787e980715a7ca895eb2b390d06 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 20 Mar 2020 23:12:59 +0000 Subject: [PATCH 0186/1854] make: use cflags/ldflags for config.h detection mechanism The config.h detection scripts should use the provided CFLAGS/LDFLAGS as it tries to link libnl, libnet, and others. Signed-off-by: Nicolas Viennot --- scripts/nmk/scripts/utils.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/nmk/scripts/utils.mk b/scripts/nmk/scripts/utils.mk index 0cf216bc0..b9790615c 100644 --- a/scripts/nmk/scripts/utils.mk +++ b/scripts/nmk/scripts/utils.mk @@ -3,7 +3,7 @@ ifndef ____nmk_defined__utils # # Usage: option := $(call try-compile,language,source-to-build,cc-options,cc-defines) try-compile = $(shell sh -c 'echo "$(2)" | \ - $(CC) $(4) -x $(1) - $(3) -o /dev/null > /dev/null 2>&1 && \ + $(CC) $(CFLAGS) $(LDFLAGS) $(4) -x $(1) - $(3) -o /dev/null > /dev/null 2>&1 && \ echo true || echo false') # From fb65ab2b1a47558c2fe92a635630d53b971e5876 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Mar 2020 10:44:14 +0300 Subject: [PATCH 0187/1854] mem: dump shared memory file descriptors Any shared memroy mapping can be opened via /proc/self/maps_files/. Such file descriptors look like memfd file descriptors, so they can be dumped by the same way. Signed-off-by: Andrei Vagin --- criu/files.c | 2 +- criu/include/memfd.h | 2 +- criu/memfd.c | 11 +++++++---- criu/proc_parse.c | 38 ++++++++++---------------------------- 4 files changed, 19 insertions(+), 34 deletions(-) diff --git a/criu/files.c b/criu/files.c index f6ba39a30..a1fd26764 100644 --- a/criu/files.c +++ b/criu/files.c @@ -552,7 +552,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, p.link = &link; - if (is_memfd(p.stat.st_dev, &link.name[1])) + if (is_memfd(p.stat.st_dev)) ops = &memfd_dump_ops; else if (link.name[1] == '/') ops = ®file_dump_ops; diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 2d8eda545..4189766fd 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -8,7 +8,7 @@ struct fd_parms; struct file_desc; -extern int is_memfd(dev_t dev, const char *path); +extern int is_memfd(dev_t dev); extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms); extern const struct fdtype_ops memfd_dump_ops; diff --git a/criu/memfd.c b/criu/memfd.c index 983e01b38..bca6900cb 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -58,15 +58,14 @@ static LIST_HEAD(memfd_inodes); static u32 memfd_inode_ids = 1; -int is_memfd(dev_t dev, const char *path) +int is_memfd(dev_t dev) { /* * TODO When MAP_HUGETLB is used, the file device is not shmem_dev, * Note that other parts of CRIU have similar issues, see * is_anon_shmem_map(). */ - return dev == kdat.shmem_dev && - !strncmp(path, MEMFD_PREFIX, MEMFD_PREFIX_LEN); + return dev == kdat.shmem_dev; } static int dump_memfd_inode(int fd, struct memfd_inode *inode, @@ -167,7 +166,11 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) link = p->link; strip_deleted(link); - name = &link->name[1+MEMFD_PREFIX_LEN]; + /* link->name is always started with "." which has to be skipped. */ + if (strncmp(link->name + 1, MEMFD_PREFIX, MEMFD_PREFIX_LEN) == 0) + name = &link->name[1 + MEMFD_PREFIX_LEN]; + else + name = link->name + 1; inode = dump_unique_memfd_inode(lfd, name, &p->stat); if (!inode) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 468afcdf3..980342870 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -305,7 +305,7 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, vfi_dev = makedev(vfi->dev_maj, vfi->dev_min); - if (is_memfd(vfi_dev, fname)) { + if (is_memfd(vfi_dev)) { struct fd_link link; link.len = strlen(fname); strlcpy(link.name, fname, sizeof(link.name)); @@ -596,39 +596,21 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, goto err; } - /* - * /dev/zero stands for anon-shared mapping - * otherwise it's some file mapping. - * - * We treat memfd mappings as regular file mappings because - * their backing can be seen as files, which is easy to - * support. So even though memfd is an anonymous shmem, we - * treat it differently. - * Note: maybe we should revisit this as /proc/map_files/ - * may not always be accessible. - */ - - if (is_memfd(st_buf->st_dev, file_path)) { - vma_area->e->status |= VMA_AREA_MEMFD; - goto normal_file; - } - - if (is_anon_shmem_map(st_buf->st_dev)) { - if (!(vma_area->e->flags & MAP_SHARED)) - goto err_bogus_mapping; + if (is_anon_shmem_map(st_buf->st_dev) && !strncmp(file_path, "/SYSV", 5)) { vma_area->e->flags |= MAP_ANONYMOUS; vma_area->e->status |= VMA_ANON_SHARED; vma_area->e->shmid = st_buf->st_ino; - - if (!strncmp(file_path, "/SYSV", 5)) { - pr_info("path: %s\n", file_path); - vma_area->e->status |= VMA_AREA_SYSVIPC; - } else { + if (!(vma_area->e->flags & MAP_SHARED)) + goto err_bogus_mapping; + pr_info("path: %s\n", file_path); + vma_area->e->status |= VMA_AREA_SYSVIPC; + } else { + if (is_anon_shmem_map(st_buf->st_dev)) { + vma_area->e->status |= VMA_AREA_MEMFD; if (fault_injected(FI_HUGE_ANON_SHMEM_ID)) vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE; } - } else { -normal_file: + if (vma_area->e->flags & MAP_PRIVATE) vma_area->e->status |= VMA_FILE_PRIVATE; else From 10b1d46f674ec458cd1a006eb1b0546bf5a7135c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 19 Mar 2020 09:37:18 +0300 Subject: [PATCH 0188/1854] mem/vma: set VMA_FILE_{PRIVATE,SHARED} if a vma file is borrowed Here is a fast path when two consequent vma-s share the same file. But one of these vma-s can map a file with MAP_SHARED, but another one can map it with MAP_PRIVATE and we need to take this into account. --- criu/proc_parse.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 980342870..60aba8788 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -584,6 +584,14 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, vma_area->e->shmid = prev->e->shmid; vma_area->vmst = prev->vmst; vma_area->mnt_id = prev->mnt_id; + + if (!(vma_area->e->status & VMA_AREA_SYSVIPC)) { + vma_area->e->status &= ~(VMA_FILE_PRIVATE | VMA_FILE_SHARED); + if (vma_area->e->flags & MAP_PRIVATE) + vma_area->e->status |= VMA_FILE_PRIVATE; + else + vma_area->e->status |= VMA_FILE_SHARED; + } } else if (*vm_file_fd >= 0) { struct stat *st_buf = vma_area->vmst; From c40c09cbbf03afc058a761314fcdb14a3f69cb53 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Mar 2020 10:53:51 +0300 Subject: [PATCH 0189/1854] test/zdtmp: add a test to C/R shared memory file descriptors Any shared memory region can be openned via /proc/self/map_files. Signed-off-by: Andrei Vagin --- test/zdtm/static/Makefile | 2 + test/zdtm/static/shmemfd-priv.c | 84 ++++++++++++++++++++++ test/zdtm/static/shmemfd-priv.desc | 1 + test/zdtm/static/shmemfd.c | 107 +++++++++++++++++++++++++++++ test/zdtm/static/shmemfd.desc | 1 + 5 files changed, 195 insertions(+) create mode 100644 test/zdtm/static/shmemfd-priv.c create mode 100644 test/zdtm/static/shmemfd-priv.desc create mode 100644 test/zdtm/static/shmemfd.c create mode 100644 test/zdtm/static/shmemfd.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index ee69612c7..a8e4107d3 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -224,6 +224,8 @@ TST_NOFILE := \ memfd01 \ memfd02 \ memfd03 \ + shmemfd \ + shmemfd-priv \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/shmemfd-priv.c b/test/zdtm/static/shmemfd-priv.c new file mode 100644 index 000000000..bbdb46905 --- /dev/null +++ b/test/zdtm/static/shmemfd-priv.c @@ -0,0 +1,84 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Test C/R of shared memory file descriptors"; +const char *test_author = "Andrei Vagin "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +int main(int argc, char *argv[]) +{ + void *addr, *priv_addr, *addr2; + char path[4096]; + int fd; + + test_init(argc, argv); + + addr = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + *(int *) addr = 1; + *(int *) (addr + PAGE_SIZE) = 11; + *(int *) (addr + 2 * PAGE_SIZE) = 111; + + snprintf(path, sizeof(path), "/proc/self/map_files/%lx-%lx", + (long)addr, (long)addr + 5 * PAGE_SIZE); + fd = open(path, O_RDWR | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open %s", path); + + priv_addr = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, fd, PAGE_SIZE); + if (priv_addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + addr2 = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 2 * PAGE_SIZE); + if (addr2 == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + *(int *) (priv_addr + PAGE_SIZE) = 22; + + test_daemon(); + test_waitsig(); + + if (*(int *) (priv_addr + PAGE_SIZE) != 22) { + fail("the second page of the private mapping is corrupted"); + return 1; + } + if (*(int *) (priv_addr) != 11) { + fail("the first page of the private mapping is corrupted"); + return 1; + } + if (*(int *) (addr2) != 111) { + fail("the first page of the second shared mapping is corrupted"); + return 1; + } + *(int *) (addr2) = 333; + if (*(int *) (addr + 2 * PAGE_SIZE) != 333) { + fail("the first page of the second shared mapping isn't shared"); + return 1; + } + *(int *) (addr + 3 * PAGE_SIZE) = 444; + if (*(int *) (priv_addr + 2 * PAGE_SIZE) != 444) { + fail("the third page of the private mapping is corrupted"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/shmemfd-priv.desc b/test/zdtm/static/shmemfd-priv.desc new file mode 100644 index 000000000..d969725f6 --- /dev/null +++ b/test/zdtm/static/shmemfd-priv.desc @@ -0,0 +1 @@ +{'flavor': 'h ns', 'flags': 'suid'} diff --git a/test/zdtm/static/shmemfd.c b/test/zdtm/static/shmemfd.c new file mode 100644 index 000000000..b65faa2e1 --- /dev/null +++ b/test/zdtm/static/shmemfd.c @@ -0,0 +1,107 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Test C/R of shared memory file descriptors"; +const char *test_author = "Andrei Vagin "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +int main(int argc, char *argv[]) +{ + int fd, fl_flags1, fl_flags2, fd_flags1, fd_flags2; + struct statfs statfs1, statfs2; + off_t pos1, pos2; + char path[4096]; + char buf[5]; + void *addr; + + test_init(argc, argv); + + addr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + snprintf(path, sizeof(path), "/proc/self/map_files/%lx-%lx", + (long)addr, (long)addr + PAGE_SIZE); + fd = open(path, O_RDWR | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open %s", path); + ftruncate(fd, 0); + munmap(addr, PAGE_SIZE); + + if (fcntl(fd, F_SETFL, O_APPEND) < 0) + err(1, "Can't get fl flags"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fstatfs(fd, &statfs1) < 0) + err(1, "statfs issue"); + + if (write(fd, "hello", 5) != 5) + err(1, "write error"); + + pos1 = 3; + if (lseek(fd, pos1, SEEK_SET) < 0) + err(1, "seek error"); + + test_daemon(); + test_waitsig(); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) { + fail("fl flags differs %x %x", fl_flags1, fl_flags2); + return 1; + } + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) { + fail("fd flags differs"); + return 1; + } + + if (fstatfs(fd, &statfs2) < 0) + err(1, "statfs issue"); + + if (statfs1.f_type != statfs2.f_type) { + fail("statfs.f_type differs"); + return 1; + } + + pos2 = lseek(fd, 0, SEEK_CUR); + if (pos1 != pos2) { + fail("position differs"); + return 1; + } + + if (pread(fd, buf, sizeof(buf), 0) != sizeof(buf)) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "hello", sizeof(buf))) { + fail("content mismatch"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/shmemfd.desc b/test/zdtm/static/shmemfd.desc new file mode 100644 index 000000000..d969725f6 --- /dev/null +++ b/test/zdtm/static/shmemfd.desc @@ -0,0 +1 @@ +{'flavor': 'h ns', 'flags': 'suid'} From 691b4a4e7ee980778d8f13eaebddf9b04063942a Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Mon, 3 Feb 2020 15:08:26 +0300 Subject: [PATCH 0190/1854] zdtm: Implemented get_current_dir_name wrapper that checks for 'x' permissions Any filesystem syscall, that needs to navigate to inode by it's absolute path performs successive lookup operations for each part of the path. Lookup operation includes access rights check. Usually but not always zdtm tests processes fall under 'other' access category. Also, usually directories don't have 'x' bit set for other. In case when bit 'x' is not set and user-ID and group-ID of a process relate it to 'other', test's will not succeed in performing these syscalls which are most of filesystem api, that has const char *path as part of it arguments (open, openat, mkdir, bind, etc). The observable behavior of that is that zdtm tests fail at file creation ops on one system and pass on the other. The above is not immediately clear to the developer by just looking at failed test's logs. Investigation of that is also not quick for a developer due to the complex structure of zdtm runtime where nested clones with NAMESPACE flags take place alongside with bind-mounts. As an additional note: 'get_current_dir_name' is documented as returning EACCESS in case when some part of the path lacks read/list permissions. But in fact it's not always so. Practice shows, that test processes can get false success on this operation only to fail on later call to something like mkdir/mknod/bind with a given path in arguments. 'get_cwd_check_perm' is a wrapper around 'get_current_dir_name'. It also checks for permissions on the given filepath and logs the error. This directs the developer towards the right investigation path or even eliminates the need for investigation completely. Signed-off-by: Valeriy Vdovin --- test/zdtm/lib/fs.c | 24 ++++++++++++++++++++++++ test/zdtm/lib/fs.h | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c index 0decfc37b..e82011ec8 100644 --- a/test/zdtm/lib/fs.c +++ b/test/zdtm/lib/fs.c @@ -94,3 +94,27 @@ err: mnt_info_free(&m); goto out; } + +int get_cwd_check_perm(char **result) +{ + char *cwd; + *result = 0; + cwd = get_current_dir_name(); + if (!cwd) { + pr_perror("failed to get current directory"); + return -1; + } + + if (access(cwd, X_OK)) { + pr_err("access check for bit X for current dir path '%s' " + "failed for uid:%d,gid:%d, error: %d(%s). " + "Bit 'x' should be set in all path components of " + "this directory\n", + cwd, getuid(), getgid(), errno, strerror(errno) + ); + return -1; + } + + *result = cwd; + return 0; +} diff --git a/test/zdtm/lib/fs.h b/test/zdtm/lib/fs.h index 972b15aba..af7a665fb 100644 --- a/test/zdtm/lib/fs.h +++ b/test/zdtm/lib/fs.h @@ -50,4 +50,28 @@ extern mnt_info_t *mnt_info_alloc(void); extern void mnt_info_free(mnt_info_t **m); extern mnt_info_t *get_cwd_mnt_info(void); +/* + * get_cwd_check_perm is called to check that cwd is actually usable for a calling + * process. + * + * Example output of a stat command on a '/root' path shows file access bits: + * > stat /root + * File: ‘/root’ + * ... + * Access: (0550/dr-xr-x---) Uid: ( 0/root) Gid: ( 0/root) + * ^- no 'x' bit for other + * + * Here we can see that '/root' dir (that often can be part of cwd path) does not + * allow non-root user and non-root group to list contents of this directory. + * Calling process matching 'other' access category may succeed getting cwd path, but will + * fail performing further filesystem operations based on this path with confusing errors. + * + * This function calls get_current_dir_name and explicitly checks that bit 'x' is enabled for + * a calling process and logs the error. + * + * If check passes, stores get_current_dir's result in *result and returns 0 + * If check fails, stores 0 in *result and returns -1 + */ +extern int get_cwd_check_perm(char **result); + #endif /* ZDTM_FS_H_ */ From fa705e418b4e7c2bce0925ad9f8689cd40b0c00d Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Mon, 3 Feb 2020 15:27:40 +0300 Subject: [PATCH 0191/1854] zdtm: Use safe helper function to initialize unix socket sockaddr structure The helper function removes code duplication from tests that want to initialize unix socket address to an absolute file path, derived from current working directory of the test + relative filename of a resulting socket. Because the former code used cwd = get_current_dir_name() as part of absolute filename generation, the resulting filepath could later cause failure of bind systcall due to unchecked permissions and introduce confusing permission errors. Signed-off-by: Valeriy Vdovin --- test/zdtm/lib/Makefile | 2 +- test/zdtm/lib/unix.c | 19 ++++++++++++++++ test/zdtm/lib/zdtmtst.h | 3 +++ test/zdtm/static/del_standalone_un.c | 17 +------------- test/zdtm/static/deleted_unix_sock.c | 19 ++-------------- test/zdtm/static/sk-unix01.c | 33 ++++++---------------------- 6 files changed, 33 insertions(+), 60 deletions(-) create mode 100644 test/zdtm/lib/unix.c diff --git a/test/zdtm/lib/Makefile b/test/zdtm/lib/Makefile index b87f36e8f..89ca90933 100644 --- a/test/zdtm/lib/Makefile +++ b/test/zdtm/lib/Makefile @@ -4,7 +4,7 @@ CFLAGS += $(USERCFLAGS) LIB := libzdtmtst.a -LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c sysctl.c +LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c unix.c fs.c sysctl.c LIBOBJ := $(LIBSRC:%.c=%.o) BIN := groups diff --git a/test/zdtm/lib/unix.c b/test/zdtm/lib/unix.c new file mode 100644 index 000000000..c36846cad --- /dev/null +++ b/test/zdtm/lib/unix.c @@ -0,0 +1,19 @@ +#include +#include +#include "zdtmtst.h" +#include "fs.h" + +int unix_fill_sock_name(struct sockaddr_un *name, char *relFilename) +{ + char *cwd; + + if (get_cwd_check_perm(&cwd)) { + pr_err("failed to get current working directory with valid permissions.\n"); + return -1; + } + + name->sun_family = AF_LOCAL; + ssprintf(name->sun_path, "%s/%s", cwd, relFilename); + return 0; +} + diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h index 2cd4bdd1d..6eec26647 100644 --- a/test/zdtm/lib/zdtmtst.h +++ b/test/zdtm/lib/zdtmtst.h @@ -149,6 +149,9 @@ extern int tcp_init_server(int family, int *port); extern int tcp_accept_server(int sock); extern int tcp_init_client(int family, char *servIP, unsigned short servPort); +struct sockaddr_un; +extern int unix_fill_sock_name(struct sockaddr_un *name, char *relFilename); + struct zdtm_tcp_opts { bool reuseaddr; bool reuseport; diff --git a/test/zdtm/static/del_standalone_un.c b/test/zdtm/static/del_standalone_un.c index d8200068b..5426fc786 100644 --- a/test/zdtm/static/del_standalone_un.c +++ b/test/zdtm/static/del_standalone_un.c @@ -16,19 +16,6 @@ const char *test_author = "Tycho Andersen "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) - return -1; - - name->sun_family = AF_LOCAL; - ssprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int bind_and_listen(struct sockaddr_un *addr) { int sk; @@ -71,10 +58,8 @@ int main(int argc, char **argv) goto out; } - if (fill_sock_name(&addr, filename) < 0) { - pr_err("filename \"%s\" is too long\n", filename); + if (unix_fill_sock_name(&addr, filename)) goto out; - } sk1 = bind_and_listen(&addr); if (sk1 < 0) diff --git a/test/zdtm/static/deleted_unix_sock.c b/test/zdtm/static/deleted_unix_sock.c index bcc33f3de..4d328e996 100644 --- a/test/zdtm/static/deleted_unix_sock.c +++ b/test/zdtm/static/deleted_unix_sock.c @@ -17,28 +17,13 @@ const char *test_author = "Roman Kagan "; char *filename; TEST_OPTION(filename, string, "file name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) - return -1; - - name->sun_family = AF_LOCAL; - sprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int setup_srv_sock(void) { struct sockaddr_un name; int sock; - if (fill_sock_name(&name, filename) < 0) { - pr_perror("filename \"%s\" is too long", filename); + if (unix_fill_sock_name(&name, filename)) return -1; - } sock = socket(PF_LOCAL, SOCK_STREAM, 0); if (sock < 0) { @@ -67,7 +52,7 @@ static int setup_clnt_sock(void) struct sockaddr_un name; int sock; - if (fill_sock_name(&name, filename) < 0) + if (unix_fill_sock_name(&name, filename)) return -1; sock = socket(PF_LOCAL, SOCK_STREAM, 0); diff --git a/test/zdtm/static/sk-unix01.c b/test/zdtm/static/sk-unix01.c index 2bceef79a..0e9006a15 100644 --- a/test/zdtm/static/sk-unix01.c +++ b/test/zdtm/static/sk-unix01.c @@ -24,22 +24,6 @@ const char *test_author = "Cyrill Gorcunov "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) { - pr_err("Name %s/%s is too long for socket\n", - cwd, filename); - return -1; - } - - name->sun_family = AF_LOCAL; - ssprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int sk_alloc_bind(int type, struct sockaddr_un *addr) { int sk; @@ -155,10 +139,9 @@ int main(int argc, char **argv) */ ssprintf(filename, "%s/%s", subdir_dg, "sk-dt"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_dgram[0] = sk_alloc_bind(SOCK_DGRAM, &addr); @@ -184,10 +167,9 @@ int main(int argc, char **argv) test_msg("sk-dt: alloc/connect/unlink %d %s\n", sk_dgram[3], addr.sun_path); ssprintf(filename, "%s/%s", dirname, "sole"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_dgram[4] = sk_alloc_bind(SOCK_DGRAM, &addr); @@ -237,7 +219,7 @@ int main(int argc, char **argv) sk_dgram_pair[0], sk_dgram_pair[1]); ssprintf(filename, "%s/%s", subdir_dg, "sk-dtp"); - if (fill_sock_name(&addr, filename) < 0) { + if (unix_fill_sock_name(&addr, filename)) { pr_err("%s is too long for socket\n", filename); return 1; } @@ -270,10 +252,9 @@ int main(int argc, char **argv) * - delete socket on fs */ ssprintf(filename, "%s/%s", subdir_st, "sk-st"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_st[0] = sk_alloc_bind(SOCK_STREAM, &addr); From 2b376168efd751856ebef192e764ebd0037e7174 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 21:58:55 +0300 Subject: [PATCH 0192/1854] pipe: restore pipe size even if a pipe is empty Without this patch, pipe size is restored only if a pipe has queued data. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/pipes.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index cb5da71de..d74329161 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -160,24 +160,24 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash return 0; } - if (!pd->pde->bytes) - goto out; - - if (!pd->data) { - pr_err("Double data restore occurred on %#x\n", id); - return -1; - } - if (pd->pde->has_size) { pr_info("Restoring size %#x for %#x\n", pd->pde->size, pd->pde->pipe_id); ret = fcntl(pfd, F_SETPIPE_SZ, pd->pde->size); if (ret < 0) { pr_perror("Can't restore pipe size"); - goto err; + return -1; } } + if (!pd->pde->bytes) + return 0; + + if (!pd->data) { + pr_err("Double data restore occurred on %#x\n", id); + return -1; + } + iov.iov_base = pd->data; iov.iov_len = pd->pde->bytes; @@ -185,14 +185,13 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash ret = vmsplice(pfd, &iov, 1, SPLICE_F_GIFT | SPLICE_F_NONBLOCK); if (ret < 0) { pr_perror("%#x: Error splicing data", id); - goto err; + return -1; } if (ret == 0 || ret > iov.iov_len /* sanity */) { pr_err("%#x: Wanted to restore %zu bytes, but got %d\n", id, iov.iov_len, ret); - ret = -1; - goto err; + return -1; } iov.iov_base += ret; @@ -211,10 +210,7 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash munmap(pd->data, pd->pde->bytes); pd->data = NULL; -out: - ret = 0; -err: - return ret; + return 0; } static int userns_reopen(void *_arg, int fd, pid_t pid) From 1ad209b9c2b780fe2d5b043c3ffe29634629252c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 22:08:38 +0300 Subject: [PATCH 0193/1854] test/pipe03: check that pipe size is restored Create two pipes with and without queued data. Signed-off-by: Andrei Vagin --- test/zdtm/static/pipe03.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/test/zdtm/static/pipe03.c b/test/zdtm/static/pipe03.c index a8721e934..d649007b7 100644 --- a/test/zdtm/static/pipe03.c +++ b/test/zdtm/static/pipe03.c @@ -13,27 +13,28 @@ const char *test_author = "Andrei Vagin "; int main(int argc, char **argv) { - int p[2], i; + int p[2][2], i; uint8_t buf[BUF_SIZE]; uint32_t crc; test_init(argc, argv); - if (pipe2(p, O_NONBLOCK)) { - pr_perror("pipe"); - return 1; - } - - if (fcntl(p[1], F_SETPIPE_SZ, DATA_SIZE) == -1) { - pr_perror("Unable to change a pipe size"); - return 1; + for (i = 0; i < 2; i++) { + if (pipe2(p[i], O_NONBLOCK)) { + pr_perror("pipe"); + return 1; + } + if (fcntl(p[i][1], F_SETPIPE_SZ, DATA_SIZE) == -1) { + pr_perror("Unable to change a pipe size"); + return 1; + } } crc = ~0; datagen(buf, BUF_SIZE, &crc); for (i = 0; i < DATA_SIZE / BUF_SIZE; i++) { - if (write(p[1], buf, BUF_SIZE) != BUF_SIZE) { + if (write(p[0][1], buf, BUF_SIZE) != BUF_SIZE) { pr_perror("write"); return 1; } @@ -43,12 +44,26 @@ int main(int argc, char **argv) test_waitsig(); for (i = 0; i < DATA_SIZE / BUF_SIZE; i++) { - if (read(p[0], buf, BUF_SIZE) != BUF_SIZE) { + if (read(p[0][0], buf, BUF_SIZE) != BUF_SIZE) { pr_perror("read"); return 1; } } + for (i = 0; i < 2; i++) { + int size; + + size = fcntl(p[i][1], F_GETPIPE_SZ); + if (size < 0) { + pr_perror("Unable to get a pipe size"); + return 1; + } + if (size != DATA_SIZE) { + fail("%d: size %d expected %d", i, size, DATA_SIZE); + return 1; + } + } + pass(); return 0; } From 5f28b692a0c972ddefb1ca4d1d0ef003dec4f617 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 22:11:28 +0300 Subject: [PATCH 0194/1854] test/fifo_loop: change sizes of all fifo-s to fit a test buffer This test doesn't expect that the write operation will block. Signed-off-by: Andrei Vagin --- test/zdtm/transition/fifo_loop.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/zdtm/transition/fifo_loop.c b/test/zdtm/transition/fifo_loop.c index b028c2fd5..b06592586 100644 --- a/test/zdtm/transition/fifo_loop.c +++ b/test/zdtm/transition/fifo_loop.c @@ -84,6 +84,14 @@ int main(int argc, char **argv) ret = errno; return ret; } + + pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); + if (pipe_size != sizeof(buf)) { + pr_perror("fcntl(writefd, F_SETPIPE_SZ) -> %d", pipe_size); + kill(0, SIGKILL); + exit(1); + } + signal(SIGPIPE, SIG_IGN); if (pipe_in2out(readfd, writefd, buf, sizeof(buf)) < 0) /* pass errno as exit code to the parent */ @@ -107,7 +115,7 @@ int main(int argc, char **argv) pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); if (pipe_size != sizeof(buf)) { - pr_perror("fcntl(writefd, F_GETPIPE_SZ) -> %d", pipe_size); + pr_perror("fcntl(writefd, F_SETPIPE_SZ) -> %d", pipe_size); kill(0, SIGKILL); exit(1); } From 1ad8657ddb3d383874cc07cd3cf456cac7977db6 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Mar 2020 19:31:42 +0300 Subject: [PATCH 0195/1854] config/nftables: include string.h for strlen Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism") Signed-off-by: Andrei Vagin --- scripts/feature-tests.mak | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 21b390092..8df20afb7 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -152,6 +152,8 @@ endef define FEATURE_TEST_NFTABLES_LIB_API_0 +#include + #include int main(int argc, char **argv) From cc362b432e2d2e3ec68628fb33b117fe3e89f9c2 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 20 Mar 2020 17:34:57 +0300 Subject: [PATCH 0196/1854] namespaces: fix error handling in dump_user_ns Fix n_xid_map leaks on error path and remove useless exit_code. Fixes: 6e1726f8 ("userns: set uid and gid before entering into userns") Signed-off-by: Pavel Tikhomirov --- criu/namespaces.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/namespaces.c b/criu/namespaces.c index 21266df7c..2db805b2f 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -938,9 +938,9 @@ static int check_user_ns(int pid) int dump_user_ns(pid_t pid, int ns_id) { - int ret, exit_code = -1; UsernsEntry *e = &userns_entry; struct cr_img *img; + int ret; ret = parse_id_map(pid, "uid_map", &e->uid_map); if (ret < 0) @@ -953,7 +953,7 @@ int dump_user_ns(pid_t pid, int ns_id) e->n_gid_map = ret; if (check_user_ns(pid)) - return -1; + goto err; img = open_image(CR_FD_USERNS, O_DUMP, ns_id); if (!img) @@ -973,7 +973,7 @@ err: xfree(e->gid_map[0]); xfree(e->gid_map); } - return exit_code; + return -1; } void free_userns_maps(void) From 967797a8676c8b3b7cd8954892b113c6765af25a Mon Sep 17 00:00:00 2001 From: Byeonggon Lee Date: Sun, 15 Mar 2020 16:32:15 +0900 Subject: [PATCH 0197/1854] Add build directory to gitignore After running make install, build directory is generated but not ignored in gitignore. So this commit add build directory to gitignore. Signed-off-by: Byeonggon Lee --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c231104af..23cd703be 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,4 @@ lib/.crit-setup.files compel/include/asm include/common/asm include/common/config.h +build/ From e3a5d0975240f9e9b6b6d7a096af6b4bbad36737 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 23 Mar 2020 07:37:00 +0300 Subject: [PATCH 0198/1854] memfd: save all memfd inodes in one image Per-object image is acceptable if we expect to have 1-3 objects per-container. If we expect to have more objects, it is better to save them all into one image. There are a number of reasons for this: * We need fewer system calls to read all objects from one image. * It is faster to save or move one image. Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 3 ++ criu/image-desc.c | 2 +- criu/include/image-desc.h | 2 +- criu/include/memfd.h | 2 + criu/memfd.c | 90 +++++++++++++++++---------------------- images/memfd.proto | 1 + 6 files changed, 46 insertions(+), 54 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 41146d4ad..1d3092f2f 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -233,6 +233,9 @@ static int restore_finish_ns_stage(int from, int to) static int crtools_prepare_shared(void) { + if (prepare_memfd_inodes()) + return -1; + if (prepare_files()) return -1; diff --git a/criu/image-desc.c b/criu/image-desc.c index b538a76ea..ac627a829 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -66,7 +66,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(FS, "fs-%u"), FD_ENTRY(REMAP_FPATH, "remap-fpath"), FD_ENTRY_F(GHOST_FILE, "ghost-file-%x", O_NOBUF), - FD_ENTRY_F(MEMFD_INODE, "memfd-%u", O_NOBUF), + FD_ENTRY_F(MEMFD_INODE, "memfd", O_NOBUF), FD_ENTRY(TCP_STREAM, "tcp-stream-%x"), FD_ENTRY(MNTS, "mountpoints-%u"), FD_ENTRY(NETDEV, "netdev-%u"), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 9ca9643a1..ce6ef1529 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -65,6 +65,7 @@ enum { CR_FD_CGROUP, CR_FD_FILE_LOCKS, CR_FD_SECCOMP, + CR_FD_MEMFD_INODE, _CR_FD_GLOB_TO, CR_FD_TMPFS_IMG, @@ -107,7 +108,6 @@ enum { CR_FD_PIPES, CR_FD_TTY_FILES, CR_FD_MEMFD_FILE, - CR_FD_MEMFD_INODE, CR_FD_AUTOFS, diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 4189766fd..3074a5c0f 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -17,6 +17,8 @@ extern struct collect_image_info memfd_cinfo; extern struct file_desc *collect_memfd(u32 id); extern int apply_memfd_seals(void); +extern int prepare_memfd_inodes(void); + #ifdef CONFIG_HAS_MEMFD_CREATE # include #else diff --git a/criu/memfd.c b/criu/memfd.c index bca6900cb..2158e925b 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -43,9 +43,10 @@ struct memfd_inode { }; /* Only for restore */ struct { - mutex_t lock; - int fdstore_id; - unsigned int pending_seals; + mutex_t lock; + int fdstore_id; + unsigned int pending_seals; + MemfdInodeEntry *mie; }; }; }; @@ -71,9 +72,8 @@ int is_memfd(dev_t dev) static int dump_memfd_inode(int fd, struct memfd_inode *inode, const char *name, const struct stat *st) { - int ret = -1; - struct cr_img *img = NULL; MemfdInodeEntry mie = MEMFD_INODE_ENTRY__INIT; + int ret = -1; u32 shmid; /* @@ -90,10 +90,7 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, if (dump_one_memfd_shmem(fd, shmid, st->st_size) < 0) goto out; - img = open_image(CR_FD_MEMFD_INODE, O_DUMP, inode->id); - if (!img) - goto out; - + mie.inode_id = inode->id; mie.uid = userns_uid(st->st_uid); mie.gid = userns_gid(st->st_gid); mie.name = (char *)name; @@ -104,14 +101,12 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, if (mie.seals == -1) goto out; - if (pb_write_one(img, &mie, PB_MEMFD_INODE)) + if (pb_write_one(img_from_set(glob_imgset, CR_FD_MEMFD_INODE), &mie, PB_MEMFD_INODE)) goto out; ret = 0; out: - if (img) - close_image(img); return ret; } @@ -212,8 +207,6 @@ struct memfd_info { struct memfd_inode *inode; }; -static int memfd_open_inode(struct memfd_inode *inode); - static struct memfd_inode *memfd_alloc_inode(int id) { struct memfd_inode *inode; @@ -222,35 +215,47 @@ static struct memfd_inode *memfd_alloc_inode(int id) if (inode->id == id) return inode; - inode = shmalloc(sizeof(*inode)); - if (!inode) - return NULL; + pr_err("Unable to find the %d memfd inode\n", id); + return NULL; +} - inode->id = id; +static int collect_one_memfd_inode(void *o, ProtobufCMessage *base, struct cr_img *i) +{ + MemfdInodeEntry *mie = pb_msg(base, MemfdInodeEntry); + struct memfd_inode *inode = o; + + inode->mie = mie; + inode->id = mie->inode_id; mutex_init(&inode->lock); inode->fdstore_id = -1; inode->pending_seals = 0; list_add_tail(&inode->list, &memfd_inodes); - return inode; + + return 0; +} + +static struct collect_image_info memfd_inode_cinfo = { + .fd_type = CR_FD_MEMFD_INODE, + .pb_type = PB_MEMFD_INODE, + .priv_size = sizeof(struct memfd_inode), + .collect = collect_one_memfd_inode, + .flags = COLLECT_SHARED | COLLECT_NOFREE, +}; + +int prepare_memfd_inodes(void) +{ + return collect_image(&memfd_inode_cinfo); } -extern int restore_memfd_shm(int fd, u64 id, u64 size); static int memfd_open_inode_nocache(struct memfd_inode *inode) { MemfdInodeEntry *mie = NULL; - struct cr_img *img = NULL; int fd = -1; int ret = -1; int flags; - img = open_image(CR_FD_MEMFD_INODE, O_RSTR, inode->id); - if (!img) - goto out; - - if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) - goto out; - + mie = inode->mie; if (mie->seals == F_SEAL_SEAL) { inode->pending_seals = 0; flags = 0; @@ -285,10 +290,6 @@ static int memfd_open_inode_nocache(struct memfd_inode *inode) out: if (fd != -1) close(fd); - if (img) - close_image(img); - if (mie) - memfd_inode_entry__free_unpacked(mie, NULL); return ret; } @@ -373,33 +374,17 @@ static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) static char *memfd_d_name(struct file_desc *d, char *buf, size_t s) { MemfdInodeEntry *mie = NULL; - struct cr_img *img = NULL; struct memfd_info *mfi; - char *ret = NULL; mfi = container_of(d, struct memfd_info, d); - img = open_image(CR_FD_MEMFD_INODE, O_RSTR, mfi->inode->id); - if (!img) - goto out; - - if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) - goto out; - + mie = mfi->inode->mie; if (snprintf(buf, s, "%s%s", MEMFD_PREFIX, mie->name) >= s) { pr_err("Buffer too small for memfd name %s\n", mie->name); - goto out; + return NULL; } - ret = buf; - -out: - if (img) - close_image(img); - if (mie) - memfd_inode_entry__free_unpacked(mie, NULL); - - return ret; + return buf; } static struct file_desc_ops memfd_desc_ops = { @@ -427,7 +412,8 @@ struct collect_image_info memfd_cinfo = { .collect = collect_one_memfd, }; -struct file_desc *collect_memfd(u32 id) { +struct file_desc *collect_memfd(u32 id) +{ struct file_desc *fdesc; fdesc = find_file_desc_raw(FD_TYPES__MEMFD, id); diff --git a/images/memfd.proto b/images/memfd.proto index 546ffc2ab..ad5373d10 100644 --- a/images/memfd.proto +++ b/images/memfd.proto @@ -18,4 +18,5 @@ message memfd_inode_entry { required uint64 size = 4; required uint32 shmid = 5; required uint32 seals = 6 [(criu).flags = "seals.flags"]; + required uint64 inode_id = 7; }; From 8c36865c84666c73424b7a0fdb9f460557465ff2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 25 Mar 2020 20:14:24 +0300 Subject: [PATCH 0199/1854] memfd: split the struct memfd_inode The struct memfd_inode has a union for dump and restore parts. The only common parts are the list_head node, and the inode id. Suggested-by: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/memfd.c | 58 ++++++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/criu/memfd.c b/criu/memfd.c index 2158e925b..4419b4bf5 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -32,23 +32,19 @@ /* Linux 5.1+ */ #define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ -struct memfd_inode { +struct memfd_dump_inode { struct list_head list; u32 id; - union { - /* Only for dump */ - struct { - u32 dev; - u32 ino; - }; - /* Only for restore */ - struct { - mutex_t lock; - int fdstore_id; - unsigned int pending_seals; - MemfdInodeEntry *mie; - }; - }; + u32 dev; + u32 ino; +}; + +struct memfd_restore_inode { + struct list_head list; + mutex_t lock; + int fdstore_id; + unsigned int pending_seals; + MemfdInodeEntry *mie; }; static LIST_HEAD(memfd_inodes); @@ -69,7 +65,7 @@ int is_memfd(dev_t dev) return dev == kdat.shmem_dev; } -static int dump_memfd_inode(int fd, struct memfd_inode *inode, +static int dump_memfd_inode(int fd, struct memfd_dump_inode *inode, const char *name, const struct stat *st) { MemfdInodeEntry mie = MEMFD_INODE_ENTRY__INIT; @@ -110,9 +106,10 @@ out: return ret; } -static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) +static struct memfd_dump_inode * +dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) { - struct memfd_inode *inode; + struct memfd_dump_inode *inode; int fd; list_for_each_entry(inode, &memfd_inodes, list) @@ -149,7 +146,7 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) { MemfdFileEntry mfe = MEMFD_FILE_ENTRY__INIT; FileEntry fe = FILE_ENTRY__INIT; - struct memfd_inode *inode; + struct memfd_dump_inode *inode; struct fd_link _link, *link; const char *name; @@ -202,17 +199,17 @@ const struct fdtype_ops memfd_dump_ops = { */ struct memfd_info { - MemfdFileEntry *mfe; - struct file_desc d; - struct memfd_inode *inode; + MemfdFileEntry *mfe; + struct file_desc d; + struct memfd_restore_inode *inode; }; -static struct memfd_inode *memfd_alloc_inode(int id) +static struct memfd_restore_inode *memfd_alloc_inode(int id) { - struct memfd_inode *inode; + struct memfd_restore_inode *inode; list_for_each_entry(inode, &memfd_inodes, list) - if (inode->id == id) + if (inode->mie->inode_id == id) return inode; pr_err("Unable to find the %d memfd inode\n", id); @@ -222,10 +219,9 @@ static struct memfd_inode *memfd_alloc_inode(int id) static int collect_one_memfd_inode(void *o, ProtobufCMessage *base, struct cr_img *i) { MemfdInodeEntry *mie = pb_msg(base, MemfdInodeEntry); - struct memfd_inode *inode = o; + struct memfd_restore_inode *inode = o; inode->mie = mie; - inode->id = mie->inode_id; mutex_init(&inode->lock); inode->fdstore_id = -1; inode->pending_seals = 0; @@ -238,7 +234,7 @@ static int collect_one_memfd_inode(void *o, ProtobufCMessage *base, struct cr_im static struct collect_image_info memfd_inode_cinfo = { .fd_type = CR_FD_MEMFD_INODE, .pb_type = PB_MEMFD_INODE, - .priv_size = sizeof(struct memfd_inode), + .priv_size = sizeof(struct memfd_restore_inode), .collect = collect_one_memfd_inode, .flags = COLLECT_SHARED | COLLECT_NOFREE, }; @@ -248,7 +244,7 @@ int prepare_memfd_inodes(void) return collect_image(&memfd_inode_cinfo); } -static int memfd_open_inode_nocache(struct memfd_inode *inode) +static int memfd_open_inode_nocache(struct memfd_restore_inode *inode) { MemfdInodeEntry *mie = NULL; int fd = -1; @@ -293,7 +289,7 @@ out: return ret; } -static int memfd_open_inode(struct memfd_inode *inode) +static int memfd_open_inode(struct memfd_restore_inode *inode) { int fd; @@ -433,7 +429,7 @@ int apply_memfd_seals(void) */ int ret, fd; - struct memfd_inode *inode; + struct memfd_restore_inode *inode; list_for_each_entry(inode, &memfd_inodes, list) { if (!inode->pending_seals) From e3fb52e375d2fdd7160395220cf52eb25dfc8c09 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 30 Mar 2020 14:16:30 +0300 Subject: [PATCH 0200/1854] remove header include statements duplicates Revert "util: introduce the mount_detached_fs helper" This reverts commit 5dbc24b206cd365db7498dddcd03798c5d8ed4e4. Revert "criu: Make use strlcpy() to copy into allocated strings" This reverts commit bc49927bbc28b41e4b2759d42dc24f1d66e22df3. Fixes for https://github.com/checkpoint-restore/criu/pull/1003 Signed-off-by: Pavel Tikhomirov --- criu/cr-restore.c | 1 - criu/util.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 1d3092f2f..74be1a5ca 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,7 +76,6 @@ #include "fdstore.h" #include "string.h" #include "memfd.h" -#include "string.h" #include "parasite-syscall.h" #include "files-reg.h" diff --git a/criu/util.c b/criu/util.c index a0a49c5a3..1646ce1c4 100644 --- a/criu/util.c +++ b/criu/util.c @@ -30,8 +30,6 @@ #include "linux/mount.h" -#include "linux/mount.h" - #include "kerndat.h" #include "page.h" #include "util.h" From 0e9b42acf96d2c5fc3a6174ae6e4f2ad8a64c272 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 25 Feb 2020 11:31:07 +0300 Subject: [PATCH 0201/1854] MAINTAINERS: Add Pavel (myself) to maintainers Hope I have enough experience in the project to be nominated. I want to help with review and will try to do my best in it. Signed-off-by: Pavel Tikhomirov --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5c28463a7..bb153f1ab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3,3 +3,4 @@ Andrey Vagin Mike Rapoport Dmitry Safonov <0x7f454c46@gmail.com> Adrian Reber +Pavel Tikhomirov From 4127ef4ab769dc4417c22d0ce0a4ddaaca4193b4 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 14 Aug 2019 07:40:40 +0300 Subject: [PATCH 0202/1854] criu: Add support for time namespaces The time namespace allows for per-namespace offsets to the system monotonic and boot-time clocks. C/R of time namespaces are very straightforward. On dump, criu enters a target time namespace and dumps currents clocks values, then on restore, criu creates a new namespace and restores clocks values. Signed-off-by: Andrei Vagin --- criu/Makefile.crtools | 1 + criu/cr-check.c | 12 ++++ criu/cr-restore.c | 10 ++- criu/image-desc.c | 1 + criu/include/image-desc.h | 1 + criu/include/kerndat.h | 1 + criu/include/magic.h | 1 + criu/include/namespaces.h | 9 ++- criu/include/proc_parse.h | 2 + criu/include/protobuf-desc.h | 1 + criu/include/timens.h | 9 +++ criu/kerndat.c | 17 +++++ criu/namespaces.c | 24 +++++++ criu/proc_parse.c | 38 ++++++++++ criu/protobuf-desc.c | 1 + criu/pstree.c | 2 + criu/timens.c | 130 +++++++++++++++++++++++++++++++++++ criu/util.c | 2 + images/Makefile | 1 + images/core.proto | 1 + images/timens.proto | 10 +++ lib/py/images/images.py | 1 + 22 files changed, 272 insertions(+), 3 deletions(-) create mode 100644 criu/include/timens.h create mode 100644 criu/timens.c create mode 100644 images/timens.proto diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 1a6e0b5b5..5c25b8928 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -87,6 +87,7 @@ obj-y += config.o obj-y += servicefd.o obj-y += pie-util-vdso.o obj-y += vdso.o +obj-y += timens.o obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 obj-$(CONFIG_COMPAT) += vdso-compat.o diff --git a/criu/cr-check.c b/criu/cr-check.c index 80df3f7cd..b790c2ffb 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1266,6 +1266,16 @@ static int check_kcmp_epoll(void) return 0; } +static int check_time_namespace(void) +{ + if (!kdat.has_timens) { + pr_err("Time namespaces are not supported\n"); + return -1; + } + + return 0; +} + static int check_net_diag_raw(void) { check_sock_diag(); @@ -1384,6 +1394,7 @@ int cr_check(void) ret |= check_kcmp_epoll(); ret |= check_net_diag_raw(); ret |= check_clone3_set_tid(); + ret |= check_time_namespace(); } /* @@ -1486,6 +1497,7 @@ static struct feature_list feature_list[] = { { "nsid", check_nsid }, { "link_nsid", check_link_nsid}, { "kcmp_epoll", check_kcmp_epoll}, + { "timens", check_time_namespace}, { "external_net_ns", check_external_net_ns}, { "clone3_set_tid", check_clone3_set_tid}, { NULL, NULL }, diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 74be1a5ca..ce6e667d7 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,6 +76,7 @@ #include "fdstore.h" #include "string.h" #include "memfd.h" +#include "timens.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -1406,7 +1407,7 @@ static inline int fork_with_pid(struct pstree_item *item) if (kdat.has_clone3_set_tid) { ret = clone3_with_pid_noasan(restore_task_with_children, &ca, (ca.clone_flags & - ~(CLONE_NEWNET | CLONE_NEWCGROUP)), + ~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)), SIGCHLD, pid); } else { /* @@ -1424,7 +1425,7 @@ static inline int fork_with_pid(struct pstree_item *item) close_pid_proc(); ret = clone_noasan(restore_task_with_children, (ca.clone_flags & - ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, + ~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)) | SIGCHLD, &ca); } @@ -1745,6 +1746,11 @@ static int restore_task_with_children(void *_arg) } } + if (root_ns_mask & CLONE_NEWTIME) { + if (prepare_timens(current->ids->time_ns_id)) + goto err; + } + /* Wait prepare_userns */ if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0) goto err; diff --git a/criu/image-desc.c b/criu/image-desc.c index ac627a829..617b95355 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -102,6 +102,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(NETNF_CT, "netns-ct-%u"), FD_ENTRY(NETNF_EXP, "netns-exp-%u"), FD_ENTRY(FILES, "files"), + FD_ENTRY(TIMENS, "timens-%u"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index ce6ef1529..6283a576d 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -26,6 +26,7 @@ enum { CR_FD_UTSNS, CR_FD_MNTS, CR_FD_USERNS, + CR_FD_TIMENS, _CR_FD_IPCNS_FROM, CR_FD_IPC_VAR, diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 27c870bb8..ad5f7d324 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -67,6 +67,7 @@ struct kerndat_s { bool has_kcmp_epoll_tfd; bool has_fsopen; bool has_clone3_set_tid; + bool has_timens; }; extern struct kerndat_s kdat; diff --git a/criu/include/magic.h b/criu/include/magic.h index bdaca968d..d078ec422 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -95,6 +95,7 @@ #define AUTOFS_MAGIC 0x49353943 /* Sochi */ #define FILES_MAGIC 0x56303138 /* Toropets */ #define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ +#define TIMENS_MAGIC 0x43114433 /* Beslan */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index a9a970a9b..e570aa0ab 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -34,7 +34,13 @@ #define CLONE_NEWCGROUP 0x02000000 #endif -#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP) +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + +#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | \ + CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | \ + CLONE_NEWCGROUP | CLONE_NEWTIME) /* Nested namespaces are supported only for these types */ #define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET) @@ -146,6 +152,7 @@ extern bool check_ns_proc(struct fd_link *link); extern struct ns_desc pid_ns_desc; extern struct ns_desc user_ns_desc; +extern struct ns_desc time_ns_desc; extern unsigned long root_ns_mask; extern const struct fdtype_ops nsfile_dump_ops; diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h index fd50ff47e..25a57df6c 100644 --- a/criu/include/proc_parse.h +++ b/criu/include/proc_parse.h @@ -102,4 +102,6 @@ extern bool is_vma_range_fmt(char *line); extern void parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf); extern int parse_uptime(uint64_t *upt); +extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff); + #endif /* __CR_PROC_PARSE_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 7e0385ef4..ee4135d65 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -63,6 +63,7 @@ enum { PB_FILE, PB_MEMFD_FILE, PB_MEMFD_INODE, /* 60 */ + PB_TIMENS, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/timens.h b/criu/include/timens.h new file mode 100644 index 000000000..22a4a5220 --- /dev/null +++ b/criu/include/timens.h @@ -0,0 +1,9 @@ +#ifndef __CR_TIME_NS_H__ +#define __CR_TIME_NS_H__ + +extern int dump_time_ns(int ns_id); +extern int prepare_timens(int pid); + +extern struct ns_desc time_ns_desc; + +#endif /* __CR_TIME_NS_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index 2ad72c350..0c6910da9 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -783,6 +783,21 @@ out: return ret; } +static int has_time_namespace(void) +{ + if (access("/proc/self/timens_offsets", F_OK) < 0) { + if (errno == ENOENT) { + pr_debug("Time namespaces are not supported.\n"); + kdat.has_timens = false; + return 0; + } + pr_perror("Unable to access /proc/self/timens_offsets"); + return -1; + } + kdat.has_timens = true; + return 0; +} + int __attribute__((weak)) kdat_x86_has_ptrace_fpu_xsave_bug(void) { return 0; @@ -1091,6 +1106,8 @@ int kerndat_init(void) ret = kerndat_has_fsopen(); if (!ret) ret = kerndat_has_clone3_set_tid(); + if (!ret) + ret = has_time_namespace(); kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/criu/namespaces.c b/criu/namespaces.c index 2db805b2f..e376feaca 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -20,6 +20,7 @@ #include "imgset.h" #include "uts_ns.h" #include "ipc_ns.h" +#include "timens.h" #include "mount.h" #include "pstree.h" #include "namespaces.h" @@ -39,6 +40,7 @@ static struct ns_desc *ns_desc_array[] = { &pid_ns_desc, &user_ns_desc, &mnt_ns_desc, + &time_ns_desc, &cgroup_ns_desc, }; @@ -157,6 +159,9 @@ int join_ns_add(const char *type, char *ns_file, char *extra_opts) } else if (!strncmp(type, "uts", 4)) { jn->nd = &uts_ns_desc; join_ns_flags |= CLONE_NEWUTS; + } else if (!strncmp(type, "time", 5)) { + jn->nd = &time_ns_desc; + join_ns_flags |= CLONE_NEWTIME; } else if (!strncmp(type, "ipc", 4)) { jn->nd = &ipc_ns_desc; join_ns_flags |= CLONE_NEWIPC; @@ -568,6 +573,10 @@ static int open_ns_fd(struct file_desc *d, int *new_fd) item = t; nd = &cgroup_ns_desc; break; + } else if (ids->time_ns_id == nfi->nfe->ns_id) { + item = t; + nd = &time_ns_desc; + break; } } @@ -671,6 +680,13 @@ int dump_task_ns_ids(struct pstree_item *item) return -1; } + ids->has_time_ns_id = true; + ids->time_ns_id = get_ns_id(pid, &time_ns_desc, NULL); + if (!ids->time_ns_id) { + pr_err("Can't make timens id\n"); + return -1; + } + ids->has_mnt_ns_id = true; ids->mnt_ns_id = get_ns_id(pid, &mnt_ns_desc, NULL); if (!ids->mnt_ns_id) { @@ -914,6 +930,9 @@ static int check_user_ns(int pid) if ((root_ns_mask & CLONE_NEWUTS) && switch_ns(pid, &uts_ns_desc, NULL)) exit(1); + if ((root_ns_mask & CLONE_NEWTIME) && + switch_ns(pid, &time_ns_desc, NULL)) + exit(1); if ((root_ns_mask & CLONE_NEWIPC) && switch_ns(pid, &ipc_ns_desc, NULL)) exit(1); @@ -1002,6 +1021,11 @@ static int do_dump_namespaces(struct ns_id *ns) ns->id, ns->ns_pid); ret = dump_uts_ns(ns->id); break; + case CLONE_NEWTIME: + pr_info("Dump TIME namespace %d via %d\n", + ns->id, ns->ns_pid); + ret = dump_time_ns(ns->id); + break; case CLONE_NEWIPC: pr_info("Dump IPC namespace %d via %d\n", ns->id, ns->ns_pid); diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 60aba8788..c73fa9776 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1472,6 +1472,44 @@ static bool should_skip_mount(const char *mountpoint) return false; } +int parse_timens_offsets(struct timespec *boff, struct timespec *moff) +{ + int exit_code = -1; + FILE *f; + + f = fopen_proc(PROC_SELF, "timens_offsets"); + if (!f) { + pr_perror("Unable to open /proc/self/timens_offsets"); + goto out; + } + while (fgets(buf, BUF_SIZE, f)) { + int64_t sec, nsec; + int clockid; + + if (sscanf(buf, "%d %"PRId64" %"PRId64"\n", &clockid, &sec, &nsec) != 3) { + pr_err("Unable to parse: %s\n", buf); + goto out; + } + switch (clockid) { + case CLOCK_MONOTONIC: + moff->tv_sec = sec; + moff->tv_nsec = nsec; + break; + case CLOCK_BOOTTIME: + boff->tv_sec = sec; + boff->tv_nsec = nsec; + break; + default: + pr_err("Unknown clockid: %d\n", clockid); + goto out; + } + } + exit_code = 0; +out: + fclose(f); + return exit_code; +} + struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump) { struct mount_info *list = NULL; diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 41c208037..2ee81e5db 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -37,6 +37,7 @@ #include "images/creds.pb-c.h" #include "images/timer.pb-c.h" #include "images/utsns.pb-c.h" +#include "images/timens.pb-c.h" #include "images/ipc-var.pb-c.h" #include "images/ipc-shm.pb-c.h" #include "images/ipc-msg.pb-c.h" diff --git a/criu/pstree.c b/criu/pstree.c index 19cf5ad38..d0e81bfad 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -814,6 +814,8 @@ static unsigned long get_clone_mask(TaskKobjIdsEntry *i, mask |= CLONE_NEWIPC; if (i->uts_ns_id != p->uts_ns_id) mask |= CLONE_NEWUTS; + if (i->time_ns_id != p->time_ns_id) + mask |= CLONE_NEWTIME; if (i->mnt_ns_id != p->mnt_ns_id) mask |= CLONE_NEWNS; if (i->user_ns_id != p->user_ns_id) diff --git a/criu/timens.c b/criu/timens.c new file mode 100644 index 000000000..79ba6a2ce --- /dev/null +++ b/criu/timens.c @@ -0,0 +1,130 @@ +#include +#include + +#include "types.h" +#include "proc_parse.h" +#include "namespaces.h" +#include "timens.h" + +#include "protobuf.h" +#include "images/timens.pb-c.h" + +int dump_time_ns(int ns_id) +{ + struct cr_img *img; + TimensEntry te = TIMENS_ENTRY__INIT; + Timespec b = TIMESPEC__INIT, m = TIMESPEC__INIT; + struct timespec ts; + int ret; + + img = open_image(CR_FD_TIMENS, O_DUMP, ns_id); + if (!img) + return -1; + + clock_gettime(CLOCK_MONOTONIC, &ts); + te.monotonic = &m; + te.monotonic->tv_sec = ts.tv_sec; + te.monotonic->tv_nsec = ts.tv_nsec; + clock_gettime(CLOCK_BOOTTIME, &ts); + te.boottime = &b; + te.boottime->tv_sec = ts.tv_sec; + te.boottime->tv_nsec = ts.tv_nsec; + + ret = pb_write_one(img, &te, PB_TIMENS); + close_image(img); + + return ret < 0 ? -1 : 0; +} + +static void normalize_timespec(struct timespec *ts) +{ + while (ts->tv_nsec >= NSEC_PER_SEC) { + ts->tv_nsec -= NSEC_PER_SEC; + ++ts->tv_sec; + } + while (ts->tv_nsec < 0) { + ts->tv_nsec += NSEC_PER_SEC; + --ts->tv_sec; + } +} + + +int prepare_timens(int id) +{ + int exit_code = -1; + int ret, fd = -1; + struct cr_img *img; + TimensEntry *te; + struct timespec ts; + struct timespec prev_moff = {}, prev_boff = {}; + + img = open_image(CR_FD_TIMENS, O_RSTR, id); + if (!img) + return -1; + + ret = pb_read_one(img, &te, PB_TIMENS); + close_image(img); + if (ret < 0) + goto err; + + if (unshare(CLONE_NEWTIME)) { + pr_perror("Unable to create a new time namespace"); + return -1; + } + + if (parse_timens_offsets(&prev_boff, &prev_moff)) + goto err; + + fd = open_proc_rw(PROC_SELF, "timens_offsets"); + if (fd < 0) + goto err; + + clock_gettime(CLOCK_MONOTONIC, &ts); + ts.tv_sec = ts.tv_sec - prev_moff.tv_sec; + ts.tv_nsec = ts.tv_nsec - prev_moff.tv_nsec; + + ts.tv_sec = te->monotonic->tv_sec - ts.tv_sec; + ts.tv_nsec = te->monotonic->tv_nsec - ts.tv_nsec; + normalize_timespec(&ts); + + pr_debug("timens: %d %ld %ld\n", + CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec); + if (dprintf(fd, "%d %ld %ld\n", + CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec) < 0) { + pr_perror("Unable to set a monotonic clock offset"); + goto err; + } + + clock_gettime(CLOCK_BOOTTIME, &ts); + + ts.tv_sec = ts.tv_sec - prev_boff.tv_sec; + ts.tv_nsec = ts.tv_nsec - prev_boff.tv_nsec; + + ts.tv_sec = te->boottime->tv_sec - ts.tv_sec; + ts.tv_nsec = te->boottime->tv_nsec - ts.tv_nsec; + normalize_timespec(&ts); + + pr_debug("timens: %d %ld %ld\n", + CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec); + if (dprintf(fd, "%d %ld %ld\n", + CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec) < 0) { + pr_perror("Unable to set a boottime clock offset"); + goto err; + } + + timens_entry__free_unpacked(te, NULL); + close_safe(&fd); + + fd = open_proc(PROC_SELF, "ns/time_for_children"); + if (fd < 0) { + pr_perror("Unable to open ns/time_for_children"); + goto err; + } + if (switch_ns_by_fd(fd, &time_ns_desc, NULL)) + goto err; + exit_code = 0; +err: + close_safe(&fd); + return exit_code; +} +struct ns_desc time_ns_desc = NS_DESC_ENTRY(CLONE_NEWTIME, "time"); diff --git a/criu/util.c b/criu/util.c index 1646ce1c4..6f6a6dde7 100644 --- a/criu/util.c +++ b/criu/util.c @@ -967,6 +967,8 @@ const char *ns_to_string(unsigned int ns) return "user"; case CLONE_NEWUTS: return "uts"; + case CLONE_NEWTIME: + return "time"; default: return NULL; } diff --git a/images/Makefile b/images/Makefile index e7f0580cf..5ddd37664 100644 --- a/images/Makefile +++ b/images/Makefile @@ -64,6 +64,7 @@ proto-obj-y += autofs.o proto-obj-y += macvlan.o proto-obj-y += sit.o proto-obj-y += memfd.o +proto-obj-y += timens.o CFLAGS += -iquote $(obj)/ diff --git a/images/core.proto b/images/core.proto index e90522914..22c2a9f1f 100644 --- a/images/core.proto +++ b/images/core.proto @@ -70,6 +70,7 @@ message task_kobj_ids_entry { optional uint32 mnt_ns_id = 9; optional uint32 user_ns_id = 10; optional uint32 cgroup_ns_id = 11; + optional uint32 time_ns_id = 12; } message thread_sas_entry { diff --git a/images/timens.proto b/images/timens.proto new file mode 100644 index 000000000..a8272609b --- /dev/null +++ b/images/timens.proto @@ -0,0 +1,10 @@ +syntax = "proto2"; + +message timespec { + required uint64 tv_sec = 1; + required uint64 tv_nsec = 2; +} +message timens_entry { + required timespec monotonic = 1; + required timespec boottime = 2; +} diff --git a/lib/py/images/images.py b/lib/py/images/images.py index dca080657..ca6f207bb 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -466,6 +466,7 @@ handlers = { 'IDS': entry_handler(pb.task_kobj_ids_entry), 'CREDS': entry_handler(pb.creds_entry), 'UTSNS': entry_handler(pb.utsns_entry), + 'TIMENS': entry_handler(pb.timens_entry), 'IPC_VAR': entry_handler(pb.ipc_var_entry), 'FS': entry_handler(pb.fs_entry), 'GHOST_FILE': ghost_file_handler(), From ddba4af608d546a968e9558758718bead9c638c5 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 28 Mar 2020 22:14:40 +0300 Subject: [PATCH 0203/1854] namespace: fail if ns/time_for_children isn't equal to ns/time This case isn't supported right now. Signed-off-by: Andrei Vagin --- criu/include/timens.h | 1 + criu/namespaces.c | 20 ++++++++++++++++---- criu/timens.c | 2 ++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/criu/include/timens.h b/criu/include/timens.h index 22a4a5220..0567c5828 100644 --- a/criu/include/timens.h +++ b/criu/include/timens.h @@ -5,5 +5,6 @@ extern int dump_time_ns(int ns_id); extern int prepare_timens(int pid); extern struct ns_desc time_ns_desc; +extern struct ns_desc time_for_children_ns_desc; #endif /* __CR_TIME_NS_H__ */ diff --git a/criu/namespaces.c b/criu/namespaces.c index e376feaca..89d97c7bc 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -341,7 +341,7 @@ struct ns_id *lookup_ns_by_kid(unsigned int kid, struct ns_desc *nd) struct ns_id *nsid; for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) - if (nsid->kid == kid && nsid->nd == nd) + if (nsid->kid == kid && nsid->nd->cflag == nd->cflag) return nsid; return NULL; @@ -447,7 +447,7 @@ static unsigned int __get_ns_id(int pid, struct ns_desc *nd, protobuf_c_boolean { int proc_dir; unsigned int kid; - char ns_path[10]; + char ns_path[32]; struct stat st; proc_dir = open_pid_proc(pid); @@ -680,12 +680,24 @@ int dump_task_ns_ids(struct pstree_item *item) return -1; } - ids->has_time_ns_id = true; - ids->time_ns_id = get_ns_id(pid, &time_ns_desc, NULL); + ids->time_ns_id = get_ns_id(pid, &time_ns_desc, &ids->has_time_ns_id); if (!ids->time_ns_id) { pr_err("Can't make timens id\n"); return -1; } + if (ids->has_time_ns_id) { + unsigned int id; + protobuf_c_boolean supported; + id = get_ns_id(pid, &time_for_children_ns_desc, &supported); + if (!supported || !id) { + pr_err("Can't make timens id\n"); + return -1; + } + if (id != ids->time_ns_id) { + pr_err("Can't dump nested time namespace for %d\n", pid); + return -1; + } + } ids->has_mnt_ns_id = true; ids->mnt_ns_id = get_ns_id(pid, &mnt_ns_desc, NULL); diff --git a/criu/timens.c b/criu/timens.c index 79ba6a2ce..764f8c9e0 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -128,3 +128,5 @@ err: return exit_code; } struct ns_desc time_ns_desc = NS_DESC_ENTRY(CLONE_NEWTIME, "time"); +struct ns_desc time_for_children_ns_desc = + NS_DESC_ENTRY(CLONE_NEWTIME, "time_for_children"); From 3fd0fa4bdc7d325bb244ef01873255e4ebcbb403 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 14 Aug 2019 07:44:02 +0300 Subject: [PATCH 0204/1854] zdtm: add support for time namespaces For ns and uns flavors, tests run in separate time namespaces. Signed-off-by: Andrei Vagin --- test/zdtm/lib/ns.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/zdtm/lib/ns.c b/test/zdtm/lib/ns.c index 3099f7495..0054a3040 100644 --- a/test/zdtm/lib/ns.c +++ b/test/zdtm/lib/ns.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include "zdtmtst.h" #include "ns.h" @@ -207,6 +209,39 @@ write_out: write(STDERR_FILENO, buf, MIN(len, sizeof(buf))); } +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 /* New time namespace */ +#endif + +static inline int _settime(clockid_t clk_id, time_t offset) +{ + int fd, len; + char buf[4096]; + + if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW) + clk_id = CLOCK_MONOTONIC; + + len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset); + + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) { + fprintf(stderr, "open(/proc/self/timens_offsets): %m"); + return -1; + } + + if (write(fd, buf, len) != len) { + fprintf(stderr, "write(/proc/self/timens_offsets): %m"); + return -1; + } + + if (close(fd)) { + fprintf(stderr, "close(/proc/self/timens_offsets): %m"); + return -1; + } + + return 0; +} + #define STATUS_FD 255 static int ns_exec(void *_arg) { @@ -218,6 +253,7 @@ static int ns_exec(void *_arg) setsid(); + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); ret = dup2(args->status_pipe[1], STATUS_FD); if (ret < 0) { fprintf(stderr, "dup2() failed: %m\n"); @@ -236,6 +272,35 @@ static int ns_exec(void *_arg) return -1; } +static int create_timens(void) +{ + int fd; + + if (unshare(CLONE_NEWTIME)) { + if (errno == EINVAL) { + fprintf(stderr, "timens isn't supported\n"); + return 0; + } else { + fprintf(stderr, "unshare(CLONE_NEWTIME) failed: %m"); + exit(1); + } + } + + if (_settime(CLOCK_MONOTONIC, 10 * 24 * 60 * 60)) + exit(1); + if (_settime(CLOCK_BOOTTIME, 20 * 24 * 60 * 60)) + exit(1); + + fd = open("/proc/self/ns/time_for_children", O_RDONLY); + if (fd < 0) + exit(1); + if (setns(fd, 0)) + exit(1); + close(fd); + + return 0; +} + int ns_init(int argc, char **argv) { struct sigaction sa = { @@ -253,6 +318,9 @@ int ns_init(int argc, char **argv) exit(1); } + if (create_timens()) + exit(1); + if (init_notify()) { fprintf(stderr, "Can't init pre-dump notification: %m"); exit(1); From f1655fd5402f7827415cddbd796e5e44ed33cbc9 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 14 Aug 2019 07:45:34 +0300 Subject: [PATCH 0205/1854] zdtm: add a new test to check c/r of time namespaces This test checks that monotonic and boottime don't jump after C/R. In ns and uns flavors, the test is started in a separate time namespace with big offsets, so if criu will restore a time namespace incorrectly the test will detect the big delta of clocks values before and after C/R. Signed-off-by: Andrei Vagin --- test/zdtm/static/Makefile | 1 + test/zdtm/static/time.c | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/zdtm/static/time.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index a8e4107d3..1b7542574 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -226,6 +226,7 @@ TST_NOFILE := \ memfd03 \ shmemfd \ shmemfd-priv \ + time \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/time.c b/test/zdtm/static/time.c new file mode 100644 index 000000000..d37e2a8f8 --- /dev/null +++ b/test/zdtm/static/time.c @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check monotonic and boot clocks"; +const char *test_author = "Andrei Vagin b + 60 * 60 * NSEC_PER_SEC) { + fail("%d: %lld %lld", clocks[i], a, b); + return 1; + } + } + + pass(); + + return 0; +} From 0d8c0562f9d8e67bae04f1b1aea08e485edf7340 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 15 Aug 2019 06:51:15 +0300 Subject: [PATCH 0206/1854] zdtm_ct: run each test in a new time namespace Signed-off-by: Andrei Vagin --- test/zdtm_ct.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/test/zdtm_ct.c b/test/zdtm_ct.c index bc88dadde..5495d61eb 100644 --- a/test/zdtm_ct.c +++ b/test/zdtm_ct.c @@ -5,6 +5,69 @@ #include #include #include +#include +#include +#include +#include + +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 /* New time namespace */ +#endif + +static inline int _settime(clockid_t clk_id, time_t offset) +{ + int fd, len; + char buf[4096]; + + if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW) + clk_id = CLOCK_MONOTONIC; + + len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset); + + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) { + fprintf(stderr, "/proc/self/timens_offsets: %m"); + return -1; + } + + if (write(fd, buf, len) != len) { + fprintf(stderr, "/proc/self/timens_offsets: %m"); + return -1; + } + + close(fd); + + return 0; +} + +static int create_timens() +{ + int fd; + + if (unshare(CLONE_NEWTIME)) { + if (errno == EINVAL) { + fprintf(stderr, "timens isn't supported\n"); + return 0; + } else { + fprintf(stderr, "unshare(CLONE_NEWTIME) failed: %m"); + exit(1); + } + } + + if (_settime(CLOCK_MONOTONIC, 110 * 24 * 60 * 60)) + exit(1); + if (_settime(CLOCK_BOOTTIME, 40 * 24 * 60 * 60)) + exit(1); + + fd = open("/proc/self/ns/time_for_children", O_RDONLY); + if (fd < 0) + exit(1); + if (setns(fd, 0)) + exit(1); + close(fd); + + return 0; +} int main(int argc, char **argv) { @@ -20,6 +83,8 @@ int main(int argc, char **argv) return 1; pid = fork(); if (pid == 0) { + if (create_timens()) + exit(1); if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL)) { fprintf(stderr, "mount(/, S_REC | MS_SLAVE)): %m"); return 1; From 73438d34bb5bfba5f0ac063c699f66454c722c51 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 26 Mar 2020 10:55:13 +0300 Subject: [PATCH 0207/1854] test: check that C/R of nested time namespaces fails Signed-off-by: Andrei Vagin --- test/zdtm/static/Makefile | 2 + test/zdtm/static/timens_for_kids.c | 36 ++++++++++++++ test/zdtm/static/timens_for_kids.desc | 1 + test/zdtm/static/timens_nested.c | 67 +++++++++++++++++++++++++++ test/zdtm/static/timens_nested.desc | 1 + 5 files changed, 107 insertions(+) create mode 100644 test/zdtm/static/timens_for_kids.c create mode 100644 test/zdtm/static/timens_for_kids.desc create mode 100644 test/zdtm/static/timens_nested.c create mode 100644 test/zdtm/static/timens_nested.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 1b7542574..7d72673c3 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -227,6 +227,8 @@ TST_NOFILE := \ shmemfd \ shmemfd-priv \ time \ + timens_nested \ + timens_for_kids \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/timens_for_kids.c b/test/zdtm/static/timens_for_kids.c new file mode 100644 index 000000000..72543486b --- /dev/null +++ b/test/zdtm/static/timens_for_kids.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check nested time namespaces"; +const char *test_author = "Andrei Vagin +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check nested time namespaces"; +const char *test_author = "Andrei Vagin Date: Mon, 23 Mar 2020 03:26:00 +0300 Subject: [PATCH 0208/1854] timens: restore processes in a new timens to restore clocks After restoring processes, we have to be sure that monotonic and boottime clocks will not go backward. For this, we can restore processes in a new time namespace and set proper offsets for the clocks. In this patch, criu dumps clocks values event when processes are running in this host time namespace and on restore, criu creates a new time namespace, sets dumped clock values and restores processes. Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +++++++ criu/cr-restore.c | 3 +++ criu/timens.c | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 6aa114c2d..a38e47d12 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -81,6 +81,7 @@ #include "dump.h" #include "eventpoll.h" #include "memfd.h" +#include "timens.h" /* * Architectures can overwrite this function to restore register sets that @@ -1921,6 +1922,12 @@ int cr_dump_tasks(pid_t pid) goto err; } + if ((root_ns_mask & CLONE_NEWTIME) == 0) { + ret = dump_time_ns(0); + if (ret) + goto err; + } + ret = dump_cgroups(); if (ret) goto err; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ce6e667d7..ed4b95b91 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1749,6 +1749,9 @@ static int restore_task_with_children(void *_arg) if (root_ns_mask & CLONE_NEWTIME) { if (prepare_timens(current->ids->time_ns_id)) goto err; + } else if (kdat.has_timens) { + if (prepare_timens(0)) + goto err; } /* Wait prepare_userns */ diff --git a/criu/timens.c b/criu/timens.c index 764f8c9e0..f3b50fdff 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -62,6 +62,11 @@ int prepare_timens(int id) if (!img) return -1; + if (id == 0 && empty_image(img)) { + pr_warn("Clocks values have not been dumped\n"); + return 0; + } + ret = pb_read_one(img, &te, PB_TIMENS); close_image(img); if (ret < 0) From 698f3a4dbd8754a5c8bf1bf8f682d176f3bd24fb Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 26 Mar 2020 20:03:34 +0300 Subject: [PATCH 0209/1854] zdtm: limit the line length for ps by 160 symbols By default, this limit is 80 symbols and this isn't enough: 4730 pts/0 S+ 0:00 \_ ./zdtm_ct zdtm.py 7535 4731 pts/0 S+ 0:00 | \_ python zdtm.py 7536 4839 pts/0 S+ 0:00 | \_ python zdtm.p 7537 4861 pts/0 S+ 0:00 | \_ make --no 7538 4882 pts/0 S+ 0:00 | \_ ./mnt 7539 4883 ? Ss 0:00 | \_ . Signed-off-by: Andrei Vagin --- test/zdtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 0bd7b84cc..ac8d7bee0 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1885,7 +1885,7 @@ class Launcher: pid, status = os.waitpid(0, flags) except OSError as e: if e.errno == errno.EINTR: - subprocess.Popen(["ps", "axf"]).wait() + subprocess.Popen(["ps", "axf", "--width", "160"]).wait() continue signal.alarm(0) raise e From 067a20c815c5a632eee63469bcc9d99af73a9c79 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 28 Mar 2020 22:13:17 +0300 Subject: [PATCH 0210/1854] zdtm: fail if test with the crfail tag passes Signed-off-by: Andrei Vagin --- test/zdtm.py | 2 ++ test/zdtm/static/unhashed_proc.desc | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index ac8d7bee0..5e42c769e 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1735,6 +1735,8 @@ def do_run_test(tname, tdesc, flavs, opts): t.stop() cr_api.fini() try_run_hook(t, ["--clean"]) + if t.blocking(): + raise test_fail_exc("unexpected success") except test_fail_exc as e: print_sep("Test %s FAIL at %s" % (tname, e.step), '#') t.print_output() diff --git a/test/zdtm/static/unhashed_proc.desc b/test/zdtm/static/unhashed_proc.desc index 847e3b27c..de1915b23 100644 --- a/test/zdtm/static/unhashed_proc.desc +++ b/test/zdtm/static/unhashed_proc.desc @@ -1 +1 @@ -{'flags': 'crfail', 'opts' : '--link-remap'} +{'opts' : '--link-remap'} From bb0b4219efbda66995887b09fb3d6d81dc314031 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 31 Mar 2020 14:16:30 +0000 Subject: [PATCH 0211/1854] img: fix image_name() when image is empty When an image is opened but errored with a ENOENT error, the image is still valid. Later on, do_pb_read_one() can fail and will invoke image_name(). The image fd is EMPTY_IMG_FD (-404). read_fd_link fails. Signed-off-by: Nicolas Viennot --- criu/protobuf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/criu/protobuf.c b/criu/protobuf.c index e68d42b5c..4accc5ee0 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -25,8 +25,13 @@ static char *image_name(struct cr_img *img) int fd = img->_x.fd; static char image_path[PATH_MAX]; - if (read_fd_link(fd, image_path, sizeof(image_path)) > 0) + if (lazy_image(img)) + return img->path; + else if (empty_image(img)) + return "(empty-image)"; + else if (fd >= 0 && read_fd_link(fd, image_path, sizeof(image_path)) > 0) return image_path; + return NULL; } From 4d34f84bb6957f00d3440a428ecd80dd869212be Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 10 Apr 2020 21:10:27 +0000 Subject: [PATCH 0212/1854] img: rellocate a PATH_MAX buffer from the bss section to the stack Reducing our memory footprint by 4K. Improved-by: Andrei Vagin Signed-off-by: Nicolas Viennot --- criu/protobuf.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/criu/protobuf.c b/criu/protobuf.c index 4accc5ee0..206223ca5 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -20,16 +20,16 @@ #include "protobuf.h" #include "util.h" -static char *image_name(struct cr_img *img) +#define image_name(img, buf) __image_name(img, buf, sizeof(buf)) +static char *__image_name(struct cr_img *img, char *image_path, size_t image_path_size) { int fd = img->_x.fd; - static char image_path[PATH_MAX]; if (lazy_image(img)) return img->path; else if (empty_image(img)) return "(empty-image)"; - else if (fd >= 0 && read_fd_link(fd, image_path, sizeof(image_path)) > 0) + else if (fd >= 0 && read_fd_link(fd, image_path, image_path_size) > 0) return image_path; return NULL; @@ -48,6 +48,7 @@ static char *image_name(struct cr_img *img) int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) { + char img_name_buf[PATH_MAX]; u8 local[PB_PKOBJ_LOCAL_SIZE]; void *buf = (void *)&local; u32 size; @@ -55,7 +56,7 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) if (!cr_pb_descs[type].pb_desc) { pr_err("Wrong object requested %d on %s\n", - type, image_name(img)); + type, image_name(img, img_name_buf)); return -1; } @@ -70,13 +71,13 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) return 0; } else { pr_err("Unexpected EOF on %s\n", - image_name(img)); + image_name(img, img_name_buf)); return -1; } } else if (ret < sizeof(size)) { pr_perror("Read %d bytes while %d expected on %s", ret, (int)sizeof(size), - image_name(img)); + image_name(img, img_name_buf)); return -1; } @@ -90,11 +91,11 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) ret = bread(&img->_x, buf, size); if (ret < 0) { pr_perror("Can't read %d bytes from file %s", - size, image_name(img)); + size, image_name(img, img_name_buf)); goto err; } else if (ret != size) { pr_perror("Read %d bytes while %d expected from %s", - ret, size, image_name(img)); + ret, size, image_name(img, img_name_buf)); ret = -1; goto err; } @@ -103,7 +104,7 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) if (!*pobj) { ret = -1; pr_err("Failed unpacking object %p from %s\n", - pobj, image_name(img)); + pobj, image_name(img, img_name_buf)); goto err; } From 6b9faabf39e14bbc23fe3174f308a5f9f870113f Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 28 Mar 2020 00:18:48 +0000 Subject: [PATCH 0213/1854] mem: avoid re-opening CR_FD_PAGES when not needed This commit introduces an optimization when rsti(t)->vma_io is empty. This optimization allows streaming a non-seekable image as CR_FD_PAGES is not reopened. Signed-off-by: Nicolas Viennot --- criu/mem.c | 14 ++++++++++++++ criu/pie/restorer.c | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/criu/mem.c b/criu/mem.c index 55022d94a..15aa0cbdb 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -1403,6 +1403,20 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) { struct cr_img *pages; + /* + * We optimize the case when rsti(t)->vma_io is empty. + * + * This is useful for for remote images, where all VMAs are premapped + * (pr->pieok is false). This avoids re-opening the CR_FD_PAGES file, + * which could be no longer be available. + */ + if (list_empty(&rsti(t)->vma_io)) { + ta->vma_ios = NULL; + ta->vma_ios_n = 0; + ta->vma_ios_fd = -1; + return 0; + } + /* * If auto-dedup is on we need RDWR mode to be able to punch holes in * the input files (in restorer.c) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index afe185f04..b3d7e2b5c 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1602,7 +1602,8 @@ long __export_restore_task(struct task_restore_args *args) rio = ((void *)rio) + RIO_SIZE(rio->nr_iovs); } - sys_close(args->vma_ios_fd); + if (args->vma_ios_fd != -1) + sys_close(args->vma_ios_fd); /* * Proxify vDSO. From d1fa1734ee53404f8a06d82a5732dc1daff8d756 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 4 Mar 2020 18:26:37 +0300 Subject: [PATCH 0214/1854] autofs: fix integer overflow in mount options parsing In real life cases pipe_ino param could be larger that INT_MAX, but in autofs_parse() function we using atoi function, that uses 4 byte integers. It's a bug. Example of mount info from real case: (00.508286) type autofs source /etc/auto.misc mnt_id 2824 s_dev 0x4b9 / @ ./misc flags 0x300000 options fd=5,pipe_ino=3480845226,pgrp=95929,timeout=300, minproto=5,maxproto=5,indirect 3480845226 > 2147483647 (32-bit wide signed int max value) => we have a problem It causes a error: (03.195915) Error (criu/pipes.c:529): The packetized mode for pipes is not supported yet Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- criu/autofs.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/criu/autofs.c b/criu/autofs.c index a2dc60ffc..175b8900e 100644 --- a/criu/autofs.c +++ b/criu/autofs.c @@ -62,25 +62,53 @@ int autofs_parse(struct mount_info *pm) { long pipe_ino = AUTOFS_OPT_UNKNOWN; char **opts; - int nr_opts, i; + int nr_opts, i, ret; split(pm->options, ',', &opts, &nr_opts); if (!opts) return -1; + for (i = 0; i < nr_opts; i++) { if (!strncmp(opts[i], "pipe_ino=", strlen("pipe_ino="))) - pipe_ino = atoi(opts[i] + strlen("pipe_ino=")); + if (xatol(opts[i] + strlen("pipe_ino="), &pipe_ino)) { + pr_err("pipe_ino (%s) mount option parse failed\n", opts[i] + strlen("pipe_ino=")); + ret = -1; + goto free; + } + } + + /* + * We must inform user about bug if pipe_ino is greater than UINT32_MAX, + * because it means that something changed in Linux Kernel virtual fs + * inode numbers generation mechanism. What we have at the moment: + * 1. struct inode i_ino field (include/linux/fs.h in Linux kernel) + * has unsigned long type. + * 2. get_next_ino() function (fs/inode.c), that used for generating inode + * numbers on virtual filesystems (pipefs, debugfs for instance) + * has unsigned int as return type. + * So, it means that ATM it is safe to keep uint32 type for pipe_id field + * in pipe-data.proto. + */ + if (pipe_ino > UINT32_MAX) { + pr_err("overflow: pipe_ino > UINT32_MAX\n"); + ret = -1; + goto free; } - for (i = 0; i < nr_opts; i++) - xfree(opts[i]); - free(opts); if (pipe_ino == AUTOFS_OPT_UNKNOWN) { pr_warn("Failed to find pipe_ino option (old kernel?)\n"); - return 0; + ret = 0; + goto free; } - return autofs_gather_pipe(pipe_ino); + ret = autofs_gather_pipe(pipe_ino); + +free: + for (i = 0; i < nr_opts; i++) + xfree(opts[i]); + xfree(opts); + + return ret; } static int autofs_check_fd_stat(struct stat *stat, int prgp, int fd, From 62088c721f08aaec8b63de3904304a3a31dcefea Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 14 Apr 2020 10:04:28 +0300 Subject: [PATCH 0215/1854] criu: put statement continuation on the same line as the closing bracket We should follow Linux Kernel Codding Style: ... the closing brace is empty on a line of its own, except in the cases where it is followed by a continuation of the same statement, ie ... an else in an if-statement ... https://www.kernel.org/doc/html/v4.10/process/coding-style.html#placing-braces-and-spaces Automaticly fixing with: :!git grep --files-with-matches "^\s*else[^{]*{" | xargs :argadd :argdo :%s/}\s*\n\s*\(else[^{]*{\)/} \1/g | update Signed-off-by: Pavel Tikhomirov --- compel/arch/ppc64/src/lib/infect.c | 6 ++---- compel/src/main.c | 9 +++------ criu/arch/ppc64/crtools.c | 3 +-- criu/cr-dump.c | 3 +-- test/zdtm/static/fifo_wronly.c | 3 +-- test/zdtm/static/inotify_system.c | 3 +-- test/zdtm/static/ptrace_sig.c | 3 +-- test/zdtm/static/vsx.c | 3 +-- test/zdtm/transition/epoll.c | 3 +-- 9 files changed, 12 insertions(+), 24 deletions(-) diff --git a/compel/arch/ppc64/src/lib/infect.c b/compel/arch/ppc64/src/lib/infect.c index defed3d85..637acd46d 100644 --- a/compel/arch/ppc64/src/lib/infect.c +++ b/compel/arch/ppc64/src/lib/infect.c @@ -222,8 +222,7 @@ static int get_altivec_regs(pid_t pid, user_fpregs_struct_t *fp) return -1; } pr_debug("Altivec not supported\n"); - } - else { + } else { pr_debug("Dumping Altivec registers\n"); fp->flags |= USER_FPREGS_FL_ALTIVEC; } @@ -251,8 +250,7 @@ static int get_vsx_regs(pid_t pid, user_fpregs_struct_t *fp) return -1; } pr_debug("VSX register's dump not supported.\n"); - } - else { + } else { pr_debug("Dumping VSX registers\n"); fp->flags |= USER_FPREGS_FL_VSX; } diff --git a/compel/src/main.c b/compel/src/main.c index 36127c357..9fc3a924c 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -181,8 +181,7 @@ static void print_ldflags(bool compat) if (uninst_root) { printf("%s/arch/%s/scripts/compel-pack%s.lds.S\n", uninst_root, flags.arch, compat_str); - } - else { + } else { printf("%s/compel/scripts/compel-pack%s.lds.S\n", LIBEXECDIR, compat_str); @@ -222,8 +221,7 @@ static int print_libs(bool is_static) return 1; } printf("%s/%s\n", uninst_root, STATIC_LIB); - } - else { + } else { printf("%s/%s\n", LIBDIR, (is_static) ? STATIC_LIB : DYN_LIB); } @@ -255,8 +253,7 @@ static char *gen_prefix(const char *path) for (i = len - 1; i >= 0; i--) { if (!p1 && path[i] == '.') { p2 = path + i - 1; - } - else if (!p1 && path[i] == '/') { + } else if (!p1 && path[i] == '/') { p1 = path + i + 1; break; } diff --git a/criu/arch/ppc64/crtools.c b/criu/arch/ppc64/crtools.c index 0d9f49c3f..631150c3e 100644 --- a/criu/arch/ppc64/crtools.c +++ b/criu/arch/ppc64/crtools.c @@ -374,8 +374,7 @@ static int __copy_task_regs(user_regs_struct_t *regs, fpstate = &(core->ti_ppc64->tmstate->fpstate); vrstate = &(core->ti_ppc64->tmstate->vrstate); vsxstate = &(core->ti_ppc64->tmstate->vsxstate); - } - else { + } else { gpregs = core->ti_ppc64->gpregs; fpstate = &(core->ti_ppc64->fpstate); vrstate = &(core->ti_ppc64->vrstate); diff --git a/criu/cr-dump.c b/criu/cr-dump.c index a38e47d12..745998afc 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1529,8 +1529,7 @@ static int cr_pre_dump_finish(int status) timing_stop(TIME_MEMWRITE); ret = page_xfer_predump_pages(item->pid->real, &xfer, mem_pp); - } - else { + } else { ret = page_xfer_dump_pages(&xfer, mem_pp); } diff --git a/test/zdtm/static/fifo_wronly.c b/test/zdtm/static/fifo_wronly.c index 2fbd69e6b..78fc7c8f7 100644 --- a/test/zdtm/static/fifo_wronly.c +++ b/test/zdtm/static/fifo_wronly.c @@ -55,8 +55,7 @@ int main(int argc, char **argv) pr_perror("read error %s", filename); chret = errno; return chret; - } - else if (res == 0) { + } else if (res == 0) { pr_perror("read(%d, rbuf, 7) return 0", fd1); return 1; } diff --git a/test/zdtm/static/inotify_system.c b/test/zdtm/static/inotify_system.c index 3e6b2ad48..f8af3dca2 100644 --- a/test/zdtm/static/inotify_system.c +++ b/test/zdtm/static/inotify_system.c @@ -280,8 +280,7 @@ int errors(int exp_len, int len, char *etalon_buf, char *buf) { fail("Incorrect length of field name."); error++; break; - } - else if (event->len && strncmp(event->name, exp_event->name, event->len)) { + } else if (event->len && strncmp(event->name, exp_event->name, event->len)) { fail("Handled file name %s, expected %s", event->name, exp_event->name); diff --git a/test/zdtm/static/ptrace_sig.c b/test/zdtm/static/ptrace_sig.c index f71517717..b70f8f1b0 100644 --- a/test/zdtm/static/ptrace_sig.c +++ b/test/zdtm/static/ptrace_sig.c @@ -74,8 +74,7 @@ int main(int argc, char ** argv) if (cpid < 0) { pr_perror("fork failed"); return 1; - } - else if (cpid == 0) { + } else if (cpid == 0) { close(child_pipe[0]); return child(child_pipe[1]); } diff --git a/test/zdtm/static/vsx.c b/test/zdtm/static/vsx.c index be02cfe10..e7d81b12c 100644 --- a/test/zdtm/static/vsx.c +++ b/test/zdtm/static/vsx.c @@ -388,8 +388,7 @@ int main(int argc, char *argv[]) test_msg("Data mismatch\n"); fail(); } - } - else { + } else { test_msg("The CPU is missing some features.\n"); fail(); } diff --git a/test/zdtm/transition/epoll.c b/test/zdtm/transition/epoll.c index 4eac5214c..6ab436889 100644 --- a/test/zdtm/transition/epoll.c +++ b/test/zdtm/transition/epoll.c @@ -181,8 +181,7 @@ int main(int argc, char **argv) fail("waitpid error: %m\n"); counter++; continue; - } - else { + } else { rv = WEXITSTATUS(rv); if (rv < MAX_EXIT_CODE && rv > SUCCESS) { fail("Child failed: %s (%d)\n", From ef7ef9cfa0c0ae4a2777c72b9facf994068b817e Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 14 Apr 2020 18:03:38 +0000 Subject: [PATCH 0216/1854] kerndat: remove duplicate call to kerndat_socket_netns() kerndat_socket_netns() is called twice. We keep the latter to avoid changing the behavior. Signed-off-by: Nicolas Viennot --- criu/kerndat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/kerndat.c b/criu/kerndat.c index 0c6910da9..0421997af 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1068,8 +1068,6 @@ int kerndat_init(void) ret = kerndat_tcp_repair(); if (!ret) ret = kerndat_compat_restore(); - if (!ret) - ret = kerndat_socket_netns(); if (!ret) ret = kerndat_tun_netns(); if (!ret) From 2c2fdd3334078f5eefea4f82f0df0fccfc8a9238 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 13 Apr 2020 17:20:34 +0000 Subject: [PATCH 0217/1854] parasite-msg: %u is not implemented for parasite code Changed all the %u into %d. Ideally, we should implement the %u format for parasite code. Signed-off-by: Nicolas Viennot --- criu/pie/parasite.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 64b5bbb3e..d83978317 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -53,7 +53,7 @@ static int mprotect_vmas(struct parasite_dump_pages_args *args) vma = vmas + i; ret = sys_mprotect((void *)vma->start, vma->len, vma->prot | args->add_prot); if (ret) { - pr_err("mprotect(%08lx, %lu) failed with code %d\n", + pr_err("mprotect(%08lx, %ld) failed with code %d\n", vma->start, vma->len, ret); break; } @@ -102,7 +102,7 @@ static int dump_pages(struct parasite_dump_pages_args *args) } if (spliced_bytes != args->nr_pages * PAGE_SIZE) { sys_close(p); - pr_err("Can't splice all pages to pipe (%lu/%d)\n", spliced_bytes, args->nr_pages); + pr_err("Can't splice all pages to pipe (%ld/%d)\n", spliced_bytes, args->nr_pages); return -1; } From 42b5700b72c0bebbef113554c064827a3ab40b18 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 15 Apr 2020 11:34:47 +0300 Subject: [PATCH 0218/1854] kerndat remove duplicate call to kerndat_nsid() Func kerndat_nsid() is called twice. v2: leave kerndat_nsid call near kerndat_link_nsid Signed-off-by: Pavel Tikhomirov --- criu/kerndat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/kerndat.c b/criu/kerndat.c index 0421997af..0b6d53bc7 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1092,8 +1092,6 @@ int kerndat_init(void) ret = kerndat_vdso_preserves_hint(); if (!ret) ret = kerndat_socket_netns(); - if (!ret) - ret = kerndat_nsid(); if (!ret) ret = kerndat_x86_has_ptrace_fpu_xsave_bug(); if (!ret) From 7dc89376b85ddea408d43527ff42f4e86ea77a41 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 1 Apr 2020 10:43:25 +0300 Subject: [PATCH 0219/1854] pstree: improve error handling in read_pstree_image First don't free pstree_item as they are allocated with shmalloc on restore. Second always pstree_entry__free_unpacked PstreeEntry. Third remove all breaks replacing them with implict goto err, so that it would be easier to understand that we are on error path. Forth split out code for reading one pstree item in separate function. Sadly there is no much use in xfree-ing pi->threads because in case of an error we still have ->threads unfreed from previous entries anyway. But at least some cleanup can be done here. Signed-off-by: Pavel Tikhomirov --- criu/pstree.c | 197 ++++++++++++++++++++++++++------------------------ 1 file changed, 103 insertions(+), 94 deletions(-) diff --git a/criu/pstree.c b/criu/pstree.c index d0e81bfad..5de367688 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -498,11 +498,109 @@ static int read_pstree_ids(struct pstree_item *pi) return 0; } +/* + * Returns <0 on error, 0 on eof and >0 on successful read + */ +static int read_one_pstree_item(struct cr_img *img, pid_t *pid_max) +{ + struct pstree_item *pi; + PstreeEntry *e; + int ret, i; + + ret = pb_read_one_eof(img, &e, PB_PSTREE); + if (ret <= 0) + return ret; + + ret = -1; + pi = lookup_create_item(e->pid); + if (pi == NULL) + goto err; + BUG_ON(pi->pid->state != TASK_UNDEF); + + /* + * All pids should be added in the tree to be able to find + * free pid-s for helpers. pstree_item for these pid-s will + * be initialized when we meet PstreeEntry with this pid or + * we will create helpers for them. + */ + if (lookup_create_item(e->pgid) == NULL) + goto err; + if (lookup_create_item(e->sid) == NULL) + goto err; + + pi->pid->ns[0].virt = e->pid; + if (e->pid > *pid_max) + *pid_max = e->pid; + pi->pgid = e->pgid; + if (e->pgid > *pid_max) + *pid_max = e->pgid; + pi->sid = e->sid; + if (e->sid > *pid_max) + *pid_max = e->sid; + pi->pid->state = TASK_ALIVE; + + if (e->ppid == 0) { + if (root_item) { + pr_err("Parent missed on non-root task " + "with pid %d, image corruption!\n", e->pid); + goto err; + } + root_item = pi; + pi->parent = NULL; + } else { + struct pid *pid; + struct pstree_item *parent; + + pid = pstree_pid_by_virt(e->ppid); + if (!pid || pid->state == TASK_UNDEF || pid->state == TASK_THREAD) { + pr_err("Can't find a parent for %d\n", vpid(pi)); + goto err; + } + + parent = pid->item; + pi->parent = parent; + list_add(&pi->sibling, &parent->children); + } + + pi->nr_threads = e->n_threads; + pi->threads = xmalloc(e->n_threads * sizeof(struct pid)); + if (!pi->threads) + goto err; + + for (i = 0; i < e->n_threads; i++) { + struct pid *node; + pi->threads[i].real = -1; + pi->threads[i].ns[0].virt = e->threads[i]; + pi->threads[i].state = TASK_THREAD; + pi->threads[i].item = NULL; + if (i == 0) + continue; /* A thread leader is in a tree already */ + node = lookup_create_pid(pi->threads[i].ns[0].virt, &pi->threads[i]); + + BUG_ON(node == NULL); + if (node != &pi->threads[i]) { + pr_err("Unexpected task %d in a tree %d\n", e->threads[i], i); + goto err; + } + } + + task_entries->nr_threads += e->n_threads; + task_entries->nr_tasks++; + + /* note: we don't fail if we have empty ids */ + if (read_pstree_ids(pi) < 0) + goto err; + + ret = 1; +err: + pstree_entry__free_unpacked(e, NULL); + return ret; +} + static int read_pstree_image(pid_t *pid_max) { - int ret = 0, i; struct cr_img *img; - struct pstree_item *pi; + int ret; pr_info("Reading image tree\n"); @@ -510,99 +608,10 @@ static int read_pstree_image(pid_t *pid_max) if (!img) return -1; - while (1) { - PstreeEntry *e; + do { + ret = read_one_pstree_item(img, pid_max); + } while (ret > 0); - ret = pb_read_one_eof(img, &e, PB_PSTREE); - if (ret <= 0) - break; - - ret = -1; - pi = lookup_create_item(e->pid); - if (pi == NULL) - break; - BUG_ON(pi->pid->state != TASK_UNDEF); - - /* - * All pids should be added in the tree to be able to find - * free pid-s for helpers. pstree_item for these pid-s will - * be initialized when we meet PstreeEntry with this pid or - * we will create helpers for them. - */ - if (lookup_create_item(e->pgid) == NULL) - break; - if (lookup_create_item(e->sid) == NULL) - break; - - pi->pid->ns[0].virt = e->pid; - if (e->pid > *pid_max) - *pid_max = e->pid; - pi->pgid = e->pgid; - if (e->pgid > *pid_max) - *pid_max = e->pgid; - pi->sid = e->sid; - if (e->sid > *pid_max) - *pid_max = e->sid; - pi->pid->state = TASK_ALIVE; - - if (e->ppid == 0) { - if (root_item) { - pr_err("Parent missed on non-root task " - "with pid %d, image corruption!\n", e->pid); - goto err; - } - root_item = pi; - pi->parent = NULL; - } else { - struct pid *pid; - struct pstree_item *parent; - - pid = pstree_pid_by_virt(e->ppid); - if (!pid || pid->state == TASK_UNDEF || pid->state == TASK_THREAD) { - pr_err("Can't find a parent for %d\n", vpid(pi)); - pstree_entry__free_unpacked(e, NULL); - xfree(pi); - goto err; - } - - parent = pid->item; - pi->parent = parent; - list_add(&pi->sibling, &parent->children); - } - - pi->nr_threads = e->n_threads; - pi->threads = xmalloc(e->n_threads * sizeof(struct pid)); - if (!pi->threads) - break; - - for (i = 0; i < e->n_threads; i++) { - struct pid *node; - pi->threads[i].real = -1; - pi->threads[i].ns[0].virt = e->threads[i]; - pi->threads[i].state = TASK_THREAD; - pi->threads[i].item = NULL; - if (i == 0) - continue; /* A thread leader is in a tree already */ - node = lookup_create_pid(pi->threads[i].ns[0].virt, &pi->threads[i]); - - BUG_ON(node == NULL); - if (node != &pi->threads[i]) { - pr_err("Unexpected task %d in a tree %d\n", e->threads[i], i); - return -1; - } - } - - task_entries->nr_threads += e->n_threads; - task_entries->nr_tasks++; - - pstree_entry__free_unpacked(e, NULL); - - ret = read_pstree_ids(pi); - if (ret < 0) - goto err; - } - -err: close_image(img); return ret; } From c83a0aae2c71b66abf613c8ef5fe97b7311e1987 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 11 Apr 2020 06:37:38 +0300 Subject: [PATCH 0220/1854] proc: parse clock symbolic names in /proc/pid/timens_offsets Clock IDs in this file has been replaced by clock symbolic names. Now it looks like this: $ cat /proc/774/timens_offsets monotonic 864000 0 boottime 1728000 0 Signed-off-by: Andrei Vagin --- criu/proc_parse.c | 22 ++++++++++++---------- criu/timens.c | 6 ++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index c73fa9776..4a22700aa 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1484,25 +1484,27 @@ int parse_timens_offsets(struct timespec *boff, struct timespec *moff) } while (fgets(buf, BUF_SIZE, f)) { int64_t sec, nsec; - int clockid; + char clockid[10]; - if (sscanf(buf, "%d %"PRId64" %"PRId64"\n", &clockid, &sec, &nsec) != 3) { + if (sscanf(buf, "%9s %"PRId64" %"PRId64"\n", clockid, &sec, &nsec) != 3) { pr_err("Unable to parse: %s\n", buf); goto out; } - switch (clockid) { - case CLOCK_MONOTONIC: + clockid[sizeof(clockid) - 1] = 0; + if (strcmp(clockid, "monotonic") == 0 || + strcmp(clockid, __stringify(CLOCK_MONOTONIC)) == 0) { moff->tv_sec = sec; moff->tv_nsec = nsec; - break; - case CLOCK_BOOTTIME: + continue; + } + if (strcmp(clockid, "boottime") == 0 || + strcmp(clockid, __stringify(CLOCK_BOOTTIME)) == 0) { boff->tv_sec = sec; boff->tv_nsec = nsec; - break; - default: - pr_err("Unknown clockid: %d\n", clockid); - goto out; + continue; } + pr_err("Unknown clockid: %s\n", clockid); + goto out; } exit_code = 0; out: diff --git a/criu/timens.c b/criu/timens.c index f3b50fdff..2a7e95284 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -92,8 +92,7 @@ int prepare_timens(int id) ts.tv_nsec = te->monotonic->tv_nsec - ts.tv_nsec; normalize_timespec(&ts); - pr_debug("timens: %d %ld %ld\n", - CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec); + pr_debug("timens: monotonic %ld %ld\n", ts.tv_sec, ts.tv_nsec); if (dprintf(fd, "%d %ld %ld\n", CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec) < 0) { pr_perror("Unable to set a monotonic clock offset"); @@ -109,8 +108,7 @@ int prepare_timens(int id) ts.tv_nsec = te->boottime->tv_nsec - ts.tv_nsec; normalize_timespec(&ts); - pr_debug("timens: %d %ld %ld\n", - CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec); + pr_debug("timens: boottime %ld %ld\n", ts.tv_sec, ts.tv_nsec); if (dprintf(fd, "%d %ld %ld\n", CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec) < 0) { pr_perror("Unable to set a boottime clock offset"); From 5c5e7695a51318b17e3d982df8231ac83971641c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 20 Apr 2020 15:45:51 -0700 Subject: [PATCH 0221/1854] get_clean_mount: demote an error to a warning When testing runc checkpointing, I frequently see the following error: > Error (criu/mount.c:1107): mnt: Can't create a temporary directory: Read-only file system This happens because container root is read-only mount. The error here is not actually fatal since it is handled later in ns_open_mountpoint() (at least since [1] is fixed), but it is shown as error in runc integration tests. Since it is not fatal, let's demote it to a warning to avoid confusion. [1] https://github.com/checkpoint-restore/criu/issues/520 Signed-off-by: Kir Kolyshkin --- criu/mount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/mount.c b/criu/mount.c index 180f2a62d..89b8cff59 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1104,7 +1104,7 @@ static char *get_clean_mnt(struct mount_info *mi, char *mnt_path_tmp, char *mnt_ if (mnt_path == NULL && errno == ENOENT) mnt_path = mkdtemp(mnt_path_root); if (mnt_path == NULL) { - pr_perror("Can't create a temporary directory"); + pr_warn("Can't create a temporary directory: %s\n", strerror(errno)); return NULL; } From 95ead14874244f3c12e5970a74d1f4dd2433d652 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Apr 2020 16:31:49 +0300 Subject: [PATCH 0222/1854] =?UTF-8?q?criu:=20Version=20=CF=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The long-tempting release with lots of new features on board. We have finally the time namespace support, great improvment of the pre-dump memory consumption, new clone3 support and many more. Signed-off-by: Pavel Emelyanov --- Makefile.versions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.versions b/Makefile.versions index f3adcb0a6..3ccc48185 100644 --- a/Makefile.versions +++ b/Makefile.versions @@ -1,10 +1,10 @@ # # CRIU version. CRIU_VERSION_MAJOR := 3 -CRIU_VERSION_MINOR := 13 +CRIU_VERSION_MINOR := 14 CRIU_VERSION_SUBLEVEL := CRIU_VERSION_EXTRA := -CRIU_VERSION_NAME := Silicon Willet +CRIU_VERSION_NAME := Platinum Peacock CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA)) export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL From f2edc1e1999e7c495af404fc6c38b82c391854ec Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 6 May 2020 13:13:38 +0000 Subject: [PATCH 0223/1854] Update certificates for failing tls based tests When using zdtm.py with --tls it started to fail as the certificates seem to have expired. Following commands have been used to re-generate the certificate: # Generate CA key and certificate echo -ne "ca\ncert_signing_key" > temp certtool --generate-privkey > cakey.pem certtool --generate-self-signed \ --template temp \ --load-privkey cakey.pem \ --outfile cacert.pem # Generate server key and certificate echo -ne "cn=$HOSTNAME\nencryption_key\nsigning_key" > temp certtool --generate-privkey > key.pem certtool --generate-certificate \ --template temp \ --load-privkey key.pem \ --load-ca-certificate cacert.pem \ --load-ca-privkey cakey.pem \ --outfile cert.pem rm temp cakey.pem Without this tests will fail in Travis. Signed-off-by: Adrian Reber --- test/pki/cacert.pem | 42 +++--- test/pki/cert.pem | 44 +++---- test/pki/key.pem | 310 ++++++++++++++++++++++---------------------- 3 files changed, 198 insertions(+), 198 deletions(-) diff --git a/test/pki/cacert.pem b/test/pki/cacert.pem index 2f8706616..65afd2aa7 100644 --- a/test/pki/cacert.pem +++ b/test/pki/cacert.pem @@ -1,23 +1,23 @@ -----BEGIN CERTIFICATE----- -MIID0TCCAjmgAwIBAgIUWzgmx9p7y7mkrNptGX9+0acjpa4wDQYJKoZIhvcNAQEL -BQAwADAeFw0xOTA1MDYxMjAzMDJaFw0yMDA1MDUxMjAzMDJaMAAwggGiMA0GCSqG -SIb3DQEBAQUAA4IBjwAwggGKAoIBgQD0p0lJUlq917GmJuCBeP2eLNd1/MUg1ojy -s7rrpinPYtLZqqquUhp32lfQtt3uJLjkhTrseZd86zWi3SMZlGs8zGGmKfqg0vaG -BXIgpEIr5C0wU9995kL9A6LS+eFZR6vJQETO5T22tjponoqEPOXeU8VaiC9jNipC -uFJT0wyC0bKIo+TUn573kxsGMt8jMOv0tc/okUlH16UAsYrmN7kWzgkWTJPddB7S -v5a9ibpPkbh+wrIGK5A6V5hTZ8U1wz2bE6/Xp+qjsD2R3jeU6f1tDvc8FZilabQy -Rmbxggucl1G3Ulo6Nvor1lhog72eZlHZujzf/5e/aMiZ7Br6plZ1/WTwtNgoCw6A -rgpLDraasQohiK6opYs2rr7uuiQxPLLVWE/RryXwUEoPXzxaf782XtXxkB0UhGvz -y2JBxCVPn7uUGuyEYywjTjI2UFvsMcXnMiQ4WaAfMbAmrBWM7EQ4b7VpD2c+OZkQ -J/AJeg85/ovTAtHPjhPP+0a9hnirktkCAwEAAaNDMEEwDwYDVR0TAQH/BAUwAwEB -/zAPBgNVHQ8BAf8EBQMDBwQAMB0GA1UdDgQWBBQOg6AA8Qu/m/O/II5spzYsTnsn -pjANBgkqhkiG9w0BAQsFAAOCAYEA1KKtw+ILKOg1AhGwgPsJXAWZoSIt7hdLaJ3P -WGyTWHLKKoJiGlLj3bSsJcMmMO+UwHBH9lmXrOWe/xcOvob2H+7dXbddQ0pX1wzK -KJKzSG35FZ2BfuSn5oEDtRsHnN2Ikc5MYz1a+F4w2tVL/Hcxld+oSAodDlCbGoe+ -0MkI5f1KhdAw00l/5IO7xPOcThjHw+nB5/cZTQ+l4zLWCWaXkor4IAEq/plPcdX1 -uoLSj3JruLz7/ts/EgG+ARAzXQrJ+LM2hdPB1NiaVxFq7MSWM6FybUdmMYgbP5s4 -RMNqI/M+bU9K5LRySDaiPhDXUoVULuqG1a23GQwXLOjF0JbrUQewfAaTO7TaPFh1 -lr25j9Fc9/gcXZjvLl+CEIv6P/haGOwySCTCks0F5bDehbLjZStPmugcnJflXdBn -lzoejlw2rePojQMlffQsaRGmmhj0beU4WQBfGACcZQB8GFNxQB8aynf0CK7Dvvb0 -9c9y4k0gHL7RxeLoQfq+smzKm+Eo +MIID0TCCAjmgAwIBAgIUF0WMpaJUi0+wXbOKQ6P41ZeRwdQwDQYJKoZIhvcNAQEL +BQAwADAeFw0yMDA1MDYxMzEwMzNaFw0yMTA1MDYxMzEwMzNaMAAwggGiMA0GCSqG +SIb3DQEBAQUAA4IBjwAwggGKAoIBgQDMPLu3o8RMlWlblK03GynmvNZDQKW0ZTYg +dE/Dlr/rVKo8KMAm6KbmpUJq8HUtaP/9Epf2eY9+LNNpBFKJuURxD23ObNdbU6o6 +hI4LRQVYsX6FB+6DrIXfD61zxebdpPWlCyoEd43firfwMoeGyarqyrZE+UXkR4fF +LhVm/4kQ8qRZte+GAIVp0SVMlNyhQM5AmTZPAO3iYRhZObkVFUTWnwjdxKOx1VhU +vRdhwI4N5x9EGiq6Lzc0iABxyIZ1mHOYhxDQv5gL4CECUZgzVxJp6DVP/v2w4254 +JeKXOPUm6YHxXxzJyT73mdz0/7VpqOZa5yiZKRBtHoWyovuTzu8jS6TgvjNrmAoO +kyu13jTlBDFUfVKBDoRQMhjt6wpMkUItYvVRq/RYLvXGU7VCzUHoVdBxNNO+MrZ2 +4ebN5l5/CSrOeaa0EylLLlkW7Q0JqmZJpZfK8/AnMyp71AkHTps5LZv/sLbPWwh1 +3XTZA1e+k9lS1Z480HjtNKurL526k8ECAwEAAaNDMEEwDwYDVR0TAQH/BAUwAwEB +/zAPBgNVHQ8BAf8EBQMDBwQAMB0GA1UdDgQWBBR5rZcu6Unhc0ZuuflmHEOa9bBu +1zANBgkqhkiG9w0BAQsFAAOCAYEAehfgQ1y3GTJ3LPQQOgn3AB/Sf+fGSNecZsTO +UxGFxbNGvl7UeFCS8Z1/h57AeIXSvE+BGfMvtq9OSl6t+3w9RfbIdzWzYYILoAVM +t0FkwrjLtVIlUSWD+Aia01ESjw+3ENceGcuo9jVyAI3MMkGftFc7U1UyNB9HY//x +79uUavmWioc3odooC0FOosIzV2KHCUPAnpN7TtNlefe4JisMa6WGXAk9CNU9t0wG +JLHK3E0LrtEreIMBmK6zyxH3OwMi1x/3sFSoon09/fsOLBbtUY+401a+nT+vMTKK +KpVvAC4tsEyFH0vyk68c6Z4VKO5aqoAtaQJZk2A2z1cBAz4aArgx1glkaNjd7sZn +lSEhnpP7uVUyYU9fyQ46zpIZm38gx6+95XL2gCQnEIUEeOCXMonXaavMqRx5zL8z +hihMSoLhE6Wx/tzxKlqz0GN8E0CTlpB1MmvCvOkaHzSq7Yc19Bez8jr6PNfZVsFv +s9a4qqhGKaLz7HKpG0863oCO4dU1 -----END CERTIFICATE----- diff --git a/test/pki/cert.pem b/test/pki/cert.pem index a0946ee41..f5a0452b5 100644 --- a/test/pki/cert.pem +++ b/test/pki/cert.pem @@ -1,24 +1,24 @@ -----BEGIN CERTIFICATE----- -MIIEAzCCAmugAwIBAgIUKV6zLC//OJDnmOYBuIG1Gvmv+V4wDQYJKoZIhvcNAQEL -BQAwADAeFw0xOTA1MDYxMjAzMDJaFw0yMDA1MDUxMjAzMDJaMBQxEjAQBgNVBAMT -CWxvY2FsaG9zdDCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoCggGBANX1nv4J -U8+TEb2bWej5O2nOowpw2zSYTDAQ1oyAvV3P99Y6GZCuVZ1uT/7DWat0uRpcdmNi -HvownkO4VmDZdVqgiK1eHzY5YBJ7hBVDs3tpWNuN7eJPjnskNmJqKQ6l9rxYl/au -781T+tdtHp1ATtToMgVJxWaUx5lrpEJdmYc8Y6GpAA42D+rI3o4Sll3mI5rPCk16 -QY5dT2lnL2HuCKzM2bjWat6b3lMpfNz3A/blU9E/462Zxr/yKK/0yy3SBZhYzrrQ -1/erjIpm4I0sakHIOexM1AQliFiowFzVvr/paiXApWGOcuBJVIbmPI/bEGuTh0nr -3pmiF0YrkDCRhargElYcz64KQ9IxPFCKcKjkMnFPjTStZ7rcMyqKvGczqFaM5a6c -9gIn2ieUrVZ38yvtI5Lo/uxZ5IjXqB1Fdg4xi2tyf9WMHKy2tydBr9bTjfQRXfNT -/Zm3woDXOYsHzj+Sf6ntLVCkO1fnczw03fPRV03/uVRa5mPGyyj9xdPBqwIDAQAB -o2EwXzAMBgNVHRMBAf8EAjAAMA8GA1UdDwEB/wQFAwMHoAAwHQYDVR0OBBYEFEtF -ELehnIjLzoh/W51TGm2B00QAMB8GA1UdIwQYMBaAFA6DoADxC7+b878gjmynNixO -eyemMA0GCSqGSIb3DQEBCwUAA4IBgQA17NZCaiCgD9P4GPWgdVNkWUrmc8itJuIB -z3c9RdJGduxb9W/D7IW//D3hIblOPorNi3+08kO/IRMGah874MDCprMNppk94WGj -Kgqi/rsxq+rT6bcZXxMrcOIg0j2EvTPIgPh7trd8nHVWxNT/hvFClDtBJ2ssL2Tz -76EA7smDCUsfdzFJ2Xvk95fSTL49nfT2j9N/YoLaBQtCIxWAVZHKiCF2K+yXufHz -B/9UlXwsPJfqxM75dYWXFEqvhNf08YRHT1e1GRrybNGrNKF864KbLsnASdK4N5wu -sK9vZJ7VkLDQz+YpZkbm+UgOYK/BY3M8IX+F+WngV+43fr6Wh89TSgD7acEBvQTm -q1y9FipRvz0my7fwBh6UlYDja6/3yw6/YfN7uMFGsOOSgpNDCrMLqesf8l1HdQUF -VaVJyDjgFswV9KykAeJK2KU8QI7TGHv9soW60sr97DgUtCh4a6OPXLt79Ji3RSNw -MbU54JnpnfmMAj/0suDymdrJWv8EJKc= +MIIEAjCCAmqgAwIBAgIURFKv2lJVvIEfr7yjE7pK0BdK9W8wDQYJKoZIhvcNAQEL +BQAwADAeFw0yMDA1MDYxMzExMDVaFw0yMTA1MDYxMzExMDVaMBMxETAPBgNVBAMT +CGZlZG9yYTAxMIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAtS2SivLa +AfsZ1X9xun/6i/1UNSxG5kPvVkfTPWOwSgCYyqn7u2vZsIsvcurqHQBEg1SWF4XA +dcFeCxdDN8em6ISfSOUFL9TSQrZ5eFQYcVTX9IFXDWXbFSC8AgJpYhrPf2O0vxQN +QFD4MpNXrLpEkc3vLrg/JhySC1HAYfO/nuJV3ZnAxJZCIv4upT5j0VjzfPw6WR/O +UUlcmLnh0+qMLv7G2dRLjdAsV1U8k7eykk+z9qXeKAjgFBEEgLTVbHR1zbrUpw42 +DYR7SQpEJ8wrJ9qHdI4H/9MnPu8O75kfgauQV4s/oIgWmVeHMDxpGaLEKgcYVE2F +/HlMfFvCrrZKKXi1/k35rG8sg6cP/bRuRZlgO0xFTrW0QKmWoBLqyqVMvifdkT0E +BWZ4eZWTDn2Rr4HMe7ov54QmKXaXp2VdOGzO1Efv9KvDildb5aEqBrVz+tU+C/Cd +g5p1d2g2m1Uk9wjcYbXOWx9fd9Ur/fxJuHtauxOW4gzFLcTOgvUgVmSXAgMBAAGj +YTBfMAwGA1UdEwEB/wQCMAAwDwYDVR0PAQH/BAUDAwegADAdBgNVHQ4EFgQUh+RA +Kg+4YVs/x9A0hxJlkDxG7IQwHwYDVR0jBBgwFoAUea2XLulJ4XNGbrn5ZhxDmvWw +btcwDQYJKoZIhvcNAQELBQADggGBAMLv1btjFDwxds0XlPH792ECXgYOXPC8cJAx +MJ+qdop/nNW8iUUoOjwy1f9jXBjz1bHKJ+XnyTz4rhLWIHVnhsiLMQ+ljHgTtTjY +3K6Lzo9+KMG/WDRajID8Sk2/B3jnCjgdzb6C4TZQ1mxnhSD7Cj/VitkVLP3D2nYx +55bXqAHAtSkW965LAxILSzt04a8MM9ZxNzyruGVI+jPF0OSgDNcJpcwUw8w8V1lj +c5TC9qqRlokFGphR45vblTw3GaXuZ5TTLOdix0QOhl52H2BtCrvTWJLREC5VJ87C +2FjXIz5zj1WZ3CDZVbpFhTiZy/chHJNaiiDWe6QhD40VvKlo/netp+rxwrpGsFkC +mtyRKrutdKi0vDvI6sKu13gLxetL+Bd6XWZ+XRsA/687QE+epxoh8sTvIt9j0z5r +0yIv/0eTKVZLQ6cAXSoR1g1GwTsWf0eBRgePdl4MCXVAs8+X1hTqnrJomiUdm93C +d2+QakSAhreCtlqMz/AULryv3KgaEg== -----END CERTIFICATE----- diff --git a/test/pki/key.pem b/test/pki/key.pem index eda1aa761..7acde5181 100644 --- a/test/pki/key.pem +++ b/test/pki/key.pem @@ -3,180 +3,180 @@ Public Key Info: Key Security Level: High (3072 bits) modulus: - 00:d5:f5:9e:fe:09:53:cf:93:11:bd:9b:59:e8:f9:3b - 69:ce:a3:0a:70:db:34:98:4c:30:10:d6:8c:80:bd:5d - cf:f7:d6:3a:19:90:ae:55:9d:6e:4f:fe:c3:59:ab:74 - b9:1a:5c:76:63:62:1e:fa:30:9e:43:b8:56:60:d9:75 - 5a:a0:88:ad:5e:1f:36:39:60:12:7b:84:15:43:b3:7b - 69:58:db:8d:ed:e2:4f:8e:7b:24:36:62:6a:29:0e:a5 - f6:bc:58:97:f6:ae:ef:cd:53:fa:d7:6d:1e:9d:40:4e - d4:e8:32:05:49:c5:66:94:c7:99:6b:a4:42:5d:99:87 - 3c:63:a1:a9:00:0e:36:0f:ea:c8:de:8e:12:96:5d:e6 - 23:9a:cf:0a:4d:7a:41:8e:5d:4f:69:67:2f:61:ee:08 - ac:cc:d9:b8:d6:6a:de:9b:de:53:29:7c:dc:f7:03:f6 - e5:53:d1:3f:e3:ad:99:c6:bf:f2:28:af:f4:cb:2d:d2 - 05:98:58:ce:ba:d0:d7:f7:ab:8c:8a:66:e0:8d:2c:6a - 41:c8:39:ec:4c:d4:04:25:88:58:a8:c0:5c:d5:be:bf - e9:6a:25:c0:a5:61:8e:72:e0:49:54:86:e6:3c:8f:db - 10:6b:93:87:49:eb:de:99:a2:17:46:2b:90:30:91:85 - aa:e0:12:56:1c:cf:ae:0a:43:d2:31:3c:50:8a:70:a8 - e4:32:71:4f:8d:34:ad:67:ba:dc:33:2a:8a:bc:67:33 - a8:56:8c:e5:ae:9c:f6:02:27:da:27:94:ad:56:77:f3 - 2b:ed:23:92:e8:fe:ec:59:e4:88:d7:a8:1d:45:76:0e - 31:8b:6b:72:7f:d5:8c:1c:ac:b6:b7:27:41:af:d6:d3 - 8d:f4:11:5d:f3:53:fd:99:b7:c2:80:d7:39:8b:07:ce - 3f:92:7f:a9:ed:2d:50:a4:3b:57:e7:73:3c:34:dd:f3 - d1:57:4d:ff:b9:54:5a:e6:63:c6:cb:28:fd:c5:d3:c1 - ab: + 00:b5:2d:92:8a:f2:da:01:fb:19:d5:7f:71:ba:7f:fa + 8b:fd:54:35:2c:46:e6:43:ef:56:47:d3:3d:63:b0:4a + 00:98:ca:a9:fb:bb:6b:d9:b0:8b:2f:72:ea:ea:1d:00 + 44:83:54:96:17:85:c0:75:c1:5e:0b:17:43:37:c7:a6 + e8:84:9f:48:e5:05:2f:d4:d2:42:b6:79:78:54:18:71 + 54:d7:f4:81:57:0d:65:db:15:20:bc:02:02:69:62:1a + cf:7f:63:b4:bf:14:0d:40:50:f8:32:93:57:ac:ba:44 + 91:cd:ef:2e:b8:3f:26:1c:92:0b:51:c0:61:f3:bf:9e + e2:55:dd:99:c0:c4:96:42:22:fe:2e:a5:3e:63:d1:58 + f3:7c:fc:3a:59:1f:ce:51:49:5c:98:b9:e1:d3:ea:8c + 2e:fe:c6:d9:d4:4b:8d:d0:2c:57:55:3c:93:b7:b2:92 + 4f:b3:f6:a5:de:28:08:e0:14:11:04:80:b4:d5:6c:74 + 75:cd:ba:d4:a7:0e:36:0d:84:7b:49:0a:44:27:cc:2b + 27:da:87:74:8e:07:ff:d3:27:3e:ef:0e:ef:99:1f:81 + ab:90:57:8b:3f:a0:88:16:99:57:87:30:3c:69:19:a2 + c4:2a:07:18:54:4d:85:fc:79:4c:7c:5b:c2:ae:b6:4a + 29:78:b5:fe:4d:f9:ac:6f:2c:83:a7:0f:fd:b4:6e:45 + 99:60:3b:4c:45:4e:b5:b4:40:a9:96:a0:12:ea:ca:a5 + 4c:be:27:dd:91:3d:04:05:66:78:79:95:93:0e:7d:91 + af:81:cc:7b:ba:2f:e7:84:26:29:76:97:a7:65:5d:38 + 6c:ce:d4:47:ef:f4:ab:c3:8a:57:5b:e5:a1:2a:06:b5 + 73:fa:d5:3e:0b:f0:9d:83:9a:75:77:68:36:9b:55:24 + f7:08:dc:61:b5:ce:5b:1f:5f:77:d5:2b:fd:fc:49:b8 + 7b:5a:bb:13:96:e2:0c:c5:2d:c4:ce:82:f5:20:56:64 + 97: public exponent: 01:00:01: private exponent: - 1e:38:b0:79:7f:85:c8:17:24:f5:5c:41:29:e8:32:5d - 32:a3:d2:f0:b7:f5:c8:e1:52:14:be:c9:5f:d1:df:b3 - 65:75:6c:05:7a:6b:35:8a:a4:2f:46:73:ff:71:79:6e - 3f:eb:f9:88:f6:2e:1b:f6:cc:14:12:b0:98:c3:7e:91 - 0b:85:e2:bf:1d:b7:82:09:30:f3:23:68:01:85:13:94 - 80:c9:9a:55:94:96:da:30:48:a0:29:ec:86:da:1b:d5 - 2b:2b:74:63:92:b8:2a:8f:87:29:f0:ae:d7:55:63:0d - 2d:b3:0b:0e:2d:84:dc:d5:08:b5:ac:a0:f7:29:9d:71 - 89:3d:27:6a:eb:96:f5:4e:9b:8a:dc:14:82:0a:c7:5c - 16:1c:d2:7e:b9:1b:13:69:d8:b2:b1:b1:7e:aa:a9:ad - 06:ce:66:0e:5b:50:10:42:2a:0a:fd:29:14:f7:09:63 - c1:20:18:5f:27:81:46:12:8c:b8:f4:89:a6:3d:55:a1 - d4:64:fc:f2:db:d7:9c:f5:be:f7:9d:88:5c:6d:36:a4 - 4b:ea:c5:e3:ea:32:81:6b:f3:47:b5:35:d5:c4:1a:b2 - ae:12:9d:19:a3:ec:a4:af:41:7e:5e:34:9d:f5:bc:b9 - 1f:a3:c2:32:b4:fc:95:a7:7a:54:04:e2:d6:4e:10:2f - 66:68:8b:3b:20:ea:05:db:2e:72:01:11:e7:7c:f8:72 - 0f:60:be:f1:27:19:ad:3a:6f:e9:70:56:3a:86:6e:46 - 0d:e3:55:31:66:77:09:84:48:b9:25:4b:c3:26:70:12 - ca:a4:5f:c6:3d:6a:e5:db:4d:63:04:b8:09:07:c9:30 - 85:08:9d:77:40:26:60:da:10:c2:53:d2:00:0d:9e:d9 - d5:71:06:30:eb:fb:f7:3f:82:1f:b3:9a:f3:4d:24:86 - 2e:94:fd:06:9e:dc:26:68:fa:64:c3:f9:fa:08:c4:b2 - ec:7a:f5:55:c5:10:b5:e2:2d:de:ba:04:30:10:5b:99 - + 66:7a:51:72:30:6c:e9:43:eb:57:dc:4f:2a:ab:2a:bf + 35:da:04:7d:77:d8:d4:c2:32:6e:4b:e8:64:53:99:77 + fe:f4:5a:f3:5d:0e:62:c2:3d:e7:e7:bb:42:12:87:4d + 39:6f:85:b9:e6:58:77:02:99:bb:f2:3b:7d:f8:7a:ca + f8:00:d4:8d:5d:c2:b1:41:00:a5:0f:cd:e4:db:32:77 + 47:f3:2f:99:5a:a5:ab:26:a7:2e:50:80:a6:b4:eb:ef + 43:e1:d4:a1:63:a9:1f:20:ed:52:46:0f:08:4f:0f:6b + dd:2c:95:a3:77:6c:8a:2a:7d:26:8f:87:98:04:61:cd + 29:be:3f:32:4e:bf:a2:c7:02:38:c6:14:a5:07:0b:a6 + 25:a2:d1:2a:0a:18:7b:d7:df:fd:2e:de:b2:2b:ef:80 + c1:71:1e:52:bb:2d:8a:7b:1a:26:c7:2d:d2:70:b9:db + 45:32:94:9d:34:cd:d8:58:e1:4b:47:b5:81:26:68:34 + 2a:32:02:0b:e6:95:3a:d2:d4:d6:e4:c7:bf:8e:04:4e + fc:75:e9:6d:f4:a4:a8:6b:50:76:7c:10:07:81:96:56 + 54:16:d7:39:8a:5f:51:79:1f:96:49:cb:d7:8b:0d:51 + 5d:4a:e8:22:fb:d1:92:a4:a2:02:65:24:0a:62:33:84 + a9:df:a9:4f:40:1a:95:1d:98:ea:0c:23:05:c2:db:1e + 4d:c5:f2:e9:63:6b:de:70:67:3a:a2:f4:72:c1:97:ae + d0:6f:78:82:2b:a1:6f:2c:d7:90:a3:4f:d6:f9:d6:80 + 11:d4:93:8e:e0:06:19:f6:d9:33:72:5d:d8:16:a2:ef + e1:a9:00:de:bc:7e:98:aa:97:45:b8:81:20:01:5d:0b + 10:3d:69:a9:ba:7d:2c:1f:e0:17:0e:bc:ee:97:95:61 + a7:6b:10:94:27:ad:ca:c1:93:3b:fa:dc:8d:3d:58:cc + fb:04:3f:7f:40:d6:6c:e8:83:a1:be:50:cd:46:73:11 + prime1: - 00:fb:d1:47:9d:9e:73:f8:1e:09:21:fd:89:16:05:56 - af:a5:cf:52:d5:cd:f7:26:18:d1:84:3a:36:65:0b:a2 - cd:f9:b8:99:c0:c7:ef:00:c9:2f:c9:92:1a:1d:3d:86 - 58:3b:b1:be:d4:8c:c6:1b:df:ba:ee:87:aa:d1:22:47 - 18:bd:de:01:0f:0d:cb:ac:d0:48:a4:f4:93:e2:a6:cb - b5:b7:f5:f5:72:dd:ec:ac:13:e8:3d:62:23:54:ac:52 - ff:ee:9a:e1:7f:b0:ae:3b:41:38:d8:39:2b:40:ef:25 - 81:50:b0:98:db:f8:40:16:6e:1a:41:79:22:90:58:99 - 80:c2:0d:ba:b5:d3:54:ec:28:33:e4:b0:58:ea:de:61 - a1:b7:30:0b:9d:dc:73:62:c2:07:d3:75:91:48:49:dd - be:cf:b2:90:95:8f:29:6c:6f:f6:68:cb:cf:d5:24:a3 - d7:37:81:1b:34:3b:af:9a:48:52:af:53:7c:f7:32:a2 - 3f: + 00:da:9b:62:22:f8:48:3e:9a:4f:ef:e5:b0:f1:e3:5e + ec:21:3d:3b:8f:ec:f1:d8:f6:fd:a7:2f:69:2a:cd:79 + a1:4b:6b:39:36:e9:c3:a9:5b:f9:59:50:71:6b:72:b0 + 8d:13:ca:93:b1:4d:4b:55:3a:69:b2:84:8d:4e:18:77 + 68:ae:f6:d8:ec:43:ef:c8:21:51:b9:cb:86:b7:62:ab + 5e:90:77:ac:e6:85:a9:e9:f7:19:f7:26:24:4c:18:81 + 53:85:42:cd:35:5c:1d:ae:70:0a:59:b0:44:a2:50:bc + 68:3a:bd:c2:53:7f:2e:ab:04:2d:85:ee:7d:8a:0c:db + cc:85:5a:b5:f8:6f:f8:92:53:0c:93:00:f7:ff:84:61 + d0:67:f4:10:b4:bf:9a:ba:35:df:05:79:d7:78:42:fb + 48:c4:db:35:27:4f:18:cf:8d:da:26:13:36:84:42:5d + 50:c4:9d:38:15:b5:6a:ca:3a:ab:f1:f9:b0:26:1a:54 + 99: prime2: - 00:d9:83:5e:be:0a:ea:0b:d9:66:63:56:3b:9e:44:aa - 46:6d:8d:6c:10:81:4b:de:19:5d:2c:16:7e:30:7c:ad - 23:9a:89:53:cc:18:e8:e8:51:2b:79:35:d0:67:7d:9e - 8f:be:ea:63:5e:14:c0:6b:ba:02:6c:4a:da:07:70:9d - 14:fa:be:1e:40:47:50:6f:f2:5a:87:9e:b6:b1:b8:55 - 2c:b6:a2:e3:b0:24:ba:ea:9b:55:87:8b:4b:cf:40:4a - 25:b4:89:cf:9e:76:ca:79:4a:f4:74:b7:ee:cf:6c:8f - cb:e3:3d:9e:86:3b:44:b7:70:ec:05:0c:68:ce:d6:c3 - a2:ec:e6:11:d6:2f:f7:80:26:a9:5c:aa:b9:a6:33:84 - a9:00:43:cf:72:07:8a:91:59:a2:b1:de:79:07:6b:81 - 67:a5:c2:4b:fd:29:8a:1a:96:66:57:66:d4:37:9a:98 - 69:d1:19:24:53:b1:a4:54:68:1e:8c:2b:b4:93:19:ed - 95: + 00:d4:2b:34:e6:a1:68:c9:c8:7a:22:5c:21:34:0f:67 + 4b:6a:78:d5:0e:63:be:4b:83:a3:ac:28:b6:37:80:c0 + 79:30:ec:0c:87:6d:c1:f2:d1:f8:bd:8d:3c:bb:20:81 + d9:dd:6b:25:0d:0c:e4:15:39:11:06:31:06:84:2c:8d + 12:73:04:b3:cf:fd:57:03:ca:65:3b:f7:e3:e0:6f:37 + 4c:b2:ef:c4:a3:cb:8b:54:f7:35:73:54:e4:f9:62:bd + 1e:7a:c1:76:b1:f0:cc:d9:d0:fc:c7:83:59:07:3b:55 + 1f:a9:88:7e:e9:27:bb:e8:5a:a0:57:de:1d:f4:56:8c + a9:34:9b:0a:43:d5:a1:2a:97:80:27:07:ee:57:20:29 + f7:08:02:78:c7:fc:9b:c9:28:64:a5:63:a4:a6:a2:65 + 48:fd:6d:42:b0:60:59:13:f7:f1:cd:78:09:94:66:42 + 36:6a:ee:7c:40:c5:f1:2f:f9:7e:ca:f7:b5:02:95:10 + af: coefficient: - 00:90:9a:7f:6f:14:a8:bc:79:3f:25:e5:62:f9:5d:29 - 78:a4:78:8e:7a:e4:8a:62:8a:7f:9c:ae:75:95:fe:ee - 1a:99:53:40:01:76:29:7d:48:85:28:a2:2a:9f:0f:10 - 8c:19:6a:36:6b:e1:ac:a2:07:b9:72:5c:b9:a6:20:bb - 8f:cb:f5:ea:dd:3f:0e:ab:9d:c1:57:7e:7b:96:f9:da - b0:52:3c:3f:62:94:e7:5c:04:9e:ac:60:cd:4d:ec:7e - 68:d3:fb:2a:b4:02:f0:0e:be:37:bc:2a:f8:6e:8d:31 - b5:38:67:00:9e:67:9f:71:d0:88:36:32:69:4b:20:73 - eb:a1:d9:bc:72:c2:7e:39:1a:36:cc:c1:45:a2:14:37 - e6:ca:db:4d:0b:5b:68:a4:ff:b7:7b:b1:db:2f:70:27 - a1:6c:31:3f:c0:c3:23:04:b0:7a:e2:0d:21:ba:5a:80 - 52:c1:a1:2b:57:72:20:b6:ed:b1:e8:3b:95:88:81:90 - 5d: + 00:c2:99:5b:b5:1c:59:73:c7:70:78:75:aa:67:4d:92 + d1:27:b6:47:be:e2:71:39:31:f7:5d:be:79:bd:22:b2 + 34:80:b0:a5:39:ab:b2:53:2a:28:f9:4a:34:20:b6:ea + 25:d5:df:34:ad:d0:b3:26:ed:ba:f3:0c:07:95:34:50 + ae:48:40:a7:5b:f7:8c:e4:c6:d6:a4:1f:18:07:2a:ea + 01:38:90:d5:ca:89:19:3d:8e:c2:40:05:e5:09:a8:30 + 78:6c:e9:e6:1d:6c:5b:22:a9:24:d0:07:41:95:0e:82 + f6:19:e4:6c:c1:96:ae:c3:5d:84:a9:02:e5:7c:d6:b9 + f7:94:0f:b9:5a:41:87:db:03:17:9b:39:b2:e9:bb:f5 + 3f:0d:91:6d:d2:32:fe:ef:60:19:3c:15:48:c1:a8:e7 + 4c:b8:bd:dc:31:43:49:df:05:be:c5:3b:6a:5d:68:91 + 6d:e6:47:0b:a4:27:74:44:42:12:31:02:ad:aa:7e:e8 + 0c: exp1: - 00:ef:ce:66:20:01:44:b9:35:89:46:f8:56:33:45:54 - 3f:23:6d:23:9a:7e:71:6d:b3:56:db:50:40:7a:cb:b0 - f7:ec:67:52:ec:96:b9:d1:8a:c6:5a:74:2b:30:4b:66 - 03:e2:9d:2b:78:e8:b2:c4:da:b3:fe:f1:ed:c7:09:98 - a1:44:37:05:d5:1b:33:2a:58:93:c5:9b:30:b6:38:57 - 68:af:4e:a8:b7:02:06:9f:fc:b9:3e:b3:95:a7:ce:0f - a0:b0:ce:88:0e:7c:e7:ff:7f:e6:2d:6b:8b:f8:63:85 - d8:f7:49:a5:d8:5d:3a:52:e1:f9:58:fe:8d:de:de:b1 - 18:40:34:a8:e8:fc:df:33:a2:39:81:00:3b:3d:38:17 - cb:d4:53:09:cd:04:a2:51:9b:2b:ae:c1:98:60:3a:0f - d4:e5:a0:4c:36:51:46:86:80:bd:2d:21:62:c3:bd:07 - d6:2d:82:62:b0:c4:62:3f:4f:be:86:3e:c0:93:fc:81 - 2b: + 00:a6:15:8d:5e:a2:21:49:26:b5:fa:be:18:4b:fe:01 + bd:06:97:dd:eb:c0:0a:12:5b:bc:64:cb:79:6c:22:85 + f9:0c:32:9f:5a:60:09:de:5e:d9:37:89:0d:52:a3:e5 + 0c:99:ef:bb:7b:e6:0c:88:e9:03:2a:b4:d8:22:70:26 + 30:6b:55:71:83:37:2e:32:6f:56:07:01:61:0d:6c:b2 + 63:fb:00:61:65:16:41:fc:56:56:ec:d5:96:98:15:ba + 13:b9:58:02:4e:3a:f8:f5:f1:7b:7b:9e:96:d6:76:de + 5d:95:db:5b:8b:52:42:23:7d:de:14:36:18:3c:cc:fe + 25:09:5f:dc:86:ae:93:3c:a0:4a:1a:59:8f:11:1b:03 + b1:71:79:15:44:2d:9a:21:45:12:76:b7:96:03:71:68 + 36:66:11:60:f5:c7:7b:43:1d:33:92:df:df:f8:65:ef + 3d:90:d3:1d:ac:28:93:c4:a7:04:c1:a7:80:b3:c2:27 + 69: exp2: - 11:e4:73:93:b0:74:26:3b:60:e7:c4:fd:2c:7c:bb:81 - 05:9b:ff:8a:b0:08:1c:a1:fb:7f:17:ee:93:70:7e:11 - 92:b1:bf:39:e7:c6:a8:ed:9c:64:e1:1f:5e:93:ff:ca - 15:4b:54:97:35:9f:ca:7c:c7:9c:3e:e0:06:82:a5:f9 - 46:d3:02:cc:08:d1:be:13:b2:8c:bb:6a:8d:dd:fa:eb - ad:ae:62:8a:67:cb:14:67:68:b6:b8:a7:a8:c9:c2:0f - ad:f5:34:25:f5:e1:9b:ee:a5:83:40:6a:1d:97:f1:90 - 35:06:29:97:23:22:f8:f0:0a:0a:34:46:1e:d5:9d:cc - 36:2e:8a:c3:12:b9:0a:4a:a3:dd:e2:91:58:f1:9d:f5 - 04:f7:8f:05:f3:46:db:c4:02:d5:1c:d6:d9:dc:67:0d - ae:9d:f8:00:40:3d:83:08:62:2c:c8:61:a6:9d:49:f2 - 52:67:fe:0c:00:6d:e3:1f:99:7b:b0:50:af:55:0f:ad - + 3a:11:ad:43:e6:2f:78:f2:be:c2:c2:b6:6f:ba:3b:8a + 3f:94:dc:b3:38:87:6f:c1:92:bd:5e:d3:28:73:bb:ba + 2f:b2:9b:67:41:9a:10:ac:79:48:df:ec:1c:47:34:62 + fd:a0:02:9f:04:c5:34:2d:cf:44:03:8d:06:05:ef:82 + 7a:2a:72:50:5d:c5:40:0b:58:13:c6:af:fe:d0:51:b1 + 53:54:1e:5c:ba:2b:e3:50:59:b7:bc:27:83:3e:0c:06 + d8:90:34:bd:54:eb:ac:3d:ef:c1:67:68:a9:7b:0a:bc + 44:b9:50:0a:de:4e:26:b5:0f:27:9d:6a:53:62:90:d3 + 0a:cb:d8:4e:62:9c:de:bc:62:5d:cc:64:e6:41:ae:5a + a7:3d:8d:f3:26:67:38:f2:2e:41:b9:f8:e9:86:8e:f4 + 20:30:94:4d:13:c1:ad:9e:71:f6:cb:80:cb:b6:05:2f + 41:ab:0e:24:e8:48:2f:da:38:62:b2:d3:3e:e6:af:8f + Public Key PIN: - pin-sha256:EiqPFBPoLKkCzVlK8KoKYGQT/LSo7/0iLg/I7nKt1/0= + pin-sha256:6j7MphUbNRjXh9x/BogWeu4m7+ON7aYmCyFxQsSMsec= Public Key ID: - sha256:122a8f1413e82ca902cd594af0aa0a606413fcb4a8effd222e0fc8ee72add7fd - sha1:4b4510b7a19c88cbce887f5b9d531a6d81d34400 + sha256:ea3ecca6151b3518d787dc7f0688167aee26efe38deda6260b217142c48cb1e7 + sha1:87e4402a0fb8615b3fc7d034871265903c46ec84 -----BEGIN RSA PRIVATE KEY----- -MIIG5AIBAAKCAYEA1fWe/glTz5MRvZtZ6Pk7ac6jCnDbNJhMMBDWjIC9Xc/31joZ -kK5VnW5P/sNZq3S5Glx2Y2Ie+jCeQ7hWYNl1WqCIrV4fNjlgEnuEFUOze2lY243t -4k+OeyQ2YmopDqX2vFiX9q7vzVP6120enUBO1OgyBUnFZpTHmWukQl2ZhzxjoakA -DjYP6sjejhKWXeYjms8KTXpBjl1PaWcvYe4IrMzZuNZq3pveUyl83PcD9uVT0T/j -rZnGv/Ior/TLLdIFmFjOutDX96uMimbgjSxqQcg57EzUBCWIWKjAXNW+v+lqJcCl -YY5y4ElUhuY8j9sQa5OHSevemaIXRiuQMJGFquASVhzPrgpD0jE8UIpwqOQycU+N -NK1nutwzKoq8ZzOoVozlrpz2AifaJ5StVnfzK+0jkuj+7FnkiNeoHUV2DjGLa3J/ -1YwcrLa3J0Gv1tON9BFd81P9mbfCgNc5iwfOP5J/qe0tUKQ7V+dzPDTd89FXTf+5 -VFrmY8bLKP3F08GrAgMBAAECggGAHjiweX+FyBck9VxBKegyXTKj0vC39cjhUhS+ -yV/R37NldWwFems1iqQvRnP/cXluP+v5iPYuG/bMFBKwmMN+kQuF4r8dt4IJMPMj -aAGFE5SAyZpVlJbaMEigKeyG2hvVKyt0Y5K4Ko+HKfCu11VjDS2zCw4thNzVCLWs -oPcpnXGJPSdq65b1TpuK3BSCCsdcFhzSfrkbE2nYsrGxfqqprQbOZg5bUBBCKgr9 -KRT3CWPBIBhfJ4FGEoy49ImmPVWh1GT88tvXnPW+952IXG02pEvqxePqMoFr80e1 -NdXEGrKuEp0Zo+ykr0F+XjSd9by5H6PCMrT8lad6VATi1k4QL2Zoizsg6gXbLnIB -Eed8+HIPYL7xJxmtOm/pcFY6hm5GDeNVMWZ3CYRIuSVLwyZwEsqkX8Y9auXbTWME -uAkHyTCFCJ13QCZg2hDCU9IADZ7Z1XEGMOv79z+CH7Oa800khi6U/Qae3CZo+mTD -+foIxLLsevVVxRC14i3eugQwEFuZAoHBAPvRR52ec/geCSH9iRYFVq+lz1LVzfcm -GNGEOjZlC6LN+biZwMfvAMkvyZIaHT2GWDuxvtSMxhvfuu6HqtEiRxi93gEPDcus -0Eik9JPipsu1t/X1ct3srBPoPWIjVKxS/+6a4X+wrjtBONg5K0DvJYFQsJjb+EAW -bhpBeSKQWJmAwg26tdNU7Cgz5LBY6t5hobcwC53cc2LCB9N1kUhJ3b7PspCVjyls -b/Zoy8/VJKPXN4EbNDuvmkhSr1N89zKiPwKBwQDZg16+CuoL2WZjVjueRKpGbY1s -EIFL3hldLBZ+MHytI5qJU8wY6OhRK3k10Gd9no++6mNeFMBrugJsStoHcJ0U+r4e -QEdQb/Jah562sbhVLLai47AkuuqbVYeLS89ASiW0ic+edsp5SvR0t+7PbI/L4z2e -hjtEt3DsBQxoztbDouzmEdYv94AmqVyquaYzhKkAQ89yB4qRWaKx3nkHa4FnpcJL -/SmKGpZmV2bUN5qYadEZJFOxpFRoHowrtJMZ7ZUCgcEA785mIAFEuTWJRvhWM0VU -PyNtI5p+cW2zVttQQHrLsPfsZ1LslrnRisZadCswS2YD4p0reOiyxNqz/vHtxwmY -oUQ3BdUbMypYk8WbMLY4V2ivTqi3Agaf/Lk+s5Wnzg+gsM6IDnzn/3/mLWuL+GOF -2PdJpdhdOlLh+Vj+jd7esRhANKjo/N8zojmBADs9OBfL1FMJzQSiUZsrrsGYYDoP -1OWgTDZRRoaAvS0hYsO9B9YtgmKwxGI/T76GPsCT/IErAoHAEeRzk7B0Jjtg58T9 -LHy7gQWb/4qwCByh+38X7pNwfhGSsb8558ao7Zxk4R9ek//KFUtUlzWfynzHnD7g -BoKl+UbTAswI0b4Tsoy7ao3d+uutrmKKZ8sUZ2i2uKeoycIPrfU0JfXhm+6lg0Bq -HZfxkDUGKZcjIvjwCgo0Rh7Vncw2LorDErkKSqPd4pFY8Z31BPePBfNG28QC1RzW -2dxnDa6d+ABAPYMIYizIYaadSfJSZ/4MAG3jH5l7sFCvVQ+tAoHBAJCaf28UqLx5 -PyXlYvldKXikeI565Ipiin+crnWV/u4amVNAAXYpfUiFKKIqnw8QjBlqNmvhrKIH -uXJcuaYgu4/L9erdPw6rncFXfnuW+dqwUjw/YpTnXASerGDNTex+aNP7KrQC8A6+ -N7wq+G6NMbU4ZwCeZ59x0Ig2MmlLIHProdm8csJ+ORo2zMFFohQ35srbTQtbaKT/ -t3ux2y9wJ6FsMT/AwyMEsHriDSG6WoBSwaErV3Igtu2x6DuViIGQXQ== +MIIG5AIBAAKCAYEAtS2SivLaAfsZ1X9xun/6i/1UNSxG5kPvVkfTPWOwSgCYyqn7 +u2vZsIsvcurqHQBEg1SWF4XAdcFeCxdDN8em6ISfSOUFL9TSQrZ5eFQYcVTX9IFX +DWXbFSC8AgJpYhrPf2O0vxQNQFD4MpNXrLpEkc3vLrg/JhySC1HAYfO/nuJV3ZnA +xJZCIv4upT5j0VjzfPw6WR/OUUlcmLnh0+qMLv7G2dRLjdAsV1U8k7eykk+z9qXe +KAjgFBEEgLTVbHR1zbrUpw42DYR7SQpEJ8wrJ9qHdI4H/9MnPu8O75kfgauQV4s/ +oIgWmVeHMDxpGaLEKgcYVE2F/HlMfFvCrrZKKXi1/k35rG8sg6cP/bRuRZlgO0xF +TrW0QKmWoBLqyqVMvifdkT0EBWZ4eZWTDn2Rr4HMe7ov54QmKXaXp2VdOGzO1Efv +9KvDildb5aEqBrVz+tU+C/Cdg5p1d2g2m1Uk9wjcYbXOWx9fd9Ur/fxJuHtauxOW +4gzFLcTOgvUgVmSXAgMBAAECggGAZnpRcjBs6UPrV9xPKqsqvzXaBH132NTCMm5L +6GRTmXf+9FrzXQ5iwj3n57tCEodNOW+FueZYdwKZu/I7ffh6yvgA1I1dwrFBAKUP +zeTbMndH8y+ZWqWrJqcuUICmtOvvQ+HUoWOpHyDtUkYPCE8Pa90slaN3bIoqfSaP +h5gEYc0pvj8yTr+ixwI4xhSlBwumJaLRKgoYe9ff/S7esivvgMFxHlK7LYp7GibH +LdJwudtFMpSdNM3YWOFLR7WBJmg0KjICC+aVOtLU1uTHv44ETvx16W30pKhrUHZ8 +EAeBllZUFtc5il9ReR+WScvXiw1RXUroIvvRkqSiAmUkCmIzhKnfqU9AGpUdmOoM +IwXC2x5NxfLpY2vecGc6ovRywZeu0G94giuhbyzXkKNP1vnWgBHUk47gBhn22TNy +XdgWou/hqQDevH6YqpdFuIEgAV0LED1pqbp9LB/gFw687peVYadrEJQnrcrBkzv6 +3I09WMz7BD9/QNZs6IOhvlDNRnMRAoHBANqbYiL4SD6aT+/lsPHjXuwhPTuP7PHY +9v2nL2kqzXmhS2s5NunDqVv5WVBxa3KwjRPKk7FNS1U6abKEjU4Yd2iu9tjsQ+/I +IVG5y4a3YqtekHes5oWp6fcZ9yYkTBiBU4VCzTVcHa5wClmwRKJQvGg6vcJTfy6r +BC2F7n2KDNvMhVq1+G/4klMMkwD3/4Rh0Gf0ELS/mro13wV513hC+0jE2zUnTxjP +jdomEzaEQl1QxJ04FbVqyjqr8fmwJhpUmQKBwQDUKzTmoWjJyHoiXCE0D2dLanjV +DmO+S4OjrCi2N4DAeTDsDIdtwfLR+L2NPLsggdndayUNDOQVOREGMQaELI0ScwSz +z/1XA8plO/fj4G83TLLvxKPLi1T3NXNU5PlivR56wXax8MzZ0PzHg1kHO1UfqYh+ +6Se76FqgV94d9FaMqTSbCkPVoSqXgCcH7lcgKfcIAnjH/JvJKGSlY6SmomVI/W1C +sGBZE/fxzXgJlGZCNmrufEDF8S/5fsr3tQKVEK8CgcEAphWNXqIhSSa1+r4YS/4B +vQaX3evAChJbvGTLeWwihfkMMp9aYAneXtk3iQ1So+UMme+7e+YMiOkDKrTYInAm +MGtVcYM3LjJvVgcBYQ1ssmP7AGFlFkH8Vlbs1ZaYFboTuVgCTjr49fF7e56W1nbe +XZXbW4tSQiN93hQ2GDzM/iUJX9yGrpM8oEoaWY8RGwOxcXkVRC2aIUUSdreWA3Fo +NmYRYPXHe0MdM5Lf3/hl7z2Q0x2sKJPEpwTBp4CzwidpAoHAOhGtQ+YvePK+wsK2 +b7o7ij+U3LM4h2/Bkr1e0yhzu7ovsptnQZoQrHlI3+wcRzRi/aACnwTFNC3PRAON +BgXvgnoqclBdxUALWBPGr/7QUbFTVB5cuivjUFm3vCeDPgwG2JA0vVTrrD3vwWdo +qXsKvES5UAreTia1DyedalNikNMKy9hOYpzevGJdzGTmQa5apz2N8yZnOPIuQbn4 +6YaO9CAwlE0Twa2ecfbLgMu2BS9Bqw4k6Egv2jhistM+5q+PAoHBAMKZW7UcWXPH +cHh1qmdNktEntke+4nE5Mfddvnm9IrI0gLClOauyUyoo+Uo0ILbqJdXfNK3Qsybt +uvMMB5U0UK5IQKdb94zkxtakHxgHKuoBOJDVyokZPY7CQAXlCagweGzp5h1sWyKp +JNAHQZUOgvYZ5GzBlq7DXYSpAuV81rn3lA+5WkGH2wMXmzmy6bv1Pw2RbdIy/u9g +GTwVSMGo50y4vdwxQ0nfBb7FO2pdaJFt5kcLpCd0REISMQKtqn7oDA== -----END RSA PRIVATE KEY----- From d23d1fc0f9483e5b99abd31d92369fa8cbb05150 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 5 Jun 2020 07:56:39 +0200 Subject: [PATCH 0224/1854] travis: fix alpine builds With the latest version of the alpine container image it seems that alpine changed a few package names. This adapts the alpine container to solve the travis failures. Signed-off-by: Adrian Reber --- scripts/build/Dockerfile.alpine | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 5785102da..dbf3c2bf1 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -15,9 +15,12 @@ RUN apk update && apk add \ libnet-dev \ libnl3-dev \ nftables \ + nftables-dev \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ + py3-pip \ + py3-protobuf \ python3 \ sudo @@ -43,7 +46,7 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip3 install protobuf junit_xml +RUN pip3 install junit_xml # For zdtm we need an unversioned python binary RUN ln -s /usr/bin/python3 /usr/bin/python From be2ded15ee1cff6c0bd5733dda57bb17ea85bcd9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 5 Jun 2020 07:55:18 +0200 Subject: [PATCH 0225/1854] test: fix flake8 errors The newest version of flake reports errors that variable names like 'l' should not be used, because they are hard to read. This changes 'l' to 'line' to make flake8 happy. Signed-off-by: Adrian Reber --- test/zdtm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 5e42c769e..b111fa383 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -2047,11 +2047,11 @@ def grep_errors(fname): print_next = False before = [] with open(fname, errors='replace') as fd: - for l in fd: - before.append(l) + for line in fd: + before.append(line) if len(before) > 5: before.pop(0) - if "Error" in l or "Warn" in l: + if "Error" in line or "Warn" in line: if first: print_fname(fname, 'log') print_sep("grep Error", "-", 60) @@ -2061,7 +2061,7 @@ def grep_errors(fname): before = [] else: if print_next: - print_next = print_error(l) + print_next = print_error(line) before = [] if not first: print_sep("ERROR OVER", "-", 60) From 01cab14dfad656eb1d5f7e6a611a49a87c83b34e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 22 Feb 2019 18:04:32 +0000 Subject: [PATCH 0226/1854] util: Fix addr casting for IPv4/IPv6 in autobind When saddr.ss_family is AF_INET6 we should cast &saddr to (struct sockaddr_in6 *). Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/util.c b/criu/util.c index 6f6a6dde7..06c594ca9 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1131,9 +1131,9 @@ int setup_tcp_server(char *type, char *addr, unsigned short *port) } if (saddr.ss_family == AF_INET6) { - (*port) = ntohs(((struct sockaddr_in *)&saddr)->sin_port); - } else if (saddr.ss_family == AF_INET) { (*port) = ntohs(((struct sockaddr_in6 *)&saddr)->sin6_port); + } else if (saddr.ss_family == AF_INET) { + (*port) = ntohs(((struct sockaddr_in *)&saddr)->sin_port); } pr_info("Using %u port\n", (*port)); From 0708cbd8839267c30e8dcd8bec63f5bc2d2d36d0 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 13 Sep 2018 18:28:10 +0100 Subject: [PATCH 0227/1854] remote: Use tmp file buffer when restore ip dump When CRIU calls the ip tool on restore, it passes the fd of remote socket by replacing the STDIN before execvp. The stdin is used by the ip tool to receive input. However, the ip tool calls ftell(stdin) which fails with "Illegal seek" since UNIX sockets do not support file positioning operations. To resolve this issue, read the received content from the UNIX socket and store it into temporary file, then replace STDIN with the fd of this tmp file. # python test/zdtm.py run -t zdtm/static/env00 --remote -f ns === Run 1/1 ================ zdtm/static/env00 ========================= Run zdtm/static/env00 in ns ========================== Start test ./env00 --pidfile=env00.pid --outfile=env00.out --envname=ENV_00_TEST Adding image cache Adding image proxy Run criu dump Run criu restore =[log]=> dump/zdtm/static/env00/31/1/restore.log ------------------------ grep Error ------------------------ RTNETLINK answers: File exists (00.229895) 1: do_open_remote_image RDONLY path=route-9.img snapshot_id=dump/zdtm/static/env00/31/1 (00.230316) 1: Running ip route restore Failed to restore: ftell: Illegal seek (00.232757) 1: Error (criu/util.c:712): exited, status=255 (00.232777) 1: Error (criu/net.c:1479): IP tool failed on route restore (00.232803) 1: Error (criu/net.c:2153): Can't create net_ns (00.255091) Error (criu/cr-restore.c:1177): 105 killed by signal 9: Killed (00.255307) Error (criu/mount.c:2960): mnt: Can't remove the directory /tmp/.criu.mntns.dTd7ak: No such file or directory (00.255339) Error (criu/cr-restore.c:2119): Restoring FAILED. ------------------------ ERROR OVER ------------------------ ################# Test zdtm/static/env00 FAIL at CRIU restore ################## ##################################### FAIL ##################################### Fixes #311 Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/net.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/criu/net.c b/criu/net.c index 762f9b547..27e7c7e33 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2068,19 +2068,46 @@ out: static int restore_ip_dump(int type, int pid, char *cmd) { - int ret = -1; + int ret = -1, sockfd, n, written; + FILE *tmp_file; struct cr_img *img; + char buf[1024]; img = open_image(type, O_RSTR, pid); if (empty_image(img)) { close_image(img); return 0; } + sockfd = img_raw_fd(img); + tmp_file = tmpfile(); + if (!tmp_file) { + pr_perror("Failed to open tmpfile"); + return -1; + } + + while ((n = read(sockfd, buf, 1024)) > 0) { + written = fwrite(buf, sizeof(char), n, tmp_file); + if (written < n) { + pr_perror("Failed to write to tmpfile " + "[written: %d; total: %d]", written, n); + return -1; + } + } + + if (fseek(tmp_file, 0, SEEK_SET)) { + pr_perror("Failed to set file position to beginning of tmpfile"); + return -1; + } + if (img) { - ret = run_ip_tool(cmd, "restore", NULL, NULL, img_raw_fd(img), -1, 0); + ret = run_ip_tool(cmd, "restore", NULL, NULL, fileno(tmp_file), -1, 0); close_image(img); } + if(fclose(tmp_file)) { + pr_perror("Failed to close tmpfile"); + } + return ret; } From 51c3f8a908b1738d7d268afc5bb0dd9a59f3d8a5 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 11 Jun 2020 21:01:24 +0000 Subject: [PATCH 0228/1854] pipes: loop over splice() when dumping a pipe's data Instead of erroring, we should loop until we get the desired number of bytes written, like regular I/O loops. Signed-off-by: Nicolas Viennot --- criu/pipes.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index d74329161..5787fdc53 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -463,18 +463,15 @@ int dump_one_pipe_data(struct pipe_data_dump *pd, int lfd, const struct fd_parms if (pb_write_one(img, &pde, PB_PIPE_DATA)) goto err_close; - if (bytes) { + while (bytes > 0) { int wrote; - wrote = splice(steal_pipe[0], NULL, img_raw_fd(img), NULL, bytes, 0); if (wrote < 0) { pr_perror("Can't push pipe data"); goto err_close; - } else if (wrote != bytes) { - pr_err("%#x: Wanted to write %d bytes, but wrote %d\n", - pipe_id(p), bytes, wrote); - goto err_close; - } + } else if (wrote == 0) + break; + bytes -= wrote; } ret = 0; From 7d79a58f4daa9ddda015fc0aaca2a60d156d12cc Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 21 May 2020 17:40:17 +0000 Subject: [PATCH 0229/1854] img-streamer: introduction of criu-image-streamer This adds the ability to stream images with criu-image-streamer The workflow is the following: 1) criu-image-streamer is started, and starts listening on a UNIX socket. 2) CRIU is started. img_streamer_init() is invoked, which connects to the socket. During dump/restore operations, instead of using local disk to open an image file, img_streamer_open() is called to provide a UNIX pipe that is sent over the UNIX socket. 3) Once the operation is done, img_streamer_finish() is called, and the UNIX socket is disconnected. criu-image-streamer can be found at: https://github.com/checkpoint-restore/criu-image-streamer Signed-off-by: Nicolas Viennot --- Documentation/criu.txt | 5 + criu/Makefile.crtools | 1 + criu/config.c | 1 + criu/cr-dump.c | 2 + criu/cr-restore.c | 4 + criu/cr-service.c | 9 +- criu/crtools.c | 34 ++++- criu/files-reg.c | 10 +- criu/image.c | 32 +++-- criu/img-streamer.c | 232 +++++++++++++++++++++++++++++++++++ criu/include/cr_options.h | 1 + criu/include/image.h | 2 +- criu/include/img-streamer.h | 8 ++ criu/include/protobuf-desc.h | 4 +- criu/include/servicefd.h | 1 + criu/mem.c | 6 +- criu/page-xfer.c | 8 ++ criu/pagemap.c | 63 +++++++++- criu/protobuf-desc.c | 1 + criu/util.c | 14 ++- images/Makefile | 1 + images/img-streamer.proto | 16 +++ 22 files changed, 429 insertions(+), 26 deletions(-) create mode 100644 criu/img-streamer.c create mode 100644 criu/include/img-streamer.h create mode 100644 images/img-streamer.proto diff --git a/Documentation/criu.txt b/Documentation/criu.txt index ab63e461c..4e9b4132e 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -76,6 +76,11 @@ The following levels are available: *-D*, *--images-dir* 'path':: Use 'path' as a base directory where to look for sets of image files. +*--stream*:: + dump/restore images using criu-image-streamer. + See https://github.com/checkpoint-restore/criu-image-streamer for detailed + usage. + *--prev-images-dir* 'path':: Use 'path' as a parent directory where to look for sets of image files. This option makes sense in case of incremental dumps. diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 5c25b8928..dc92c2ea2 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -29,6 +29,7 @@ obj-y += files-reg.o obj-y += fsnotify.o obj-y += image-desc.o obj-y += image.o +obj-y += img-streamer.o obj-y += ipc_ns.o obj-y += irmap.o obj-y += kcmp-ids.o diff --git a/criu/config.c b/criu/config.c index b84b7da28..e78b534a9 100644 --- a/criu/config.c +++ b/criu/config.c @@ -510,6 +510,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT(SK_CLOSE_PARAM, &opts.tcp_close), { "verbosity", optional_argument, 0, 'v' }, { "ps-socket", required_argument, 0, 1091}, + BOOL_OPT("stream", &opts.stream), { "config", required_argument, 0, 1089}, { "no-default-config", no_argument, 0, 1090}, { "tls-cacert", required_argument, 0, 1092}, diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 745998afc..2b4c9ae82 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -82,6 +82,7 @@ #include "eventpoll.h" #include "memfd.h" #include "timens.h" +#include "img-streamer.h" /* * Architectures can overwrite this function to restore register sets that @@ -1759,6 +1760,7 @@ static int cr_dump_finish(int ret) free_userns_maps(); close_service_fd(CR_PROC_FD_OFF); + close_image_dir(); if (ret) { pr_err("Dumping FAILED.\n"); diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ed4b95b91..f572f79a0 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -29,6 +29,7 @@ #include "cr_options.h" #include "servicefd.h" #include "image.h" +#include "img-streamer.h" #include "util.h" #include "util-pie.h" #include "criu-log.h" @@ -2355,6 +2356,9 @@ skip_ns_bouncing: pr_info("Restore finished successfully. Tasks resumed.\n"); write_stats(RESTORE_STATS); + /* This has the effect of dismissing the image streamer */ + close_image_dir(); + ret = run_scripts(ACT_POST_RESUME); if (ret != 0) pr_err("Post-resume script ret code %d\n", ret); diff --git a/criu/cr-service.c b/criu/cr-service.c index 279016bcd..53eadb1bc 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -343,7 +343,14 @@ static int setup_opts_from_req(int sk, CriuOpts *req) if (req->parent_img) SET_CHAR_OPTS(img_parent, req->parent_img); - if (open_image_dir(images_dir_path) < 0) { + /* + * Image streaming is not supported with CRIU's service feature as + * the streamer must be started for each dump/restore operation. + * It is unclear how to do that with RPC, so we punt for now. + * This explains why we provide the argument mode=-1 instead of + * O_RSTR or O_DUMP. + */ + if (open_image_dir(images_dir_path, -1) < 0) { pr_perror("Can't open images directory"); goto err; } diff --git a/criu/crtools.c b/criu/crtools.c index 7f72dde27..ad61fa9bb 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -54,6 +54,20 @@ void flush_early_log_to_stderr(void) flush_early_log_buffer(STDERR_FILENO); } +static int image_dir_mode(char *argv[], int optind) +{ + if (!strcmp(argv[optind], "dump") || + !strcmp(argv[optind], "pre-dump") || + (!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "dump"))) + return O_DUMP; + + if (!strcmp(argv[optind], "restore") || + (!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "restore"))) + return O_RSTR; + + return -1; +} + int main(int argc, char *argv[], char *envp[]) { int ret = -1; @@ -148,13 +162,30 @@ int main(int argc, char *argv[], char *envp[]) } } + if (opts.stream && image_dir_mode(argv, optind) == -1) { + pr_err("--stream cannot be used with the %s command\n", argv[optind]); + goto usage; + } + /* We must not open imgs dir, if service is called */ if (strcmp(argv[optind], "service")) { - ret = open_image_dir(opts.imgs_dir); + ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind)); if (ret < 0) return 1; } + /* + * The kernel might send us lethal signals when writing to a pipe + * which reader has disappeared. We deal with write() failures on our + * own, and prefer not to get killed. So we ignore SIGPIPEs. + * + * Pipes are used in various places: + * 1) Receiving application page data + * 2) Transmitting data to the image streamer + * 3) Emitting logs (potentially to a pipe). + */ + signal(SIGPIPE, SIG_IGN); + /* * When a process group becomes an orphan, * its processes are sent a SIGHUP signal @@ -322,6 +353,7 @@ usage: " this requires running a second instance of criu\n" " in lazy-pages mode: 'criu lazy-pages -D DIR'\n" " --lazy-pages and lazy-pages mode require userfaultfd\n" +" --stream dump/restore images using criu-image-streamer\n" "\n" "* External resources support:\n" " --external RES dump objects from this list as external resources:\n" diff --git a/criu/files-reg.c b/criu/files-reg.c index b53e9b080..7e84addf2 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -154,7 +154,6 @@ static int trim_last_parent(char *path) static int copy_chunk_from_file(int fd, int img, off_t off, size_t len) { - char *buf = NULL; int ret; while (len > 0) { @@ -167,7 +166,6 @@ static int copy_chunk_from_file(int fd, int img, off_t off, size_t len) len -= ret; } - xfree(buf); return 0; } @@ -213,7 +211,6 @@ static int copy_file_to_chunks(int fd, struct cr_img *img, size_t file_size) static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) { - char *buf = NULL; int ret; while (len > 0) { @@ -221,7 +218,11 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) pr_perror("Can't seek file"); return -1; } - ret = sendfile(fd, img, NULL, len); + + if (opts.stream) + ret = splice(img, NULL, fd, NULL, len, SPLICE_F_MOVE); + else + ret = sendfile(fd, img, NULL, len); if (ret < 0) { pr_perror("Can't send data"); return -1; @@ -231,7 +232,6 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) len -= ret; } - xfree(buf); return 0; } diff --git a/criu/image.c b/criu/image.c index 0225788b0..2bbb4dd02 100644 --- a/criu/image.c +++ b/criu/image.c @@ -17,6 +17,7 @@ #include "images/inventory.pb-c.h" #include "images/pagemap.pb-c.h" #include "proc_parse.h" +#include "img-streamer.h" #include "namespaces.h" bool ns_per_id = false; @@ -415,13 +416,16 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL); - /* - * For pages images dedup we need to open images read-write on - * restore, that may require proper capabilities, so we ask - * usernsd to do it for us - */ - if (root_ns_mask & CLONE_NEWUSER && - type == CR_FD_PAGES && oflags & O_RDWR) { + if (opts.stream && !(oflags & O_FORCE_LOCAL)) { + ret = img_streamer_open(path, flags); + errno = EIO; /* errno value is meaningless, only the ret value is meaningful */ + } else if (root_ns_mask & CLONE_NEWUSER && + type == CR_FD_PAGES && oflags & O_RDWR) { + /* + * For pages images dedup we need to open images read-write on + * restore, that may require proper capabilities, so we ask + * usernsd to do it for us + */ struct openat_args pa = { .flags = flags, .err = 0, @@ -520,7 +524,12 @@ struct cr_img *img_from_fd(int fd) return img; } -int open_image_dir(char *dir) +/* + * `mode` should be O_RSTR or O_DUMP depending on the intent. + * This is used when opts.stream is enabled for picking the right streamer + * socket name. `mode` is ignored when opts.stream is not enabled. + */ +int open_image_dir(char *dir, int mode) { int fd, ret; @@ -535,7 +544,10 @@ int open_image_dir(char *dir) return -1; fd = ret; - if (opts.img_parent) { + if (opts.stream) { + if (img_streamer_init(dir, mode) < 0) + goto err; + } else if (opts.img_parent) { ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK); if (ret < 0 && errno != EEXIST) { pr_perror("Can't link parent snapshot"); @@ -556,6 +568,8 @@ err: void close_image_dir(void) { + if (opts.stream) + img_streamer_finish(); close_service_fd(IMG_FD_OFF); } diff --git a/criu/img-streamer.c b/criu/img-streamer.c new file mode 100644 index 000000000..e31b17dd9 --- /dev/null +++ b/criu/img-streamer.c @@ -0,0 +1,232 @@ +#include +#include +#include +#include + +#include "cr_options.h" +#include "img-streamer.h" +#include "image.h" +#include "images/img-streamer.pb-c.h" +#include "protobuf.h" +#include "servicefd.h" +#include "rst-malloc.h" +#include "common/scm.h" +#include "common/lock.h" + +/* + * We use different path names for the dump and restore sockets because: + * 1) The user may want to perform both at the same time (akin to live + * migration). Specifying the same images-dir is convenient. + * 2) It fails quickly when the user mix-up the streamer and CRIU operations. + * (e.g., streamer is in capture more, while CRIU is in restore mode). + */ +#define IMG_STREAMER_CAPTURE_SOCKET_NAME "streamer-capture.sock" +#define IMG_STREAMER_SERVE_SOCKET_NAME "streamer-serve.sock" + +/* All requests go through the same socket connection. We must synchronize */ +static mutex_t *img_streamer_fd_lock; + +/* Either O_DUMP or O_RSTR */ +static int img_streamer_mode; + +static const char *socket_name_for_mode(int mode) +{ + switch (mode) { + case O_DUMP: return IMG_STREAMER_CAPTURE_SOCKET_NAME; + case O_RSTR: return IMG_STREAMER_SERVE_SOCKET_NAME; + default: BUG(); return NULL; + } +} + +/* + * img_streamer_init() connects to the image streamer socket. + * mode should be either O_DUMP or O_RSTR. + */ +int img_streamer_init(const char *image_dir, int mode) +{ + struct sockaddr_un addr; + int sockfd; + + img_streamer_mode = mode; + + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) { + pr_perror("Unable to instantiate UNIX socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", + image_dir, socket_name_for_mode(mode)); + + if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + pr_perror("Unable to connect to image streamer socket: %s", addr.sun_path); + goto err; + } + + img_streamer_fd_lock = shmalloc(sizeof(*img_streamer_fd_lock)); + if (!img_streamer_fd_lock) { + pr_err("Failed to allocate memory\n"); + goto err; + } + mutex_init(img_streamer_fd_lock); + + if (install_service_fd(IMG_STREAMER_FD_OFF, sockfd) < 0) + goto err; + + return 0; + +err: + close(sockfd); + return -1; +} + +/* + * img_streamer_finish() indicates that no more files will be opened. + * In other words, img_streamer_open() will no longer be called. + */ +void img_streamer_finish(void) +{ + if (get_service_fd(IMG_STREAMER_FD_OFF) >= 0) { + pr_info("Dismissing the image streamer\n"); + close_service_fd(IMG_STREAMER_FD_OFF); + } +} + +/* + * The regular protobuf APIs pb_write_one() and pb_read_one() operate over a + * `struct cr_img` object. Sadly, we don't have such object. We just have a + * file descriptor. The following pb_write_one_fd() and pb_read_one_fd() + * provide a protobuf API over a file descriptor. The implementation is a bit + * of a hack, but should be fine. At some point we can revisit to have a + * proper protobuf API over fds. + */ +static int pb_write_one_fd(int fd, void *obj, int type) +{ + int ret; + struct cr_img img; + memset(&img, 0, sizeof(img)); + + img._x.fd = fd; + ret = pb_write_one(&img, obj, type); + if (ret < 0) + pr_perror("Failed to communicate with the image streamer"); + return ret; +} + +static int pb_read_one_fd(int fd, void **pobj, int type) +{ + int ret; + struct cr_img img; + memset(&img, 0, sizeof(img)); + + img._x.fd = fd; + ret = pb_read_one(&img, pobj, type); + if (ret < 0) + pr_perror("Failed to communicate with the image streamer"); + return ret; +} + +static int send_file_request(char *filename) +{ + ImgStreamerRequestEntry req = IMG_STREAMER_REQUEST_ENTRY__INIT; + req.filename = filename; + return pb_write_one_fd(get_service_fd(IMG_STREAMER_FD_OFF), + &req, PB_IMG_STREAMER_REQUEST); +} + +static int recv_file_reply(bool *exists) +{ + ImgStreamerReplyEntry *reply; + int ret = pb_read_one_fd(get_service_fd(IMG_STREAMER_FD_OFF), + (void **)&reply, PB_IMG_STREAMER_REPLY); + if (ret < 0) + return ret; + + *exists = reply->exists; + free(reply); + + return 0; +} + +/* + * Using a pipe for image file transfers allows the data to be spliced by the + * image streamer, greatly improving performance. + * Transfer rates of up to 15GB/s can be seen with this technique. + */ +#define READ_PIPE 0 /* index of the read pipe returned by pipe() */ +#define WRITE_PIPE 1 +static int establish_streamer_file_pipe(void) +{ + /* + * If the other end of the pipe closes, the kernel will want to kill + * us with a SIGPIPE. These signal must be ignored, which we do in + * crtools.c:main() with signal(SIGPIPE, SIG_IGN). + */ + int ret = -1; + int criu_pipe_direction = img_streamer_mode == O_DUMP ? WRITE_PIPE : READ_PIPE; + int streamer_pipe_direction = 1 - criu_pipe_direction; + int fds[2]; + + if (pipe(fds) < 0) { + pr_perror("Unable to create pipe"); + return -1; + } + + if (send_fd(get_service_fd(IMG_STREAMER_FD_OFF), + NULL, 0, fds[streamer_pipe_direction]) < 0) + close(fds[criu_pipe_direction]); + else + ret = fds[criu_pipe_direction]; + + close(fds[streamer_pipe_direction]); + + return ret; +} + +static int _img_streamer_open(char *filename) +{ + if (send_file_request(filename) < 0) + return -1; + + if (img_streamer_mode == O_RSTR) { + /* The streamer replies whether the file exists */ + bool exists; + if (recv_file_reply(&exists) < 0) + return -1; + + if (!exists) + return -ENOENT; + } + + /* + * When the image streamer encounters a fatal error, it won't report + * errors via protobufs. Instead, CRIU will get a broken pipe error + * when trying to access a streaming pipe. This behavior is similar to + * what would happen if we were connecting criu and * criu-image-streamer + * via a shell pipe. + */ + + return establish_streamer_file_pipe(); +} + +/* + * Opens an image file via a UNIX pipe with the image streamer. + * + * Return: + * A file descriptor on success + * -ENOENT when the file was not found. + * -1 on any other error. + */ +int img_streamer_open(char *filename, int flags) +{ + int ret; + + BUG_ON(flags != img_streamer_mode); + + mutex_lock(img_streamer_fd_lock); + ret = _img_streamer_open(filename); + mutex_unlock(img_streamer_fd_lock); + return ret; +} diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index ba405182e..d5655212d 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -143,6 +143,7 @@ struct cr_options { int weak_sysctls; int status_fd; bool orphan_pts_master; + int stream; pid_t tree_id; int log_level; char *imgs_dir; diff --git a/criu/include/image.h b/criu/include/image.h index 1c7cc5471..62c8d7ba0 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -145,7 +145,7 @@ static inline int img_raw_fd(struct cr_img *img) extern off_t img_raw_size(struct cr_img *img); -extern int open_image_dir(char *dir); +extern int open_image_dir(char *dir, int mode); extern void close_image_dir(void); extern struct cr_img *open_image_at(int dfd, int type, unsigned long flags, ...); diff --git a/criu/include/img-streamer.h b/criu/include/img-streamer.h new file mode 100644 index 000000000..0c380c915 --- /dev/null +++ b/criu/include/img-streamer.h @@ -0,0 +1,8 @@ +#ifndef IMAGE_STREAMER_H +#define IMAGE_STREAMER_H + +extern int img_streamer_init(const char *image_dir, int mode); +extern void img_streamer_finish(void); +extern int img_streamer_open(char *filename, int flags); + +#endif /* IMAGE_STREAMER_H */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index ee4135d65..43d961731 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -62,8 +62,10 @@ enum { PB_GHOST_CHUNK, PB_FILE, PB_MEMFD_FILE, - PB_MEMFD_INODE, /* 60 */ + PB_MEMFD_INODE, PB_TIMENS, + PB_IMG_STREAMER_REQUEST, + PB_IMG_STREAMER_REPLY, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h index 986c46af5..c11f89d37 100644 --- a/criu/include/servicefd.h +++ b/criu/include/servicefd.h @@ -14,6 +14,7 @@ enum sfd_type { LOG_FD_OFF, IMG_FD_OFF, + IMG_STREAMER_FD_OFF, PROC_FD_OFF, /* fd with /proc for all proc_ calls */ PROC_PID_FD_OFF, CR_PROC_FD_OFF, /* some other's proc fd: diff --git a/criu/mem.c b/criu/mem.c index 15aa0cbdb..167838b98 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -1406,9 +1406,9 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) /* * We optimize the case when rsti(t)->vma_io is empty. * - * This is useful for for remote images, where all VMAs are premapped - * (pr->pieok is false). This avoids re-opening the CR_FD_PAGES file, - * which could be no longer be available. + * This is useful when using the image streamer, where all VMAs are + * premapped (pr->pieok is false). This avoids re-opening the + * CR_FD_PAGES file, which may only be readable only once. */ if (list_empty(&rsti(t)->vma_io)) { ta->vma_ios = NULL; diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 9affc2706..db8e5bec2 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -382,6 +382,10 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, unsigned lo int pfd; int pr_flags = (fd_type == CR_FD_PAGEMAP) ? PR_TASK : PR_SHMEM; + /* Image streaming lacks support for incremental images */ + if (opts.stream) + goto out; + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); if (pfd < 0 && errno == ENOENT) goto out; @@ -928,6 +932,10 @@ int check_parent_local_xfer(int fd_type, unsigned long img_id) struct stat st; int ret, pfd; + /* Image streaming lacks support for incremental images */ + if (opts.stream) + return 0; + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); if (pfd < 0 && errno == ENOENT) return 0; diff --git a/criu/pagemap.c b/criu/pagemap.c index 05f6b82b8..f1e1be91f 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -406,6 +406,49 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, return ret; } +/* + * We cannot use maybe_read_page_local() for streaming images as it uses + * pread(), seeking in the file. Instead, we use this custom page reader. + */ +static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vaddr, + int nr, void *buf, unsigned flags) +{ + unsigned long len = nr * PAGE_SIZE; + int fd = img_raw_fd(pr->pi); + int ret; + size_t curr = 0; + + pr_debug("\tpr%lu-%u Read page from self %lx/%"PRIx64"\n", + pr->img_id, pr->id, pr->cvaddr, pr->pi_off); + + /* We can't seek. The requested address better match */ + BUG_ON(pr->cvaddr != vaddr); + + while (1) { + ret = read(fd, buf + curr, len - curr); + if (ret == 0) { + pr_err("Reached EOF unexpectedly while reading page from image\n"); + return -1; + } else if (ret < 0) { + pr_perror("Can't read mapping page %d", ret); + return -1; + } + curr += ret; + if (curr == len) + break; + } + + if (opts.auto_dedup) + pr_warn_once("Can't dedup when streaming images\n"); + + if (ret == 0 && pr->io_complete) + ret = pr->io_complete(pr, vaddr, nr); + + pr->pi_off += len; + + return ret; +} + static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_pages, void *priv) { int ret = 0; @@ -601,6 +644,10 @@ static int try_open_parent(int dfd, unsigned long id, struct page_read *pr, int int pfd, ret; struct page_read *parent = NULL; + /* Image streaming lacks support for incremental images */ + if (opts.stream) + goto out; + pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY); if (pfd < 0 && errno == ENOENT) goto out; @@ -657,7 +704,19 @@ static int init_pagemaps(struct page_read *pr) off_t fsize; int nr_pmes, nr_realloc; - fsize = img_raw_size(pr->pmi); + if (opts.stream) { + /* + * TODO - There is no easy way to estimate the size of the + * pagemap that is still to be read from the pipe. Possible + * solution is to ask the image streamer for the size of the + * image. 1024 is a wild guess (more space is allocated if + * needed). + */ + fsize = 1024; + } else { + fsize = img_raw_size(pr->pmi); + } + if (fsize < 0) return -1; @@ -781,6 +840,8 @@ int open_page_read_at(int dfd, unsigned long img_id, struct page_read *pr, int p if (remote) pr->maybe_read_page = maybe_read_page_remote; + else if (opts.stream) + pr->maybe_read_page = maybe_read_page_img_streamer; else { pr->maybe_read_page = maybe_read_page_local; if (!pr->parent && !opts.lazy_pages) diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 2ee81e5db..13655264a 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -63,6 +63,7 @@ #include "images/seccomp.pb-c.h" #include "images/binfmt-misc.pb-c.h" #include "images/autofs.pb-c.h" +#include "images/img-streamer.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/criu/util.c b/criu/util.c index 06c594ca9..b30dbc86c 100644 --- a/criu/util.c +++ b/criu/util.c @@ -423,13 +423,19 @@ int copy_file(int fd_in, int fd_out, size_t bytes) { ssize_t written = 0; size_t chunk = bytes ? bytes : 4096; + ssize_t ret; while (1) { - ssize_t ret; - - ret = sendfile(fd_out, fd_in, NULL, chunk); + /* + * When fd_out is a pipe, sendfile() returns -EINVAL, so we + * fallback to splice(). Not sure why. + */ + if (opts.stream) + ret = splice(fd_in, NULL, fd_out, NULL, chunk, SPLICE_F_MOVE); + else + ret = sendfile(fd_out, fd_in, NULL, chunk); if (ret < 0) { - pr_perror("Can't send data to ghost file"); + pr_perror("Can't transfer data to ghost file from image"); return -1; } diff --git a/images/Makefile b/images/Makefile index 5ddd37664..bc67278e6 100644 --- a/images/Makefile +++ b/images/Makefile @@ -65,6 +65,7 @@ proto-obj-y += macvlan.o proto-obj-y += sit.o proto-obj-y += memfd.o proto-obj-y += timens.o +proto-obj-y += img-streamer.o CFLAGS += -iquote $(obj)/ diff --git a/images/img-streamer.proto b/images/img-streamer.proto new file mode 100644 index 000000000..d1bd4cc19 --- /dev/null +++ b/images/img-streamer.proto @@ -0,0 +1,16 @@ +syntax = "proto2"; + +// This message is sent from CRIU to the streamer. +// * During dump, it communicates the name of the file that is about to be sent +// to the streamer. +// * During restore, CRIU requests image files from the streamer. The message is +// used to communicate the name of the desired file. +message img_streamer_request_entry { + required string filename = 1; +} + +// This message is sent from the streamer to CRIU. It is only used during +// restore to report whether the requested file exists. +message img_streamer_reply_entry { + required bool exists = 1; +} From 27ab533cbec281f60726c2ec3a26f74ca661e814 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 10 Jun 2020 20:25:36 +0000 Subject: [PATCH 0230/1854] tests: run tests with criu-image-streamer with --stream One can pass --stream to zdtm.py for testing criu with image streaming. criu-image-streamer should be installed in ../criu-image-streamer relative to the criu project directory. But any path will do providing that criu-image-streamer can be found in the PATH env. Added a few tests to run on travis-ci to make sure streaming works. We run test that are likely to fail. However, it would be good to once in a while run all tests with `--stream -a`. Signed-off-by: Nicolas Viennot --- .travis.yml | 1 + scripts/install-criu-image-streamer.sh | 14 ++++ scripts/travis/travis-tests | 15 ++++ test/zdtm.py | 106 ++++++++++++++++++++++++- 4 files changed, 133 insertions(+), 3 deletions(-) create mode 100755 scripts/install-criu-image-streamer.sh diff --git a/.travis.yml b/.travis.yml index 9928f16c2..69a505193 100644 --- a/.travis.yml +++ b/.travis.yml @@ -94,6 +94,7 @@ jobs: arch: amd64 env: TR_ARCH=ppc64-cross dist: bionic + - env: TR_ARCH=local STREAM_TEST=1 allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/install-criu-image-streamer.sh b/scripts/install-criu-image-streamer.sh new file mode 100755 index 000000000..e4f368602 --- /dev/null +++ b/scripts/install-criu-image-streamer.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -eux + +# Install Rust toolchain +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Clone criu-image-streamer in a sibling directory of the criu project directory +cd $(dirname "$0")/../../ +# TODO change dev branch to master once PR is merged +git clone --depth=1 https://github.com/checkpoint-restore/criu-image-streamer.git -b dev + +# Compile +cd criu-image-streamer +make BUILD=debug # debug build compiles faster than release mode (2x faster) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 4cb842c97..311236d7d 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -61,6 +61,13 @@ travis_prep () { chmod a+x $HOME } +test_stream() { + # We must test CRIU features that dump content into an image file to ensure + # streaming compatibility. + STREAM_TEST_PATTERN='.*(ghost|fifo|unlink|memfd|shmem|socket_queue).*' + ./test/zdtm.py run --stream -p 2 --keep-going -T $STREAM_TEST_PATTERN $ZDTM_OPTS +} + travis_prep export GCOV @@ -132,6 +139,14 @@ chmod 0777 test/ chmod 0777 test/zdtm/static chmod 0777 test/zdtm/transition +# We run streaming tests separately to improve test completion times, +# hence the exit 0. +if [ "${STREAM_TEST}" = "1" ]; then + ./scripts/install-criu-image-streamer.sh + test_stream + exit 0 +fi + ./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS KERN_MAJ=`uname -r | cut -d. -f1` diff --git a/test/zdtm.py b/test/zdtm.py index b111fa383..49387e43e 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -30,6 +30,9 @@ import yaml os.chdir(os.path.dirname(os.path.abspath(__file__))) +# File to store content of streamed images +STREAMED_IMG_FILE_NAME = "img.criu" + prev_line = None @@ -1024,6 +1027,7 @@ class criu: self.__mdedup = bool(opts['noauto_dedup']) self.__user = bool(opts['user']) self.__leave_stopped = bool(opts['stop']) + self.__stream = bool(opts['stream']) self.__criu = (opts['rpc'] and criu_rpc or criu_cli) self.__show_stats = bool(opts['show_stats']) self.__lazy_pages_p = None @@ -1208,11 +1212,19 @@ class criu: stats_written = int(stent['shpages_written']) + int( stent['pages_written']) + if self.__stream: + p = self.spawn_criu_image_streamer("extract") + p.wait() + real_written = 0 for f in os.listdir(self.__ddir()): if f.startswith('pages-'): real_written += os.path.getsize(os.path.join(self.__ddir(), f)) + if self.__stream: + # make sure the extracted image is not usable. + os.unlink(os.path.join(self.__ddir(), "inventory.img")) + r_pages = real_written / mmap.PAGESIZE r_off = real_written % mmap.PAGESIZE if (stats_written != r_pages) or (r_off != 0): @@ -1220,6 +1232,57 @@ class criu: (stats_written, r_pages, r_off)) raise test_fail_exc("page counts mismatch") + # action can be "capture", "extract", or "serve" + def spawn_criu_image_streamer(self, action): + print("Run criu-image-streamer in {} mode".format(action)) + + progress_r, progress_w = os.pipe() + # We fcntl() on both file descriptors due to some potential differences + # with python2 and python3. + fcntl.fcntl(progress_r, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + fcntl.fcntl(progress_w, fcntl.F_SETFD, 0) + + # We use cat because the streamer requires to work with pipes. + if action == 'capture': + cmd = ["criu-image-streamer", + "--images-dir '{images_dir}'", + "--progress-fd {progress_fd}", + action, + "| cat > {img_file}"] + else: + cmd = ["cat {img_file} |", + "criu-image-streamer", + "--images-dir '{images_dir}'", + "--progress-fd {progress_fd}", + action] + + # * As we are using a shell pipe command, we want to use pipefail. + # Otherwise, failures stay unnoticed. For this, we use bash as sh + # doesn't support that feature. + # * We use close_fds=False because we want the child to inherit the progress pipe + p = subprocess.Popen(["bash", "-c", "set -o pipefail; " + " ".join(cmd).format( + progress_fd=progress_w, + images_dir=self.__ddir(), + img_file=os.path.join(self.__ddir(), STREAMED_IMG_FILE_NAME) + )], close_fds=False) + + os.close(progress_w) + progress = os.fdopen(progress_r, "r") + + if action == 'serve' or action == 'extract': + # Consume image statistics + progress.readline() + + if action == 'capture' or action == 'serve': + # The streamer socket is ready for consumption once we receive the + # socket-init message. + if progress.readline().strip() != "socket-init": + p.kill() + raise test_fail_exc( + "criu-image-streamer is not starting (exit_code=%d)" % p.wait()) + + return p + def dump(self, action, opts=[]): self.__iter += 1 os.mkdir(self.__ddir()) @@ -1249,6 +1312,10 @@ class criu: a_opts += self.__test.getdopts() + if self.__stream: + streamer_p = self.spawn_criu_image_streamer("capture") + a_opts += ["--stream"] + if self.__dedup: a_opts += ["--auto-dedup"] @@ -1273,6 +1340,11 @@ class criu: self.__dump_process = self.__criu_act(action, opts=a_opts + opts, nowait=nowait) + if self.__stream: + ret = streamer_p.wait() + if ret: + raise test_fail_exc("criu-image-streamer exited with %d" % ret) + if self.__mdedup and self.__iter > 1: self.__criu_act("dedup", opts=[]) @@ -1303,6 +1375,10 @@ class criu: r_opts += ['--empty-ns', 'net'] r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh'] + if self.__stream: + streamer_p = self.spawn_criu_image_streamer("serve") + r_opts += ["--stream"] + if self.__dedup: r_opts += ["--auto-dedup"] @@ -1336,6 +1412,11 @@ class criu: r_opts += ['--leave-stopped'] self.__criu_act("restore", opts=r_opts + ["--restore-detached"]) + if self.__stream: + ret = streamer_p.wait() + if ret: + raise test_fail_exc("criu-image-streamer exited with %d" % ret) + self.show_stats("restore") if self.__leave_stopped: @@ -1344,6 +1425,13 @@ class criu: @staticmethod def check(feature): + if feature == 'stream': + try: + p = subprocess.Popen(["criu-image-streamer", "--version"]) + return p.wait() == 0 + except Exception: + return False + return criu_cli.run( "check", ["--no-default-config", "-v0", "--feature", feature], opts['criu_bin']) == 0 @@ -1852,7 +1940,7 @@ class Launcher: 'stop', 'empty_ns', 'fault', 'keep_img', 'report', 'snaps', 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', - 'remote_lazy_pages', 'show_stats', 'lazy_migrate', + 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'stream', 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) @@ -2138,6 +2226,15 @@ def run_tests(opts): "[WARNING] Non-cooperative UFFD is missing, some tests might spuriously fail" ) + if opts['stream']: + streamer_dir = os.path.realpath(opts['criu_image_streamer_dir']) + os.environ['PATH'] = "{}:{}".format(streamer_dir, os.environ['PATH']) + if not criu.check('stream'): + raise RuntimeError(( + "Streaming tests need the criu-image-streamer binary to be accessible in the {} directory. " + + "Specify --criu-image-streamer-dir or modify PATH to provide an alternate location") + .format(streamer_dir)) + launcher = Launcher(opts, len(torun)) try: for t in torun: @@ -2460,8 +2557,8 @@ rp.add_argument("--rpc", rp.add_argument("--page-server", help="Use page server dump", action='store_true') -rp.add_argument("--remote", - help="Use remote option for diskless C/R", +rp.add_argument("--stream", + help="Use criu-image-streamer", action='store_true') rp.add_argument("-p", "--parallel", help="Run test in parallel") rp.add_argument("--dry-run", @@ -2500,6 +2597,9 @@ rp.add_argument("--criu-bin", rp.add_argument("--crit-bin", help="Path to crit binary", default='../crit/crit') +rp.add_argument("--criu-image-streamer-dir", + help="Directory where the criu-image-streamer binary is located", + default="../../criu-image-streamer") rp.add_argument("--pre-dump-mode", help="Use splice or read mode of pre-dumping", choices=['splice', 'read'], From 8c538ca10dc4385baf911b9b1747ff2287aff5fb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 6 Jul 2017 12:38:48 +0300 Subject: [PATCH 0231/1854] page-read: Warn about async read w/o completion cb Acked-by: Mike Rapoport Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/pagemap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/pagemap.c b/criu/pagemap.c index f1e1be91f..58f2850ff 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -462,6 +462,8 @@ static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_ if (pr->io_complete) ret = pr->io_complete(pr, vaddr, nr_pages); + else + pr_warn_once("Remote page read w/o io_complete!\n"); return ret; } From eb732bcf0d7621e3b7a74797e8b4f04b40f2b183 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 24 Mar 2019 14:26:10 +0000 Subject: [PATCH 0232/1854] util: Remove deprecated print_data() routine The print_data() function was part of the deprecated (and removed) 'show' action, and it was moved in util.c with the following commit: a501b4804b3c95e1d83d64dd10ed95c37f0378bb The 'show' action has been deprecated since 1.6, let's finally drop it. The print_data() routine is kept for yet another (to be deprecated too) feature called 'criu exec'. The criu exec feature was removed with: 909590a3558560655c1ce5b72215efbb325999ca Remove criu exec code It's now obsoleted by compel library. Maybe-TODO: Add compel tool exec action? Therefore, now we can drop print_data() as well. Signed-off-by: Radostin Stoyanov --- criu/include/util.h | 2 -- criu/util.c | 75 +-------------------------------------------- 2 files changed, 1 insertion(+), 76 deletions(-) diff --git a/criu/include/util.h b/criu/include/util.h index 45bebf673..d67f6d39d 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -290,8 +290,6 @@ char *xstrcat(char *str, const char *fmt, ...) char *xsprintf(const char *fmt, ...) __attribute__ ((__format__ (__printf__, 1, 2))); -void print_data(unsigned long addr, unsigned char *data, size_t size); - int setup_tcp_server(char *type, char *addr, unsigned short *port); int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk); int setup_tcp_client(char *hostname); diff --git a/criu/util.c b/criu/util.c index b30dbc86c..517f0fc25 100644 --- a/criu/util.c +++ b/criu/util.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "linux/mount.h" @@ -994,85 +993,13 @@ void tcp_nodelay(int sk, bool on) pr_perror("Unable to restore TCP_NODELAY (%d)", val); } -static inline void pr_xsym(unsigned char *data, size_t len, int pos) -{ - char sym; - - if (pos < len) - sym = data[pos]; - else - sym = ' '; - - pr_msg("%c", isprint(sym) ? sym : '.'); -} - -static inline void pr_xdigi(unsigned char *data, size_t len, int pos) -{ - if (pos < len) - pr_msg("%02x ", data[pos]); - else - pr_msg(" "); -} - -static int nice_width_for(unsigned long addr) -{ - int ret = 3; - - while (addr) { - addr >>= 4; - ret++; - } - - return ret; -} - -void print_data(unsigned long addr, unsigned char *data, size_t size) -{ - int i, j, addr_len; - unsigned zero_line = 0; - - addr_len = nice_width_for(addr + size); - - for (i = 0; i < size; i += 16) { - if (*(u64 *)(data + i) == 0 && *(u64 *)(data + i + 8) == 0) { - if (zero_line == 0) - zero_line = 1; - else { - if (zero_line == 1) { - pr_msg("*\n"); - zero_line = 2; - } - - continue; - } - } else - zero_line = 0; - - pr_msg("%#0*lx: ", addr_len, addr + i); - for (j = 0; j < 8; j++) - pr_xdigi(data, size, i + j); - pr_msg(" "); - for (j = 8; j < 16; j++) - pr_xdigi(data, size, i + j); - - pr_msg(" |"); - for (j = 0; j < 8; j++) - pr_xsym(data, size, i + j); - pr_msg(" "); - for (j = 8; j < 16; j++) - pr_xsym(data, size, i + j); - - pr_msg("|\n"); - } -} - static int get_sockaddr_in(struct sockaddr_storage *addr, char *host, unsigned short port) { memset(addr, 0, sizeof(*addr)); if (!host) { - ((struct sockaddr_in *)addr)->sin_addr.s_addr = INADDR_ANY; + ((struct sockaddr_in *)addr)->sin_addr.s_addr = INADDR_ANY; addr->ss_family = AF_INET; } else if (inet_pton(AF_INET, host, &((struct sockaddr_in *)addr)->sin_addr)) { addr->ss_family = AF_INET; From 8be1d457d71c93ea0a592bdc2dc8a982cd3e6fa9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 09:47:17 +0000 Subject: [PATCH 0233/1854] net: fix coverity RESOURCE_LEAK criu-3.12/criu/net.c:2043: overwrite_var: Overwriting "img" in "img = open_image_at(-1, CR_FD_IP6TABLES, 0UL, pid)" leaks the storage that "img" points to. Signed-off-by: Adrian Reber --- criu/net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/net.c b/criu/net.c index 27e7c7e33..86fba2ddc 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2090,13 +2090,13 @@ static int restore_ip_dump(int type, int pid, char *cmd) if (written < n) { pr_perror("Failed to write to tmpfile " "[written: %d; total: %d]", written, n); - return -1; + goto close; } } if (fseek(tmp_file, 0, SEEK_SET)) { pr_perror("Failed to set file position to beginning of tmpfile"); - return -1; + goto close; } if (img) { @@ -2104,6 +2104,7 @@ static int restore_ip_dump(int type, int pid, char *cmd) close_image(img); } +close: if(fclose(tmp_file)) { pr_perror("Failed to close tmpfile"); } @@ -2208,6 +2209,7 @@ static inline int restore_iptables(int pid) return -1; if (empty_image(img)) { ret = 0; + close_image(img); goto ipt6; } From ba0d6dbac1bd237319817ac0f34653a78686be1f Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:13:22 +0800 Subject: [PATCH 0234/1854] mips:compel/arch/mips: Add architecture support to compel tool and libraries This patch only adds the support but does not enable it for building. Signed-off-by: Guoyun Sun --- .../arch/mips/plugins/include/asm/prologue.h | 36 ++ .../mips/plugins/include/asm/syscall-types.h | 36 ++ compel/arch/mips/plugins/include/features.h | 6 + compel/arch/mips/plugins/std/memcpy.S | 23 ++ compel/arch/mips/plugins/std/parasite-head.S | 20 ++ .../plugins/std/syscalls/Makefile.syscalls | 117 +++++++ .../std/syscalls/syscall-common-mips-64.S | 12 + .../mips/plugins/std/syscalls/syscall_64.tbl | 115 +++++++ .../mips/scripts/compel-pack-compat.lds.S | 4 + compel/arch/mips/scripts/compel-pack.lds.S | 37 +++ compel/arch/mips/src/lib/cpu.c | 36 ++ compel/arch/mips/src/lib/handle-elf-host.c | 1 + compel/arch/mips/src/lib/handle-elf.c | 23 ++ compel/arch/mips/src/lib/include/handle-elf.h | 8 + compel/arch/mips/src/lib/include/ldsodefs.h | 147 +++++++++ compel/arch/mips/src/lib/include/syscall.h | 7 + .../mips/src/lib/include/uapi/asm/.gitignore | 0 .../src/lib/include/uapi/asm/breakpoints.h | 6 + .../arch/mips/src/lib/include/uapi/asm/cpu.h | 5 + .../arch/mips/src/lib/include/uapi/asm/fpu.h | 4 + .../src/lib/include/uapi/asm/infect-types.h | 66 ++++ .../mips/src/lib/include/uapi/asm/sigframe.h | 63 ++++ .../mips/src/lib/include/uapi/asm/siginfo.h | 124 +++++++ compel/arch/mips/src/lib/infect.c | 310 ++++++++++++++++++ include/common/arch/mips/asm/atomic.h | 148 +++++++++ include/common/arch/mips/asm/bitops.h | 41 +++ include/common/arch/mips/asm/bitsperlong.h | 6 + include/common/arch/mips/asm/cmpxchg.h | 67 ++++ include/common/arch/mips/asm/fls64.h | 38 +++ include/common/arch/mips/asm/linkage.h | 58 ++++ include/common/arch/mips/asm/page.h | 39 +++ include/common/arch/mips/asm/utils.h | 24 ++ 32 files changed, 1627 insertions(+) create mode 100755 compel/arch/mips/plugins/include/asm/prologue.h create mode 100755 compel/arch/mips/plugins/include/asm/syscall-types.h create mode 100755 compel/arch/mips/plugins/include/features.h create mode 100755 compel/arch/mips/plugins/std/memcpy.S create mode 100755 compel/arch/mips/plugins/std/parasite-head.S create mode 100755 compel/arch/mips/plugins/std/syscalls/Makefile.syscalls create mode 100755 compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S create mode 100755 compel/arch/mips/plugins/std/syscalls/syscall_64.tbl create mode 100755 compel/arch/mips/scripts/compel-pack-compat.lds.S create mode 100755 compel/arch/mips/scripts/compel-pack.lds.S create mode 100755 compel/arch/mips/src/lib/cpu.c create mode 120000 compel/arch/mips/src/lib/handle-elf-host.c create mode 100755 compel/arch/mips/src/lib/handle-elf.c create mode 100755 compel/arch/mips/src/lib/include/handle-elf.h create mode 100755 compel/arch/mips/src/lib/include/ldsodefs.h create mode 100755 compel/arch/mips/src/lib/include/syscall.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/.gitignore create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/cpu.h create mode 100644 compel/arch/mips/src/lib/include/uapi/asm/fpu.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/infect-types.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/sigframe.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/siginfo.h create mode 100755 compel/arch/mips/src/lib/infect.c create mode 100755 include/common/arch/mips/asm/atomic.h create mode 100644 include/common/arch/mips/asm/bitops.h create mode 100755 include/common/arch/mips/asm/bitsperlong.h create mode 100755 include/common/arch/mips/asm/cmpxchg.h create mode 100644 include/common/arch/mips/asm/fls64.h create mode 100644 include/common/arch/mips/asm/linkage.h create mode 100755 include/common/arch/mips/asm/page.h create mode 100644 include/common/arch/mips/asm/utils.h diff --git a/compel/arch/mips/plugins/include/asm/prologue.h b/compel/arch/mips/plugins/include/asm/prologue.h new file mode 100755 index 000000000..9d812eec9 --- /dev/null +++ b/compel/arch/mips/plugins/include/asm/prologue.h @@ -0,0 +1,36 @@ +#ifndef __ASM_PROLOGUE_H__ +#define __ASM_PROLOGUE_H__ + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#include + +#define sys_recv(sockfd, ubuf, size, flags) \ + sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL) + +typedef struct prologue_init_args { + struct sockaddr_un ctl_sock_addr; + unsigned int ctl_sock_addr_len; + + unsigned int arg_s; + void *arg_p; + + void *sigframe; +} prologue_init_args_t; + +#endif /* __ASSEMBLY__ */ + +/* + * Reserve enough space for sigframe. + * + * FIXME It is rather should be taken from sigframe header. + */ +#define PROLOGUE_SGFRAME_SIZE 4096 + +#define PROLOGUE_INIT_ARGS_SIZE 1024 + +#endif /* __ASM_PROLOGUE_H__ */ diff --git a/compel/arch/mips/plugins/include/asm/syscall-types.h b/compel/arch/mips/plugins/include/asm/syscall-types.h new file mode 100755 index 000000000..64daf2c7a --- /dev/null +++ b/compel/arch/mips/plugins/include/asm/syscall-types.h @@ -0,0 +1,36 @@ +#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__ +#define COMPEL_ARCH_SYSCALL_TYPES_H__ + +/* Types for sigaction, sigprocmask syscalls */ +typedef void rt_signalfn_t(int, siginfo_t *, void *); +typedef rt_signalfn_t *rt_sighandler_t; + +typedef void rt_restorefn_t(void); +typedef rt_restorefn_t *rt_sigrestore_t; + +#define SA_RESTORER 0x04000000 + +/** refer to linux-3.10/arch/mips/include/uapi/asm/signal.h*/ +#define _KNSIG 128 +#define _NSIG_BPW 64 + +#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW) + +/* + * Note: as k_rtsigset_t is the same size for 32-bit and 64-bit, + * sig defined as uint64_t rather than (unsigned long) - for the + * purpose if we ever going to support native 32-bit compilation. + */ + +typedef struct { + uint64_t sig[_KNSIG_WORDS]; +} k_rtsigset_t; + +typedef struct { + rt_sighandler_t rt_sa_handler; + unsigned long rt_sa_flags; + rt_sigrestore_t rt_sa_restorer; + k_rtsigset_t rt_sa_mask; +} rt_sigaction_t; + +#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */ diff --git a/compel/arch/mips/plugins/include/features.h b/compel/arch/mips/plugins/include/features.h new file mode 100755 index 000000000..0f35725fa --- /dev/null +++ b/compel/arch/mips/plugins/include/features.h @@ -0,0 +1,6 @@ +#ifndef __COMPEL_ARCH_FEATURES_H +#define __COMPEL_ARCH_FEATURES_H + +#define ARCH_HAS_MEMCPY + +#endif /* __COMPEL_ARCH_FEATURES_H */ diff --git a/compel/arch/mips/plugins/std/memcpy.S b/compel/arch/mips/plugins/std/memcpy.S new file mode 100755 index 000000000..262267f7f --- /dev/null +++ b/compel/arch/mips/plugins/std/memcpy.S @@ -0,0 +1,23 @@ + +#include "common/asm/linkage.h" + + .section .head.text, "ax" +ENTRY(memcpy) + .set noreorder + dadd v0,zero,a0 + daddiu t1,zero,0 +loop: + beq t1,a2,exit + nop + lb t2,0(a1) + sb t2,0(a0) + daddiu t1,t1,1 + daddiu a0,a0,1 + daddiu a1,a1,1 + j loop + nop +exit: + jr ra + nop +END(memcpy) + diff --git a/compel/arch/mips/plugins/std/parasite-head.S b/compel/arch/mips/plugins/std/parasite-head.S new file mode 100755 index 000000000..38e87f823 --- /dev/null +++ b/compel/arch/mips/plugins/std/parasite-head.S @@ -0,0 +1,20 @@ + +#include "common/asm/linkage.h" + + + .section .head.text, "ax" +ENTRY(__export_parasite_head_start) + .set push + .set noreorder + lw a0, __export_parasite_cmd + dla a1, __export_parasite_args + jal parasite_service + nop + .byte 0x0d, 0x00, 0x00, 0x00 //break + .set pop +// .byte 0x40,0x01,0x00,0x00 //pause + +__export_parasite_cmd: + .long 0 +END(__export_parasite_head_start) + diff --git a/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls b/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls new file mode 100755 index 000000000..ef75f9e95 --- /dev/null +++ b/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls @@ -0,0 +1,117 @@ +std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/syscalls-64.o +sys-proto-types := $(obj)/include/uapi/std/syscall-types.h +sys-proto-generic := $(obj)/include/uapi/std/syscall.h +sys-codes-generic := $(obj)/include/uapi/std/syscall-codes.h +sys-codes = $(obj)/include/uapi/std/syscall-codes-$(1).h +sys-proto = $(obj)/include/uapi/std/syscall-$(1).h +sys-def = $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_$(1).tbl +sys-asm = $(PLUGIN_ARCH_DIR)/std/syscalls-$(1).S +sys-asm-common-name = std/syscalls/syscall-common-mips-$(1).S +sys-asm-common = $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name) +sys-asm-types := $(obj)/include/uapi/std/asm/syscall-types.h +sys-exec-tbl = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl-$(1).c + +sys-bits := 64 + +AV := $$$$ + +define gen-rule-sys-codes +$(sys-codes): $(sys-def) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) echo "#ifndef ASM_SYSCALL_CODES_H_$(1)__" >> $$@ + $(Q) echo "#define ASM_SYSCALL_CODES_H_$(1)__" >> $$@ + $(Q) cat $$< | awk '/^__NR/{SYSN=$(AV)1; \ + sub("^__NR", "SYS", SYSN); \ + print "\n#ifndef ", $(AV)1; \ + print "#define", $(AV)1, $(AV)2; \ + print "#endif"; \ + print "\n#ifndef ", SYSN; \ + print "#define ", SYSN, $(AV)1; \ + print "#endif";}' >> $$@ + $(Q) echo "#endif /* ASM_SYSCALL_CODES_H_$(1)__ */" >> $$@ +endef + +define gen-rule-sys-proto +$(sys-proto): $(sys-def) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__" >> $$@ + $(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__" >> $$@ + $(Q) echo '#include ' >> $$@ + $(Q) echo '#include ' >> $$@ +ifeq ($(1),32) + $(Q) echo '#include "asm/syscall32.h"' >> $$@ +endif + $(Q) cat $$< | awk '/^__NR/{print "extern long", $(AV)3, \ + substr($(AV)0, index($(AV)0,$(AV)4)), ";"}' >> $$@ + $(Q) echo "#endif /* ASM_SYSCALL_PROTO_H_$(1)__ */" >> $$@ +endef + +define gen-rule-sys-asm +$(sys-asm): $(sys-def) $(sys-asm-common) $(sys-codes) $(sys-proto) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) echo '#include ' >> $$@ + $(Q) echo '#include "$(sys-asm-common-name)"' >> $$@ + $(Q) cat $$< | awk '/^__NR/{print "SYSCALL(", $(AV)3, ",", $(AV)2, ")"}' >> $$@ +endef + +define gen-rule-sys-exec-tbl +$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) cat $$< | awk '/^__NR/{print \ + "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@ +endef + +$(sys-codes-generic): $(sys-proto-types) + $(call msg-gen, $@) + $(Q) echo "/* Autogenerated, don't edit */" > $@ + $(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__" >> $@ + $(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__" >> $@ + $(Q) echo '#include ' >> $@ + $(Q) cat $< | awk '/^__NR/{NR32=$$1; \ + sub("^__NR", "__NR32", NR32); \ + print "\n#ifndef ", NR32; \ + print "#define ", NR32, $$2; \ + print "#endif";}' >> $@ + $(Q) echo "#endif /* __ASM_CR_SYSCALL_CODES_H__ */" >> $@ +mrproper-y += $(sys-codes-generic) + +$(sys-proto-generic): $(strip $(call map,sys-proto,$(sys-bits))) $(sys-proto-types) + $(call msg-gen, $@) + $(Q) echo "/* Autogenerated, don't edit */" > $@ + $(Q) echo "#ifndef __ASM_CR_SYSCALL_PROTO_H__" >> $@ + $(Q) echo "#define __ASM_CR_SYSCALL_PROTO_H__" >> $@ + $(Q) echo "" >> $@ + $(Q) echo '#include ' >> $@ + $(Q) echo "" >> $@ + $(Q) echo "#endif /* __ASM_CR_SYSCALL_PROTO_H__ */" >> $@ +mrproper-y += $(sys-proto-generic) + +define gen-rule-sys-exec-tbl +$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) cat $$< | awk '/^__NR/{print \ + "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@ +endef + +$(eval $(call map,gen-rule-sys-codes,$(sys-bits))) +$(eval $(call map,gen-rule-sys-proto,$(sys-bits))) +$(eval $(call map,gen-rule-sys-asm,$(sys-bits))) +$(eval $(call map,gen-rule-sys-exec-tbl,$(sys-bits))) + +$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h + $(call msg-gen, $@) + $(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types) + +std-headers-deps += $(call sys-codes,$(sys-bits)) +std-headers-deps += $(call sys-proto,$(sys-bits)) +std-headers-deps += $(call sys-asm,$(sys-bits)) +std-headers-deps += $(call sys-exec-tbl,$(sys-bits)) +std-headers-deps += $(sys-codes-generic) +std-headers-deps += $(sys-proto-generic) +std-headers-deps += $(sys-asm-types) +mrproper-y += $(std-headers-deps) diff --git a/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S b/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S new file mode 100755 index 000000000..3478488da --- /dev/null +++ b/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S @@ -0,0 +1,12 @@ +#include "common/asm/linkage.h" + +#define SYSCALL(name, opcode) \ + ENTRY(name); \ + li v0, opcode; \ + syscall; \ + jr ra; \ + nop; \ + END(name) + +ENTRY(__cr_restore_rt) +END(__cr_restore_rt) diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl new file mode 100755 index 000000000..47c08fc4c --- /dev/null +++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl @@ -0,0 +1,115 @@ +# +# System calls table, please make sure the table consist only the syscalls +# really used somewhere in project. +# from kernel/linux-3.10.84/arch/mips/include/uapi/asm/unistd.h Linux 64-bit syscalls are in the range from 5000 to 5999. +# +# __NR_name code name arguments +# ------------------------------------------------------------------------------------------------------------------------------------------------------------- +__NR_read 5000 sys_read (int fd, void *buf, unsigned long count) +__NR_write 5001 sys_write (int fd, const void *buf, unsigned long count) +__NR_open 5002 sys_open (const char *filename, unsigned long flags, unsigned long mode) +__NR_close 5003 sys_close (int fd) +__NR_lseek 5008 sys_lseek (int fd, unsigned long offset, unsigned long origin) +__NR_mmap 5009 sys_mmap (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset) +__NR_mprotect 5010 sys_mprotect (const void *addr, unsigned long len, unsigned long prot) +__NR_munmap 5011 sys_munmap (void *addr, unsigned long len) +__NR_brk 5012 sys_brk (void *addr) +__NR_rt_sigaction 5013 sys_sigaction (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize) +__NR_rt_sigprocmask 5014 sys_sigprocmask (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize) +__NR_rt_sigreturn 5211 sys_rt_sigreturn (void) +__NR_ioctl 5015 sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg) +__NR_pread64 5016 sys_pread (unsigned int fd, char *buf, size_t count, loff_t pos) +__NR_mremap 5024 sys_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) +__NR_mincore 5026 sys_mincore (void *addr, unsigned long size, unsigned char *vec) +__NR_madvise 5027 sys_madvise (unsigned long start, size_t len, int behavior) +__NR_shmat 5029 sys_shmat (int shmid, void *shmaddr, int shmflag) +__NR_dup2 5032 sys_dup2 (int oldfd, int newfd) +__NR_nanosleep 5034 sys_nanosleep (struct timespec *req, struct timespec *rem) +__NR_getitimer 5035 sys_getitimer (int which, const struct itimerval *val) +__NR_setitimer 5036 sys_setitimer (int which, const struct itimerval *val, struct itimerval *old) +__NR_getpid 5038 sys_getpid (void) +__NR_socket 5040 sys_socket (int domain, int type, int protocol) +__NR_connect 5041 sys_connect (int sockfd, struct sockaddr *addr, int addrlen) +__NR_sendto 5043 sys_sendto (int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len) +__NR_recvfrom 5044 sys_recvfrom (int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len) +__NR_sendmsg 5045 sys_sendmsg (int sockfd, const struct msghdr *msg, int flags) +__NR_recvmsg 5046 sys_recvmsg (int sockfd, struct msghdr *msg, int flags) +__NR_shutdown 5047 sys_shutdown (int sockfd, int how) +__NR_bind 5048 sys_bind (int sockfd, const struct sockaddr *addr, int addrlen) +__NR_setsockopt 5053 sys_setsockopt (int sockfd, int level, int optname, const void *optval, socklen_t optlen) +__NR_getsockopt 5054 sys_getsockopt (int sockfd, int level, int optname, const void *optval, socklen_t *optlen) +__NR_clone 5055 sys_clone (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid) +__NR_exit 5058 sys_exit (unsigned long error_code) +__NR_wait4 5059 sys_wait4 (int pid, int *status, int options, struct rusage *ru) +__NR_kill 5060 sys_kill (long pid, int sig) +__NR_fcntl 5070 sys_fcntl (int fd, int type, long arg) +__NR_flock 5071 sys_flock (int fd, unsigned long cmd) +__NR_mkdir 5081 sys_mkdir (const char *name, int mode) +__NR_rmdir 5082 sys_rmdir (const char *name) +__NR_unlink 5085 sys_unlink (char *pathname) +__NR_umask 5093 sys_umask (int mask) +__NR_gettimeofday 5094 sys_gettimeofday (struct timeval *tv, struct timezone *tz) +__NR_ptrace 5099 sys_ptrace (long request, pid_t pid, void *addr, void *data) +__NR_getgroups 5113 sys_getgroups (int gsize, unsigned int *groups) +__NR_setgroups 5114 sys_setgroups (int gsize, unsigned int *groups) +__NR_setresuid 5115 sys_setresuid (int uid, int euid, int suid) +__NR_getresuid 5116 sys_getresuid (int *uid, int *euid, int *suid) +__NR_setresgid 5117 sys_setresgid (int gid, int egid, int sgid) +__NR_getresgid 5118 sys_getresgid (int *gid, int *egid, int *sgid) +__NR_getpgid 5119 sys_getpgid (pid_t pid) +__NR_setfsuid 5120 sys_setfsuid (int fsuid) +__NR_setfsgid 5121 sys_setfsgid (int fsgid) +__NR_getsid 5122 sys_getsid (void) +__NR_capget 5123 sys_capget (struct cap_header *h, struct cap_data *d) +__NR_capset 5124 sys_capset (struct cap_header *h, struct cap_data *d) +__NR_rt_sigqueueinfo 5127 sys_rt_sigqueueinfo (pid_t pid, int sig, siginfo_t *info) +__NR_sigaltstack 5129 sys_sigaltstack (const void *uss, void *uoss) +__NR_personality 5132 sys_personality (unsigned int personality) +__NR_setpriority 5138 sys_setpriority (int which, int who, int nice) +__NR_sched_setscheduler 5141 sys_sched_setscheduler (int pid, int policy, struct sched_param *p) +__NR_prctl 5153 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) +__NR_setrlimit 5155 sys_setrlimit (int resource, struct krlimit *rlim) +__NR_mount 5160 sys_mount (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data) +__NR_umount2 5161 sys_umount2 (char *name, int flags) +__NR_gettid 5178 sys_gettid (void) +__NR_futex 5194 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3) +__NR_cacheflush 5197 sys_cacheflush (char *addr, int nbytes, int cache) +__NR_io_setup 5200 sys_io_setup (unsigned nr_events, aio_context_t *ctx) +__NR_io_getevents 5202 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo) +__NR_io_submit 5203 sys_io_submit (aio_context_t ctx, long nr, struct iocb **iocbpp) +__NR_set_tid_address 5212 sys_set_tid_address (int *tid_addr) +__NR_restart_syscall 5213 sys_restart_syscall (void) +__NR_sys_timer_create 5216 sys_timer_create (clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id) +__NR_sys_timer_settime 5217 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting) +__NR_sys_timer_gettime 5218 sys_timer_gettime (int timer_id, const struct itimerspec *setting) +__NR_sys_timer_getoverrun 5219 sys_timer_getoverrun (int timer_id) +__NR_sys_timer_delete 5220 sys_timer_delete (kernel_timer_t timer_id) +__NR_clock_gettime 5222 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) +__NR_exit_group 5205 sys_exit_group (int error_code) +__NR_set_thread_area 5242 sys_set_thread_area (unsigned long *addr) +__NR_openat 5247 sys_openat (int dfd, const char *filename, int flags, int mode) +__NR_waitid 5237 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) +__NR_readlinkat 5257 sys_readlinkat (int fd, const char *path, char *buf, int bufsize) +__NR_ppoll 5261 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_set_robust_list 5268 sys_set_robust_list (struct robust_list_head *head, size_t len) +__NR_get_robust_list 5269 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr) +__NR_fallocate 5279 sys_fallocate (int fd, int mode, loff_t offset, loff_t len) +__NR_seccomp 5312 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs) +__NR_vmsplice 5266 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags) +__NR_timerfd_settime 5282 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr) +__NR_signalfd4 5283 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags) +__NR_preadv 5289 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) +__NR_rt_tgsigqueueinfo 5291 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *info) +__NR_fanotify_init 5295 sys_fanotify_init (unsigned int flags, unsigned int event_f_flags) +__NR_fanotify_mark 5296 sys_fanotify_mark (int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname) +__NR_open_by_handle_at 5299 sys_open_by_handle_at (int mountdirfd, struct file_handle *handle, int flags) +__NR_setns 5303 sys_setns (int fd, int nstype) +__NR_kcmp 5306 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) +__NR_memfd_create 5314 sys_memfd_create (const char *name, unsigned int flags) +__NR_userfaultfd 5317 sys_userfaultfd (int flags) + +##TODO for kernel +__NR_fsopen 5430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 5431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 5432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 5435 sys_clone3 (struct clone_args *uargs, size_t size) \ No newline at end of file diff --git a/compel/arch/mips/scripts/compel-pack-compat.lds.S b/compel/arch/mips/scripts/compel-pack-compat.lds.S new file mode 100755 index 000000000..0ae585e0d --- /dev/null +++ b/compel/arch/mips/scripts/compel-pack-compat.lds.S @@ -0,0 +1,4 @@ +OUTPUT_ARCH(mips) +EXTERN(__export_parasite_head_start) +ASSERT(0,"Compatible PIEs are unsupported on mips") + diff --git a/compel/arch/mips/scripts/compel-pack.lds.S b/compel/arch/mips/scripts/compel-pack.lds.S new file mode 100755 index 000000000..cadb19aef --- /dev/null +++ b/compel/arch/mips/scripts/compel-pack.lds.S @@ -0,0 +1,37 @@ +OUTPUT_ARCH(mips) +EXTERN(__export_parasite_head_start) + +SECTIONS +{ + .text : { + *(.head.text) + ASSERT(DEFINED(__export_parasite_head_start), + "Symbol __export_parasite_head_start is missing"); + *(.text*) + *(.compel.exit) + *(.compel.init) + /* .rodata section*/ + *(.rodata*) + *(.got*) + /* .data section */ + *(.data*) + *(.bss*) + *(.sbss*) + *(.toc*) + } + + /DISCARD/ : { /*segments need to discard */ + *(.debug*) + *(.pdr) + *(.comment*) + *(.note*) + *(.group*) + *(.eh_frame*) + *(.MIPS.options) + *(.gnu.attributes) + } + +/* Parasite args should have 4 bytes align, as we have futex inside. */ +. = ALIGN(32); +__export_parasite_args = .; +} diff --git a/compel/arch/mips/src/lib/cpu.c b/compel/arch/mips/src/lib/cpu.c new file mode 100755 index 000000000..4b071cd16 --- /dev/null +++ b/compel/arch/mips/src/lib/cpu.c @@ -0,0 +1,36 @@ +#include +#include + +#include "compel-cpu.h" +#include "common/bitops.h" +#include "common/compiler.h" +#include "log.h" + +#undef LOG_PREFIX +#define LOG_PREFIX "cpu: " + +static compel_cpuinfo_t rt_info; +static bool rt_info_done = false; + +void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature){ } + +void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature){ } + +int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) +{ + return 0; +} + +int compel_cpuid(compel_cpuinfo_t *c){ + return 0; +} + +bool compel_cpu_has_feature(unsigned int feature) +{ + if (!rt_info_done) { + compel_cpuid(&rt_info); + rt_info_done = true; + } + + return compel_test_cpu_cap(&rt_info, feature); +} diff --git a/compel/arch/mips/src/lib/handle-elf-host.c b/compel/arch/mips/src/lib/handle-elf-host.c new file mode 120000 index 000000000..fe4611886 --- /dev/null +++ b/compel/arch/mips/src/lib/handle-elf-host.c @@ -0,0 +1 @@ +handle-elf.c \ No newline at end of file diff --git a/compel/arch/mips/src/lib/handle-elf.c b/compel/arch/mips/src/lib/handle-elf.c new file mode 100755 index 000000000..199bb1581 --- /dev/null +++ b/compel/arch/mips/src/lib/handle-elf.c @@ -0,0 +1,23 @@ +#include +#include + +#include "handle-elf.h" +#include "piegen.h" +#include "log.h" + +static const unsigned char __maybe_unused +elf_ident_64_le[EI_NIDENT] = { + 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +extern int __handle_elf(void *mem, size_t size); + +int handle_binary(void *mem, size_t size) +{ + if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0) + return __handle_elf(mem, size); + + pr_err("Unsupported Elf format detected\n"); + return -EINVAL; +} diff --git a/compel/arch/mips/src/lib/include/handle-elf.h b/compel/arch/mips/src/lib/include/handle-elf.h new file mode 100755 index 000000000..f28188136 --- /dev/null +++ b/compel/arch/mips/src/lib/include/handle-elf.h @@ -0,0 +1,8 @@ +#ifndef COMPEL_HANDLE_ELF_H__ +#define COMPEL_HANDLE_ELF_H__ + +#include "elf64-types.h" + +#define arch_is_machine_supported(e_machine) (e_machine == EM_MIPS) + +#endif /* COMPEL_HANDLE_ELF_H__ */ diff --git a/compel/arch/mips/src/lib/include/ldsodefs.h b/compel/arch/mips/src/lib/include/ldsodefs.h new file mode 100755 index 000000000..6e2d4f549 --- /dev/null +++ b/compel/arch/mips/src/lib/include/ldsodefs.h @@ -0,0 +1,147 @@ +/* Run-time dynamic linker data structures for loaded ELF shared objects. + Copyright (C) 2000-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#ifndef _MIPS_LDSODEFS_H +#define _MIPS_LDSODEFS_H 1 + +#include + +struct La_mips_32_regs; +struct La_mips_32_retval; +struct La_mips_64_regs; +struct La_mips_64_retval; + +#define ARCH_PLTENTER_MEMBERS \ + Elf32_Addr (*mips_o32_gnu_pltenter) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_mips_32_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf32_Addr (*mips_n32_gnu_pltenter) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_mips_64_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf64_Addr (*mips_n64_gnu_pltenter) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_mips_64_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); + +#define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*mips_o32_gnu_pltexit) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_mips_32_regs *, \ + struct La_mips_32_retval *, \ + const char *); \ + unsigned int (*mips_n32_gnu_pltexit) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_mips_64_regs *, \ + struct La_mips_64_retval *, \ + const char *); \ + unsigned int (*mips_n64_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_mips_64_regs *, \ + struct La_mips_64_retval *, \ + const char *); + +/* The MIPS ABI specifies that the dynamic section has to be read-only. */ + +/* The 64-bit MIPS ELF ABI uses an unusual reloc format. Each + relocation entry specifies up to three actual relocations, all at + the same address. The first relocation which required a symbol + uses the symbol in the r_sym field. The second relocation which + requires a symbol uses the symbol in the r_ssym field. If all + three relocations require a symbol, the third one uses a zero + value. + + We define these structures in internal headers because we're not + sure we want to make them part of the ABI yet. Eventually, some of + this may move into elf/elf.h. */ + +/* An entry in a 64 bit SHT_REL section. */ + +typedef struct +{ + Elf32_Word r_sym; /* Symbol index */ + unsigned char r_ssym; /* Special symbol for 2nd relocation */ + unsigned char r_type3; /* 3rd relocation type */ + unsigned char r_type2; /* 2nd relocation type */ + unsigned char r_type1; /* 1st relocation type */ +} _Elf64_Mips_R_Info; + +typedef union +{ + Elf64_Xword r_info_number; + _Elf64_Mips_R_Info r_info_fields; +} _Elf64_Mips_R_Info_union; + +typedef struct +{ + Elf64_Addr r_offset; /* Address */ + _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */ +} Elf64_Mips_Rel; + +typedef struct +{ + Elf64_Addr r_offset; /* Address */ + _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */ + Elf64_Sxword r_addend; /* Addend */ +} Elf64_Mips_Rela; + +#define ELF64_MIPS_R_SYM(i) \ + ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym) +#define ELF64_MIPS_R_TYPE(i) \ + (((_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type1 \ + | ((Elf32_Word)(__extension__ (_Elf64_Mips_R_Info_union)(i) \ + ).r_info_fields.r_type2 << 8) \ + | ((Elf32_Word)(__extension__ (_Elf64_Mips_R_Info_union)(i) \ + ).r_info_fields.r_type3 << 16) \ + | ((Elf32_Word)(__extension__ (_Elf64_Mips_R_Info_union)(i) \ + ).r_info_fields.r_ssym << 24)) +#define ELF64_MIPS_R_INFO(sym, type) \ + (__extension__ (_Elf64_Mips_R_Info_union) \ + (__extension__ (_Elf64_Mips_R_Info) \ + { (sym), ELF64_MIPS_R_SSYM (type), \ + ELF64_MIPS_R_TYPE3 (type), \ + ELF64_MIPS_R_TYPE2 (type), \ + ELF64_MIPS_R_TYPE1 (type) \ + }).r_info_number) +/* These macros decompose the value returned by ELF64_MIPS_R_TYPE, and + compose it back into a value that it can be used as an argument to + ELF64_MIPS_R_INFO. */ +#define ELF64_MIPS_R_SSYM(i) (((i) >> 24) & 0xff) +#define ELF64_MIPS_R_TYPE3(i) (((i) >> 16) & 0xff) +#define ELF64_MIPS_R_TYPE2(i) (((i) >> 8) & 0xff) +#define ELF64_MIPS_R_TYPE1(i) ((i) & 0xff) +#define ELF64_MIPS_R_TYPEENC(type1, type2, type3, ssym) \ + ((type1) \ + | ((Elf32_Word)(type2) << 8) \ + | ((Elf32_Word)(type3) << 16) \ + | ((Elf32_Word)(ssym) << 24)) + +#undef ELF64_R_SYM +#define ELF64_R_SYM(i) ELF64_MIPS_R_SYM (i) +#undef ELF64_R_TYPE + +/*fixme*/ +#define ELF64_R_TYPE(i) (ELF64_MIPS_R_TYPE (i) & 0x00ff) +#undef ELF64_R_INFO +#define ELF64_R_INFO(sym, type) ELF64_MIPS_R_INFO ((sym), (type)) + +#endif diff --git a/compel/arch/mips/src/lib/include/syscall.h b/compel/arch/mips/src/lib/include/syscall.h new file mode 100755 index 000000000..704080172 --- /dev/null +++ b/compel/arch/mips/src/lib/include/syscall.h @@ -0,0 +1,7 @@ +#ifndef __COMPEL_SYSCALL_H__ +#define __COMPEL_SYSCALL_H__ + +#ifndef SIGSTKFLT +#define SIGSTKFLT 16 +#endif +#endif diff --git a/compel/arch/mips/src/lib/include/uapi/asm/.gitignore b/compel/arch/mips/src/lib/include/uapi/asm/.gitignore new file mode 100755 index 000000000..e69de29bb diff --git a/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h new file mode 100755 index 000000000..21eb1309f --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h @@ -0,0 +1,6 @@ +#ifndef __COMPEL_BREAKPOINTS_H__ +#define __COMPEL_BREAKPOINTS_H__ +#define ARCH_SI_TRAP TRAP_BRKPT +extern int ptrace_set_breakpoint(pid_t pid, void *addr); +extern int ptrace_flush_breakpoints(pid_t pid); +#endif diff --git a/compel/arch/mips/src/lib/include/uapi/asm/cpu.h b/compel/arch/mips/src/lib/include/uapi/asm/cpu.h new file mode 100755 index 000000000..329b9529b --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/cpu.h @@ -0,0 +1,5 @@ +#ifndef __CR_ASM_CPU_H__ +#define __CR_ASM_CPU_H__ + +typedef struct { } compel_cpuinfo_t; +#endif /* __CR_ASM_CPU_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/fpu.h b/compel/arch/mips/src/lib/include/uapi/asm/fpu.h new file mode 100644 index 000000000..7f476d541 --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/fpu.h @@ -0,0 +1,4 @@ +#ifndef __CR_ASM_FPU_H__ +#define __CR_ASM_FPU_H__ + +#endif /* __CR_ASM_FPU_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h new file mode 100755 index 000000000..423880821 --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h @@ -0,0 +1,66 @@ +#ifndef UAPI_COMPEL_ASM_TYPES_H__ +#define UAPI_COMPEL_ASM_TYPES_H__ + +#include +#include +#include +#include +#include +#define SIGMAX 64 +#define SIGMAX_OLD 31 + +/* + + * Copied from the Linux kernel header arch/mips/include/asm/ptrace.h + * + * A thread MIPS CPU context + **/ +typedef struct { + /* Saved main processor registers. */ + __u64 regs[32]; + + /* Saved special registers. */ + __u64 lo; + __u64 hi; + __u64 cp0_epc; + __u64 cp0_badvaddr; + __u64 cp0_status; + __u64 cp0_cause; +} user_regs_struct_t; + +/* from linux-3.10/arch/mips/kernel/ptrace.c */ +typedef struct { + /* Saved fpu registers. */ + __u64 regs[32]; + + __u32 fpu_fcr31; + __u32 fpu_id; + + +} user_fpregs_struct_t; + +#define MIPS_a0 regs[4] //arguments a0-a3 +#define MIPS_t0 regs[8] //temporaries t0-t7 +#define MIPS_v0 regs[2] +#define MIPS_v1 regs[3] +#define MIPS_sp regs[29] +#define MIPS_ra regs[31] + + +#define NATIVE_MAGIC 0x0A +#define COMPAT_MAGIC 0x0C +static inline bool user_regs_native(user_regs_struct_t *pregs) +{ + return true; +} + + +#define REG_RES(regs) ((regs).MIPS_v0) +#define REG_IP(regs) ((regs).cp0_epc) +#define REG_SP(regs) ((regs).MIPS_sp) +#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0) + +//#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall) +#define __NR(syscall, compat) __NR_##syscall + +#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h b/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h new file mode 100755 index 000000000..ed6a959fc --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h @@ -0,0 +1,63 @@ +#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__ +#define UAPI_COMPEL_ASM_SIGFRAME_H__ + +#include +#include + +#include +#include + +#include +#define u32 __u32 + +/* sigcontext defined in /usr/include/asm/sigcontext.h*/ +#define rt_sigcontext sigcontext + + +#include + +/* refer to linux-3.10/include/uapi/asm-generic/ucontext.h */ +struct k_ucontext{ + unsigned long uc_flags; + struct k_ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + k_rtsigset_t uc_sigmask; +}; + +/* Copy from the kernel source arch/mips/kernel/signal.c */ +struct rt_sigframe { + u32 rs_ass[4]; /* argument save space for o32 */ + u32 rs_pad[2]; /* Was: signal trampoline */ + siginfo_t rs_info; + struct k_ucontext rs_uc; +}; + + +#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->rs_uc) +#define RT_SIGFRAME_UC_SIGMASK(rt_sigframe) ((k_rtsigset_t *)(void *)&rt_sigframe->rs_uc.uc_sigmask) +#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)0x00) +#define RT_SIGFRAME_FPU(rt_sigframe) +#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1 + + +#define RT_SIGFRAME_OFFSET(rt_sigframe) 0 + + +#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \ + asm volatile( \ + "move $29, %0 \n" \ + "li $2, "__stringify(__NR_rt_sigreturn)" \n" \ + "syscall \n" \ + : \ + : "r"(new_sp) \ + : "$29","$2","memory") + +int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, + struct rt_sigframe *rsigframe); + +#define rt_sigframe_erase_sigset(sigframe) \ + memset(&sigframe->rs_uc.uc_sigmask, 0, sizeof(k_rtsigset_t)) +#define rt_sigframe_copy_sigset(sigframe, from) \ + memcpy(&sigframe->rs_uc.uc_sigmask, from, sizeof(k_rtsigset_t)) +#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h new file mode 100755 index 000000000..519aea1a6 --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h @@ -0,0 +1,124 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998, 1999, 2001, 2003 Ralf Baechle + * Copyright (C) 2000, 2001 Silicon Graphics, Inc. + */ +#ifndef _UAPI_ASM_SIGINFO_H +#define _UAPI_ASM_SIGINFO_H + + +#define __ARCH_SIGEV_PREAMBLE_SIZE (sizeof(long) + 2*sizeof(int)) +#undef __ARCH_SI_TRAPNO /* exception code needs to fill this ... */ + +#define HAVE_ARCH_SIGINFO_T + +/* + * Careful to keep union _sifields from shifting ... + */ + +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) + +#define __ARCH_SIGSYS + +#define SI_MAX_SIZE 128 +#define SI_PAD_SIZE ((SI_MAX_SIZE - __ARCH_SI_PREAMBLE_SIZE) / sizeof(int)) +#define __ARCH_SI_UID_T __kernel_uid32_t + +#ifndef __ARCH_SI_UID_T +#define __ARCH_SI_UID_T __kernel_uid32_t +#endif + +#ifndef __ARCH_SI_BAND_T +#define __ARCH_SI_BAND_T long +#endif + +#ifndef __ARCH_SI_CLOCK_T +#define __ARCH_SI_CLOCK_T __kernel_clock_t +#endif + +#ifndef __ARCH_SI_ATTRIBUTES +#define __ARCH_SI_ATTRIBUTES +#endif + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + __kernel_pid_t _pid; /* sender's pid */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + __kernel_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)]; + sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + __kernel_pid_t _pid; /* sender's pid */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + __kernel_pid_t _pid; /* which child */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + int _status; /* exit code */ + __ARCH_SI_CLOCK_T _utime; + __ARCH_SI_CLOCK_T _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void *_addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif + short _addr_lsb; /* LSB of the reported address */ +#ifndef __GENKSYMS__ + struct { + void *_lower; + void *_upper; + } _addr_bnd; +#endif + } _sigfault; + + /* SIGPOLL */ + struct { + __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + + /* SIGSYS */ + struct { + void *_call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; + } _sifields; +} __ARCH_SI_ATTRIBUTES siginfo_t; + +/* + * si_code values + * Again these have been chosen to be IRIX compatible. + */ +#undef SI_ASYNCIO +#undef SI_TIMER +#undef SI_MESGQ +#define SI_ASYNCIO -2 /* sent by AIO completion */ + +#endif /* _UAPI_ASM_SIGINFO_H */ diff --git a/compel/arch/mips/src/lib/infect.c b/compel/arch/mips/src/lib/infect.c new file mode 100755 index 000000000..a7dcea55a --- /dev/null +++ b/compel/arch/mips/src/lib/infect.c @@ -0,0 +1,310 @@ +#include +#include +#include +#include +#include +#include +#include "errno.h" +#include +#include +#include "common/err.h" +#include "common/page.h" +#include "asm/infect-types.h" +#include "ptrace.h" +#include "infect.h" +#include "infect-priv.h" +#include "log.h" +#include "common/bug.h" +/* + * Injected syscall instruction + * mips64el is Little Endian + */ +const char code_syscall[] = { + 0x0c, 0x00, 0x00, 0x00, /* syscall */ + 0x0d, 0x00, 0x00, 0x00 /* break */ +}; + +/* 10-byte legacy floating point register */ +struct fpreg { + uint16_t significand[4]; + uint16_t exponent; +}; + +/* 16-byte floating point register */ +struct fpxreg { + uint16_t significand[4]; + uint16_t exponent; + uint16_t padding[3]; +}; + + +int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, + user_regs_struct_t *regs, + user_fpregs_struct_t *fpregs) +{ + sigframe->rs_uc.uc_mcontext.sc_regs[0] = regs->regs[0]; + sigframe->rs_uc.uc_mcontext.sc_regs[1] = regs->regs[1]; + sigframe->rs_uc.uc_mcontext.sc_regs[2] = regs->regs[2]; + sigframe->rs_uc.uc_mcontext.sc_regs[3] = regs->regs[3]; + sigframe->rs_uc.uc_mcontext.sc_regs[4] = regs->regs[4]; + sigframe->rs_uc.uc_mcontext.sc_regs[5] = regs->regs[5]; + sigframe->rs_uc.uc_mcontext.sc_regs[6] = regs->regs[6]; + sigframe->rs_uc.uc_mcontext.sc_regs[7] = regs->regs[7]; + sigframe->rs_uc.uc_mcontext.sc_regs[8] = regs->regs[8]; + sigframe->rs_uc.uc_mcontext.sc_regs[9] = regs->regs[9]; + sigframe->rs_uc.uc_mcontext.sc_regs[10] = regs->regs[10]; + sigframe->rs_uc.uc_mcontext.sc_regs[11] = regs->regs[11]; + sigframe->rs_uc.uc_mcontext.sc_regs[12] = regs->regs[12]; + sigframe->rs_uc.uc_mcontext.sc_regs[13] = regs->regs[13]; + sigframe->rs_uc.uc_mcontext.sc_regs[14] = regs->regs[14]; + sigframe->rs_uc.uc_mcontext.sc_regs[15] = regs->regs[15]; + sigframe->rs_uc.uc_mcontext.sc_regs[16] = regs->regs[16]; + sigframe->rs_uc.uc_mcontext.sc_regs[17] = regs->regs[17]; + sigframe->rs_uc.uc_mcontext.sc_regs[18] = regs->regs[18]; + sigframe->rs_uc.uc_mcontext.sc_regs[19] = regs->regs[19]; + sigframe->rs_uc.uc_mcontext.sc_regs[20] = regs->regs[20]; + sigframe->rs_uc.uc_mcontext.sc_regs[21] = regs->regs[21]; + sigframe->rs_uc.uc_mcontext.sc_regs[22] = regs->regs[22]; + sigframe->rs_uc.uc_mcontext.sc_regs[23] = regs->regs[23]; + sigframe->rs_uc.uc_mcontext.sc_regs[24] = regs->regs[24]; + sigframe->rs_uc.uc_mcontext.sc_regs[25] = regs->regs[25]; + sigframe->rs_uc.uc_mcontext.sc_regs[26] = regs->regs[26]; + sigframe->rs_uc.uc_mcontext.sc_regs[27] = regs->regs[27]; + sigframe->rs_uc.uc_mcontext.sc_regs[28] = regs->regs[28]; + sigframe->rs_uc.uc_mcontext.sc_regs[29] = regs->regs[29]; + sigframe->rs_uc.uc_mcontext.sc_regs[30] = regs->regs[30]; + sigframe->rs_uc.uc_mcontext.sc_regs[31] = regs->regs[31]; + sigframe->rs_uc.uc_mcontext.sc_mdlo = regs->lo; + sigframe->rs_uc.uc_mcontext.sc_mdhi = regs->hi; + sigframe->rs_uc.uc_mcontext.sc_pc = regs->cp0_epc; + + sigframe->rs_uc.uc_mcontext.sc_fpregs[0] = fpregs->regs[0]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[1] = fpregs->regs[1]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[2] = fpregs->regs[2]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[3] = fpregs->regs[3]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[4] = fpregs->regs[4]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[5] = fpregs->regs[5]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[6] = fpregs->regs[6]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[7] = fpregs->regs[7]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[8] = fpregs->regs[8]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[9] = fpregs->regs[9]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[10] = fpregs->regs[10]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[11] = fpregs->regs[11]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[12] = fpregs->regs[12]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[13] = fpregs->regs[13]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[14] = fpregs->regs[14]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[15] = fpregs->regs[15]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[16] = fpregs->regs[16]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[17] = fpregs->regs[17]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[18] = fpregs->regs[18]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[19] = fpregs->regs[19]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[20] = fpregs->regs[20]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[21] = fpregs->regs[21]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[22] = fpregs->regs[22]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[23] = fpregs->regs[23]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[24] = fpregs->regs[24]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[25] = fpregs->regs[25]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[26] = fpregs->regs[26]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[27] = fpregs->regs[27]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[28] = fpregs->regs[28]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[29] = fpregs->regs[29]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[30] = fpregs->regs[30]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[31] = fpregs->regs[31]; + + return 0; +} + +int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, + struct rt_sigframe *rsigframe) +{ + return 0; +} + +int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save, + void *arg, __maybe_unused unsigned long flags) +{ + user_fpregs_struct_t xsave = { }, *xs = NULL; + int ret = -1; + + if (ptrace(PTRACE_GETFPREGS, pid, NULL, &xsave)) { + pr_perror("Can't obtain FPU registers for %d", pid); + return ret; + } + + /*Restart the system call*/ + if (regs->regs[0]){ + switch ((long)(int)regs->regs[2]) { + case ERESTARTNOHAND: + case ERESTARTSYS: + case ERESTARTNOINTR: + regs->regs[2] = regs->regs[0]; + regs->regs[7] = regs->regs[26]; + regs->cp0_epc -= 4; + break; + case ERESTART_RESTARTBLOCK: + regs->regs[2] = __NR_restart_syscall; + regs->regs[7] = regs->regs[26]; + regs->cp0_epc -= 4; + break; + } + regs->regs[0] = 0; + } + + xs = &xsave; + ret = save(arg, regs, xs); + return ret; +} + +int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6) +{ + /*refer to glibc-2.20/sysdeps/unix/sysv/linux/mips/mips64/syscall.S*/ + user_regs_struct_t regs = ctl->orig.regs; + int err; + + regs.regs[2] = (unsigned long)nr; //syscall_number will be in v0 + regs.regs[4] = arg1; + regs.regs[5] = arg2; + regs.regs[6] = arg3; + regs.regs[7] = arg4; + regs.regs[8] = arg5; + regs.regs[9] = arg6; + + err = compel_execute_syscall(ctl, ®s, code_syscall); + *ret = regs.regs[2]; + + return err; +} + +void *remote_mmap(struct parasite_ctl *ctl, + void *addr, size_t length, int prot, + int flags, int fd, off_t offset) +{ + long map; + int err; + + err = compel_syscall(ctl, __NR_mmap, &map, + (unsigned long)addr, length, prot, flags, fd, offset >> PAGE_SHIFT); + + if (err < 0 || IS_ERR_VALUE(map)) { + pr_err("remote mmap() failed: %s\n", strerror(-map)); + return NULL; + } + + return (void *)map; +} + +/* + * regs must be inited when calling this function from original context + */ +void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) +{ + regs->cp0_epc = new_ip; + if (stack){ + /* regs[29] is sp */ + regs->regs[29] = (unsigned long)stack; + } +} + +bool arch_can_dump_task(struct parasite_ctl *ctl) +{ + return true; +} + +int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s) +{ + long ret; + int err; + + err = compel_syscall(ctl, __NR_sigaltstack, + &ret, 0, (unsigned long)&s->rs_uc.uc_stack, + 0, 0, 0, 0); + return err ? err : ret; +} + + +int ptrace_set_breakpoint(pid_t pid, void *addr) +{ + return 0; +} + +int ptrace_flush_breakpoints(pid_t pid) +{ + return 0; +} + +/*refer to kernel linux-3.10/arch/mips/include/asm/processor.h*/ +#define TASK_SIZE32 0x7fff8000UL +#define TASK_SIZE64 0x10000000000UL +#define TASK_SIZE TASK_SIZE64 + +unsigned long compel_task_size(void) { return TASK_SIZE; } + +/* + * Get task registers (overwrites weak function) + * + */ +int ptrace_get_regs(int pid, user_regs_struct_t *regs) +{ + return ptrace(PTRACE_GETREGS, pid, NULL, regs); +} + +/* + * Set task registers (overwrites weak function) + */ +int ptrace_set_regs(int pid, user_regs_struct_t *regs) +{ + return ptrace(PTRACE_SETREGS, pid, NULL, regs); +} + +void compel_relocs_apply_mips(void *mem, void *vbase, compel_reloc_t *elf_relocs, size_t nr_relocs) +{ + size_t i, j; + + /* + * mips rebasing :load time relocation + * parasite.built-in.o and restorer.built-in.o is ELF 64-bit LSB relocatable for mips. + * so we have to relocate some type for R_MIPS_26 R_MIPS_HIGHEST R_MIPS_HIGHER R_MIPS_HI16 and R_MIPS_LO16 in there. + * for mips64el .if toload/store data or jump instruct ,need to relocation R_TYPE + */ + for (i = 0, j = 0; i < nr_relocs; i++) { + if (elf_relocs[i].type & COMPEL_TYPE_MIPS_26) { + int *where = (mem + elf_relocs[i].offset); + *where = *where | ((elf_relocs[i].addend + ((unsigned long)vbase & 0x00fffffff) /*low 28 bit*/)>>2); + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_64) { + unsigned long *where = (mem + elf_relocs[i].offset); + *where = elf_relocs[i].addend + (unsigned long)vbase; + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HI16) { + /* refer to binutils mips.cc */ + int *where = (mem + elf_relocs[i].offset); + int v_lo16 = (unsigned long)vbase &0x00ffff; + + if ((v_lo16+elf_relocs[i].value+elf_relocs[i].addend) >= 0x8000){ + *where = *where | ((((unsigned long)vbase>>16) &0xffff)+0x1); + } else { + *where = *where | ((((unsigned long)vbase>>16) &0xffff)); + } + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_LO16) { + int *where = (mem + elf_relocs[i].offset); + int v_lo16 = (unsigned long)vbase &0x00ffff; + *where = *where | ((v_lo16 + elf_relocs[i].addend) & 0xffff); + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HIGHER) { + int *where = (mem + elf_relocs[i].offset); + *where = *where | ((( (unsigned long)vbase + (uint64_t) 0x80008000) >> 32) & 0xffff); + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HIGHEST) { + int *where = (mem + elf_relocs[i].offset); + *where = *where | ((( (unsigned long)vbase + (uint64_t) 0x800080008000llu) >> 48) & 0xffff); + } else + BUG(); + } +} diff --git a/include/common/arch/mips/asm/atomic.h b/include/common/arch/mips/asm/atomic.h new file mode 100755 index 000000000..600e3a70f --- /dev/null +++ b/include/common/arch/mips/asm/atomic.h @@ -0,0 +1,148 @@ +#ifndef __CR_ATOMIC_H__ +#define __CR_ATOMIC_H__ + +#include +#include "common/compiler.h" +#include "common/arch/mips/asm/utils.h" +#include "common/arch/mips/asm/cmpxchg.h" + +/* + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +#define atomic_read(v) (*(volatile int *)&(v)->counter) + +/* + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +#define atomic_set(v, i) ((v)->counter = (i)) +/* + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ + +static __inline__ void atomic_add(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_add \n" + " addu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static __inline__ void atomic_sub(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_sub \n" + " subu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * Same as above, but return the result value + */ +static __inline__ int atomic_add_return(int i, atomic_t * v) +{ + int result; + + smp_mb__before_llsc(); + + + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_add_return \n" + " addu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp + i; + + smp_llsc_mb(); + + return result; +} + +static __inline__ int atomic_sub_return(int i, atomic_t * v) +{ + int result; + + smp_mb__before_llsc(); + + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_sub_return \n" + " subu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp - i; + + smp_llsc_mb(); + + return result; +} + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +/* + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +#define atomic_inc( v) atomic_add(1, (v)) + +/* + * atomic_dec - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +#define atomic_dec(v) atomic_sub(1, (v)) + +#endif /* __CR_ATOMIC_H__ */ diff --git a/include/common/arch/mips/asm/bitops.h b/include/common/arch/mips/asm/bitops.h new file mode 100644 index 000000000..874845e45 --- /dev/null +++ b/include/common/arch/mips/asm/bitops.h @@ -0,0 +1,41 @@ +#ifndef _LINUX_BITOPS_H +#define _LINUX_BITOPS_H +#include +#include "common/compiler.h" +#include "common/asm-generic/bitops.h" + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ + +static inline int test_and_set_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long *m = ((unsigned long *) addr) + (nr >> 6); + unsigned long temp = 0; + unsigned long res; + int bit = nr & 63UL; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " lld %0, %1 # test_and_set_bit \n" + " or %2, %0, %3 \n" + " scd %2, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*m), "=&r" (res) + : "r" (1UL << bit) + : "memory"); + } while (unlikely(!res)); + + res = temp & (1UL << bit); + + return res != 0; +} + +#endif diff --git a/include/common/arch/mips/asm/bitsperlong.h b/include/common/arch/mips/asm/bitsperlong.h new file mode 100755 index 000000000..31aece3b6 --- /dev/null +++ b/include/common/arch/mips/asm/bitsperlong.h @@ -0,0 +1,6 @@ +#ifndef __CR_BITSPERLONG_H__ +#define __CR_BITSPERLONG_H__ + +# define BITS_PER_LONG 64 + +#endif /* __CR_BITSPERLONG_H__ */ diff --git a/include/common/arch/mips/asm/cmpxchg.h b/include/common/arch/mips/asm/cmpxchg.h new file mode 100755 index 000000000..bdc41390b --- /dev/null +++ b/include/common/arch/mips/asm/cmpxchg.h @@ -0,0 +1,67 @@ +#ifndef __CR_CMPXCHG_H__ +#define __CR_CMPXCHG_H__ + +#define __cmpxchg_asm(ld, st, m, old, new) \ +({ \ + __typeof(*(m)) __ret; \ + \ + if (kernel_uses_llsc) { \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set noat \n" \ + " .set mips3 \n" \ + "1: " ld " %0, %2 # __cmpxchg_asm \n" \ + " bne %0, %z3, 2f \n" \ + " .set mips0 \n" \ + " move $1, %z4 \n" \ + " .set mips3 \n" \ + " " st " $1, %1 \n" \ + " beqz $1, 1b \n" \ + " .set pop \n" \ + "2: \n" \ + : "=&r" (__ret), "=R" (*m) \ + : "R" (*m), "Jr" (old), "Jr" (new) \ + : "memory"); \ + } else { \ + } \ + \ + __ret; \ +}) +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __cmpxchg(ptr, old, new, pre_barrier, post_barrier) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __res = 0; \ + \ + pre_barrier; \ + \ + switch (sizeof(*(__ptr))) { \ + case 4: \ + __res = __cmpxchg_asm("ll", "sc", __ptr, __old, __new); \ + break; \ + case 8: \ + if (sizeof(long) == 8) { \ + __res = __cmpxchg_asm("lld", "scd", __ptr, \ + __old, __new); \ + break; \ + } \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + break; \ + } \ + \ + post_barrier; \ + \ + __res; \ +}) + +#define cmpxchg(ptr, old, new) __cmpxchg(ptr, old, new, smp_mb__before_llsc(), smp_llsc_mb()) + +#endif /* __CR_CMPXCHG_H__ */ diff --git a/include/common/arch/mips/asm/fls64.h b/include/common/arch/mips/asm/fls64.h new file mode 100644 index 000000000..fdae28513 --- /dev/null +++ b/include/common/arch/mips/asm/fls64.h @@ -0,0 +1,38 @@ +#ifndef _ASM_GENERIC_BITOPS_FLS64_H_ +#define _ASM_GENERIC_BITOPS_FLS64_H_ + +#include + +/** + * fls64 - find last set bit in a 64-bit word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +#include "common/arch/mips/asm/bitops.h" +#if BITS_PER_LONG == 32 +static __always_inline int fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(h) + 32; + return fls(x); +} +#elif BITS_PER_LONG == 64 +extern unsigned long __fls(unsigned long word); +static __always_inline int fls64(__u64 x) +{ + if (x == 0) + return 0; + return __fls(x) + 1; +} +#else +#error BITS_PER_LONG not 32 or 64 +#endif + +#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */ diff --git a/include/common/arch/mips/asm/linkage.h b/include/common/arch/mips/asm/linkage.h new file mode 100644 index 000000000..8f2426889 --- /dev/null +++ b/include/common/arch/mips/asm/linkage.h @@ -0,0 +1,58 @@ +#ifndef __CR_LINKAGE_H__ +#define __CR_LINKAGE_H__ + +#define zero $0 /* wired zero */ +#define AT $1 /* assembler temp - uppercase because of ".set at" */ +#define v0 $2 +#define v1 $3 + +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define a4 $8 +#define a5 $9 +#define a6 $10 +#define a7 $11 +#define t0 $12 +#define t1 $13 +#define t2 $14 +#define t3 $15 + +#define s0 $16 /* callee saved */ +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 /* caller saved */ +#define t9 $25 +#define jp $25 /* PIC jump register */ +#define k0 $26 /* kernel scratch */ +#define k1 $27 +#define gp $28 /* global pointer */ +#define sp $29 /* stack pointer */ +#define fp $30 /* frame pointer */ +#define s8 $30 /* same like fp! */ +#define ra $31 /* return address */ + +#define __ALIGN .align 8 +#define __ALIGN_STR ".align 8" + +#define GLOBAL(name) \ + .globl name; \ + name: + +#define ENTRY(name) \ + .globl name; \ + __ALIGN; \ + .type name, @function; \ + name: + +#define END(sym) \ + .size sym, . - sym + + +#endif /* __CR_LINKAGE_H__ */ diff --git a/include/common/arch/mips/asm/page.h b/include/common/arch/mips/asm/page.h new file mode 100755 index 000000000..bf27420f7 --- /dev/null +++ b/include/common/arch/mips/asm/page.h @@ -0,0 +1,39 @@ +#ifndef __CR_ASM_PAGE_H__ +#define __CR_ASM_PAGE_H__ + +#define ARCH_HAS_LONG_PAGES + +#ifndef CR_NOGLIBC +#include /* ffsl() */ +#include /* _SC_PAGESIZE */ + +static unsigned __page_size; +static unsigned __page_shift; + +static inline unsigned page_size(void) +{ + if (!__page_size) + __page_size = sysconf(_SC_PAGESIZE); + return __page_size; +} + +static inline unsigned page_shift(void) +{ + if (!__page_shift) + __page_shift = (ffsl(page_size()) - 1); + return __page_shift; +} + +#define PAGE_SIZE page_size() +#define PAGE_SHIFT page_shift() +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define PAGE_PFN(addr) ((addr) / PAGE_SIZE) +#else /* CR_NOGLIBC */ + +extern unsigned page_size(void); +#define PAGE_SIZE page_size() + +#endif /* CR_NOGLIBC */ + +#endif /* __CR_ASM_PAGE_H__ */ diff --git a/include/common/arch/mips/asm/utils.h b/include/common/arch/mips/asm/utils.h new file mode 100644 index 000000000..6415bf485 --- /dev/null +++ b/include/common/arch/mips/asm/utils.h @@ -0,0 +1,24 @@ +#ifndef __UTILS_H__ +#define __UTILS_H__ + + +# define kernel_uses_llsc 1 + +typedef struct { + int counter; +}atomic_t; + + +/* + * FIXME: detect with compel_cpu_has_feature() if LL/SC implicitly + * provide a memory barrier. +*/ +#define __WEAK_LLSC_MB " sync \n" + +#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") + +#define smp_mb__before_llsc() smp_llsc_mb() +#define smp_mb__before_atomic() smp_mb__before_llsc() +#define smp_mb__after_atomic() smp_llsc_mb() + +#endif /* __UTILS_H__ */ From e7d13b368db95c57d0a20ea016d6b0517e19d3b8 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:21:11 +0800 Subject: [PATCH 0235/1854] mips:compel: Enable mips in compel/ Signed-off-by: Guoyun Sun --- compel/include/uapi/handle-elf.h | 9 ++++- compel/include/uapi/infect.h | 1 + compel/plugins/Makefile | 14 +++++++ compel/src/lib/handle-elf.c | 65 +++++++++++++++++++++++++++++++- compel/src/lib/infect.c | 5 ++- compel/src/main.c | 3 ++ 6 files changed, 93 insertions(+), 4 deletions(-) diff --git a/compel/include/uapi/handle-elf.h b/compel/include/uapi/handle-elf.h index ddeecb0d5..ba40be57f 100644 --- a/compel/include/uapi/handle-elf.h +++ b/compel/include/uapi/handle-elf.h @@ -4,7 +4,14 @@ #define COMPEL_TYPE_INT (1u << 0) #define COMPEL_TYPE_LONG (1u << 1) #define COMPEL_TYPE_GOTPCREL (1u << 2) - +#ifdef CONFIG_MIPS +#define COMPEL_TYPE_MIPS_26 (1u << 3) +#define COMPEL_TYPE_MIPS_HI16 (1u << 4) +#define COMPEL_TYPE_MIPS_LO16 (1u << 5) +#define COMPEL_TYPE_MIPS_HIGHER (1u << 6) +#define COMPEL_TYPE_MIPS_HIGHEST (1u << 7) +#define COMPEL_TYPE_MIPS_64 (1u << 8) +#endif typedef struct { unsigned int offset; unsigned int type; diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index dd672bc1c..4f14c7270 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -165,6 +165,7 @@ extern struct parasite_blob_desc *compel_parasite_blob_desc(struct parasite_ctl extern int __must_check compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); extern void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs); +extern void compel_relocs_apply_mips(void *mem, void *vbase, compel_reloc_t *elf_relocs, size_t nr_relocs); extern unsigned long compel_task_size(void); diff --git a/compel/plugins/Makefile b/compel/plugins/Makefile index 197ff1b24..e5fa781ac 100644 --- a/compel/plugins/Makefile +++ b/compel/plugins/Makefile @@ -16,7 +16,12 @@ asflags-y += -I compel/include/uapi # General compel includes ccflags-y += -iquote compel/include + +ifeq ($(ARCH),mips) +ccflags-y += -mno-abicalls -fno-pic -fno-stack-protector +else ccflags-y += -fpie -fno-stack-protector +endif # General compel/plugins includes ccflags-y += -iquote $(obj)/include @@ -28,7 +33,12 @@ asflags-y += -iquote $(PLUGIN_ARCH_DIR)/include asflags-y += -iquote $(PLUGIN_ARCH_DIR) # General flags for assembly +ifeq ($(ARCH),mips) +asflags-y += -mno-abicalls -fno-pic -Wstrict-prototypes +else asflags-y += -fpie -Wstrict-prototypes +endif + asflags-y += -nostdlib -fomit-frame-pointer asflags-y += -fno-stack-protector ldflags-y += -z noexecstack @@ -57,6 +67,10 @@ ifeq ($(ARCH),x86) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o endif +ifeq ($(ARCH),mips) + std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o +endif + ifeq ($(ARCH),ppc64) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcmp.o diff --git a/compel/src/lib/handle-elf.c b/compel/src/lib/handle-elf.c index 69d5104b6..e97d75026 100644 --- a/compel/src/lib/handle-elf.c +++ b/compel/src/lib/handle-elf.c @@ -16,8 +16,9 @@ #include "piegen.h" #include "log.h" -piegen_opt_t opts = {}; - +#ifdef CONFIG_MIPS +#include "ldsodefs.h" +#endif /* Check if pointer is out-of-bound */ static bool __ptr_oob(const uintptr_t ptr, const uintptr_t start, const size_t size) { @@ -403,6 +404,66 @@ int __handle_elf(void *mem, size_t size) #endif switch (ELF_R_TYPE(r->rel.r_info)) { +#ifdef CONFIG_MIPS + case R_MIPS_PC16: + /* s+a-p relative */ + *((int32_t *)where) = *((int32_t *)where) | ((value32 + addend32 - place)>>2); + break; + + case R_MIPS_26: + /* local : (((A << 2) | (P & 0xf0000000) + S) >> 2 + * external : (sign–extend(A < 2) + S) >> 2 + */ + + if (((unsigned)ELF_ST_BIND(sym->st_info) == 0x1) + || ((unsigned)ELF_ST_BIND(sym->st_info) == 0x2)){ + /* bind type local is 0x0 ,global is 0x1,WEAK is 0x2 */ + addend32 = value32; + } + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_26, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_26 */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_32: + /* S+A */ + break; + + case R_MIPS_64: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_64, " + ".addend = %-8ld, .value = 0x%-16lx, }, /* R_MIPS_64 */\n", + (unsigned int)place, (long)addend64, (long)value64); + break; + + case R_MIPS_HIGHEST: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_HIGHEST, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_HIGHEST */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_HIGHER: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_HIGHER, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_HIGHER */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_HI16: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_HI16, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_HI16 */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_LO16: + if((unsigned)ELF_ST_BIND(sym->st_info) == 0x1){ + /* bind type local is 0x0 ,global is 0x1 */ + addend32 = value32; + } + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_LO16, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_LO16 */\n", + (unsigned int)place, addend32, value32); + break; + +#endif #ifdef ELF_PPC64 case R_PPC64_REL24: /* Update PC relative offset, linker has not done this yet */ diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 3fad85ed3..296ffcbd0 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -817,7 +817,9 @@ err_cure: void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs) { size_t i, j; - +#ifdef CONFIG_MIPS + compel_relocs_apply_mips(mem, vbase, elf_relocs, nr_relocs); +#else for (i = 0, j = 0; i < nr_relocs; i++) { if (elf_relocs[i].type & COMPEL_TYPE_LONG) { long *where = mem + elf_relocs[i].offset; @@ -840,6 +842,7 @@ void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *el } else BUG(); } +#endif } static int compel_map_exchange(struct parasite_ctl *ctl, unsigned long size) diff --git a/compel/src/main.c b/compel/src/main.c index 9fc3a924c..c5f6e57ed 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -53,11 +53,14 @@ static const flags_t flags = { #elif defined CONFIG_S390 .arch = "s390", .cflags = COMPEL_CFLAGS_PIE, +#elif defined CONFIG_MIPS + .arch = "mips", #else #error "CONFIG_ not defined, or unsupported ARCH" #endif }; +piegen_opt_t opts = {}; const char *uninst_root; static int piegen(void) From 158e8f8fe637117f00bb6ecbad84d6491970d84d Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:23:16 +0800 Subject: [PATCH 0236/1854] mips:proto: Add mips to protocol buffer files Signed-off-by: Guoyun Sun --- images/Makefile | 1 + images/core-mips.proto | 90 ++++++++++++++++++++++++++++++++++++++++++ images/core.proto | 5 +++ images/sa.proto | 1 + 4 files changed, 97 insertions(+) create mode 100755 images/core-mips.proto diff --git a/images/Makefile b/images/Makefile index bc67278e6..5458e4679 100644 --- a/images/Makefile +++ b/images/Makefile @@ -1,6 +1,7 @@ proto-obj-y += stats.o proto-obj-y += core.o proto-obj-y += core-x86.o +proto-obj-y += core-mips.o proto-obj-y += core-arm.o proto-obj-y += core-aarch64.o proto-obj-y += core-ppc64.o diff --git a/images/core-mips.proto b/images/core-mips.proto new file mode 100755 index 000000000..6391b1e86 --- /dev/null +++ b/images/core-mips.proto @@ -0,0 +1,90 @@ +syntax = "proto2"; + +import "opts.proto"; + +message user_mips_regs_entry { + required uint64 r0 = 1; + required uint64 r1 = 2; + required uint64 r2 = 3; + required uint64 r3 = 4; + required uint64 r4 = 5; + required uint64 r5 = 6; + required uint64 r6 = 7; + required uint64 r7 = 8; + required uint64 r8 = 9; + required uint64 r9 = 10; + required uint64 r10 = 11; + required uint64 r11 = 12; + required uint64 r12 = 13; + required uint64 r13 = 14; + required uint64 r14 = 15; + required uint64 r15 = 16; + required uint64 r16 = 17; + required uint64 r17 = 18; + required uint64 r18 = 19; + required uint64 r19 = 20; + required uint64 r20 = 21; + required uint64 r21 = 22; + required uint64 r22 = 23; + required uint64 r23 = 24; + required uint64 r24 = 25; + required uint64 r25 = 26; + required uint64 r26 = 27; + required uint64 r27 = 28; + required uint64 r28 = 29; + required uint64 r29 = 30; + required uint64 r30 = 31; + required uint64 r31 = 32; + required uint64 lo = 33; + required uint64 hi = 34; + required uint64 cp0_epc = 35; + required uint64 cp0_badvaddr = 36; + required uint64 cp0_status = 37; + required uint64 cp0_cause = 38; +} + +message user_mips_fpregs_entry { + required uint64 r0 = 1; + required uint64 r1 = 2; + required uint64 r2 = 3; + required uint64 r3 = 4; + required uint64 r4 = 5; + required uint64 r5 = 6; + required uint64 r6 = 7; + required uint64 r7 = 8; + required uint64 r8 = 9; + required uint64 r9 = 10; + required uint64 r10 = 11; + required uint64 r11 = 12; + required uint64 r12 = 13; + required uint64 r13 = 14; + required uint64 r14 = 15; + required uint64 r15 = 16; + required uint64 r16 = 17; + required uint64 r17 = 18; + required uint64 r18 = 19; + required uint64 r19 = 20; + required uint64 r20 = 21; + required uint64 r21 = 22; + required uint64 r22 = 23; + required uint64 r23 = 24; + required uint64 r24 = 25; + required uint64 r25 = 26; + required uint64 r26 = 27; + required uint64 r27 = 28; + required uint64 r28 = 29; + required uint64 r29 = 30; + required uint64 r30 = 31; + required uint64 r31 = 32; + required uint64 lo = 33; + required uint64 hi = 34; + required uint32 fpu_fcr31 = 35; + required uint32 fpu_id = 36; +} + +message thread_info_mips { + required uint64 clear_tid_addr = 1[(criu).hex = true]; + required uint64 tls = 2; + required user_mips_regs_entry gpregs = 3[(criu).hex = true]; + required user_mips_fpregs_entry fpregs = 4[(criu).hex = true]; +} diff --git a/images/core.proto b/images/core.proto index 22c2a9f1f..9e9e39388 100644 --- a/images/core.proto +++ b/images/core.proto @@ -5,6 +5,7 @@ import "core-arm.proto"; import "core-aarch64.proto"; import "core-ppc64.proto"; import "core-s390.proto"; +import "core-mips.proto"; import "rlimit.proto"; import "timer.proto"; @@ -55,6 +56,7 @@ message task_core_entry { optional bool child_subreaper = 18; // Reserved for container relative start time //optional uint64 start_time = 19; + optional uint64 blk_sigset_extended = 20[(criu).hex = true]; } message task_kobj_ids_entry { @@ -96,6 +98,7 @@ message thread_core_entry { optional uint32 seccomp_filter = 12; optional string comm = 13; + optional uint64 blk_sigset_extended = 14; } message task_rlimits_entry { @@ -110,6 +113,7 @@ message core_entry { AARCH64 = 3; PPC64 = 4; S390 = 5; + MIPS = 6; } required march mtype = 1; @@ -118,6 +122,7 @@ message core_entry { optional thread_info_aarch64 ti_aarch64 = 8; optional thread_info_ppc64 ti_ppc64 = 9; optional thread_info_s390 ti_s390 = 10; + optional thread_info_mips ti_mips = 11; optional task_core_entry tc = 3; optional task_kobj_ids_entry ids = 4; diff --git a/images/sa.proto b/images/sa.proto index 3bce0c4ff..07fd4ffd3 100644 --- a/images/sa.proto +++ b/images/sa.proto @@ -8,4 +8,5 @@ message sa_entry { required uint64 restorer = 3 [(criu).hex = true]; required uint64 mask = 4 [(criu).hex = true]; optional bool compat_sigaction = 5; + optional uint64 mask_extended = 6 [(criu).hex = true]; } From d325b7b775cd2e51651c00137be0df5bd3d2d74f Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:24:45 +0800 Subject: [PATCH 0237/1854] mips:criu/arch/mips: Add mips parts to criu Signed-off-by: Guoyun Sun --- criu/arch/mips/Makefile | 14 + criu/arch/mips/cpu.c | 53 ++++ criu/arch/mips/crtools.c | 252 ++++++++++++++++++ criu/arch/mips/include/asm/dump.h | 14 + criu/arch/mips/include/asm/int.h | 6 + criu/arch/mips/include/asm/kerndat.h | 7 + criu/arch/mips/include/asm/parasite-syscall.h | 8 + criu/arch/mips/include/asm/parasite.h | 9 + criu/arch/mips/include/asm/restore.h | 29 ++ criu/arch/mips/include/asm/restorer.h | 79 ++++++ criu/arch/mips/include/asm/syscall32.h | 17 ++ criu/arch/mips/include/asm/types.h | 31 +++ criu/arch/mips/include/asm/vdso.h | 23 ++ criu/arch/mips/restorer.c | 17 ++ criu/arch/mips/sigaction_compat.c | 19 ++ criu/arch/mips/sigframe.c | 13 + criu/arch/mips/vdso-pie.c | 21 ++ 17 files changed, 612 insertions(+) create mode 100755 criu/arch/mips/Makefile create mode 100755 criu/arch/mips/cpu.c create mode 100755 criu/arch/mips/crtools.c create mode 100755 criu/arch/mips/include/asm/dump.h create mode 100755 criu/arch/mips/include/asm/int.h create mode 100644 criu/arch/mips/include/asm/kerndat.h create mode 100755 criu/arch/mips/include/asm/parasite-syscall.h create mode 100755 criu/arch/mips/include/asm/parasite.h create mode 100755 criu/arch/mips/include/asm/restore.h create mode 100755 criu/arch/mips/include/asm/restorer.h create mode 100755 criu/arch/mips/include/asm/syscall32.h create mode 100755 criu/arch/mips/include/asm/types.h create mode 100755 criu/arch/mips/include/asm/vdso.h create mode 100755 criu/arch/mips/restorer.c create mode 100755 criu/arch/mips/sigaction_compat.c create mode 100755 criu/arch/mips/sigframe.c create mode 100755 criu/arch/mips/vdso-pie.c diff --git a/criu/arch/mips/Makefile b/criu/arch/mips/Makefile new file mode 100755 index 000000000..4bd99eb7e --- /dev/null +++ b/criu/arch/mips/Makefile @@ -0,0 +1,14 @@ +builtin-name := crtools.built-in.o + +ccflags-y += -iquote $(obj)/include +ccflags-y += -iquote criu/include -iquote include +ccflags-y += $(COMPEL_UAPI_INCLUDES) + +asflags-y += -Wstrict-prototypes +asflags-y += -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer +asflags-y += -iquote $(obj)/include +ldflags-y += -r -z noexecstack + +obj-y += cpu.o +obj-y += crtools.o +obj-y += sigframe.o diff --git a/criu/arch/mips/cpu.c b/criu/arch/mips/cpu.c new file mode 100755 index 000000000..484698e4f --- /dev/null +++ b/criu/arch/mips/cpu.c @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include +#include + +#include "bitops.h" +#include "asm/types.h" +#include "asm/cpu.h" +#include +#include + +#include "common/compiler.h" +#include "cr_options.h" +#include "image.h" +#include "util.h" +#include "log.h" +#include "cpu.h" +#include "protobuf.h" +#include "images/cpuinfo.pb-c.h" + +#undef LOG_PREFIX +#define LOG_PREFIX "cpu: " + +int cpu_init(void) +{ + return 0; +} + +int cpu_dump_cpuinfo(void) +{ + return 0; +} + +int cpu_validate_cpuinfo(void) +{ + return 0; +} + +int cpuinfo_dump(void) +{ + if (cpu_init()) + return -1; + if (cpu_dump_cpuinfo()) + return -1; + return 0; +} + +int cpuinfo_check(void) +{ + return 0; +} diff --git a/criu/arch/mips/crtools.c b/criu/arch/mips/crtools.c new file mode 100755 index 000000000..329ae8edb --- /dev/null +++ b/criu/arch/mips/crtools.c @@ -0,0 +1,252 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "log.h" +#include "asm/parasite-syscall.h" +#include "asm/restorer.h" +#include +#include "asm/dump.h" +#include "cr_options.h" +#include "common/compiler.h" +#include "restorer.h" +#include "parasite-syscall.h" +#include "util.h" +#include "cpu.h" +#include +#include "kerndat.h" + +#include "protobuf.h" +#include "images/core.pb-c.h" +#include "images/creds.pb-c.h" + + +int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) +{ + CoreEntry *core = x; + + /* Save the MIPS CPU state */ + core->ti_mips->gpregs->r0 = regs->regs[0]; + core->ti_mips->gpregs->r1 = regs->regs[1]; + core->ti_mips->gpregs->r2 = regs->regs[2]; + core->ti_mips->gpregs->r3 = regs->regs[3]; + core->ti_mips->gpregs->r4 = regs->regs[4]; + core->ti_mips->gpregs->r5 = regs->regs[5]; + core->ti_mips->gpregs->r6 = regs->regs[6]; + core->ti_mips->gpregs->r7 = regs->regs[7]; + core->ti_mips->gpregs->r8 = regs->regs[8]; + core->ti_mips->gpregs->r9 = regs->regs[9]; + core->ti_mips->gpregs->r10 = regs->regs[10]; + core->ti_mips->gpregs->r11 = regs->regs[11]; + core->ti_mips->gpregs->r12 = regs->regs[12]; + core->ti_mips->gpregs->r13 = regs->regs[13]; + core->ti_mips->gpregs->r14 = regs->regs[14]; + core->ti_mips->gpregs->r15 = regs->regs[15]; + core->ti_mips->gpregs->r16 = regs->regs[16]; + core->ti_mips->gpregs->r17 = regs->regs[17]; + core->ti_mips->gpregs->r18 = regs->regs[18]; + core->ti_mips->gpregs->r19 = regs->regs[19]; + core->ti_mips->gpregs->r20 = regs->regs[20]; + core->ti_mips->gpregs->r21 = regs->regs[21]; + core->ti_mips->gpregs->r22 = regs->regs[22]; + core->ti_mips->gpregs->r23 = regs->regs[23]; + core->ti_mips->gpregs->r24 = regs->regs[24]; + core->ti_mips->gpregs->r25 = regs->regs[25]; + core->ti_mips->gpregs->r26 = regs->regs[26]; + core->ti_mips->gpregs->r27 = regs->regs[27]; + core->ti_mips->gpregs->r28 = regs->regs[28]; + core->ti_mips->gpregs->r29 = regs->regs[29]; + core->ti_mips->gpregs->r30 = regs->regs[30]; + core->ti_mips->gpregs->r31 = regs->regs[31]; + + core->ti_mips->gpregs->lo = regs->lo; + core->ti_mips->gpregs->hi = regs->hi; + core->ti_mips->gpregs->cp0_epc = regs->cp0_epc; + core->ti_mips->gpregs->cp0_badvaddr = regs->cp0_badvaddr; + core->ti_mips->gpregs->cp0_status = regs->cp0_status; + core->ti_mips->gpregs->cp0_cause = regs->cp0_cause; + + core->ti_mips->fpregs->r0 = fpregs->regs[0]; + core->ti_mips->fpregs->r1 = fpregs->regs[1]; + core->ti_mips->fpregs->r2 = fpregs->regs[2]; + core->ti_mips->fpregs->r3 = fpregs->regs[3]; + core->ti_mips->fpregs->r4 = fpregs->regs[4]; + core->ti_mips->fpregs->r5 = fpregs->regs[5]; + core->ti_mips->fpregs->r6 = fpregs->regs[6]; + core->ti_mips->fpregs->r7 = fpregs->regs[7]; + core->ti_mips->fpregs->r8 = fpregs->regs[8]; + core->ti_mips->fpregs->r9 = fpregs->regs[9]; + core->ti_mips->fpregs->r10 = fpregs->regs[10]; + core->ti_mips->fpregs->r11 = fpregs->regs[11]; + core->ti_mips->fpregs->r12 = fpregs->regs[12]; + core->ti_mips->fpregs->r13 = fpregs->regs[13]; + core->ti_mips->fpregs->r14 = fpregs->regs[14]; + core->ti_mips->fpregs->r15 = fpregs->regs[15]; + core->ti_mips->fpregs->r16 = fpregs->regs[16]; + core->ti_mips->fpregs->r17 = fpregs->regs[17]; + core->ti_mips->fpregs->r18 = fpregs->regs[18]; + core->ti_mips->fpregs->r19 = fpregs->regs[19]; + core->ti_mips->fpregs->r20 = fpregs->regs[20]; + core->ti_mips->fpregs->r21 = fpregs->regs[21]; + core->ti_mips->fpregs->r22 = fpregs->regs[22]; + core->ti_mips->fpregs->r23 = fpregs->regs[23]; + core->ti_mips->fpregs->r24 = fpregs->regs[24]; + core->ti_mips->fpregs->r25 = fpregs->regs[25]; + core->ti_mips->fpregs->r26 = fpregs->regs[26]; + core->ti_mips->fpregs->r27 = fpregs->regs[27]; + core->ti_mips->fpregs->r28 = fpregs->regs[28]; + core->ti_mips->fpregs->r29 = fpregs->regs[29]; + core->ti_mips->fpregs->r30 = fpregs->regs[30]; + core->ti_mips->fpregs->r31 = fpregs->regs[31]; + core->ti_mips->fpregs->fpu_fcr31 = fpregs->fpu_fcr31; + core->ti_mips->fpregs->fpu_id = fpregs->fpu_id; + + return 0; +} + +int arch_alloc_thread_info(CoreEntry *core) +{ + ThreadInfoMips *ti_mips; + UserMipsRegsEntry *gpregs; + UserMipsFpregsEntry *fpregs; + + ti_mips = xmalloc(sizeof(*ti_mips)); + if (!ti_mips) + goto err; + + thread_info_mips__init(ti_mips); + core->ti_mips = ti_mips; + + gpregs = xmalloc(sizeof(*gpregs)); + if (!gpregs){ + xfree(ti_mips); + goto err; + } + + user_mips_regs_entry__init(gpregs); + ti_mips->gpregs = gpregs; + + fpregs = xmalloc(sizeof(*fpregs)); + if (!fpregs){ + xfree(ti_mips); + xfree(gpregs); + goto err; + } + + user_mips_fpregs_entry__init(fpregs); + ti_mips->fpregs = fpregs; + + return 0; +err: + return -1; +} + +void arch_free_thread_info(CoreEntry *core) +{ + if (!core->ti_mips) + return; + + if (core->ti_mips->gpregs) + xfree(core->ti_mips->gpregs); + + if (core->ti_mips->fpregs) + xfree(core->ti_mips->fpregs); + + xfree(core->ti_mips); +} + +int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) +{ + struct rt_sigframe *f = sigframe; + UserMipsFpregsEntry *r = core->ti_mips->fpregs; + + f->rs_uc.uc_mcontext.sc_fpregs[0] = r->r0; + f->rs_uc.uc_mcontext.sc_fpregs[1] = r->r1; + f->rs_uc.uc_mcontext.sc_fpregs[2] = r->r2; + f->rs_uc.uc_mcontext.sc_fpregs[3] = r->r3; + f->rs_uc.uc_mcontext.sc_fpregs[4] = r->r4; + f->rs_uc.uc_mcontext.sc_fpregs[5] = r->r5; + f->rs_uc.uc_mcontext.sc_fpregs[6] = r->r6; + f->rs_uc.uc_mcontext.sc_fpregs[7] = r->r7; + f->rs_uc.uc_mcontext.sc_fpregs[8] = r->r8; + f->rs_uc.uc_mcontext.sc_fpregs[9] = r->r9; + f->rs_uc.uc_mcontext.sc_fpregs[10] = r->r10; + f->rs_uc.uc_mcontext.sc_fpregs[11] = r->r11; + f->rs_uc.uc_mcontext.sc_fpregs[12] = r->r12; + f->rs_uc.uc_mcontext.sc_fpregs[13] = r->r13; + f->rs_uc.uc_mcontext.sc_fpregs[14] = r->r14; + f->rs_uc.uc_mcontext.sc_fpregs[15] = r->r15; + f->rs_uc.uc_mcontext.sc_fpregs[16] = r->r16; + f->rs_uc.uc_mcontext.sc_fpregs[17] = r->r17; + f->rs_uc.uc_mcontext.sc_fpregs[18] = r->r18; + f->rs_uc.uc_mcontext.sc_fpregs[19] = r->r19; + f->rs_uc.uc_mcontext.sc_fpregs[20] = r->r20; + f->rs_uc.uc_mcontext.sc_fpregs[21] = r->r21; + f->rs_uc.uc_mcontext.sc_fpregs[22] = r->r22; + f->rs_uc.uc_mcontext.sc_fpregs[23] = r->r23; + f->rs_uc.uc_mcontext.sc_fpregs[24] = r->r24; + f->rs_uc.uc_mcontext.sc_fpregs[25] = r->r25; + f->rs_uc.uc_mcontext.sc_fpregs[26] = r->r26; + f->rs_uc.uc_mcontext.sc_fpregs[27] = r->r27; + f->rs_uc.uc_mcontext.sc_fpregs[28] = r->r28; + f->rs_uc.uc_mcontext.sc_fpregs[29] = r->r29; + f->rs_uc.uc_mcontext.sc_fpregs[30] = r->r30; + f->rs_uc.uc_mcontext.sc_fpregs[31] = r->r31; + + return 0; +} + + +int restore_gpregs(struct rt_sigframe *f, UserMipsRegsEntry *r) +{ + f->rs_uc.uc_mcontext.sc_regs[0] = r->r0; + f->rs_uc.uc_mcontext.sc_regs[1] = r->r1; + f->rs_uc.uc_mcontext.sc_regs[2] = r->r2; + f->rs_uc.uc_mcontext.sc_regs[3] = r->r3; + f->rs_uc.uc_mcontext.sc_regs[4] = r->r4; + f->rs_uc.uc_mcontext.sc_regs[5] = r->r5; + f->rs_uc.uc_mcontext.sc_regs[6] = r->r6; + f->rs_uc.uc_mcontext.sc_regs[7] = r->r7; + f->rs_uc.uc_mcontext.sc_regs[8] = r->r8; + f->rs_uc.uc_mcontext.sc_regs[9] = r->r9; + f->rs_uc.uc_mcontext.sc_regs[10] = r->r10; + f->rs_uc.uc_mcontext.sc_regs[11] = r->r11; + f->rs_uc.uc_mcontext.sc_regs[12] = r->r12; + f->rs_uc.uc_mcontext.sc_regs[13] = r->r13; + f->rs_uc.uc_mcontext.sc_regs[14] = r->r14; + f->rs_uc.uc_mcontext.sc_regs[15] = r->r15; + f->rs_uc.uc_mcontext.sc_regs[16] = r->r16; + f->rs_uc.uc_mcontext.sc_regs[17] = r->r17; + f->rs_uc.uc_mcontext.sc_regs[18] = r->r18; + f->rs_uc.uc_mcontext.sc_regs[19] = r->r19; + f->rs_uc.uc_mcontext.sc_regs[20] = r->r20; + f->rs_uc.uc_mcontext.sc_regs[21] = r->r21; + f->rs_uc.uc_mcontext.sc_regs[22] = r->r22; + f->rs_uc.uc_mcontext.sc_regs[23] = r->r23; + f->rs_uc.uc_mcontext.sc_regs[24] = r->r24; + f->rs_uc.uc_mcontext.sc_regs[25] = r->r25; + f->rs_uc.uc_mcontext.sc_regs[26] = r->r26; + f->rs_uc.uc_mcontext.sc_regs[27] = r->r27; + f->rs_uc.uc_mcontext.sc_regs[28] = r->r28; + f->rs_uc.uc_mcontext.sc_regs[29] = r->r29; + f->rs_uc.uc_mcontext.sc_regs[30] = r->r30; + f->rs_uc.uc_mcontext.sc_regs[31] = r->r31; + + f->rs_uc.uc_mcontext.sc_mdlo = r->lo; + f->rs_uc.uc_mcontext.sc_mdhi = r->hi; + f->rs_uc.uc_mcontext.sc_pc = r->cp0_epc; + + return 0; +} + +int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info) +{ + return 0; +} diff --git a/criu/arch/mips/include/asm/dump.h b/criu/arch/mips/include/asm/dump.h new file mode 100755 index 000000000..58015833d --- /dev/null +++ b/criu/arch/mips/include/asm/dump.h @@ -0,0 +1,14 @@ +#ifndef __CR_ASM_DUMP_H__ +#define __CR_ASM_DUMP_H__ + +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int arch_alloc_thread_info(CoreEntry *core); +extern void arch_free_thread_info(CoreEntry *core); +extern int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info); + +static inline void core_put_tls(CoreEntry *core, tls_t tls) +{ + core->ti_mips->tls = tls; +} + +#endif diff --git a/criu/arch/mips/include/asm/int.h b/criu/arch/mips/include/asm/int.h new file mode 100755 index 000000000..642804e9b --- /dev/null +++ b/criu/arch/mips/include/asm/int.h @@ -0,0 +1,6 @@ +#ifndef __CR_ASM_INT_H__ +#define __CR_ASM_INT_H__ + +#include "asm-generic/int.h" + +#endif /* __CR_ASM_INT_H__ */ diff --git a/criu/arch/mips/include/asm/kerndat.h b/criu/arch/mips/include/asm/kerndat.h new file mode 100644 index 000000000..60956b573 --- /dev/null +++ b/criu/arch/mips/include/asm/kerndat.h @@ -0,0 +1,7 @@ +#ifndef __CR_ASM_KERNDAT_H__ +#define __CR_ASM_KERNDAT_H__ + +#define kdat_compatible_cr() 0 +#define kdat_can_map_vdso() 0 + +#endif /* __CR_ASM_KERNDAT_H__ */ diff --git a/criu/arch/mips/include/asm/parasite-syscall.h b/criu/arch/mips/include/asm/parasite-syscall.h new file mode 100755 index 000000000..a2b5e75ff --- /dev/null +++ b/criu/arch/mips/include/asm/parasite-syscall.h @@ -0,0 +1,8 @@ +#ifndef __CR_ASM_PARASITE_SYSCALL_H__ +#define __CR_ASM_PARASITE_SYSCALL_H__ + +#include "asm/types.h" + +struct parasite_ctl; + +#endif diff --git a/criu/arch/mips/include/asm/parasite.h b/criu/arch/mips/include/asm/parasite.h new file mode 100755 index 000000000..39882dd21 --- /dev/null +++ b/criu/arch/mips/include/asm/parasite.h @@ -0,0 +1,9 @@ +#ifndef __ASM_PARASITE_H__ +#define __ASM_PARASITE_H__ + +static inline void arch_get_tls(tls_t *ptls) +{ + asm("rdhwr %0, $29" : "=r"(*ptls)); +} + +#endif diff --git a/criu/arch/mips/include/asm/restore.h b/criu/arch/mips/include/asm/restore.h new file mode 100755 index 000000000..0cb9aa8ed --- /dev/null +++ b/criu/arch/mips/include/asm/restore.h @@ -0,0 +1,29 @@ +#ifndef __CR_ASM_RESTORE_H__ +#define __CR_ASM_RESTORE_H__ + +#include "asm/restorer.h" +#include "images/core.pb-c.h" + +#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start, \ + task_args) \ + asm volatile( \ + "move $4, %0 \n" \ + "move $25, %1 \n" \ + "move $5, %2 \n" \ + "move $29, $5 \n" \ + "jalr $25 \n" \ + "nop \n" \ + : \ + :"r"(task_args),"r"(restore_task_exec_start), \ + "g"(new_sp) \ + : "$29", "$25", "$4","$5") + +static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) +{ + *ptls = pcore->ti_mips->tls; +} + + +int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core); + +#endif diff --git a/criu/arch/mips/include/asm/restorer.h b/criu/arch/mips/include/asm/restorer.h new file mode 100755 index 000000000..1a33cd884 --- /dev/null +++ b/criu/arch/mips/include/asm/restorer.h @@ -0,0 +1,79 @@ +#ifndef __CR_ASM_RESTORER_H__ +#define __CR_ASM_RESTORER_H__ + +#include "asm/types.h" +#include +#include "images/core.pb-c.h" +#include +#include + +static inline void restore_tls(tls_t *ptls) { + asm volatile( + "move $4, %0 \n" + "li $2, "__stringify(__NR_set_thread_area)" \n" + "syscall \n" + : + : "r"(*ptls) + : "$4","$2","memory"); +} +static inline int arch_compat_rt_sigaction(void *stack, int sig, void *act) +{ + return -1; +} +static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) +{ + return -1; +} + +#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ + thread_args, clone_restore_fn) \ + asm volatile( \ + "ld $5,%2 \n" /* a1 = new_sp */ \ + "dsubu $5,32 \n" \ + "sd %5,0($5) \n" \ + "sd %6,8($5) \n" \ + "sd %1,16($5) \n" \ + "move $4,%1 \n" /* a0=flags */ \ + "move $6,%3 \n" /* a2=parent_tid */ \ + "li $7,0 \n" /* a3 = tls is 0 */ \ + "move $8,%4 \n" /* a4 = child_tid */ \ + "li $2, "__stringify(__NR_clone)" \n" \ + "syscall \n" /* syscall */ \ + "sync \n" \ + "bnez $7,err \n" \ + "nop \n" \ + "beqz $2,thread_start \n" \ + "nop \n" \ + "move %0,$2 \n" \ + "b end \n" \ + "err:break \n" \ + "thread_start: \n" \ + "ld $25,0($29) \n" \ + "ld $4,8($29) \n" \ + "jal $25 \n" \ + "nop \n" \ + "end: \n" \ + : "=r"(ret) \ + : "r"(clone_flags), \ + "m"(new_sp), \ + "r"(&parent_tid), \ + "r"(&thread_args[i].pid), \ + "r"(clone_restore_fn), \ + "r"(&thread_args[i]) \ + :"$2","$4","$5","$6","$7","$8","$25","$29","memory") + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + ret = -1; \ +} while (0) + +#define kdat_compatible_cr() 0 +#define arch_map_vdso(map, compat) -1 + +static inline void *alloc_compat_syscall_stack(void) { return NULL; } +static inline void free_compat_syscall_stack(void *stack32) { } +int restore_gpregs(struct rt_sigframe *f, UserMipsRegsEntry *r); +int restore_nonsigframe_gpregs(UserMipsRegsEntry *r); + +#endif diff --git a/criu/arch/mips/include/asm/syscall32.h b/criu/arch/mips/include/asm/syscall32.h new file mode 100755 index 000000000..a6e298217 --- /dev/null +++ b/criu/arch/mips/include/asm/syscall32.h @@ -0,0 +1,17 @@ +#ifndef __CR_SYSCALL32_H__ +#define __CR_SYSCALL32_H__ + +extern long sys_socket(int domain, int type, int protocol); +extern long sys_connect(int sockfd, struct sockaddr *addr, int addrlen); +extern long sys_sendto(int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len); +extern long sys_recvfrom(int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len); +extern long sys_sendmsg(int sockfd, const struct msghdr *msg, int flags); +extern long sys_recvmsg(int sockfd, struct msghdr *msg, int flags); +extern long sys_shutdown(int sockfd, int how); +extern long sys_bind(int sockfd, const struct sockaddr *addr, int addrlen); +extern long sys_setsockopt(int sockfd, int level, int optname, const void *optval, unsigned int optlen); +extern long sys_getsockopt(int sockfd, int level, int optname, const void *optval, unsigned int *optlen); +extern long sys_shmat(int shmid, void *shmaddr, int shmflag); +extern long sys_pread(unsigned int fd, char *ubuf, u32 count, u64 pos); + +#endif /* __CR_SYSCALL32_H__ */ diff --git a/criu/arch/mips/include/asm/types.h b/criu/arch/mips/include/asm/types.h new file mode 100755 index 000000000..8366e0540 --- /dev/null +++ b/criu/arch/mips/include/asm/types.h @@ -0,0 +1,31 @@ +#ifndef __CR_ASM_TYPES_H__ +#define __CR_ASM_TYPES_H__ + +#include +#include + +#include "page.h" +#include "bitops.h" +#include "asm/int.h" + +#include + +#include "images/core.pb-c.h" + +#define core_is_compat(core) false + +#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__MIPS + +#define CORE_THREAD_ARCH_INFO(core) core->ti_mips + +typedef UserMipsRegsEntry UserRegsEntry; + +static inline u64 encode_pointer(void *p) { return (u64)p; } +static inline void *decode_pointer(u64 v) { return (void*)v; } + + +#define AT_VECTOR_SIZE 44 +typedef uint64_t auxv_t; +typedef unsigned long tls_t; + +#endif /* __CR_ASM_TYPES_H__ */ diff --git a/criu/arch/mips/include/asm/vdso.h b/criu/arch/mips/include/asm/vdso.h new file mode 100755 index 000000000..0e5da159e --- /dev/null +++ b/criu/arch/mips/include/asm/vdso.h @@ -0,0 +1,23 @@ +#ifndef __CR_ASM_VDSO_H__ +#define __CR_ASM_VDSO_H__ + +#include "asm/int.h" +#include "asm-generic/vdso.h" + +/* This definition is used in pie/util-vdso.c to initialize the vdso symbol + * name string table 'vdso_symbols' + */ + +/* + * This is a minimal amount of symbols + * we should support at the moment. + */ +#define VDSO_SYMBOL_MAX 3 +#define VDSO_SYMBOL_GTOD 0 +#define ARCH_VDSO_SYMBOLS \ + "__vdso_clock_gettime", \ + "__vdso_gettimeofday", \ + "__vdso_clock_getres" + + +#endif /* __CR_ASM_VDSO_H__ */ diff --git a/criu/arch/mips/restorer.c b/criu/arch/mips/restorer.c new file mode 100755 index 000000000..2e196b60c --- /dev/null +++ b/criu/arch/mips/restorer.c @@ -0,0 +1,17 @@ +#include + +#include "types.h" +#include "restorer.h" +#include "asm/restorer.h" +#include + +#include +#include +#include +#include "log.h" +#include "cpu.h" + +int restore_nonsigframe_gpregs(UserMipsRegsEntry *r) +{ + return 0; +} diff --git a/criu/arch/mips/sigaction_compat.c b/criu/arch/mips/sigaction_compat.c new file mode 100755 index 000000000..d3e45f082 --- /dev/null +++ b/criu/arch/mips/sigaction_compat.c @@ -0,0 +1,19 @@ +#include "log.h" +#include "asm/restorer.h" +#include +#include "asm/compat.h" +#include + +#ifdef CR_NOGLIBC +# include +#endif + +#include "cpu.h" + +extern char restore_rt_sigaction; + +int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) +{ + return 0; +} + diff --git a/criu/arch/mips/sigframe.c b/criu/arch/mips/sigframe.c new file mode 100755 index 000000000..1e39102f0 --- /dev/null +++ b/criu/arch/mips/sigframe.c @@ -0,0 +1,13 @@ +#include +#include + +#include "asm/sigframe.h" +#include "asm/types.h" + +#include "log.h" +#include +int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, + struct rt_sigframe *rsigframe) +{ + return 0; +} diff --git a/criu/arch/mips/vdso-pie.c b/criu/arch/mips/vdso-pie.c new file mode 100755 index 000000000..737e5538b --- /dev/null +++ b/criu/arch/mips/vdso-pie.c @@ -0,0 +1,21 @@ +#include +#include "asm/types.h" + +#include +#include +#include "parasite-vdso.h" +#include "log.h" +#include "common/bug.h" + +#ifdef LOG_PREFIX +# undef LOG_PREFIX +#endif +#define LOG_PREFIX "vdso: " + +int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, + struct vdso_symtable *sto, struct vdso_symtable *sfrom, + bool compat_vdso) +{ + pr_err("Vdso proxification isn't implemented on mips\n"); + return -1; +} From afe90627e276e9e68cdf4872cae8fa3c5c637914 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:29:21 +0800 Subject: [PATCH 0238/1854] mips:criu: Enable mips in criu Signed-off-by: Guoyun Sun --- Makefile | 10 +++++++++- criu/cr-restore.c | 9 ++++++++- criu/kerndat.c | 9 +++++++++ criu/parasite-syscall.c | 17 +++++++++++++++++ criu/pie/Makefile | 4 ++++ criu/pie/Makefile.library | 4 ++++ 6 files changed, 51 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 00e563c11..e72dd1428 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ endif # # Supported Architectures -ifneq ($(filter-out x86 arm aarch64 ppc64 s390,$(ARCH)),) +ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips,$(ARCH)),) $(error "The architecture $(ARCH) isn't supported") endif @@ -76,6 +76,10 @@ ifeq ($(ARCH),x86) DEFINES := -DCONFIG_X86_64 endif +ifeq ($(ARCH),mips) + DEFINES := -DCONFIG_MIPS +endif + # # CFLAGS_PIE: # @@ -105,6 +109,10 @@ WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement -Wstrict-prot CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV +ifeq ($(ARCH),mips) +WARNINGS := -rdynamic +endif + ifneq ($(GCOV),) LDFLAGS += -lgcov CFLAGS += $(CFLAGS-GCOV) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index f572f79a0..99b36e0d4 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -461,9 +461,16 @@ static int restore_native_sigaction(int sig, SaEntry *e) ASSIGN_TYPED(act.rt_sa_handler, decode_pointer(e->sigaction)); ASSIGN_TYPED(act.rt_sa_flags, e->flags); ASSIGN_TYPED(act.rt_sa_restorer, decode_pointer(e->restorer)); +#ifdef CONFIG_MIPS + e->has_mask_extended = 1; + BUILD_BUG_ON(sizeof(e->mask)* 2 != sizeof(act.rt_sa_mask.sig)); + + memcpy(&(act.rt_sa_mask.sig[0]), &e->mask, sizeof(act.rt_sa_mask.sig[0])); + memcpy(&(act.rt_sa_mask.sig[1]), &e->mask_extended, sizeof(act.rt_sa_mask.sig[1])); +#else BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig)); memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig)); - +#endif if (sig == SIGCHLD) { sigchld_act = act; return 0; diff --git a/criu/kerndat.c b/criu/kerndat.c index 0b6d53bc7..831f9f72a 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1007,6 +1007,15 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; +#if defined(CONFIG_MIPS) + /* + * Currently the CRIU PIE assembler clone3() wrapper is + * not implemented for MIPS. + */ + kdat.has_clone3_set_tid = false; + return 0; +#endif + args.set_tid = -1; /* * On a system without clone3() this will return ENOSYS. diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index b649d1b51..5f9de152a 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -179,7 +179,12 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, pc->cap_last_cap = kdat.last_cap; tc->has_blk_sigset = true; +#ifdef CONFIG_MIPS + memcpy(&tc->blk_sigset, (unsigned long *)compel_thread_sigmask(tctl), sizeof(tc->blk_sigset)); + memcpy(&tc->blk_sigset_extended, (unsigned long *)compel_thread_sigmask(tctl)+1, sizeof(tc->blk_sigset)); +#else memcpy(&tc->blk_sigset, compel_thread_sigmask(tctl), sizeof(k_rtsigset_t)); +#endif ret = compel_get_thread_regs(tctl, save_task_regs, core); if (ret) { pr_err("Can't obtain regs for thread %d\n", pid); @@ -240,8 +245,15 @@ int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct pstree_item *i ASSIGN_TYPED(sa->sigaction, encode_pointer(args->sas[i].rt_sa_handler)); ASSIGN_TYPED(sa->flags, args->sas[i].rt_sa_flags); ASSIGN_TYPED(sa->restorer, encode_pointer(args->sas[i].rt_sa_restorer)); +#ifdef CONFIG_MIPS + sa->has_mask_extended = 1; + BUILD_BUG_ON(sizeof(sa->mask) * 2 != sizeof(args->sas[0].rt_sa_mask.sig)); + memcpy(&sa->mask, &(args->sas[i].rt_sa_mask.sig[0]), sizeof(sa->mask)); + memcpy(&sa->mask_extended, &(args->sas[i].rt_sa_mask.sig[1]), sizeof(sa->mask)); +#else BUILD_BUG_ON(sizeof(sa->mask) != sizeof(args->sas[0].rt_sa_mask.sig)); memcpy(&sa->mask, args->sas[i].rt_sa_mask.sig, sizeof(sa->mask)); +#endif sa->has_compat_sigaction = true; sa->compat_sigaction = !compel_mode_native(ctl); @@ -569,7 +581,12 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item, } parasite_args_size = PARASITE_ARG_SIZE_MIN; /* reset for next task */ +#ifdef CONFIG_MIPS + memcpy(&item->core[0]->tc->blk_sigset, (unsigned long *)compel_task_sigmask(ctl), sizeof(item->core[0]->tc->blk_sigset)); + memcpy(&item->core[0]->tc->blk_sigset_extended, (unsigned long *)compel_task_sigmask(ctl)+1, sizeof(item->core[0]->tc->blk_sigset)); +#else memcpy(&item->core[0]->tc->blk_sigset, compel_task_sigmask(ctl), sizeof(k_rtsigset_t)); +#endif dmpi(item)->parasite_ctl = ctl; return ctl; diff --git a/criu/pie/Makefile b/criu/pie/Makefile index a30747ac3..265dcf82b 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -14,6 +14,10 @@ ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif +ifeq ($(ARCH),mips) + ccflags-y += -mno-abicalls -fno-pic +endif + LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index de75b11d4..da2a2fab3 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -23,3 +23,7 @@ endif CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) CFLAGS += $(CFLAGS_PIE) + +ifeq ($(ARCH),mips) +CFLAGS += -fno-stack-protector -DCR_NOGLIBC -mno-abicalls -fno-pic +endif From b5c34c74c5055301821c2acbe4cf8aad646da558 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:49:47 +0800 Subject: [PATCH 0239/1854] mips:support docker-cross compile Signed-off-by: Guoyun Sun --- .travis.yml | 4 + scripts/build/Dockerfile.mips64el-cross | 44 ++++++ scripts/build/Makefile | 4 +- test/zdtm/lib/arch/mips/include/asm/atomic.h | 136 +++++++++++++++++++ test/zdtm/lib/test.c | 2 +- test/zdtm/static/pthread01.c | 5 + 6 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 scripts/build/Dockerfile.mips64el-cross create mode 100644 test/zdtm/lib/arch/mips/include/asm/atomic.h diff --git a/.travis.yml b/.travis.yml index 69a505193..8c126b47f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -95,6 +95,10 @@ jobs: env: TR_ARCH=ppc64-cross dist: bionic - env: TR_ARCH=local STREAM_TEST=1 + - os: linux + arch: amd64 + env: TR_ARCH=mips64el-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.mips64el-cross b/scripts/build/Dockerfile.mips64el-cross new file mode 100644 index 000000000..1ba936105 --- /dev/null +++ b/scripts/build/Dockerfile.mips64el-cross @@ -0,0 +1,44 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ + dpkg --add-architecture mips64el && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-mips64el \ + libbz2-dev:mips64el \ + libexpat1-dev:mips64el \ + ncurses-dev:mips64el \ + libssl-dev:mips64el \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:mips64el \ + libprotobuf-dev:mips64el \ + libnet-dev:mips64el \ + libprotobuf-c-dev:mips64el \ + libcap-dev:mips64el \ + libaio-dev:mips64el \ + libnl-route-3-dev:mips64el + +ENV CROSS_TRIPLE=mips64el-linux-gnuabi64 +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=mips \ + SUBARCH=mips + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 855539152..974d1455f 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,9 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross aarch64-cross ppc64-cross + +TARGETS += armv7-cross aarch64-cross ppc64-cross mips64el-cross + all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all diff --git a/test/zdtm/lib/arch/mips/include/asm/atomic.h b/test/zdtm/lib/arch/mips/include/asm/atomic.h new file mode 100644 index 000000000..acf4c03cd --- /dev/null +++ b/test/zdtm/lib/arch/mips/include/asm/atomic.h @@ -0,0 +1,136 @@ +#ifndef __CR_ATOMIC_H__ +#define __CR_ATOMIC_H__ + +//#include +//#include "common/compiler.h" +//#include "common/arch/mips/asm/utils.h" +//#include "common/arch/mips/asm/cmpxchg.h" + +typedef uint32_t atomic_t; +/* typedef struct { */ +/* int counter; */ +/* }atomic_t; */ + +#define __WEAK_LLSC_MB " sync \n" + +#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") + +#define smp_mb__before_llsc() smp_llsc_mb() +#define smp_mb__before_atomic() smp_mb__before_llsc() +#define smp_mb__after_atomic() smp_llsc_mb() + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#define atomic_get(v) (*(volatile int *)v) +#define atomic_set(v, i) ((*v) = (i)) + +//#define atomic_get atomic_read + +/* + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ + +static __inline__ void atomic_add(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_add \n" + " addu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static __inline__ void atomic_sub(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_sub \n" + " subu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * Same as above, but return the result value + */ +static __inline__ int atomic_add_return(int i, atomic_t * v) +{ + int result; + int temp; + + smp_mb__before_llsc(); + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_add_return \n" + " addu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp + i; + + smp_llsc_mb(); + + return result; +} + +static __inline__ int atomic_sub_return(int i, atomic_t * v) +{ + int result; + int temp; + + smp_mb__before_llsc(); + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_sub_return \n" + " subu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp - i; + + smp_llsc_mb(); + + return result; +} + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +static inline unsigned int atomic_inc(atomic_t *v) { return atomic_add_return(1, v) - 1; } +static inline unsigned int atomic_dec(atomic_t *v) { return atomic_sub_return(1, v) + 1; } +#endif /* __CR_ATOMIC_H__ */ diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c index 630476de0..e031357ac 100644 --- a/test/zdtm/lib/test.c +++ b/test/zdtm/lib/test.c @@ -403,7 +403,7 @@ pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid { #ifdef __x86_64__ return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, newtls); -#elif (__i386__ || __arm__ || __aarch64__ ||__powerpc64__) +#elif (__i386__ || __arm__ || __aarch64__ ||__powerpc64__ || __mips__) return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, newtls, child_tid); #elif __s390x__ return (pid_t)syscall(__NR_clone, child_stack, flags, parent_tid, child_tid, newtls); diff --git a/test/zdtm/static/pthread01.c b/test/zdtm/static/pthread01.c index 1e84463ee..bdd7c59d8 100644 --- a/test/zdtm/static/pthread01.c +++ b/test/zdtm/static/pthread01.c @@ -27,6 +27,11 @@ static __thread struct tls_data_s { static task_waiter_t t1; static task_waiter_t t2; +#ifdef CONFIG_MIPS +#ifndef SIGSTKFLT +#define SIGSTKFLT 16 +#endif +#endif static char *decode_signal(const sigset_t *s, char *buf) { buf[0] = '\0'; From 40169b950eff23347975acc0de0b9316f08d175b Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 26 Mar 2020 06:26:48 +0000 Subject: [PATCH 0240/1854] style: fix typos Oddly, one of the test had a typo which should be fatal. Signed-off-by: Nicolas Viennot --- criu/config.c | 2 +- test/others/ext-tty/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/config.c b/criu/config.c index e78b534a9..eb303fd77 100644 --- a/criu/config.c +++ b/criu/config.c @@ -876,7 +876,7 @@ int check_options(void) } if (!opts.restore_detach && opts.restore_sibling) { - pr_err("--restore-sibling only makes sense with --restore-detach\n"); + pr_err("--restore-sibling only makes sense with --restore-detached\n"); return 1; } diff --git a/test/others/ext-tty/run.py b/test/others/ext-tty/run.py index 2c0bacc84..8109033cb 100755 --- a/test/others/ext-tty/run.py +++ b/test/others/ext-tty/run.py @@ -29,7 +29,7 @@ ttyid = "fd[%d]:tty[%x:%x]" % (slave, st.st_rdev, st.st_dev) ret = subprocess.Popen([ "../../../criu/criu", "restore", "-v4", "--inherit-fd", ttyid, - "--restore-sibling", "--restore-detach" + "--restore-sibling", "--restore-detached" ]).wait() if ret: sys.exit(ret) From d38851c9bd003d9c5b2b1c804291f518db681938 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 18 Apr 2020 22:28:24 +0300 Subject: [PATCH 0241/1854] test/jenkins: use bash to run shell scripts We permanently have issues like this: ./test/jenkins/criu-iter.sh: 3: source: not found It looks like a good idea to use one shell to run our jenkins scripts. Signed-off-by: Andrei Vagin --- test/jenkins/criu-btrfs.sh | 2 ++ test/jenkins/criu-by-id.sh | 2 ++ test/jenkins/criu-dedup.sh | 2 ++ test/jenkins/criu-dump.sh | 2 ++ test/jenkins/criu-fault.sh | 1 + test/jenkins/criu-fcg.sh | 2 ++ test/jenkins/criu-groups.sh | 2 ++ test/jenkins/criu-inhfd.sh | 2 ++ test/jenkins/criu-iter.sh | 2 ++ test/jenkins/criu-join-ns.sh | 2 ++ test/jenkins/criu-lazy-common.sh | 2 ++ test/jenkins/criu-lazy-migration.sh | 2 ++ test/jenkins/criu-lazy-pages.sh | 2 ++ test/jenkins/criu-other.sh | 2 ++ test/jenkins/criu-overlay.sh | 2 ++ test/jenkins/criu-pre-dump.sh | 2 ++ test/jenkins/criu-remote-lazy-pages.sh | 2 ++ test/jenkins/criu-sibling.sh | 2 ++ test/jenkins/criu-snap.sh | 2 ++ test/jenkins/criu-stop.sh | 2 ++ test/jenkins/criu-user.sh | 2 ++ test/jenkins/criu.sh | 2 ++ 22 files changed, 43 insertions(+) diff --git a/test/jenkins/criu-btrfs.sh b/test/jenkins/criu-btrfs.sh index e749ad906..e456f1c34 100644 --- a/test/jenkins/criu-btrfs.sh +++ b/test/jenkins/criu-btrfs.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # This is a job which is executed on btrfs source `dirname $0`/criu-lib.sh && diff --git a/test/jenkins/criu-by-id.sh b/test/jenkins/criu-by-id.sh index 2381e73f7..c041ed358 100644 --- a/test/jenkins/criu-by-id.sh +++ b/test/jenkins/criu-by-id.sh @@ -1,3 +1,5 @@ +#!/bin/bash + echo 950000 > /sys/fs/cgroup/cpu,cpuacct/system/cpu.rt_runtime_us echo 950000 > /sys/fs/cgroup/cpu,cpuacct/system/jenkins.service/cpu.rt_runtime_us git checkout -f ${TEST_COMMIT} diff --git a/test/jenkins/criu-dedup.sh b/test/jenkins/criu-dedup.sh index e75ef5f82..0041496d8 100755 --- a/test/jenkins/criu-dedup.sh +++ b/test/jenkins/criu-dedup.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check auto-deduplication of pagemaps set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-dump.sh b/test/jenkins/criu-dump.sh index 381cf7a98..4c49532b2 100755 --- a/test/jenkins/criu-dump.sh +++ b/test/jenkins/criu-dump.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check that dump is not destructive set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index c27dd3738..f871a140b 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -1,4 +1,5 @@ #!/bin/bash + # Check known fault injections set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-fcg.sh b/test/jenkins/criu-fcg.sh index 938a72f26..ca5054f5e 100755 --- a/test/jenkins/criu-fcg.sh +++ b/test/jenkins/criu-fcg.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Test how freeze cgroup works set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-groups.sh b/test/jenkins/criu-groups.sh index 508d20aa6..b5bea4eab 100755 --- a/test/jenkins/criu-groups.sh +++ b/test/jenkins/criu-groups.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle over randomly-generated groups set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-inhfd.sh b/test/jenkins/criu-inhfd.sh index a59dcda6e..8f44ba13a 100755 --- a/test/jenkins/criu-inhfd.sh +++ b/test/jenkins/criu-inhfd.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check known fault injections set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-iter.sh b/test/jenkins/criu-iter.sh index d414b0575..304aa43db 100755 --- a/test/jenkins/criu-iter.sh +++ b/test/jenkins/criu-iter.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make 3 iteration of dump/restore for each test set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-join-ns.sh b/test/jenkins/criu-join-ns.sh index 39ef182f0..241c29034 100755 --- a/test/jenkins/criu-join-ns.sh +++ b/test/jenkins/criu-join-ns.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-lazy-common.sh b/test/jenkins/criu-lazy-common.sh index 7fdab40dd..a8ff9e51b 100644 --- a/test/jenkins/criu-lazy-common.sh +++ b/test/jenkins/criu-lazy-common.sh @@ -1,3 +1,5 @@ +#!/bin/bash + KERN_MAJ=`uname -r | cut -d. -f1` KERN_MIN=`uname -r | cut -d. -f2` if [ $KERN_MAJ -ge "4" ] && [ $KERN_MIN -ge "11" ]; then diff --git a/test/jenkins/criu-lazy-migration.sh b/test/jenkins/criu-lazy-migration.sh index 30e3c0375..02a212e0d 100755 --- a/test/jenkins/criu-lazy-migration.sh +++ b/test/jenkins/criu-lazy-migration.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check lazy-pages set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-lazy-pages.sh b/test/jenkins/criu-lazy-pages.sh index a3ee9a4ec..9ef721739 100755 --- a/test/jenkins/criu-lazy-pages.sh +++ b/test/jenkins/criu-lazy-pages.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check lazy-pages set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-other.sh b/test/jenkins/criu-other.sh index c6c231c86..bb68f912a 100755 --- a/test/jenkins/criu-other.sh +++ b/test/jenkins/criu-other.sh @@ -1,3 +1,5 @@ +#!/bin/bash + source `dirname $0`/criu-lib.sh && prep && make -C test other && diff --git a/test/jenkins/criu-overlay.sh b/test/jenkins/criu-overlay.sh index 5ef7682ac..de80007a3 100755 --- a/test/jenkins/criu-overlay.sh +++ b/test/jenkins/criu-overlay.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-pre-dump.sh b/test/jenkins/criu-pre-dump.sh index 95f4d8549..137f7c23f 100755 --- a/test/jenkins/criu-pre-dump.sh +++ b/test/jenkins/criu-pre-dump.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check 3 pre-dump-s before dump (with and w/o page server) set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-remote-lazy-pages.sh b/test/jenkins/criu-remote-lazy-pages.sh index ea0d17f0e..1c677e333 100755 --- a/test/jenkins/criu-remote-lazy-pages.sh +++ b/test/jenkins/criu-remote-lazy-pages.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check remote-lazy-pages set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-sibling.sh b/test/jenkins/criu-sibling.sh index 93f070330..d59b38970 100755 --- a/test/jenkins/criu-sibling.sh +++ b/test/jenkins/criu-sibling.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make 3 iteration of dump/restore for each test set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-snap.sh b/test/jenkins/criu-snap.sh index d28ba45d9..b08c57f52 100755 --- a/test/jenkins/criu-snap.sh +++ b/test/jenkins/criu-snap.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check snapshots set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-stop.sh b/test/jenkins/criu-stop.sh index d92519d68..64da2ee8a 100644 --- a/test/jenkins/criu-stop.sh +++ b/test/jenkins/criu-stop.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check --leave-stopped option set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-user.sh b/test/jenkins/criu-user.sh index d89ede203..f4ec52fc6 100755 --- a/test/jenkins/criu-user.sh +++ b/test/jenkins/criu-user.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make 3 iteration of dump/restore for each test set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu.sh b/test/jenkins/criu.sh index 19d545c3c..0ee750b08 100755 --- a/test/jenkins/criu.sh +++ b/test/jenkins/criu.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle set -e source `dirname $0`/criu-lib.sh From be1394122112381eca55faf5b1a7b2e2b51bd383 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Fri, 17 Apr 2020 11:54:31 +0000 Subject: [PATCH 0242/1854] mips: impliment arch_shmat() On MIPS CPUs with VIPT caches also has aliasing issues, just like ARMv6. To overcome this issue, page coloring 0x40000 align for shared mappings was introduced (SHMLBA) in kernel. https://github.com/torvalds/linux/blob/master/arch/mips/include/asm/shmparam.h Related to this, zdtm test suites ipc.c shm.c shm-unaligned.c and shm-mp.c are passed. Signed-off-by: Guoyun Sun --- criu/arch/mips/include/asm/restorer.h | 4 ++++ criu/arch/mips/restorer.c | 32 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/criu/arch/mips/include/asm/restorer.h b/criu/arch/mips/include/asm/restorer.h index 1a33cd884..d916377f4 100755 --- a/criu/arch/mips/include/asm/restorer.h +++ b/criu/arch/mips/include/asm/restorer.h @@ -76,4 +76,8 @@ static inline void free_compat_syscall_stack(void *stack32) { } int restore_gpregs(struct rt_sigframe *f, UserMipsRegsEntry *r); int restore_nonsigframe_gpregs(UserMipsRegsEntry *r); +#define ARCH_HAS_SHMAT_HOOK +unsigned long arch_shmat(int shmid, void *shmaddr, + int shmflg, unsigned long size); + #endif diff --git a/criu/arch/mips/restorer.c b/criu/arch/mips/restorer.c index 2e196b60c..e3a1e4a44 100755 --- a/criu/arch/mips/restorer.c +++ b/criu/arch/mips/restorer.c @@ -15,3 +15,35 @@ int restore_nonsigframe_gpregs(UserMipsRegsEntry *r) { return 0; } + +#define SHMLBA 0x40000 +unsigned long arch_shmat(int shmid, void *shmaddr, + int shmflg, unsigned long size) +{ + unsigned long smap; + + /* SHMLBA-aligned, direct call shmat() */ + if (!((unsigned long)shmaddr & (SHMLBA - 1))) + return sys_shmat(shmid, shmaddr, shmflg); + + smap = sys_shmat(shmid, NULL, shmflg); + if (IS_ERR_VALUE(smap)) { + pr_err("shmat() with NULL shmaddr failed: %d\n", (int)smap); + return smap; + } + + /* We're lucky! */ + if (smap == (unsigned long)shmaddr) + return smap; + + /* Warn ALOUD */ + pr_warn("Restoring shmem %p unaligned to SHMLBA.\n", shmaddr); + pr_warn("Make sure that you don't migrate shmem from non-VIPT cached CPU to VIPT cached \n"); + pr_warn("Otherwise YOU HAVE A CHANCE OF DATA CORRUPTIONS in writeable shmem\n"); + + smap = sys_mremap(smap, size, size, + MREMAP_FIXED | MREMAP_MAYMOVE, (unsigned long)shmaddr); + if (IS_ERR_VALUE(smap)) + pr_err("mremap() for shmem failed: %d\n", (int)smap); + return smap; +} From 277b0b69fac7afa6cbde51e7c99e2756029d0d6c Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 22 Apr 2020 15:43:04 +0800 Subject: [PATCH 0243/1854] mips: fix fail when run zdtm test pthread01.c k_rtsigset_t is 16Bytes in mips architecture but not 8Bytes. so blk_sigset_extended be added in TaskCoreEntry and ThreadCoreEntry for dumping extern 8Bytes data in parasite-syscall.c, restore extern 8Bytes data in cr-restore.c Signed-off-by: Guoyun Sun --- criu/cr-restore.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 99b36e0d4..ec00bf71b 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3551,8 +3551,12 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns for (i = 0; i < current->nr_threads; i++) { CoreEntry *tcore; struct rt_sigframe *sigframe; +#ifdef CONFIG_MIPS + k_rtsigset_t mips_blkset; +#else k_rtsigset_t *blkset = NULL; +#endif thread_args[i].pid = current->threads[i].ns[0].virt; thread_args[i].siginfo_n = siginfo_priv_nr[i]; thread_args[i].siginfo = task_args->siginfo; @@ -3563,11 +3567,22 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns if (thread_args[i].pid == pid) { task_args->t = thread_args + i; tcore = core; +#ifdef CONFIG_MIPS + mips_blkset.sig[0] = tcore->tc->blk_sigset; + mips_blkset.sig[1] = tcore->tc->blk_sigset_extended; +#else blkset = (void *)&tcore->tc->blk_sigset; +#endif } else { tcore = current->core[i]; - if (tcore->thread_core->has_blk_sigset) + if (tcore->thread_core->has_blk_sigset) { +#ifdef CONFIG_MIPS + mips_blkset.sig[0] = tcore->thread_core->blk_sigset; + mips_blkset.sig[1] = tcore->thread_core->blk_sigset_extended; +#else blkset = (void *)&tcore->thread_core->blk_sigset; +#endif + } } if ((tcore->tc || tcore->ids) && thread_args[i].pid != pid) { @@ -3607,7 +3622,11 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns thread_args[i].mz = mz + i; sigframe = (struct rt_sigframe *)&mz[i].rt_sigframe; +#ifdef CONFIG_MIPS + if (construct_sigframe(sigframe, sigframe, &mips_blkset, tcore)) +#else if (construct_sigframe(sigframe, sigframe, blkset, tcore)) +#endif goto err; if (tcore->thread_core->comm) From 8364b09407a969c1af68cd3e477449fa60d2518e Mon Sep 17 00:00:00 2001 From: Josh Abraham Date: Mon, 27 Apr 2020 13:40:46 -0400 Subject: [PATCH 0244/1854] soccr/test: Fix error logging in libsoccr tcp-test Signed-off-by: Joshua Abraham --- soccr/test/tcp-conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/soccr/test/tcp-conn.c b/soccr/test/tcp-conn.c index e31f58e7e..cdd75129a 100644 --- a/soccr/test/tcp-conn.c +++ b/soccr/test/tcp-conn.c @@ -101,12 +101,12 @@ int main(void) /* Start testing */ dst_let = sizeof(addr); if (getsockname(sock, (struct sockaddr *) &addr, &dst_let)) { - pr_perror("connect"); + pr_perror("getsockname"); return 1; } dst_let = sizeof(addr); if (getpeername(sock, (struct sockaddr *) &dst, &dst_let)) { - pr_perror("connect"); + pr_perror("getpeername"); return 1; } From 5bd776da382fb0838830d80f4bec6c0aaec8bfbb Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 11 May 2020 02:38:14 -0700 Subject: [PATCH 0245/1854] Remove dupe of "deprecated stuff on" msg A similar one is already printed in check_options(). Before this patch: > $ ./criu/criu -vvvvvv --deprecated --log-file=/dev/stdout xxx > (00.000000) Turn deprecated stuff ON > ... > (00.029680) DEPRECATED ON > (00.029687) Error (criu/crtools.c:284): unknown command: xxx Signed-off-by: Kir Kolyshkin --- criu/crtools.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index ad61fa9bb..76172f350 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -210,9 +210,6 @@ int main(int argc, char *argv[], char *envp[]) if (fault_injected(FI_CANNOT_MAP_VDSO)) kdat.can_map_vdso = 0; - if (opts.deprecated_ok) - pr_debug("DEPRECATED ON\n"); - if (!list_empty(&opts.inherit_fds)) { if (strcmp(argv[optind], "restore")) { pr_err("--inherit-fd is restore-only option\n"); From 8452be93cf24b0dffe257ddab892a9c95d44c91b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 May 2020 15:48:28 +0000 Subject: [PATCH 0246/1854] travis: use bionic almost everywhere A few tests were still running on xenial because at some point they were hanging. This switches now all tests to bionic except one docker test which still uses xenial to test with overlayfs. Signed-off-by: Adrian Reber --- .travis.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8c126b47f..b28bd64f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,7 +51,7 @@ jobs: - os: linux arch: amd64 env: TR_ARCH=fedora-rawhide - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=podman-test @@ -69,19 +69,19 @@ jobs: - os: linux arch: amd64 env: TR_ARCH=alpine CLANG=1 - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=alpine - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=centos - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=fedora-asan - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=armv7-cross From 00b8257d9f31876e35e08e6d556f6f67d76f3908 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 May 2020 17:57:03 +0200 Subject: [PATCH 0247/1854] tests: move cross compilation to github actions This moves the cross compilation tests to github actions, to slightly reduce the number of Travis tests and run them in parallel on github actions. Signed-off-by: Adrian Reber --- .github/workflows/cross-compile.yml | 23 +++++++++++++++++++++++ .travis.yml | 16 ---------------- 2 files changed, 23 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/cross-compile.yml diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml new file mode 100644 index 000000000..9545d3df6 --- /dev/null +++ b/.github/workflows/cross-compile.yml @@ -0,0 +1,23 @@ +name: Cross Compile Tests + +on: + push: + branches: [ criu-dev ] + pull_request: + branches: [ criu-dev ] + schedule: + - cron: '55 5 * * *' + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + target: [armv7-cross, aarch64-cross, ppc64-cross, mips64el-cross] + + steps: + - uses: actions/checkout@v2 + - name: Run Cross Compilation Targets + run: > + sudo make -C scripts/travis ${{ matrix.target }} diff --git a/.travis.yml b/.travis.yml index b28bd64f9..e71afa0a3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,23 +82,7 @@ jobs: arch: amd64 env: TR_ARCH=fedora-asan dist: bionic - - os: linux - arch: amd64 - env: TR_ARCH=armv7-cross - dist: bionic - - os: linux - arch: amd64 - env: TR_ARCH=aarch64-cross - dist: bionic - - os: linux - arch: amd64 - env: TR_ARCH=ppc64-cross - dist: bionic - env: TR_ARCH=local STREAM_TEST=1 - - os: linux - arch: amd64 - env: TR_ARCH=mips64el-cross - dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial From 00a44031e220919c9df6a1e25db8f32fb141d741 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 17:10:58 -0700 Subject: [PATCH 0248/1854] cr-service: fix wording in debug messages The message "Overwriting RPC settings with values from " is misleading, giving the impression that file is being read and consumed. It really puzzled me, since didn't exist. What it needs to say is "Would overwrite", i.e. if a file with such name is present, it would be used. Also, add actual "Parsing file ..." so it will be clear which files are being used. Signed-off-by: Kir Kolyshkin --- criu/config.c | 2 ++ criu/cr-service.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/config.c b/criu/config.c index eb303fd77..904addf3a 100644 --- a/criu/config.c +++ b/criu/config.c @@ -126,6 +126,8 @@ static char ** parse_config(char *filepath) if (!configfile) return NULL; + pr_debug("Parsing config file %s\n", filepath); + configuration = xmalloc(config_size * sizeof(char *)); if (configuration == NULL) { fclose(configfile); diff --git a/criu/cr-service.c b/criu/cr-service.c index 53eadb1bc..6dc2379d6 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -405,7 +405,7 @@ static int setup_opts_from_req(int sk, CriuOpts *req) } if (req->config_file) { - pr_debug("Overwriting RPC settings with values from %s\n", req->config_file); + pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file); } if (kerndat_init()) From f6d1b498dc22a66865e1c8899ac5dc2b81363ce1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 17:13:33 -0700 Subject: [PATCH 0249/1854] cr-service: spell out an error While working on runc checkpointing, I incorrectly closed status_fd prematurely, and received an error from CRIU, but it was non-descriptive. Do print the error from open(). Signed-off-by: Kir Kolyshkin --- criu/cr-service.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 6dc2379d6..7c2ff9835 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -680,8 +680,10 @@ static int setup_opts_from_req(int sk, CriuOpts *req) if (req->has_status_fd) { sprintf(status_fd, "/proc/%d/fd/%d", ids.pid, req->status_fd); opts.status_fd = open(status_fd, O_WRONLY); - if (opts.status_fd < 0) + if (opts.status_fd < 0) { + pr_perror("Can't reopen status fd %s", status_fd); goto err; + } } if (req->orphan_pts_master) From ae4fd07ca5c09482fa5a78f3ede0d31a4b5ff63e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 20 Feb 2020 21:10:23 +0000 Subject: [PATCH 0250/1854] libcriu: Add orphan pts master The orphan pts master option was introduced with commit [1] to enable checkpoint/restore of containers with a pty pair used as a console. [1] https://github.com/checkpoint-restore/criu/commit/6afe523d97d59e6bf29621b8aa0e6a4332f710fc Signed-off-by: Radostin Stoyanov --- lib/c/criu.c | 11 +++++++++++ lib/c/criu.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/lib/c/criu.c b/lib/c/criu.c index 1d0a235f4..2ac18ade9 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -541,6 +541,17 @@ void criu_set_shell_job(bool shell_job) criu_local_set_shell_job(global_opts, shell_job); } +void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master) +{ + opts->rpc->has_orphan_pts_master = true; + opts->rpc->orphan_pts_master = orphan_pts_master; +} + +void criu_set_orphan_pts_master(bool orphan_pts_master) +{ + criu_local_set_orphan_pts_master(global_opts, orphan_pts_master); +} + void criu_local_set_file_locks(criu_opts *opts, bool file_locks) { opts->rpc->has_file_locks = true; diff --git a/lib/c/criu.h b/lib/c/criu.h index 22db0fdcf..3a9204f5b 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -72,6 +72,7 @@ void criu_set_tcp_close(bool tcp_close); void criu_set_weak_sysctls(bool val); void criu_set_evasive_devices(bool evasive_devices); void criu_set_shell_job(bool shell_job); +void criu_set_orphan_pts_master(bool orphan_pts_master); void criu_set_file_locks(bool file_locks); void criu_set_track_mem(bool track_mem); void criu_set_auto_dedup(bool auto_dedup); @@ -185,6 +186,7 @@ void criu_local_set_tcp_close(criu_opts *opts, bool tcp_close); void criu_local_set_weak_sysctls(criu_opts *opts, bool val); void criu_local_set_evasive_devices(criu_opts *opts, bool evasive_devices); void criu_local_set_shell_job(criu_opts *opts, bool shell_job); +void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master); void criu_local_set_file_locks(criu_opts *opts, bool file_locks); void criu_local_set_track_mem(criu_opts *opts, bool track_mem); void criu_local_set_auto_dedup(criu_opts *opts, bool auto_dedup); From 4ac9a3c904ace0414521afc05e1ba3287a95f248 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 21 Feb 2020 12:23:01 +0000 Subject: [PATCH 0251/1854] libcriu: Use spaces around '=' Signed-off-by: Radostin Stoyanov --- lib/c/criu.c | 94 ++++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index 2ac18ade9..7daac7dbf 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -256,11 +256,11 @@ int criu_local_init_opts(criu_opts **o) return -1; } - opts->rpc = rpc; - opts->notify = NULL; + opts->rpc = rpc; + opts->notify = NULL; - opts->service_comm = CRIU_COMM_BIN; - opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); + opts->service_comm = CRIU_COMM_BIN; + opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); if(opts->service_binary == NULL) { perror("Can't allocate memory for criu service setting"); @@ -303,8 +303,8 @@ int criu_notify_pid(criu_notify_arg_t na) void criu_local_set_pid(criu_opts *opts, int pid) { - opts->rpc->has_pid = true; - opts->rpc->pid = pid; + opts->rpc->has_pid = true; + opts->rpc->pid = pid; } void criu_set_pid(int pid) @@ -408,8 +408,8 @@ void criu_set_work_dir_fd(int fd) void criu_local_set_leave_running(criu_opts *opts, bool leave_running) { - opts->rpc->has_leave_running = true; - opts->rpc->leave_running = leave_running; + opts->rpc->has_leave_running = true; + opts->rpc->leave_running = leave_running; } void criu_set_leave_running(bool leave_running) @@ -419,8 +419,8 @@ void criu_set_leave_running(bool leave_running) void criu_local_set_ext_unix_sk(criu_opts *opts, bool ext_unix_sk) { - opts->rpc->has_ext_unix_sk = true; - opts->rpc->ext_unix_sk = ext_unix_sk; + opts->rpc->has_ext_unix_sk = true; + opts->rpc->ext_unix_sk = ext_unix_sk; } void criu_set_ext_unix_sk(bool ext_unix_sk) @@ -477,8 +477,8 @@ int criu_add_unix_sk(unsigned int inode) void criu_local_set_tcp_established(criu_opts *opts, bool tcp_established) { - opts->rpc->has_tcp_established = true; - opts->rpc->tcp_established = tcp_established; + opts->rpc->has_tcp_established = true; + opts->rpc->tcp_established = tcp_established; } void criu_set_tcp_established(bool tcp_established) @@ -488,8 +488,8 @@ void criu_set_tcp_established(bool tcp_established) void criu_local_set_tcp_skip_in_flight(criu_opts *opts, bool tcp_skip_in_flight) { - opts->rpc->has_tcp_skip_in_flight = true; - opts->rpc->tcp_skip_in_flight = tcp_skip_in_flight; + opts->rpc->has_tcp_skip_in_flight = true; + opts->rpc->tcp_skip_in_flight = tcp_skip_in_flight; } void criu_set_tcp_skip_in_flight(bool tcp_skip_in_flight) @@ -499,8 +499,8 @@ void criu_set_tcp_skip_in_flight(bool tcp_skip_in_flight) void criu_local_set_tcp_close(criu_opts *opts, bool tcp_close) { - opts->rpc->has_tcp_close = true; - opts->rpc->tcp_close = tcp_close; + opts->rpc->has_tcp_close = true; + opts->rpc->tcp_close = tcp_close; } void criu_set_tcp_close(bool tcp_close) @@ -511,7 +511,7 @@ void criu_set_tcp_close(bool tcp_close) void criu_local_set_weak_sysctls(criu_opts *opts, bool val) { opts->rpc->has_weak_sysctls = true; - opts->rpc->weak_sysctls = val; + opts->rpc->weak_sysctls = val; } void criu_set_weak_sysctls(bool val) @@ -521,8 +521,8 @@ void criu_set_weak_sysctls(bool val) void criu_local_set_evasive_devices(criu_opts *opts, bool evasive_devices) { - opts->rpc->has_evasive_devices = true; - opts->rpc->evasive_devices = evasive_devices; + opts->rpc->has_evasive_devices = true; + opts->rpc->evasive_devices = evasive_devices; } void criu_set_evasive_devices(bool evasive_devices) @@ -532,8 +532,8 @@ void criu_set_evasive_devices(bool evasive_devices) void criu_local_set_shell_job(criu_opts *opts, bool shell_job) { - opts->rpc->has_shell_job = true; - opts->rpc->shell_job = shell_job; + opts->rpc->has_shell_job = true; + opts->rpc->shell_job = shell_job; } void criu_set_shell_job(bool shell_job) @@ -554,8 +554,8 @@ void criu_set_orphan_pts_master(bool orphan_pts_master) void criu_local_set_file_locks(criu_opts *opts, bool file_locks) { - opts->rpc->has_file_locks = true; - opts->rpc->file_locks = file_locks; + opts->rpc->has_file_locks = true; + opts->rpc->file_locks = file_locks; } void criu_set_file_locks(bool file_locks) @@ -565,8 +565,8 @@ void criu_set_file_locks(bool file_locks) void criu_local_set_log_level(criu_opts *opts, int log_level) { - opts->rpc->has_log_level = true; - opts->rpc->log_level = log_level; + opts->rpc->has_log_level = true; + opts->rpc->log_level = log_level; } void criu_set_log_level(int log_level) @@ -697,8 +697,8 @@ int criu_set_log_file(const char *log_file) void criu_local_set_cpu_cap(criu_opts *opts, unsigned int cap) { - opts->rpc->has_cpu_cap = true; - opts->rpc->cpu_cap = cap; + opts->rpc->has_cpu_cap = true; + opts->rpc->cpu_cap = cap; } void criu_set_cpu_cap(unsigned int cap) @@ -1410,7 +1410,7 @@ exit: static int send_req_and_recv_resp(criu_opts *opts, CriuReq *req, CriuResp **resp) { int fd; - int ret = 0; + int ret = 0; bool d = false; if (req->type == CRIU_REQ_TYPE__DUMP && req->opts->has_pid == false) @@ -1431,12 +1431,12 @@ static int send_req_and_recv_resp(criu_opts *opts, CriuReq *req, CriuResp **resp int criu_local_check(criu_opts *opts) { int ret = -1; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__CHECK; + req.type = CRIU_REQ_TYPE__CHECK; ret = send_req_and_recv_resp(opts, &req, &resp); if (ret) @@ -1463,13 +1463,13 @@ int criu_check(void) int criu_local_dump(criu_opts *opts) { int ret = -1; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__DUMP; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__DUMP; + req.opts = opts->rpc; ret = send_req_and_recv_resp(opts, &req, &resp); if (ret) @@ -1502,13 +1502,13 @@ int criu_dump(void) int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi)) { int ret = -1, fd = -1, uret; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__PRE_DUMP; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__PRE_DUMP; + req.opts = opts->rpc; ret = -EINVAL; /* @@ -1573,13 +1573,13 @@ int criu_dump_iters(int (*more)(criu_predump_info pi)) int criu_local_restore(criu_opts *opts) { int ret = -1; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__RESTORE; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__RESTORE; + req.opts = opts->rpc; ret = send_req_and_recv_resp(opts, &req, &resp); if (ret) @@ -1612,8 +1612,8 @@ int criu_local_restore_child(criu_opts *opts) enum criu_service_comm saved_comm; const char *saved_comm_data; bool save_comm; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; /* * restore_child is not possible with criu running as a system @@ -1644,8 +1644,8 @@ int criu_local_restore_child(criu_opts *opts) saved_errno = 0; - req.type = CRIU_REQ_TYPE__RESTORE; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__RESTORE; + req.opts = opts->rpc; req.opts->has_rst_sibling = true; req.opts->rst_sibling = true; From f3341025207ba960e36140bb24d63098cdd69a57 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 24 Feb 2020 18:30:59 +0000 Subject: [PATCH 0252/1854] libcriu: Add space between 'if' and parenthesis Signed-off-by: Radostin Stoyanov --- lib/c/criu.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index 7daac7dbf..de57a65dc 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -58,7 +58,7 @@ int criu_local_set_service_address(criu_opts *opts, const char *path) } else { opts->service_address = strdup(CR_DEFAULT_SERVICE_ADDRESS); } - if(opts->service_address == NULL) { + if (opts->service_address == NULL) { return -ENOMEM; } return 0; @@ -90,7 +90,7 @@ int criu_local_set_service_binary(criu_opts *opts, const char *path) } else { opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); } - if(opts->service_binary == NULL) { + if (opts->service_binary == NULL) { return -ENOMEM; } return 0; @@ -118,7 +118,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_exec_cmd = 0; - if(opts->rpc->unix_sk_ino) { + if (opts->rpc->unix_sk_ino) { for (i = 0; i < opts->rpc->n_unix_sk_ino; i++) { free(opts->rpc->unix_sk_ino[i]); } @@ -126,7 +126,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_unix_sk_ino = 0; - if(opts->rpc->ext_mnt) { + if (opts->rpc->ext_mnt) { for (i = 0; i < opts->rpc->n_ext_mnt; i++) { if (opts->rpc->ext_mnt[i]) { free(opts->rpc->ext_mnt[i]->val); @@ -138,7 +138,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_ext_mnt = 0; - if(opts->rpc->cg_root) { + if (opts->rpc->cg_root) { for (i = 0; i < opts->rpc->n_cg_root; i++) { if (opts->rpc->cg_root[i]) { free(opts->rpc->cg_root[i]->ctrl); @@ -150,7 +150,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_cg_root = 0; - if(opts->rpc->veths) { + if (opts->rpc->veths) { for (i = 0; i < opts->rpc->n_veths; i++) { if (opts->rpc->veths[i]) { free(opts->rpc->veths[i]->if_in); @@ -162,7 +162,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_veths = 0; - if(opts->rpc->enable_fs) { + if (opts->rpc->enable_fs) { for (i = 0; i < opts->rpc->n_enable_fs; i++) { free(opts->rpc->enable_fs[i]); } @@ -170,7 +170,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_enable_fs = 0; - if(opts->rpc->skip_mnt) { + if (opts->rpc->skip_mnt) { for (i = 0; i < opts->rpc->n_skip_mnt; i++) { free(opts->rpc->skip_mnt[i]); } @@ -178,7 +178,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_skip_mnt = 0; - if(opts->rpc->irmap_scan_paths) { + if (opts->rpc->irmap_scan_paths) { for (i = 0; i < opts->rpc->n_irmap_scan_paths; i++) { free(opts->rpc->irmap_scan_paths[i]); } @@ -186,7 +186,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_irmap_scan_paths = 0; - if(opts->rpc->cgroup_dump_controller) { + if (opts->rpc->cgroup_dump_controller) { for (i = 0; i < opts->rpc->n_cgroup_dump_controller; i++) { free(opts->rpc->cgroup_dump_controller[i]); } @@ -194,7 +194,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_cgroup_dump_controller = 0; - if(opts->rpc->inherit_fd) { + if (opts->rpc->inherit_fd) { for (i = 0; i < opts->rpc->n_inherit_fd; i++) { if (opts->rpc->inherit_fd[i]) { free(opts->rpc->inherit_fd[i]->key); @@ -205,7 +205,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_inherit_fd = 0; - if(opts->rpc->external) { + if (opts->rpc->external) { for (i = 0; i < opts->rpc->n_external; i++) { free(opts->rpc->external[i]); } @@ -213,7 +213,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_external = 0; - if(opts->rpc->ps) { + if (opts->rpc->ps) { free(opts->rpc->ps->address); free(opts->rpc->ps); } @@ -262,7 +262,7 @@ int criu_local_init_opts(criu_opts **o) opts->service_comm = CRIU_COMM_BIN; opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); - if(opts->service_binary == NULL) { + if (opts->service_binary == NULL) { perror("Can't allocate memory for criu service setting"); criu_local_free_opts(opts); return -1; @@ -325,7 +325,7 @@ void criu_set_images_dir_fd(int fd) int criu_local_set_parent_images(criu_opts *opts, const char *path) { opts->rpc->parent_img = strdup(path); - if(opts->rpc->parent_img == NULL) { + if (opts->rpc->parent_img == NULL) { return -ENOMEM; } return 0; @@ -577,7 +577,7 @@ void criu_set_log_level(int log_level) int criu_local_set_root(criu_opts *opts, const char *root) { opts->rpc->root = strdup(root); - if(opts->rpc->root == NULL) { + if (opts->rpc->root == NULL) { return -ENOMEM; } return 0; @@ -613,7 +613,7 @@ void criu_set_manage_cgroups_mode(enum criu_cg_mode mode) int criu_local_set_freeze_cgroup(criu_opts *opts, const char *name) { opts->rpc->freeze_cgroup = strdup(name); - if(opts->rpc->freeze_cgroup == NULL) { + if (opts->rpc->freeze_cgroup == NULL) { return -ENOMEM; } return 0; @@ -627,7 +627,7 @@ int criu_set_freeze_cgroup(const char *name) int criu_local_set_lsm_profile(criu_opts *opts, const char *name) { opts->rpc->lsm_profile = strdup(name); - if(opts->rpc->lsm_profile == NULL) { + if (opts->rpc->lsm_profile == NULL) { return -ENOMEM; } return 0; @@ -684,7 +684,7 @@ void criu_set_ext_masters(bool val) int criu_local_set_log_file(criu_opts *opts, const char *log_file) { opts->rpc->log_file = strdup(log_file); - if(opts->rpc->log_file == NULL) { + if (opts->rpc->log_file == NULL) { return -ENOMEM; } return 0; From 64347398c10b5911d0f1acd2db5c856b4b1fe464 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 20 May 2020 11:57:22 +0000 Subject: [PATCH 0253/1854] coverity: fix RESOURCE_LEAK criu/timens.c: 67 7. criu-3.14/criu/timens.c:67: leaked_storage: Variable "img" going out of scope leaks the storage it points to. 65| if (id == 0 && empty_image(img)) { 66| pr_warn("Clocks values have not been dumped\n"); 67|-> return 0; 68| } Signed-off-by: Adrian Reber --- criu/timens.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/timens.c b/criu/timens.c index 2a7e95284..f81808abf 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -64,6 +64,7 @@ int prepare_timens(int id) if (id == 0 && empty_image(img)) { pr_warn("Clocks values have not been dumped\n"); + close_image(img); return 0; } From b4c51ea492c98011472dd28a7cb47bffcfb4ad20 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 20 May 2020 12:19:36 +0000 Subject: [PATCH 0254/1854] coverity: fix FORWARD_NULL in criu/proc_parse.c: 1481 8. criu-3.14/criu/proc_parse.c:1511: var_deref_model: Passing null pointer "f" to "fclose", which dereferences it. 1509| exit_code = 0; 1510| out: 1511|-> fclose(f); 1512| return exit_code; 1513| } Signed-off-by: Adrian Reber --- criu/proc_parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 4a22700aa..d1ccd9281 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1480,7 +1480,7 @@ int parse_timens_offsets(struct timespec *boff, struct timespec *moff) f = fopen_proc(PROC_SELF, "timens_offsets"); if (!f) { pr_perror("Unable to open /proc/self/timens_offsets"); - goto out; + return exit_code; } while (fgets(buf, BUF_SIZE, f)) { int64_t sec, nsec; From e34f5dd3a351dc2e475fa235c25ed115ac996644 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 20 May 2020 12:38:55 +0000 Subject: [PATCH 0255/1854] clang: Branch condition evaluates to a garbage value criu-3.14/criu/namespaces.c:692:7: warning: Branch condition evaluates to a garbage value criu-3.14/criu/namespaces.c:690:3: note: 'supported' declared without an initial value protobuf_c_boolean supported; ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:691:8: note: Calling 'get_ns_id' id = get_ns_id(pid, &time_for_children_ns_desc, &supported); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:479:9: note: Calling '__get_ns_id' return __get_ns_id(pid, nd, supported, NULL); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:454:6: note: Assuming 'proc_dir' is < 0 if (proc_dir < 0) ^~~~~~~~~~~~ criu-3.14/criu/namespaces.c:454:2: note: Taking true branch if (proc_dir < 0) ^ criu-3.14/criu/namespaces.c:455:3: note: Returning without writing to '*supported' return 0; ^ criu-3.14/criu/namespaces.c:479:9: note: Returning from '__get_ns_id' return __get_ns_id(pid, nd, supported, NULL); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:479:2: note: Returning without writing to '*supported' return __get_ns_id(pid, nd, supported, NULL); ^ criu-3.14/criu/namespaces.c:691:8: note: Returning from 'get_ns_id' id = get_ns_id(pid, &time_for_children_ns_desc, &supported); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:692:7: note: Branch condition evaluates to a garbage value if (!supported || !id) { ^~~~~~~~~~ 690| protobuf_c_boolean supported; 691| id = get_ns_id(pid, &time_for_children_ns_desc, &supported); 692|-> if (!supported || !id) { 693| pr_err("Can't make timens id\n"); 694| Signed-off-by: Adrian Reber --- criu/namespaces.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/namespaces.c b/criu/namespaces.c index 89d97c7bc..04f242505 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -687,7 +687,7 @@ int dump_task_ns_ids(struct pstree_item *item) } if (ids->has_time_ns_id) { unsigned int id; - protobuf_c_boolean supported; + protobuf_c_boolean supported = false; id = get_ns_id(pid, &time_for_children_ns_desc, &supported); if (!supported || !id) { pr_err("Can't make timens id\n"); From faf6dbf33e04eb8a0907f44b2787022b14a840e0 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 18:21:25 -0700 Subject: [PATCH 0256/1854] close_service_fd: rename to status_ready The name close_service_fd() is misleading, as it not just closes the status_fd, but also writes to it. On a high level, though, it signals the other side that we are ready, so rename to status_ready. Signed-off-by: Kir Kolyshkin --- criu/cr-service.c | 2 +- criu/include/util.h | 2 +- criu/uffd.c | 2 +- criu/util.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 7c2ff9835..7201b549a 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -1394,7 +1394,7 @@ int cr_service(bool daemon_mode) if (setup_sigchld_handler()) goto err; - if (close_status_fd()) + if (status_ready()) goto err; while (1) { diff --git a/criu/include/util.h b/criu/include/util.h index d67f6d39d..1b22d9e0b 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -177,7 +177,7 @@ extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], un extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid); extern int cr_daemon(int nochdir, int noclose, int close_fd); -extern int close_status_fd(void); +extern int status_ready(void); extern int is_root_user(void); extern void set_proc_self_fd(int fd); diff --git a/criu/uffd.c b/criu/uffd.c index 99373c04d..33b34ba25 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -1456,7 +1456,7 @@ int cr_lazy_pages(bool daemon) } } - if (close_status_fd()) + if (status_ready()) return -1; /* diff --git a/criu/util.c b/criu/util.c index 517f0fc25..0a60fa105 100644 --- a/criu/util.c +++ b/criu/util.c @@ -643,7 +643,7 @@ out: return ret; } -int close_status_fd(void) +int status_ready(void) { char c = 0; @@ -1105,7 +1105,7 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) } } - if (close_status_fd()) + if (status_ready()) return -1; if (sk >= 0) { From 62c03530c9d6e0a1012a589ed1a26c2612113238 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 18:12:30 -0700 Subject: [PATCH 0257/1854] swrk: send notification instead of using status fd When we use swrk, we have a mechanism to send notifications over RPC. It is cleaner and more straightforward than sending \0 to status fd. For now, both mechanisms are supported, although status fd request option is now deprecated, so a warning is logged in case it's used. Guess we can remove it in a few years. Signed-off-by: Kir Kolyshkin --- criu/action-scripts.c | 1 + criu/cr-service.c | 2 ++ criu/include/action-scripts.h | 1 + criu/util.c | 4 ++++ 4 files changed, 8 insertions(+) diff --git a/criu/action-scripts.c b/criu/action-scripts.c index 2f7617c0f..5337efa64 100644 --- a/criu/action-scripts.c +++ b/criu/action-scripts.c @@ -29,6 +29,7 @@ static const char *action_names[ACT_MAX] = { [ ACT_PRE_RESUME ] = "pre-resume", [ ACT_POST_RESUME ] = "post-resume", [ ACT_ORPHAN_PTS_MASTER ] = "orphan-pts-master", + [ ACT_STATUS_READY ] = "status-ready", }; struct script { diff --git a/criu/cr-service.c b/criu/cr-service.c index 7201b549a..56be6bcd3 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -678,6 +678,8 @@ static int setup_opts_from_req(int sk, CriuOpts *req) } if (req->has_status_fd) { + pr_warn("status_fd is obsoleted; use status-ready notification instead\n"); + sprintf(status_fd, "/proc/%d/fd/%d", ids.pid, req->status_fd); opts.status_fd = open(status_fd, O_WRONLY); if (opts.status_fd < 0) { diff --git a/criu/include/action-scripts.h b/criu/include/action-scripts.h index 40b09b160..c2e8850aa 100644 --- a/criu/include/action-scripts.h +++ b/criu/include/action-scripts.h @@ -15,6 +15,7 @@ enum script_actions { ACT_POST_RESUME, ACT_PRE_RESUME, ACT_ORPHAN_PTS_MASTER, + ACT_STATUS_READY, ACT_MAX }; diff --git a/criu/util.c b/criu/util.c index 0a60fa105..4c1f3b4ca 100644 --- a/criu/util.c +++ b/criu/util.c @@ -45,6 +45,7 @@ #include "pstree.h" #include "cr-errno.h" +#include "action-scripts.h" #define VMA_OPT_LEN 128 @@ -647,6 +648,9 @@ int status_ready(void) { char c = 0; + if (run_scripts(ACT_STATUS_READY)) + return -1; + if (opts.status_fd < 0) return 0; From e57e74a18df0b7bbfb2fa556941fc7c8715e57d1 Mon Sep 17 00:00:00 2001 From: ZeyadYasser Date: Thu, 16 Apr 2020 15:58:18 +0200 Subject: [PATCH 0258/1854] criu: optimize find_unix_sk_by_ino() Fixes: #339 Replaced the linear search with a hashtable lookup. Signed-off-by: Zeyad Yasser --- criu/files.c | 1 + criu/include/sockets.h | 2 ++ criu/sk-unix.c | 19 ++++++++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index a1fd26764..2cfc9040e 100644 --- a/criu/files.c +++ b/criu/files.c @@ -1749,5 +1749,6 @@ struct collect_image_info files_cinfo = { int prepare_files(void) { init_fdesc_hash(); + init_sk_info_hash(); return collect_image(&files_cinfo); } diff --git a/criu/include/sockets.h b/criu/include/sockets.h index cd98d18e0..e971f3efd 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -62,6 +62,8 @@ extern int unix_sk_id_add(unsigned int ino); extern int unix_sk_ids_parse(char *optarg); extern int unix_prepare_root_shared(void); +extern void init_sk_info_hash(void); + extern int do_dump_opt(int sk, int level, int name, void *val, int len); #define dump_opt(s, l, n, f) do_dump_opt(s, l, n, f, sizeof(*f)) extern int do_restore_opt(int sk, int level, int name, void *val, int len); diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 048ff44ae..cbcf1f66c 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -903,6 +903,7 @@ struct unix_sk_info { struct unix_sk_info *peer; struct pprep_head peer_resolve; /* XXX : union with the above? */ struct file_desc d; + struct hlist_node hash; /* To lookup socket by ino */ struct list_head connected; /* List of sockets, connected to me */ struct list_head node; /* To link in peer's connected list */ struct list_head scm_fles; @@ -934,11 +935,25 @@ struct scm_fle { #define USK_PAIR_SLAVE 0x2 #define USK_GHOST_FDSTORE 0x4 /* bound but removed address */ +#define SK_INFO_HASH_SIZE 32 + +static struct hlist_head sk_info_hash[SK_INFO_HASH_SIZE]; + +void init_sk_info_hash(void) +{ + int i; + + for (i = 0; i < SK_INFO_HASH_SIZE; i++) + INIT_HLIST_HEAD(&sk_info_hash[i]); +} + static struct unix_sk_info *find_unix_sk_by_ino(int ino) { struct unix_sk_info *ui; + struct hlist_head *chain; - list_for_each_entry(ui, &unix_sockets, list) { + chain = &sk_info_hash[ino % SK_INFO_HASH_SIZE]; + hlist_for_each_entry(ui, chain, hash) { if (ui->ue->ino == ino) return ui; } @@ -2044,6 +2059,7 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue) INIT_LIST_HEAD(&ui->node); INIT_LIST_HEAD(&ui->scm_fles); INIT_LIST_HEAD(&ui->ghost_node); + INIT_HLIST_NODE(&ui->hash); return 0; } @@ -2135,6 +2151,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) list_add_tail(&ui->ghost_node, &unix_ghost_addr); } + hlist_add_head(&ui->hash, &sk_info_hash[ui->ue->ino % SK_INFO_HASH_SIZE]); list_add_tail(&ui->list, &unix_sockets); return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops); } From 55f71b8667043fb8241bc500a96c20644d478eba Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 23 Apr 2020 14:13:15 +0000 Subject: [PATCH 0259/1854] lib/c: add criu_get_version() Although the CRIU version is exported in macros in version.h it only contains the CRIU version of libcriu during build time. As it is possible that CRIU is upgraded since the last time something was built against libcriu, this adds functions to query the actual CRIU binary about its version. Signed-off-by: Adrian Reber --- lib/c/criu.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/c/criu.h | 32 ++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/lib/c/criu.c b/lib/c/criu.c index de57a65dc..d052f8d1f 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -1668,3 +1668,68 @@ int criu_restore_child(void) { return criu_local_restore_child(global_opts); } + +int criu_local_get_version(criu_opts *opts) +{ + int ret = -1; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; + + saved_errno = 0; + + req.type = CRIU_REQ_TYPE__VERSION; + req.opts = opts->rpc; + + ret = send_req_and_recv_resp(opts, &req, &resp); + if (ret) + goto exit; + + if (resp->success) { + ret = resp->version->major_number * 10000; + ret += resp->version->minor_number * 100; + if (resp->version->has_sublevel) + ret += resp->version->sublevel; + if (resp->version->gitid) { + /* Taken from runc: a git release -> minor + 1 */ + ret -= (ret % 100); + ret += 100; + } + } else { + ret = -EBADE; + } + +exit: + if (resp) + criu_resp__free_unpacked(resp, NULL); + + swrk_wait(opts); + + errno = saved_errno; + + return ret; +} + +int criu_get_version(void) +{ + return criu_local_get_version(global_opts); +} + +int criu_local_check_version(criu_opts *opts, int minimum) +{ + int version; + + version = criu_local_get_version(opts); + + if (version < 0) + return version; + + if (minimum <= version) + return 1; + + return 0; +} + +int criu_check_version(int minimum) +{ + return criu_local_check_version(global_opts, minimum); +} diff --git a/lib/c/criu.h b/lib/c/criu.h index 3a9204f5b..49f7a7005 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -158,6 +158,35 @@ int criu_restore_child(void); typedef void *criu_predump_info; int criu_dump_iters(int (*more)(criu_predump_info pi)); +/* + * Get the version of the actual binary used for RPC. + * + * As this library is just forwarding all tasks to an + * independent (of this library) CRIU binary, the actual + * version of the CRIU binary can be different then the + * hardcoded values in the libary (version.h). + * To be able to easily check the version of the CRIU binary + * the function criu_get_version() returns the version + * in the following format: + * + * (major * 10000) + (minor * 100) + sublevel + * + * If the CRIU binary has been built from a git checkout + * minor will increased by one. + */ +int criu_get_version(void); + +/* + * Check if the version of the CRIU binary is at least + * 'minimum'. Version has to be in the same format as + * described for criu_get_version(). + * + * Returns 1 if CRIU is at least 'minimum'. + * Returns 0 if CRIU is too old. + * Returns < 0 if there was an error. + */ +int criu_check_version(int minimum); + /* * Same as the list above, but lets you have your very own options * structure and lets you set individual options in it. @@ -229,6 +258,9 @@ int criu_local_restore(criu_opts *opts); int criu_local_restore_child(criu_opts *opts); int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi)); +int criu_local_get_version(criu_opts *opts); +int criu_local_check_version(criu_opts *opts, int minimum); + #ifdef __GNUG__ } #endif From 047ecd3a15f83b15535943c2c87e0c55b4866dd9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 23 Apr 2020 14:28:00 +0000 Subject: [PATCH 0260/1854] test/others/libcriu: test version library calls This adds the previously added libcriu version functions to the libcriu tests. Signed-off-by: Adrian Reber --- test/others/libcriu/lib.c | 7 ++++++- test/others/libcriu/lib.h | 1 + test/others/libcriu/test_self.c | 13 +++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/test/others/libcriu/lib.c b/test/others/libcriu/lib.c index 33aa4090d..0c7929cda 100644 --- a/test/others/libcriu/lib.c +++ b/test/others/libcriu/lib.c @@ -2,6 +2,8 @@ #include #include +#include "criu.h" + void what_err_ret_mean(int ret) { /* NOTE: errno is set by libcriu */ @@ -44,4 +46,7 @@ int chk_exit(int status, int want) return 1; } - +int get_version() +{ + printf("Using a CRIU binary with version %d\n", criu_get_version()); +} diff --git a/test/others/libcriu/lib.h b/test/others/libcriu/lib.h index 67b784bff..6fdf8aef2 100644 --- a/test/others/libcriu/lib.h +++ b/test/others/libcriu/lib.h @@ -1,2 +1,3 @@ void what_err_ret_mean(int ret); int chk_exit(int status, int want); +int get_version(void); diff --git a/test/others/libcriu/test_self.c b/test/others/libcriu/test_self.c index c9d2a2e64..374a4b545 100644 --- a/test/others/libcriu/test_self.c +++ b/test/others/libcriu/test_self.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,18 @@ int main(int argc, char *argv[]) criu_init_opts(); criu_set_service_binary(argv[1]); + + get_version(); + if (!criu_check_version(31400)) { + printf("CRIU version check failed. CRIU too old\n"); + return 1; + } + + if (criu_check_version(INT_MAX)) { + printf("CRIU version check failed. CRIU too new.\n"); + return 1; + } + criu_set_images_dir_fd(fd); criu_set_log_level(4); From d72428b7c4254c2e3587d8d84f16626302e7e111 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 23 Apr 2020 09:11:48 +0000 Subject: [PATCH 0261/1854] Also report clone3() errors correctly Without clone3() CRIU was able to detect a process with a wrong PID only in the already created child process. With clone3() this error can happen before the process is created. In the case of EEXIST this error will now be correctly forwarded to an RPC client. This was detected by running test/others/libcriu on a clone3() system. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 1 + criu/cr-restore.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index a2190ba0a..35c40c21d 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -70,6 +70,7 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, if (!(flags & CLONE_PARENT)) { if (exit_signal != SIGCHLD) { pr_err("Exit signal not SIGCHLD\n"); + errno = EINVAL; return -1; } c_args.exit_signal = exit_signal; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ec00bf71b..e44ba308d 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1439,6 +1439,8 @@ static inline int fork_with_pid(struct pstree_item *item) if (ret < 0) { pr_perror("Can't fork for %d", pid); + if (errno == EEXIST) + set_cr_errno(EEXIST); goto err_unlock; } From cbf099400a24debe4eaf830bd81138bb73a46a00 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 8 Feb 2020 16:58:36 +0100 Subject: [PATCH 0262/1854] Travis: use Vagrant to run VMs This adds the minimal configuration to run Fedora 31 based VMs on Travis. This can be used to test cgroupv2 based tests, tests with vdso=off and probably much more which requires booting a newer kernel. As an example this builds CRIU on Fedora 31 and reconfigures it to boot without VDSO support and runs one single test. Signed-off-by: Adrian Reber --- .travis.yml | 1 + scripts/travis/Makefile | 8 ++++++ scripts/travis/vagrant.sh | 53 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100755 scripts/travis/vagrant.sh diff --git a/.travis.yml b/.travis.yml index e71afa0a3..8ada90193 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,7 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=openj9-test + - TR_ARCH=vagrant-fedora-no-vdso jobs: include: - os: linux diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 17abb703a..1af60fe8d 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -64,5 +64,13 @@ podman-test: openj9-test: restart-docker ./openj9-test.sh +setup-vagrant: + ./vagrant.sh setup + +vagrant-fedora-no-vdso: setup-vagrant + ./vagrant.sh fedora-no-vdso + +.PHONY: setup-vagrant vagrant-fedora-no-vdso + %: $(MAKE) -C ../build $@$(target-suffix) diff --git a/scripts/travis/vagrant.sh b/scripts/travis/vagrant.sh new file mode 100755 index 000000000..943a8b9a3 --- /dev/null +++ b/scripts/travis/vagrant.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# This script is used to run vagrant based tests on Travis. +# This script is started via sudo from .travis.yml + +set -e +set -x + +VAGRANT_VERSION=2.2.7 +FEDORA_VERSION=31 +FEDORA_BOX_VERSION=31.20191023.0 + +setup() { + apt-get -qq update + # Load the kvm modules for vagrant to use qemu + modprobe kvm kvm_intel + + # Tar up the git checkout to have vagrant rsync it to the VM + tar cf criu.tar ../../../criu + wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb -O /tmp/vagrant.deb && \ + dpkg -i /tmp/vagrant.deb + + apt-get -qq install -y libvirt-bin libvirt-dev qemu-utils qemu + systemctl restart libvirt-bin + vagrant plugin install vagrant-libvirt + vagrant init fedora/${FEDORA_VERSION}-cloud-base --box-version ${FEDORA_BOX_VERSION} + # The default libvirt Vagrant VM uses 512MB. + # Travis VMs should have around 7.5GB. + # Increasing it to 4GB should work. + sed -i Vagrantfile -e 's,^end$, config.vm.provider :libvirt do |libvirt|'"\n"' libvirt.memory = 4096;end'"\n"'end,g' + vagrant up --provider=libvirt + mkdir -p /root/.ssh + vagrant ssh-config >> /root/.ssh/config + ssh default sudo dnf install -y gcc git gnutls-devel nftables-devel libaio-devel \ + libasan libcap-devel libnet-devel libnl3-devel make protobuf-c-devel \ + protobuf-devel python3-flake8 python3-future python3-protobuf \ + python3-junit_xml rubygem-asciidoctor iptables libselinux-devel + # Disable sssd to avoid zdtm test failures in pty04 due to sssd socket + ssh default sudo systemctl mask sssd + ssh default cat /proc/cmdline +} + +fedora-no-vdso() { + ssh default sudo grubby --update-kernel ALL --args="vdso=0" + vagrant reload + ssh default cat /proc/cmdline + ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' + # Excluding the VDSO test as we are running without VDSO + # Excluding two cgroup tests which seem to fail because of cgroup2 + ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a -x zdtm/static/cgroup04 -x zdtm/static/cgroup_ifpriomap -x zdtm/static/vdso01 --keep-going' +} + +$1 From 1d9438aefbd1609ee765f7e958b88883f402a662 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 28 Apr 2020 19:00:42 -0700 Subject: [PATCH 0263/1854] criu swrk: fix usage, allow common options TL;DR: this makes possible -v with criu swrk, and removes showing usage which is useless in swrk mode. 1. Since criu swrk command is not described in usage, there is no sense in showing it. Instead, show a one-line hint about how to use it. 2. In case some global options (like -v) are used, argv[1] might not point to "swrk". Use optind to point to a correct non-option argument. 3. While at it, also error out in case we have extra arguments. Signed-off-by: Kir Kolyshkin --- criu/crtools.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 76172f350..b696898e7 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -100,12 +100,18 @@ int main(int argc, char *argv[], char *envp[]) return 1; if (ret == 2) goto usage; + if (optind >= argc) { + pr_err("command is required\n"); + goto usage; + } log_set_loglevel(opts.log_level); - if (!strcmp(argv[1], "swrk")) { - if (argc < 3) - goto usage; + if (optind < argc && !strcmp(argv[optind], "swrk")) { + if (argc != optind+2) { + fprintf(stderr, "Usage: criu swrk \n"); + return 1; + } /* * This is to start criu service worker from libcriu calls. * The usage is "criu swrk " and is not for CLI/scripts. @@ -113,7 +119,7 @@ int main(int argc, char *argv[], char *envp[]) * corresponding lib call change. */ opts.swrk_restore = true; - return cr_service_work(atoi(argv[2])); + return cr_service_work(atoi(argv[optind+1])); } if (check_options()) @@ -125,11 +131,6 @@ int main(int argc, char *argv[], char *envp[]) if (opts.work_dir == NULL) SET_CHAR_OPTS(work_dir, opts.imgs_dir); - if (optind >= argc) { - pr_err("command is required\n"); - goto usage; - } - has_sub_command = (argc - optind) > 1; if (has_exec_cmd) { From 6ee4b72382f72362ab746876cf32a70712eb89f9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 May 2020 11:19:23 +0300 Subject: [PATCH 0264/1854] arch/x86: Fix calculation of xstate_size The layout of xsave frame in a standart format is predefined by the hardware. Lets make sure we're increasing in frame offsets and use latest offset where appropriate. https://github.com/checkpoint-restore/criu/issues/1042 Reported-by: Ashutosh Mehra Signed-off-by: Cyrill Gorcunov --- criu/arch/x86/crtools.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index 9c8beeedd..bc8022535 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -437,6 +437,7 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) void *from = xsave->member; \ size_t size = pb_repeated_size(xsave, member); \ size_t xsize = (size_t)compel_fpu_feature_size(feature); \ + size_t xstate_size_next = off + xsize; \ if (xsize != size) { \ if (size) { \ pr_err("%s reported %zu bytes (expecting %zu)\n",\ @@ -448,7 +449,8 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) } \ } \ xstate_bv |= (1UL << feature); \ - xstate_size += xsize; \ + BUG_ON(xstate_size > xstate_size_next); \ + xstate_size = xstate_size_next; \ memcpy(to, from, size); \ } \ } while (0) @@ -485,6 +487,11 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) UserX86XsaveEntry *xsave = core->thread_info->fpregs->xsave; uint8_t *extended_state_area = (void *)x; + /* + * Note the order does matter here and bound + * to the increasing offsets of XFEATURE_x + * inside memory layout (xstate_size calculation). + */ assign_xsave(XFEATURE_YMM, xsave, ymmh_space, extended_state_area); assign_xsave(XFEATURE_BNDREGS, xsave, bndreg_state, extended_state_area); assign_xsave(XFEATURE_BNDCSR, xsave, bndcsr_state, extended_state_area); From 808684c99eb4c0cf12a5725cd6bbe3fea191273c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 9 Jun 2020 16:52:10 +0300 Subject: [PATCH 0265/1854] Add CONTRIBUTING.md Move the existing contribution guidelines to a dedicated file for future extensions. Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 19 +++++++++++++++++++ README.md | 20 +++++--------------- 2 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..342619e88 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,19 @@ +[![master](https://travis-ci.org/checkpoint-restore/criu.svg?branch=master)](https://travis-ci.org/checkpoint-restore/criu) +[![development](https://travis-ci.org/checkpoint-restore/criu.svg?branch=criu-dev)](https://travis-ci.org/checkpoint-restore/criu) +[![Codacy Badge](https://api.codacy.com/project/badge/Grade/55251ec7db28421da4481fc7c1cb0cee)](https://www.codacy.com/app/xemul/criu?utm_source=github.com&utm_medium=referral&utm_content=xemul/criu&utm_campaign=Badge_Grade) +

+ +## How to contribute to CRIU + +CRIU project is (almost) the never-ending story, because we have to always keep up with the +Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're +looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. +Here are some useful hints to get involved. + +* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; +* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); +* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; +* Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); +* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); +* Spread the word about CRIU in [social networks](http://criu.org/Contacts); +* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); diff --git a/README.md b/README.md index 6a578b953..d703638ec 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,11 @@ project is that it is mainly implemented in user space. There are some more proj doing C/R for Linux, and so far CRIU [appears to be](https://criu.org/Comparison_to_other_CR_projects) the most feature-rich and up-to-date with the kernel. +CRIU project is (almost) the never-ending story, because we have to always keep up with the +Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're +looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. +Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) if you would like to get involved. + The project [started](https://criu.org/History) as the way to do live migration for OpenVZ Linux containers, but later grew to more sophisticated and flexible tool. It is currently used by (integrated into) OpenVZ, LXC/LXD, Docker, and other software, project gets tremendous @@ -56,21 +61,6 @@ One of the CRIU features is the ability to save and restore state of a TCP socke without breaking the connection. This functionality is considered to be useful by itself, and we have it available as the [libsoccr library](https://criu.org/Libsoccr). -## How to contribute - -CRIU project is (almost) the never-ending story, because we have to always keep up with the -Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're -looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. -Here are some useful hints to get involved. - -* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; -* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); -* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; -* Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); -* Spread the word about CRIU in [social networks](http://criu.org/Contacts); -* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); - ## Licence The project is licensed under GPLv2 (though files sitting in the lib/ directory are LGPLv2.1). From d0fcb01d47de8d5c659cc09d0ab5d994d10b5ffa Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 9 Jun 2020 16:52:11 +0300 Subject: [PATCH 0266/1854] CONTRIBUTING.md: import "How to submit patches" from criu.org Import "How to submit patches" article from CRIU wiki and update its format to match GitHub markdown. Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 200 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 199 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 342619e88..edb7ecb48 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,6 +14,204 @@ Here are some useful hints to get involved. * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); +* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu). +Below we describe in more detail recommend practices for CRIU developemnt. * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); + +### Seting up the developemnt environment + +Although criu could be run as non-root (see [Security](https://criu.org/Security), development is better to be done as root. For example, some tests require root. So, it would be a good idea to set up some recent Linux distro on a virtual machine. + +### Get the source code + +The CRIU sources are tracked by Git. Official CRIU repo is at https://github.com/checkpoint-restore/criu. + +The repository may contain multiple branches. Development happens in the **criu-dev** branch. + +To clone CRIU repo and switch to the proper branch, run: + +``` + git clone https://github.com/checkpoint-restore/criu criu + cd criu + git checkout criu-dev +``` + +### Compile + +First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info. + +To compile CRIU, run: + +``` + make +``` + +This should create the `./criu/criu` executable. + +## Edit the source code + +If you use ctags, you can generate the ctags file by running + +``` + make tags +``` + +When you change the source code, please keep in mind the following code conventions: + +* we prefer tabs and indentations to be 8 characters width +* CRIU mostly follows [Linux kernel coding style](https://www.kernel.org/doc/Documentation/process/coding-style.rst), but we are less strict than the kernel community. + +Other conventions can be learned from the source code itself. In short, make sure your new code +looks similar to what is already there. + +## Test your changes + +CRIU comes with an extensive test suite. To check whether your changes introduce any regressions, run + +``` + make test +``` + +The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it. + +In case you'd rather have someone else run the tests, you can use travis-ci for your +own github fork of CRIU. It will check the compilation for various supported platforms, +as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu +for more details. + +## Sign your work + +To improve tracking of who did what, we ask you to sign off the patches +that are to be emailed. + +The sign-off is a simple line at the end of the explanation for the +patch, which certifies that you wrote it or otherwise have the right to +pass it on as an open-source patch. The rules are pretty simple: if you +can certify the below: + +### Developer's Certificate of Origin 1.1 + By making a contribution to this project, I certify that: + + (a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + + (b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + + (c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + + (d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + +then you just add a line saying + +``` + Signed-off-by: Random J Developer +``` + +using your real name (please, no pseudonyms or anonymous contributions if +it possible). + +Hint: you can use `git commit -s` to add Signed-off-by line to your +commit message. To append such line to a commit you already made, use +`git commit --amend -s`. + +``` + From: Random J Developer + Subject: [PATCH] Short patch description + + Long patch description (could be skipped if patch + is trivial enough) + + Signed-off-by: Random J Developer + --- + Patch body here +``` + +## Submit your work upstream + +We accept github pull requests and this is the preferred way to contribute to CRIU. +For that you should push your work to your fork of CRIU at [GitHub](https://github.com) and create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) + +Historically, CRIU worked with mailing lists and patches so if you still prefer this way continue reading till the end of this section. + +### Make a patch + +To create a patch, run + +``` + git format-patch --signoff origin/criu-dev +``` + +You might need to read GIT documentation on how to prepare patches +for mail submission. Take a look at http://book.git-scm.com/ and/or +http://git-scm.com/documentation for details. It should not be hard +at all. + +We recommend to post patches using `git send-email` + +``` + git send-email --cover-letter --no-chain-reply-to --annotate \ + --confirm=always --to=criu@openvz.org criu-dev +``` + +Note that the `git send-email` subcommand may not be in +the main git package and using it may require installation of a +separate package, for example the "git-email" package in Fedora and +Debian. + +If this is your first time using git send-email, you might need to +configure it to point it to your SMTP server with something like: + +``` + git config --global sendemail.smtpServer stmp.example.net +``` + +If you get tired of typing `--to=criu@openvz.org` all the time, +you can configure that to be automatically handled as well: + +``` + git config sendemail.to criu@openvz.org +``` + +If a developer is sending another version of the patch (e.g. to address +review comments), they are advised to note differences to previous versions +after the `---` line in the patch so that it helps reviewers but +doesn't become part of git history. Moreover, such patch needs to be prefixed +correctly with `--subject-prefix=PATCHv2` appended to +`git send-email` (substitute `v2` with the correct +version if needed though). + +### Mail patches + +The patches should be sent to CRIU development mailing list, `criu AT openvz.org`. Note that you need to be subscribed first in order to post. The list web interface is available at https://openvz.org/mailman/listinfo/criu; you can also use standard mailman aliases to work with it. + +Please make sure the email client you're using doesn't screw your patch (line wrapping and so on). + +{{Note| When sending a patch set that consists of more than one patch, please, push your changes in your local repo and provide the URL of the branch in the cover-letter}} + +### Wait for response + +Be patient. Most CRIU developers are pretty busy people so if +there is no immediate response on your patch — don't be surprised, +sometimes a patch may fly around a week before it gets reviewed. + +## Continuous integration + +Wiki article: [Continuous integration](https://criu.org/Continuous_integration) + +CRIU tests are run for each series sent to the mailing list. If you get a message from our patchwork that patches failed to pass the tests, you have to investigate what is wrong. + +We also recommend you to [enable Travis CI for your repo](https://criu.org/Continuous_integration#Enable_Travis_CI_for_your_repo) to check patches in your git branch, before sending them to the mailing list. From 2e5805878bbf1f80b2297b4b60a6859d15055142 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 14 Jun 2020 09:31:15 +0300 Subject: [PATCH 0267/1854] CONTRIBUTING.md: minor formatting fixes * Mark lowcase criu as code in the environment section * Add missing brace around the reference to https://criu.org/Secrity * Fixup an admolition block that GitHub cannot render * Spelling fixups * s/github/GitHub/g Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index edb7ecb48..de4f3e1ea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,15 +13,15 @@ Here are some useful hints to get involved. * We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; -* Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu). -Below we describe in more detail recommend practices for CRIU developemnt. +* Feedback is expected on the GitHub issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); +* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu). +Below we describe in more detail recommend practices for CRIU development. * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); -### Seting up the developemnt environment +### Setting up the development environment -Although criu could be run as non-root (see [Security](https://criu.org/Security), development is better to be done as root. For example, some tests require root. So, it would be a good idea to set up some recent Linux distro on a virtual machine. +Although `criu` could be run as non-root (see [Security](https://criu.org/Security)), development is better to be done as root. For example, some tests require root. So, it would be a good idea to set up some recent Linux distro on a virtual machine. ### Get the source code @@ -76,14 +76,14 @@ CRIU comes with an extensive test suite. To check whether your changes introduce The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it. In case you'd rather have someone else run the tests, you can use travis-ci for your -own github fork of CRIU. It will check the compilation for various supported platforms, +own GitHub fork of CRIU. It will check the compilation for various supported platforms, as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu for more details. ## Sign your work -To improve tracking of who did what, we ask you to sign off the patches -that are to be emailed. +To improve tracking of who did what, we ask you to sign off the commits in +your fork of CRIU or the patches that are to be emailed. The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to @@ -142,7 +142,7 @@ commit message. To append such line to a commit you already made, use ## Submit your work upstream -We accept github pull requests and this is the preferred way to contribute to CRIU. +We accept GitHub pull requests and this is the preferred way to contribute to CRIU. For that you should push your work to your fork of CRIU at [GitHub](https://github.com) and create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) Historically, CRIU worked with mailing lists and patches so if you still prefer this way continue reading till the end of this section. @@ -200,7 +200,7 @@ The patches should be sent to CRIU development mailing list, `criu AT openvz.org Please make sure the email client you're using doesn't screw your patch (line wrapping and so on). -{{Note| When sending a patch set that consists of more than one patch, please, push your changes in your local repo and provide the URL of the branch in the cover-letter}} +> **Note:** When sending a patch set that consists of more than one patch, please, push your changes in your local repo and provide the URL of the branch in the cover-letter ### Wait for response From 35f8c056ac49ab62b8575a2d1f04cbbd94ccff5b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 14 Jun 2020 11:22:19 +0300 Subject: [PATCH 0268/1854] CONTRIBUTING.md: add sections about patch description and splitting Shamelessly stolen from the Linux kernel [1], shortened a bit and relaxed to match CRIU. [1] https://www.kernel.org/doc/html/latest/process/submitting-patches.html Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index de4f3e1ea..1c731b7f8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -80,6 +80,77 @@ own GitHub fork of CRIU. It will check the compilation for various supported pla as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu for more details. +## Describe your changes + +Describe your problem. Whether your change is a one-line bug fix or +5000 lines of a new feature, there must be an underlying problem that +motivated you to do this work. Convince the reviewer that there is a +problem worth fixing and that it makes sense for them to read past the +first paragraph. + +Once the problem is established, describe what you are actually doing +about it in technical detail. It's important to describe the change +in plain English for the reviewer to verify that the code is behaving +as you intend it to. + +Solve only one problem per commit. If your description starts to get +long, that's a sign that you probably need to split up your commit. +See [Separate your changes](#separate-your-changes). + +Describe your changes in imperative mood, e.g. "make xyzzy do frotz" +instead of "[This commit] makes xyzzy do frotz" or "[I] changed xyzzy +to do frotz", as if you are giving orders to the codebase to change +its behaviour. + +If your change fixes a bug in a specific commit, e.g. you found an issue using +`git bisect`, please use the `Fixes:` tag with the abbreviation of +the SHA-1 ID, and the one line summary. For example: + +``` + Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism") +``` + +The following `git config` settings can be used to add a pretty format for +outputting the above style in the `git log` or `git show` commands: + +``` + [pretty] + fixes = Fixes: %h (\"%s\") +``` + +If your change address an issue listed in GitHub, please use `Fixes:` tag with the number of the issue. For instance: + +``` + Fixes: #339 +``` + +You may refer to [How to Write a Git Commit +Message](https://chris.beams.io/posts/git-commit/) article for +recommendations for good commit message. + +## Separate your changes + +Separate each **logical change** into a separate commit. + +For example, if your changes include both bug fixes and performance +enhancements for a single driver, separate those changes into two +or more commits. If your changes include an API update, and a new +driver which uses that new API, separate those into two commits. + +On the other hand, if you make a single change to numerous files, +group those changes into a single commit. Thus a single logical change +is contained within a single commit. + +The point to remember is that each commit should make an easily understood +change that can be verified by reviewers. Each commit should be justifiable +on its own merits. + +When dividing your change into a series of commits, take special care to +ensure that CRIU builds and runs properly after each commit in the +series. Developers using `git bisect` to track down a problem can end up +splitting your patch series at any point; they will not thank you if you +introduce bugs in the middle. + ## Sign your work To improve tracking of who did what, we ask you to sign off the commits in From 6815aa958d1ac01f0dd0c81d55475d44aabfff88 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 14 Jun 2020 11:26:14 +0300 Subject: [PATCH 0269/1854] CONTRIBUTING.md: add pull request guidelines Following the discussion at [1] describe best practices for pull request creation. [1] https://github.com/checkpoint-restore/criu/pull/1096 Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1c731b7f8..d40f0014c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -216,6 +216,44 @@ commit message. To append such line to a commit you already made, use We accept GitHub pull requests and this is the preferred way to contribute to CRIU. For that you should push your work to your fork of CRIU at [GitHub](https://github.com) and create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) +### Pull request guidelines + +Pull request comment should contain description of the problem your changes +solve and a brief outline of the changes included in the pull request. + +Please avoid pushing fixup commits to an existent pull request. Each commit +should be self contained and there should not be fixup commits in a patch +series. Pull requests that contain one commit which breaks something +and another commit which fixes it, will be rejected. + +Please merge the fixup commits into the commits that has introduced the +problem before creating a pull request. + +It may happen that the reviewers were not completely happy with your +changes and requested changes to your patches. After you updated your +changes please close the old pull request and create a new one that +contains the following: + +* Description of the problem your changes solve and a brief outline of the + changes +* Link to the previous version of the pull request +* Brief description of the changes between old and new versions of the pull + request. If there were more than one previous pull request, all the + revisions should be listed. For example: + +``` + v3: rebase on the current criu-dev + v2: add commit to foo() and update bar() coding style +``` + +If there are only minor updates to the commits in a pull request, it is +possible to force-push them into an existing pull request. This only applies +to small changes and should be used with care. If you update an existing +pull request, remember to add the description of the changes from the +previous version. + +### Mailing list submission + Historically, CRIU worked with mailing lists and patches so if you still prefer this way continue reading till the end of this section. ### Make a patch From ce22e0f37dfce44bcdd6c9b8a94713441f139158 Mon Sep 17 00:00:00 2001 From: Angie Ni Date: Thu, 4 Jun 2020 11:24:23 -0600 Subject: [PATCH 0270/1854] uffd: uffd_open prints info, caller prints error When uffd_open is called from kerndat_uffd, userfaultfd failure is not considered an error, so the goal is to suppress the error message -- instead, we print this message as info. If the function fails, it is the responsibility of the caller to print the error message. Signed-off-by: Angie Ni --- criu/uffd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/uffd.c b/criu/uffd.c index 33b34ba25..5f4c15a60 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -269,7 +269,7 @@ int uffd_open(int flags, unsigned long *features) uffd = syscall(SYS_userfaultfd, flags); if (uffd == -1) { - pr_perror("Lazy pages are not available"); + pr_info("Lazy pages are not available: %s\n", strerror(errno)); return -errno; } From 41b535d312828eac6fd79481d04abe20163b0cc8 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 16 Jun 2020 14:26:06 +0000 Subject: [PATCH 0271/1854] test: skip vdso test on non-vdso systems Signed-off-by: Adrian Reber --- scripts/travis/vagrant.sh | 3 +-- test/zdtm/static/vdso01.checkskip | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100755 test/zdtm/static/vdso01.checkskip diff --git a/scripts/travis/vagrant.sh b/scripts/travis/vagrant.sh index 943a8b9a3..46740efc3 100755 --- a/scripts/travis/vagrant.sh +++ b/scripts/travis/vagrant.sh @@ -45,9 +45,8 @@ fedora-no-vdso() { vagrant reload ssh default cat /proc/cmdline ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' - # Excluding the VDSO test as we are running without VDSO # Excluding two cgroup tests which seem to fail because of cgroup2 - ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a -x zdtm/static/cgroup04 -x zdtm/static/cgroup_ifpriomap -x zdtm/static/vdso01 --keep-going' + ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a -x zdtm/static/cgroup04 -x zdtm/static/cgroup_ifpriomap --keep-going' } $1 diff --git a/test/zdtm/static/vdso01.checkskip b/test/zdtm/static/vdso01.checkskip new file mode 100755 index 000000000..a00df6231 --- /dev/null +++ b/test/zdtm/static/vdso01.checkskip @@ -0,0 +1,3 @@ +#!/bin/bash + +grep -q "\[vdso\]" /proc/self/maps From d38046b0035d977f44f99d68bb5e3c3d4fb4fd9a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 22 Jul 2017 11:33:12 +0300 Subject: [PATCH 0272/1854] mount: restore_task_mnt_ns - Lookup for mount namespace conditionally In case if our parent is a dead task (zombie) we should lookup for parent ids which will be inherited on restore. Otherwise parent->ids may be nil and SIGSEGV produced. Signed-off-by: Cyrill Gorcunov Rework and port from vzcriu: 87b320964 ("vz7: mount: restore_task_mnt_ns - Lookup for mount namespace conditionally") Fixes: #1066 Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 89b8cff59..a0b8b2e06 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3094,19 +3094,19 @@ int restore_task_mnt_ns(struct pstree_item *current) return 0; if (current->ids && current->ids->has_mnt_ns_id) { + struct pstree_item *parent = current->parent; unsigned int id = current->ids->mnt_ns_id; struct ns_id *nsid; - /* - * Regardless of the namespace a task wants to - * live in, by that point they all will live in - * root's one (see prepare_pstree_kobj_ids() + - * get_clone_mask()). So if the current task's - * target namespace is the root's one -- it's - * already there, otherwise it will have to do - * setns(). + /* Zombies and helpers can have ids == 0 so we skip them */ + while (parent && !parent->ids) + parent = parent->parent; + + /** + * Our parent had restored the mount namespace before forking + * us and if we have the same mntns we just stay there. */ - if (current->parent && id == current->parent->ids->mnt_ns_id) + if (parent && id == parent->ids->mnt_ns_id) return 0; nsid = lookup_ns_by_id(id, &mnt_ns_desc); From f0438f47f28d73a75e0fff4c74ee50f80b0c70d1 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 22 Jun 2020 18:50:55 +0300 Subject: [PATCH 0273/1854] cgroup: make prepare_task_cgroup lookup current cgset in ancestors In case if our parent is a dead task (zombie) or a helper which in it's turn has zombie parent, and parent thus has zero cg_set we should look for current cgset deeper. Fixes: #1066 Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index d4c712167..3737772df 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -1210,14 +1210,19 @@ static int move_in_cgroup(CgSetEntry *se, bool setup_cgns) int prepare_task_cgroup(struct pstree_item *me) { + struct pstree_item *parent = me->parent; CgSetEntry *se; u32 current_cgset; if (!rsti(me)->cg_set) return 0; - if (me->parent) - current_cgset = rsti(me->parent)->cg_set; + /* Zombies and helpers can have cg_set == 0 so we skip them */ + while (parent && !rsti(parent)->cg_set) + parent = parent->parent; + + if (parent) + current_cgset = rsti(parent)->cg_set; else current_cgset = root_cg_set; From 99c84878374e90e52ed1723f12564a5d16b1281d Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 22 Jun 2020 14:20:29 +0300 Subject: [PATCH 0274/1854] zdtm: add zombie_leader test Create a session leader and it's child - session member, make leader zombie. To restore this criu will need to create a helper task a child of our zombie so that member can inherit session. Before fixes in this patchset we segfault on empty ids and fail to restore cgroups because of empty cg_set Signed-off-by: Pavel Tikhomirov --- test/zdtm/static/Makefile | 1 + test/zdtm/static/zombie_leader.c | 83 +++++++++++++++++++++++++++++ test/zdtm/static/zombie_leader.desc | 1 + 3 files changed, 85 insertions(+) create mode 100644 test/zdtm/static/zombie_leader.c create mode 100644 test/zdtm/static/zombie_leader.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 7d72673c3..45a0df784 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -229,6 +229,7 @@ TST_NOFILE := \ time \ timens_nested \ timens_for_kids \ + zombie_leader \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/zombie_leader.c b/test/zdtm/static/zombie_leader.c new file mode 100644 index 000000000..d94b2af04 --- /dev/null +++ b/test/zdtm/static/zombie_leader.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check non-empty session with zombie leader"; +const char *test_author = "Pavel Tikhomirov "; + +int child(void) +{ + while (1) + sleep(1); + + return 0; +} + +int zombie_leader(int *cpid) +{ + int pid; + + setsid(); + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork child"); + return 1; + } else if (pid == 0) { + exit(child()); + } + + *cpid = pid; + return 0; +} + +int main(int argc, char **argv) +{ + int ret = -1, status; + int pid, *cpid; + siginfo_t infop; + + test_init(argc, argv); + + cpid = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_SHARED, -1, 0); + *cpid = 0; + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork zombie"); + return 1; + } else if (pid == 0) { + exit(zombie_leader(cpid)); + } + + if (waitid(P_PID, pid, &infop, WNOWAIT | WEXITED) < 0) { + pr_perror("Failed to waitid zombie"); + goto err; + } + + if (!*cpid) { + pr_err("Don't know grand child's pid"); + goto err; + } + + test_daemon(); + test_waitsig(); + + ret = 0; +err: + waitpid(pid, &status, 0); + + if (*cpid) + kill(*cpid, SIGKILL); + + if (!ret) + pass(); + + return 0; +} diff --git a/test/zdtm/static/zombie_leader.desc b/test/zdtm/static/zombie_leader.desc new file mode 100644 index 000000000..6c4afe5f0 --- /dev/null +++ b/test/zdtm/static/zombie_leader.desc @@ -0,0 +1 @@ +{'flavor': 'ns uns'} From 4e7ec3c88b518e7bbf986228bb822bed6869ad9c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 5 May 2020 14:53:08 +0000 Subject: [PATCH 0275/1854] pidns: add pidns image file definition TODO: create correct magic Signed-off-by: Adrian Reber --- criu/image-desc.c | 1 + criu/include/image-desc.h | 1 + criu/include/magic.h | 1 + criu/include/protobuf-desc.h | 1 + criu/protobuf-desc.c | 1 + images/Makefile | 1 + images/pidns.proto | 5 +++++ lib/py/images/images.py | 1 + 8 files changed, 12 insertions(+) create mode 100644 images/pidns.proto diff --git a/criu/image-desc.c b/criu/image-desc.c index 617b95355..c9581f5b8 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -103,6 +103,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(NETNF_EXP, "netns-exp-%u"), FD_ENTRY(FILES, "files"), FD_ENTRY(TIMENS, "timens-%u"), + FD_ENTRY(PIDNS, "pidns-%u"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 6283a576d..e37d535c2 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -27,6 +27,7 @@ enum { CR_FD_MNTS, CR_FD_USERNS, CR_FD_TIMENS, + CR_FD_PIDNS, _CR_FD_IPCNS_FROM, CR_FD_IPC_VAR, diff --git a/criu/include/magic.h b/criu/include/magic.h index d078ec422..ac5cd8033 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -96,6 +96,7 @@ #define FILES_MAGIC 0x56303138 /* Toropets */ #define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ #define TIMENS_MAGIC 0x43114433 /* Beslan */ +#define PIDNS_MAGIC 0x12345678 #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 43d961731..46f7f8cea 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -66,6 +66,7 @@ enum { PB_TIMENS, PB_IMG_STREAMER_REQUEST, PB_IMG_STREAMER_REPLY, + PB_PIDNS, /* PB_AUTOGEN_STOP */ diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 13655264a..223a6149f 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -38,6 +38,7 @@ #include "images/timer.pb-c.h" #include "images/utsns.pb-c.h" #include "images/timens.pb-c.h" +#include "images/pidns.pb-c.h" #include "images/ipc-var.pb-c.h" #include "images/ipc-shm.pb-c.h" #include "images/ipc-msg.pb-c.h" diff --git a/images/Makefile b/images/Makefile index 5458e4679..9ce7198c0 100644 --- a/images/Makefile +++ b/images/Makefile @@ -55,6 +55,7 @@ proto-obj-y += rpc.o proto-obj-y += ext-file.o proto-obj-y += cgroup.o proto-obj-y += userns.o +proto-obj-y += pidns.o proto-obj-y += google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto proto-obj-y += opts.o proto-obj-y += seccomp.o diff --git a/images/pidns.proto b/images/pidns.proto new file mode 100644 index 000000000..7ff049749 --- /dev/null +++ b/images/pidns.proto @@ -0,0 +1,5 @@ +syntax = "proto2"; + +message pidns_entry { + optional string ext_key = 1; +} diff --git a/lib/py/images/images.py b/lib/py/images/images.py index ca6f207bb..7faefbb96 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -467,6 +467,7 @@ handlers = { 'CREDS': entry_handler(pb.creds_entry), 'UTSNS': entry_handler(pb.utsns_entry), 'TIMENS': entry_handler(pb.timens_entry), + 'PIDNS': entry_handler(pb.pidns_entry), 'IPC_VAR': entry_handler(pb.ipc_var_entry), 'FS': entry_handler(pb.fs_entry), 'GHOST_FILE': ghost_file_handler(), From f1e6b103692e20a031fde1257193aab3d1f45ef4 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 5 May 2020 15:16:19 +0000 Subject: [PATCH 0276/1854] pidns: write and read pidns information This loads and stores the key for an external PID namespace if specified by the user using: --external pid[]: