From ae2ab5ddadc24c06c12c545f1160086713f78f59 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 2 May 2019 13:41:46 +0000 Subject: [PATCH 0001/2030] lsm: also dump and restore sockcreate The file /proc/PID/attr/sockcreate is used by SELinux to label newly created sockets with the label available at sockcreate. If it is NULL, the default label of the process will be used. This reads out that file during checkpoint and restores the value during restore. This value is irrelevant for existing sockets as they might have been created with another context. This is only to make sure that newly created sockets have the correct context. Signed-off-by: Adrian Reber --- criu/cr-restore.c | 36 ++++++++++++++++++++++++++++++++++++ criu/include/restorer.h | 2 ++ criu/lsm.c | 32 ++++++++++++++++++++++++++++++++ criu/pie/restorer.c | 15 ++++++++++----- images/creds.proto | 1 + 5 files changed, 81 insertions(+), 5 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index a7b121b8c..7933fe37b 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2994,6 +2994,8 @@ static void rst_reloc_creds(struct thread_restore_args *thread_args, if (args->lsm_profile) args->lsm_profile = rst_mem_remap_ptr(args->mem_lsm_profile_pos, RM_PRIVATE); + if (args->lsm_sockcreate) + args->lsm_sockcreate = rst_mem_remap_ptr(args->mem_lsm_sockcreate_pos, RM_PRIVATE); if (args->groups) args->groups = rst_mem_remap_ptr(args->mem_groups_pos, RM_PRIVATE); @@ -3059,6 +3061,40 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos) args->mem_lsm_profile_pos = 0; } + if (ce->lsm_sockcreate) { + char *rendered = NULL; + char *profile; + + profile = ce->lsm_sockcreate; + + if (validate_lsm(profile) < 0) + return ERR_PTR(-EINVAL); + + if (profile && render_lsm_profile(profile, &rendered)) { + return ERR_PTR(-EINVAL); + } + if (rendered) { + size_t lsm_sockcreate_len; + char *lsm_sockcreate; + + args->mem_lsm_sockcreate_pos = rst_mem_align_cpos(RM_PRIVATE); + lsm_sockcreate_len = strlen(rendered); + lsm_sockcreate = rst_mem_alloc(lsm_sockcreate_len + 1, RM_PRIVATE); + if (!lsm_sockcreate) { + xfree(rendered); + return ERR_PTR(-ENOMEM); + } + + args = rst_mem_remap_ptr(this_pos, RM_PRIVATE); + args->lsm_sockcreate = lsm_sockcreate; + strncpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len); + xfree(rendered); + } + } else { + args->lsm_sockcreate = NULL; + args->mem_lsm_sockcreate_pos = 0; + } + /* * Zap fields which we can't use. */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 2884ce9e6..b83e9130c 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -69,8 +69,10 @@ struct thread_creds_args { unsigned int secbits; char *lsm_profile; unsigned int *groups; + char *lsm_sockcreate; unsigned long mem_lsm_profile_pos; + unsigned long mem_lsm_sockcreate_pos; unsigned long mem_groups_pos; unsigned long mem_pos_next; diff --git a/criu/lsm.c b/criu/lsm.c index 849ec37cd..b0ef0c396 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -98,6 +98,32 @@ err: freecon(ctx); return ret; } + +/* + * selinux_get_sockcreate_label reads /proc/PID/attr/sockcreate + * to see if the PID has a special label specified for sockets. + * Most of the time this will be empty and the process will use + * the process context also for sockets. + */ +static int selinux_get_sockcreate_label(pid_t pid, char **output) +{ + FILE *f; + + f = fopen_proc(pid, "attr/sockcreate"); + if (!f) + return -1; + + fscanf(f, "%ms", output); + /* + * No need to check the result of fscanf(). If there is something + * in /proc/PID/attr/sockcreate it will be copied to *output. If + * there is nothing it will stay NULL. So whatever fscanf() does + * it should be correct. + */ + + fclose(f); + return 0; +} #endif void kerndat_lsm(void) @@ -132,6 +158,7 @@ int collect_lsm_profile(pid_t pid, CredsEntry *ce) int ret; ce->lsm_profile = NULL; + ce->lsm_sockcreate = NULL; switch (kdat.lsm) { case LSMTYPE__NO_LSM: @@ -143,6 +170,9 @@ int collect_lsm_profile(pid_t pid, CredsEntry *ce) #ifdef CONFIG_HAS_SELINUX case LSMTYPE__SELINUX: ret = selinux_get_label(pid, &ce->lsm_profile); + if (ret) + break; + ret = selinux_get_sockcreate_label(pid, &ce->lsm_sockcreate); break; #endif default: @@ -153,6 +183,8 @@ int collect_lsm_profile(pid_t pid, CredsEntry *ce) if (ce->lsm_profile) pr_info("%d has lsm profile %s\n", pid, ce->lsm_profile); + if (ce->lsm_sockcreate) + pr_info("%d has lsm sockcreate label %s\n", pid, ce->lsm_sockcreate); return ret; } diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 6e18cc260..4f42605a0 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -149,7 +149,7 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data) sys_exit_group(1); } -static int lsm_set_label(char *label, int procfd) +static int lsm_set_label(char *label, char *type, int procfd) { int ret = -1, len, lsmfd; char path[STD_LOG_SIMPLE_CHUNK]; @@ -157,9 +157,9 @@ static int lsm_set_label(char *label, int procfd) if (!label) return 0; - pr_info("restoring lsm profile %s\n", label); + pr_info("restoring lsm profile (%s) %s\n", type, label); - std_sprintf(path, "self/task/%ld/attr/current", sys_gettid()); + std_sprintf(path, "self/task/%ld/attr/%s", sys_gettid(), type); lsmfd = sys_openat(procfd, path, O_WRONLY, 0); if (lsmfd < 0) { @@ -305,9 +305,14 @@ static int restore_creds(struct thread_creds_args *args, int procfd, * SELinux and instead the process context is set before the * threads are created. */ - if (lsm_set_label(args->lsm_profile, procfd) < 0) + if (lsm_set_label(args->lsm_profile, "current", procfd) < 0) return -1; } + + /* Also set the sockcreate label for all threads */ + if (lsm_set_label(args->lsm_sockcreate, "sockcreate", procfd) < 0) + return -1; + return 0; } @@ -1571,7 +1576,7 @@ long __export_restore_task(struct task_restore_args *args) if (args->lsm_type == LSMTYPE__SELINUX) { /* Only for SELinux */ if (lsm_set_label(args->t->creds_args->lsm_profile, - args->proc_fd) < 0) + "current", args->proc_fd) < 0) goto core_restore_end; } diff --git a/images/creds.proto b/images/creds.proto index 29fb8652e..23b84c7e5 100644 --- a/images/creds.proto +++ b/images/creds.proto @@ -20,4 +20,5 @@ message creds_entry { repeated uint32 groups = 14; optional string lsm_profile = 15; + optional string lsm_sockcreate = 16; } From 149fed6480de9d4fb44632a9be69314fcbeeee58 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 2 May 2019 13:47:29 +0000 Subject: [PATCH 0002/2030] test: Verify that sockcreate does not change during restore This makes sure that sockcreate stays empty for selinux00 before and after checkpoint/restore. Signed-off-by: Adrian Reber --- test/zdtm/static/selinux00.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/test/zdtm/static/selinux00.c b/test/zdtm/static/selinux00.c index dd9096a6f..db8420eac 100644 --- a/test/zdtm/static/selinux00.c +++ b/test/zdtm/static/selinux00.c @@ -83,6 +83,31 @@ int checkprofile() return 0; } +int check_sockcreate() +{ + char *output = NULL; + FILE *f = fopen("/proc/self/attr/sockcreate", "r"); + int ret = fscanf(f, "%ms", &output); + fclose(f); + + if (ret >= 1) { + free(output); + /* sockcreate should be empty, if fscanf found something + * it is wrong.*/ + fail("sockcreate should be empty\n"); + return -1; + } + + if (output) { + free(output); + /* Same here, output should still be NULL. */ + fail("sockcreate should be empty\n"); + return -1; + } + + return 0; +} + int main(int argc, char **argv) { test_init(argc, argv); @@ -95,12 +120,21 @@ int main(int argc, char **argv) return 0; } + if (check_sockcreate()) + return -1; + if (setprofile()) return -1; + if (check_sockcreate()) + return -1; + test_daemon(); test_waitsig(); + if (check_sockcreate()) + return -1; + if (checkprofile() == 0) pass(); From 8f704c3eb66fba5ed44d956b3f569e1461a3d3b3 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 29 Apr 2019 15:21:59 +0200 Subject: [PATCH 0003/2030] sockets: dump and restore xattr security labels Restoring a SELinux process also requires to correctly label sockets. During checkpointing fgetxattr() is used to retrieve the "security.selinux" xattr and during restore setsockcreatecon() is used before a socket is created. Previous commits are already restoring the sockcreate SELinux setting if set by the process. Signed-off-by: Adrian Reber --- criu/include/lsm.h | 18 +++++++++++++++ criu/lsm.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ criu/sk-inet.c | 12 ++++++++++ criu/sockets.c | 4 ++++ images/fdinfo.proto | 1 + 5 files changed, 91 insertions(+) diff --git a/criu/include/lsm.h b/criu/include/lsm.h index b4fce1303..3b8271282 100644 --- a/criu/include/lsm.h +++ b/criu/include/lsm.h @@ -3,6 +3,7 @@ #include "images/inventory.pb-c.h" #include "images/creds.pb-c.h" +#include "images/fdinfo.pb-c.h" #define AA_SECURITYFS_PATH "/sys/kernel/security/apparmor" @@ -34,4 +35,21 @@ int validate_lsm(char *profile); int render_lsm_profile(char *profile, char **val); extern int lsm_check_opts(void); + +#ifdef CONFIG_HAS_SELINUX +int dump_xattr_security_selinux(int fd, FdinfoEntry *e); +int run_setsockcreatecon(FdinfoEntry *e); +int reset_setsockcreatecon(); +#else +static inline int dump_xattr_security_selinux(int fd, FdinfoEntry *e) { + return 0; +} +static inline int run_setsockcreatecon(FdinfoEntry *e) { + return 0; +} +static inline int reset_setsockcreatecon() { + return 0; +} +#endif + #endif /* __CR_LSM_H__ */ diff --git a/criu/lsm.c b/criu/lsm.c index b0ef0c396..ef6ba112b 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "common/config.h" @@ -11,10 +12,12 @@ #include "util.h" #include "cr_options.h" #include "lsm.h" +#include "fdstore.h" #include "protobuf.h" #include "images/inventory.pb-c.h" #include "images/creds.pb-c.h" +#include "images/fdinfo.pb-c.h" #ifdef CONFIG_HAS_SELINUX #include @@ -124,6 +127,59 @@ static int selinux_get_sockcreate_label(pid_t pid, char **output) fclose(f); return 0; } + +int reset_setsockcreatecon() +{ + return setsockcreatecon_raw(NULL); +} + +int run_setsockcreatecon(FdinfoEntry *e) +{ + char *ctx = NULL; + + /* Currently this only works for SELinux. */ + if (kdat.lsm != LSMTYPE__SELINUX) + return 0; + + ctx = e->xattr_security_selinux; + /* Writing to the FD using fsetxattr() did not work for some reason. */ + return setsockcreatecon_raw(ctx); +} + +int dump_xattr_security_selinux(int fd, FdinfoEntry *e) +{ + char *ctx = NULL; + int len; + int ret; + + /* Currently this only works for SELinux. */ + if (kdat.lsm != LSMTYPE__SELINUX) + return 0; + + /* Get the size of the xattr. */ + len = fgetxattr(fd, "security.selinux", ctx, 0); + if (len == -1) { + pr_err("Reading xattr %s to FD %d failed\n", ctx, fd); + return -1; + } + + ctx = xmalloc(len); + if (!ctx) { + pr_err("xmalloc to read xattr for FD %d failed\n", fd); + return -1; + } + + ret = fgetxattr(fd, "security.selinux", ctx, len); + if (len != ret) { + pr_err("Reading xattr %s to FD %d failed\n", ctx, fd); + return -1; + } + + e->xattr_security_selinux = ctx; + + return 0; +} + #endif void kerndat_lsm(void) diff --git a/criu/sk-inet.c b/criu/sk-inet.c index cc7e1cc28..0b7f0d7ff 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -23,6 +23,9 @@ #include "files.h" #include "image.h" #include "log.h" +#include "lsm.h" +#include "kerndat.h" +#include "pstree.h" #include "rst-malloc.h" #include "sockets.h" #include "sk-inet.h" @@ -30,6 +33,8 @@ #include "util.h" #include "namespaces.h" +#include "images/inventory.pb-c.h" + #undef LOG_PREFIX #define LOG_PREFIX "inet: " @@ -804,12 +809,18 @@ static int open_inet_sk(struct file_desc *d, int *new_fd) if (set_netns(ie->ns_id)) return -1; + if (run_setsockcreatecon(fle->fe)) + return -1; + sk = socket(ie->family, ie->type, ie->proto); if (sk < 0) { pr_perror("Can't create inet socket"); return -1; } + if (reset_setsockcreatecon()) + return -1; + if (ie->v6only) { if (restore_opt(sk, SOL_IPV6, IPV6_V6ONLY, &yes) == -1) goto err; @@ -890,6 +901,7 @@ done: } *new_fd = sk; + return 1; err: close(sk); diff --git a/criu/sockets.c b/criu/sockets.c index 30072ac73..7f7453ca1 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -22,6 +22,7 @@ #include "util-pie.h" #include "sk-packet.h" #include "namespaces.h" +#include "lsm.h" #include "net.h" #include "xmalloc.h" #include "fs-magic.h" @@ -663,6 +664,9 @@ int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *e) int family; const struct fdtype_ops *ops; + if (dump_xattr_security_selinux(lfd, e)) + return -1; + if (dump_opt(lfd, SOL_SOCKET, SO_DOMAIN, &family)) return -1; diff --git a/images/fdinfo.proto b/images/fdinfo.proto index ed82ceffe..77e375aa9 100644 --- a/images/fdinfo.proto +++ b/images/fdinfo.proto @@ -47,6 +47,7 @@ message fdinfo_entry { required uint32 flags = 2; required fd_types type = 3; required uint32 fd = 4; + optional string xattr_security_selinux = 5; } message file_entry { From a1cf51841a42a71a0867919f744560e21c654193 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 30 Apr 2019 09:47:32 +0000 Subject: [PATCH 0004/2030] selinux: add socket label test This adds two more SELinux test to verfy that checkpointing and restoring SELinux socket labels works correctly, if the process uses setsockcreatecon() or if the process leaves the default context for newly created sockets. Signed-off-by: Adrian Reber --- test/zdtm/static/Makefile | 3 + test/zdtm/static/selinux01.c | 200 +++++++++++++++++++++++++++ test/zdtm/static/selinux01.checkskip | 1 + test/zdtm/static/selinux01.desc | 1 + test/zdtm/static/selinux01.hook | 1 + test/zdtm/static/selinux02.c | 1 + test/zdtm/static/selinux02.checkskip | 1 + test/zdtm/static/selinux02.desc | 1 + test/zdtm/static/selinux02.hook | 1 + 9 files changed, 210 insertions(+) create mode 100644 test/zdtm/static/selinux01.c create mode 120000 test/zdtm/static/selinux01.checkskip create mode 120000 test/zdtm/static/selinux01.desc create mode 120000 test/zdtm/static/selinux01.hook create mode 120000 test/zdtm/static/selinux02.c create mode 120000 test/zdtm/static/selinux02.checkskip create mode 120000 test/zdtm/static/selinux02.desc create mode 120000 test/zdtm/static/selinux02.hook diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 8e3f39276..1ffaa9039 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -211,6 +211,8 @@ TST_NOFILE := \ thp_disable \ pid_file \ selinux00 \ + selinux01 \ + selinux02 \ # jobctl00 \ ifneq ($(SRCARCH),arm) @@ -513,6 +515,7 @@ unlink_fstat041: CFLAGS += -DUNLINK_FSTAT041 -DUNLINK_FSTAT04 ghost_holes01: CFLAGS += -DTAIL_HOLE ghost_holes02: CFLAGS += -DHEAD_HOLE sk-freebind-false: CFLAGS += -DZDTM_FREEBIND_FALSE +selinux02: CFLAGS += -DUSING_SOCKCREATE stopped01: CFLAGS += -DZDTM_STOPPED_KILL stopped02: CFLAGS += -DZDTM_STOPPED_TKILL stopped12: CFLAGS += -DZDTM_STOPPED_KILL -DZDTM_STOPPED_TKILL diff --git a/test/zdtm/static/selinux01.c b/test/zdtm/static/selinux01.c new file mode 100644 index 000000000..9966455c4 --- /dev/null +++ b/test/zdtm/static/selinux01.c @@ -0,0 +1,200 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "zdtmtst.h" + +/* Enabling the right policy happens in selinux00.hook and selinx00.checkskip */ + +const char *test_doc = "Check that a SELinux socket context is restored"; +const char *test_author = "Adrian Reber "; + +/* This is all based on Tycho's apparmor code */ + +#define CONTEXT "unconfined_u:unconfined_r:unconfined_dbusd_t:s0" + +/* + * This is used to store the state of SELinux. For this test + * SELinux is switched to permissive mode and later the previous + * SELinux state is restored. + */ +char state; + +int check_for_selinux() +{ + if (access("/sys/fs/selinux", F_OK) == 0) + return 0; + return 1; +} + +int setprofile() +{ + int fd, len; + + fd = open("/proc/self/attr/current", O_WRONLY); + if (fd < 0) { + fail("Could not open /proc/self/attr/current\n"); + return -1; + } + + len = write(fd, CONTEXT, strlen(CONTEXT)); + close(fd); + + if (len < 0) { + fail("Could not write context\n"); + return -1; + } + + return 0; +} + +int set_sockcreate() +{ + int fd, len; + + fd = open("/proc/self/attr/sockcreate", O_WRONLY); + if (fd < 0) { + fail("Could not open /proc/self/attr/sockcreate\n"); + return -1; + } + + len = write(fd, CONTEXT, strlen(CONTEXT)); + close(fd); + + if (len < 0) { + fail("Could not write context\n"); + return -1; + } + + return 0; +} + +int check_sockcreate() +{ + int fd; + char context[1024]; + int len; + + + fd = open("/proc/self/attr/sockcreate", O_RDONLY); + if (fd < 0) { + fail("Could not open /proc/self/attr/sockcreate\n"); + return -1; + } + + len = read(fd, context, strlen(CONTEXT)); + close(fd); + if (len != strlen(CONTEXT)) { + fail("SELinux context has unexpected length %d, expected %zd\n", + len, strlen(CONTEXT)); + return -1; + } + + if (strncmp(context, CONTEXT, strlen(CONTEXT)) != 0) { + fail("Wrong SELinux context %s expected %s\n", context, CONTEXT); + return -1; + } + + return 0; +} + +int check_sockcreate_empty() +{ + char *output = NULL; + FILE *f = fopen("/proc/self/attr/sockcreate", "r"); + int ret = fscanf(f, "%ms", &output); + fclose(f); + + if (ret >= 1) { + free(output); + /* sockcreate should be empty, if fscanf found something + * it is wrong.*/ + fail("sockcreate should be empty\n"); + return -1; + } + + if (output) { + free(output); + /* Same here, output should still be NULL. */ + fail("sockcreate should be empty\n"); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + char ctx[1024]; + test_init(argc, argv); + + if (check_for_selinux()) { + skip("SELinux not found on this system."); + test_daemon(); + test_waitsig(); + pass(); + return 0; + } + +#ifdef USING_SOCKCREATE + if (set_sockcreate()) + return -1; +#else + if (check_sockcreate_empty()) + return -1; + + if (setprofile()) + return -1; + + if (check_sockcreate_empty()) + return -1; +#endif + + /* Open our test socket */ + int sk = socket(AF_INET, SOCK_STREAM, 0); + memset(ctx, 0, 1024); + /* Read out the socket label */ + if (fgetxattr(sk, "security.selinux", ctx, 1024) == -1) { + fail("Reading xattr 'security.selinux' failed.\n"); + return -1; + } + if (strncmp(ctx, CONTEXT, strlen(CONTEXT)) != 0) { + fail("Wrong SELinux context %s expected %s\n", ctx, CONTEXT); + return -1; + } + memset(ctx, 0, 1024); + + test_daemon(); + test_waitsig(); + + /* Read out the socket label again */ + + if (fgetxattr(sk, "security.selinux", ctx, 1024) == -1) { + fail("Reading xattr 'security.selinux' failed.\n"); + return -1; + } + if (strncmp(ctx, CONTEXT, strlen(CONTEXT)) != 0) { + fail("Wrong SELinux context %s expected %s\n", ctx, CONTEXT); + return -1; + } + +#ifdef USING_SOCKCREATE + if (check_sockcreate()) + return -1; +#else + if (check_sockcreate_empty()) + return -1; +#endif + + pass(); + + return 0; +} diff --git a/test/zdtm/static/selinux01.checkskip b/test/zdtm/static/selinux01.checkskip new file mode 120000 index 000000000..e8a172479 --- /dev/null +++ b/test/zdtm/static/selinux01.checkskip @@ -0,0 +1 @@ +selinux00.checkskip \ No newline at end of file diff --git a/test/zdtm/static/selinux01.desc b/test/zdtm/static/selinux01.desc new file mode 120000 index 000000000..2d2961a76 --- /dev/null +++ b/test/zdtm/static/selinux01.desc @@ -0,0 +1 @@ +selinux00.desc \ No newline at end of file diff --git a/test/zdtm/static/selinux01.hook b/test/zdtm/static/selinux01.hook new file mode 120000 index 000000000..dd7ed6bb3 --- /dev/null +++ b/test/zdtm/static/selinux01.hook @@ -0,0 +1 @@ +selinux00.hook \ No newline at end of file diff --git a/test/zdtm/static/selinux02.c b/test/zdtm/static/selinux02.c new file mode 120000 index 000000000..570267785 --- /dev/null +++ b/test/zdtm/static/selinux02.c @@ -0,0 +1 @@ +selinux01.c \ No newline at end of file diff --git a/test/zdtm/static/selinux02.checkskip b/test/zdtm/static/selinux02.checkskip new file mode 120000 index 000000000..2696e6e3d --- /dev/null +++ b/test/zdtm/static/selinux02.checkskip @@ -0,0 +1 @@ +selinux01.checkskip \ No newline at end of file diff --git a/test/zdtm/static/selinux02.desc b/test/zdtm/static/selinux02.desc new file mode 120000 index 000000000..9c6802c4d --- /dev/null +++ b/test/zdtm/static/selinux02.desc @@ -0,0 +1 @@ +selinux01.desc \ No newline at end of file diff --git a/test/zdtm/static/selinux02.hook b/test/zdtm/static/selinux02.hook new file mode 120000 index 000000000..e3ea0a6c8 --- /dev/null +++ b/test/zdtm/static/selinux02.hook @@ -0,0 +1 @@ +selinux01.hook \ No newline at end of file From 7caaed20d88da3a64a6501c010d9556dd898ed70 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 3 May 2019 06:27:51 +0000 Subject: [PATCH 0005/2030] lsm: fix compiler error 'unused-result' Reading out the xattr 'security.selinux' of checkpointed sockets with fscanf() works (at least in theory) without checking the result of fscanf(). There are, however, multiple CI failures when ignoring the return value of fscanf(). This adds ferror() to check if the stream has an actual error or if '-1' just mean EOF. Handle all errors of fscanf() // Andrei Signed-off-by: Adrian Reber Signed-off-by: Andrei Vagin --- criu/lsm.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/criu/lsm.c b/criu/lsm.c index ef6ba112b..9c9ac7f80 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -33,8 +33,8 @@ static int apparmor_get_label(pid_t pid, char **profile_name) return -1; if (fscanf(f, "%ms", profile_name) != 1) { - fclose(f); pr_perror("err scanfing"); + fclose(f); return -1; } @@ -111,19 +111,23 @@ err: static int selinux_get_sockcreate_label(pid_t pid, char **output) { FILE *f; + int ret; f = fopen_proc(pid, "attr/sockcreate"); if (!f) return -1; - fscanf(f, "%ms", output); - /* - * No need to check the result of fscanf(). If there is something - * in /proc/PID/attr/sockcreate it will be copied to *output. If - * there is nothing it will stay NULL. So whatever fscanf() does - * it should be correct. - */ - + ret = fscanf(f, "%ms", output); + if (ret == -1 && errno != 0) { + pr_perror("Unable to parse /proc/%d/attr/sockcreate", pid); + /* + * Only if the error indicator is set it is a real error. + * -1 could also be EOF, which would mean that sockcreate + * was just empty, which is the most common case. + */ + fclose(f); + return -1; + } fclose(f); return 0; } From 6e36fb26b24c296cb27814816492c5722ae6e218 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 4 May 2019 20:01:52 -0700 Subject: [PATCH 0006/2030] lsm: don't reset socket contex if SELinux is disabled Fixes #693 --- criu/lsm.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/criu/lsm.c b/criu/lsm.c index 9c9ac7f80..592113839 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -134,7 +134,15 @@ static int selinux_get_sockcreate_label(pid_t pid, char **output) int reset_setsockcreatecon() { - return setsockcreatecon_raw(NULL); + /* Currently this only works for SELinux. */ + if (kdat.lsm != LSMTYPE__SELINUX) + return 0; + + if (setsockcreatecon_raw(NULL)) { + pr_perror("Unable to reset socket SELinux context"); + return -1; + } + return 0; } int run_setsockcreatecon(FdinfoEntry *e) @@ -147,7 +155,11 @@ int run_setsockcreatecon(FdinfoEntry *e) ctx = e->xattr_security_selinux; /* Writing to the FD using fsetxattr() did not work for some reason. */ - return setsockcreatecon_raw(ctx); + if (setsockcreatecon_raw(ctx)) { + pr_perror("Unable to set the %s socket SELinux context", ctx); + return -1; + } + return 0; } int dump_xattr_security_selinux(int fd, FdinfoEntry *e) From 05b98e74e47e7d4fb52c6ca17ee28088d2b3e4cb Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 4 May 2019 15:27:32 +0200 Subject: [PATCH 0007/2030] lsm: fix compiler error on Fedora 30 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes following compiler error: criu/lsm.c: In function ‘dump_xattr_security_selinux’: criu/include/log.h:51:2: error: ‘%s’ directive argument is null [-Werror=format-overflow=] 51 | print_on_level(LOG_ERROR, \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 52 | "Error (%s:%d): " LOG_PREFIX fmt, \ | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 53 | __FILE__, __LINE__, ##__VA_ARGS__) | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu/lsm.c:166:3: note: in expansion of macro ‘pr_err’ 166 | pr_err("Reading xattr %s to FD %d failed\n", ctx, fd); | ^~~~~~ Signed-off-by: Adrian Reber --- criu/lsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/lsm.c b/criu/lsm.c index 592113839..420585ba4 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -175,7 +175,7 @@ int dump_xattr_security_selinux(int fd, FdinfoEntry *e) /* Get the size of the xattr. */ len = fgetxattr(fd, "security.selinux", ctx, 0); if (len == -1) { - pr_err("Reading xattr %s to FD %d failed\n", ctx, fd); + pr_err("Reading xattr security.selinux from FD %d failed\n", fd); return -1; } From f4de24c38ba2a5c3e323d4faaa4079c9aaac1060 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 15 May 2019 08:40:31 +0300 Subject: [PATCH 0008/2030] criu: Version 3.12.1 Changelog: * Restore SELinux socket labels Signed-off-by: Andrei Vagin --- Makefile.versions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.versions b/Makefile.versions index 6d4e15e3d..a7820b3b3 100644 --- a/Makefile.versions +++ b/Makefile.versions @@ -2,7 +2,7 @@ # CRIU version. CRIU_VERSION_MAJOR := 3 CRIU_VERSION_MINOR := 12 -CRIU_VERSION_SUBLEVEL := +CRIU_VERSION_SUBLEVEL := 1 CRIU_VERSION_EXTRA := CRIU_VERSION_NAME := Ice Penguin CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA)) From 2a3e34155c734995edd6806f134809b67429e6a6 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 27 Jun 2019 15:51:50 +0100 Subject: [PATCH 0009/2030] zdtm: Refactor seccomp_filter_{threads,tsync} As discusses on the musl mailing list [1] when libc api is used to create a POSIX thread, and this thread is killed by seccomp, this breaks a fundamental assumption the C runtime relies on, causing any libc call (i.e. pthread_join) after the kill to have undefined behaviour. In order to work around the issue we could use SECCOMP_RET_ERRNO instead of SECCOMP_RET_KILL. This filter will set a magic value to user space as errno without executing the system call. [1] https://www.openwall.com/lists/musl/2019/06/26/7 Rresolves #725 Signed-off-by: Radostin Stoyanov --- test/zdtm/static/seccomp_filter_threads.c | 26 +++++++++++++++-------- test/zdtm/static/seccomp_filter_tsync.c | 23 ++++++++++---------- 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/test/zdtm/static/seccomp_filter_threads.c b/test/zdtm/static/seccomp_filter_threads.c index b3fa6089d..63ea4b5bd 100644 --- a/test/zdtm/static/seccomp_filter_threads.c +++ b/test/zdtm/static/seccomp_filter_threads.c @@ -39,6 +39,8 @@ static long sys_gettid(void) { return syscall(__NR_gettid); } static futex_t *wait_rdy; static futex_t *wait_run; +static int magic = 1234; + int get_seccomp_mode(pid_t pid) { FILE *f; @@ -70,7 +72,7 @@ int filter_syscall(int syscall_nr, unsigned int flags) struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscall_nr, 0, 1), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (SECCOMP_RET_DATA & magic)), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), }; @@ -87,9 +89,9 @@ int filter_syscall(int syscall_nr, unsigned int flags) return 0; } -void tigger_ptrace(void) { ptrace(PTRACE_TRACEME); } -void trigger_prctl(void) { prctl(PR_SET_PDEATHSIG, 9, 0, 0, 0); } -void trigger_mincore(void) { mincore(NULL, 0, NULL); } +int tigger_ptrace(void) { return ptrace(PTRACE_TRACEME); } +int trigger_prctl(void) { return prctl(PR_SET_PDEATHSIG, 9, 0, 0, 0); } +int trigger_mincore(void) { return mincore(NULL, 0, NULL); } #define gen_param(__syscall_nr, __trigger) \ { \ @@ -101,7 +103,7 @@ void trigger_mincore(void) { mincore(NULL, 0, NULL); } struct { char *syscall_name; unsigned int syscall_nr; - void (*trigger)(void); + int (*trigger)(void); } pthread_seccomp_params[] = { gen_param(__NR_ptrace, tigger_ptrace), gen_param(__NR_prctl, trigger_prctl), @@ -112,6 +114,7 @@ struct { void *thread_main(void *arg) { + int ret; size_t nr = (long) arg; if (filter_syscall(pthread_seccomp_params[nr].syscall_nr, 0) < 0) @@ -128,10 +131,12 @@ void *thread_main(void *arg) nr, pthread_seccomp_params[nr].syscall_name, sys_gettid()); - pthread_seccomp_params[nr].trigger(); + ret = pthread_seccomp_params[nr].trigger(); + if (ret == -1 && errno == magic) + return (void *)0; test_msg("Abnormal exit %zu thread %lu\n", nr, sys_gettid()); - pthread_exit((void *)1); + return (void *)1; } int main(int argc, char ** argv) @@ -167,7 +172,7 @@ int main(int argc, char ** argv) if (pid == 0) { pthread_t thread[ARRAY_SIZE(pthread_seccomp_params)]; - void *p = NULL; + void *ret; zdtm_seccomp = 1; @@ -180,10 +185,13 @@ int main(int argc, char ** argv) for (i = 0; i < ARRAY_SIZE(pthread_seccomp_params); i++) { test_msg("Waiting thread %zu\n", i); - if (pthread_join(thread[i], &p) != 0) { + if (pthread_join(thread[i], &ret) != 0) { pr_perror("pthread_join"); exit(1); } + + if (ret != 0) + syscall(__NR_exit, 1); } syscall(__NR_exit, 0); diff --git a/test/zdtm/static/seccomp_filter_tsync.c b/test/zdtm/static/seccomp_filter_tsync.c index 9b4742ba1..e374f0aff 100644 --- a/test/zdtm/static/seccomp_filter_tsync.c +++ b/test/zdtm/static/seccomp_filter_tsync.c @@ -34,6 +34,8 @@ const char *test_author = "Tycho Andersen "; pthread_mutex_t getpid_wait; +static int magic = 1234; + int get_seccomp_mode(pid_t pid) { FILE *f; @@ -65,7 +67,7 @@ int filter_syscall(int syscall_nr, unsigned int flags) struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscall_nr, 0, 1), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO | (SECCOMP_RET_DATA & magic)), BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), }; @@ -84,14 +86,19 @@ int filter_syscall(int syscall_nr, unsigned int flags) void *wait_and_getpid(void *arg) { + int ret; + pthread_mutex_lock(&getpid_wait); pthread_mutex_unlock(&getpid_wait); pthread_mutex_destroy(&getpid_wait); - /* we expect the tg to get killed by the seccomp filter that was - * installed via TSYNC */ - ptrace(PTRACE_TRACEME); - pthread_exit((void *)1); + /* we expect seccomp to exit with + * an error and set errno = magic */ + ret = ptrace(PTRACE_TRACEME); + if (ret == -1 && errno == magic) + return (void *)0; + + return ((void *)1); } int main(int argc, char ** argv) @@ -159,12 +166,6 @@ int main(int argc, char ** argv) exit(1); } - /* Here we're abusing pthread exit slightly: if the thread gets - * to call pthread_exit, the value of p is one, but if it gets - * killed pthread_join doesn't set a value since the thread - * didn't, so the value is null; we exit 0 to indicate success - * as usual. - */ syscall(__NR_exit, p); } From 6d66dd5d89f48026fd8a7149bfbb83669e7ff508 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 2 May 2019 02:34:41 +0100 Subject: [PATCH 0010/2030] zdtm/ia32: fcntl() wrapper for old glibc(s) A bit nasty, but does the job to run ofd tests on glibc < v2.28. Other way would be to update glibc on Travis-CI ia32 tests, but I thought someone might want to run the tests outside Travis-CI. Fixes: #745 Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/static/file_locks06.c | 2 +- test/zdtm/static/file_locks07.c | 2 +- test/zdtm/static/file_locks08.c | 2 +- test/zdtm/static/ofd_file_locks.c | 60 +++++++++++++++++++++++++++++-- test/zdtm/static/ofd_file_locks.h | 1 + 5 files changed, 62 insertions(+), 5 deletions(-) diff --git a/test/zdtm/static/file_locks06.c b/test/zdtm/static/file_locks06.c index 9bc70c47d..780fb07ea 100644 --- a/test/zdtm/static/file_locks06.c +++ b/test/zdtm/static/file_locks06.c @@ -26,7 +26,7 @@ int init_lock(int *fd, struct flock *lck) lck->l_len = 0; lck->l_pid = 0; - if (fcntl(*fd, F_OFD_SETLK, lck) < 0) { + if (zdtm_fcntl(*fd, F_OFD_SETLK, lck) < 0) { pr_perror("Can't set ofd lock"); return -1; } diff --git a/test/zdtm/static/file_locks07.c b/test/zdtm/static/file_locks07.c index b36f23011..2fe169fcf 100644 --- a/test/zdtm/static/file_locks07.c +++ b/test/zdtm/static/file_locks07.c @@ -45,7 +45,7 @@ int init_file_locks(void) } for (i = 0; i < FILE_NUM; ++i) - if (fcntl(fds[i], F_OFD_SETLKW, &lcks[i]) < 0) { + if (zdtm_fcntl(fds[i], F_OFD_SETLKW, &lcks[i]) < 0) { pr_perror("Can't set ofd lock"); return -1; } diff --git a/test/zdtm/static/file_locks08.c b/test/zdtm/static/file_locks08.c index 2d25b4b09..fea8d9e7e 100644 --- a/test/zdtm/static/file_locks08.c +++ b/test/zdtm/static/file_locks08.c @@ -28,7 +28,7 @@ int init_file_lock(int *fd, struct flock *lck) lck->l_len = 0; /* lock whole file */ lck->l_pid = 0; /* should be 0 for ofd lock */ - if (fcntl(*fd, F_OFD_SETLKW, lck) < 0) { + if (zdtm_fcntl(*fd, F_OFD_SETLKW, lck) < 0) { pr_perror("Can't set ofd lock"); return -1; } diff --git a/test/zdtm/static/ofd_file_locks.c b/test/zdtm/static/ofd_file_locks.c index c4a633625..5b19532f8 100644 --- a/test/zdtm/static/ofd_file_locks.c +++ b/test/zdtm/static/ofd_file_locks.c @@ -86,7 +86,7 @@ int check_lock_exists(const char *filename, struct flock *lck) if (lck->l_type == F_RDLCK) { /* check, that there is no write lock */ - ret = fcntl(fd, F_OFD_GETLK, lck); + ret = zdtm_fcntl(fd, F_OFD_GETLK, lck); if (ret) { pr_err("fcntl failed (%i)\n", ret); goto out; @@ -99,7 +99,7 @@ int check_lock_exists(const char *filename, struct flock *lck) /* check, that lock is set */ lck->l_type = F_WRLCK; - ret = fcntl(fd, F_OFD_GETLK, lck); + ret = zdtm_fcntl(fd, F_OFD_GETLK, lck); if (ret) { pr_err("fcntl failed (%i)\n", ret); goto out; @@ -136,3 +136,59 @@ int check_file_lock_restored(int pid, int fd, struct flock *lck) } return 0; } + +/* + * fcntl() wrapper for ofd locks. + * + * Kernel requires ia32 processes to use fcntl64() syscall for ofd: + * COMPAT_SYSCALL_DEFINE3(fcntl, [..]) + * { + * switch (cmd) { + * case F_GETLK64: + * case F_SETLK64: + * case F_SETLKW64: + * case F_OFD_GETLK: + * case F_OFD_SETLK: + * case F_OFD_SETLKW: + * return -EINVAL; + * } + * + * Glibc does all the needed wraps for fcntl(), but only from v2.28. + * To make ofd tests run on the older glibc's - provide zdtm wrap. + * + * Note: we don't need the wraps in CRIU itself as parasite/restorer + * run in 64-bit mode as long as possible, including the time to play + * with ofd (and they are dumped from CRIU). + */ +int zdtm_fcntl(int fd, int cmd, struct flock *f) +{ +#if defined(__i386__) +#ifndef __NR_fcntl64 +# define __NR_fcntl64 221 +#endif + struct flock64 f64 = {}; + int ret; + + switch (cmd) { + case F_OFD_SETLK: + case F_OFD_SETLKW: + f64.l_type = f->l_type; + f64.l_whence = f->l_whence; + f64.l_start = f->l_start; + f64.l_len = f->l_len; + f64.l_pid = f->l_pid; + return syscall(__NR_fcntl64, fd, cmd, &f64); + case F_OFD_GETLK: + ret = syscall(__NR_fcntl64, fd, cmd, &f64); + f->l_type = f64.l_type; + f->l_whence = f64.l_whence; + f->l_start = f64.l_start; + f->l_len = f64.l_len; + f->l_pid = f64.l_pid; + return ret; + default: + break; + } +#endif + return fcntl(fd, cmd, f); +} diff --git a/test/zdtm/static/ofd_file_locks.h b/test/zdtm/static/ofd_file_locks.h index 6978446df..1b206a238 100644 --- a/test/zdtm/static/ofd_file_locks.h +++ b/test/zdtm/static/ofd_file_locks.h @@ -16,5 +16,6 @@ extern int check_lock_exists(const char *filename, struct flock *lck); extern int check_file_lock_restored(int pid, int fd, struct flock *lck); +extern int zdtm_fcntl(int fd, int cmd, struct flock *f); #endif /* ZDTM_OFD_FILE_LOCKS_H_ */ From 4662315fc4fd74228b551c3d5ddb42c2975f5749 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 12 Apr 2019 21:01:36 +0100 Subject: [PATCH 0011/2030] Convert spaces to tabs There are a few places where spaces have been used instead of tabs for indentation. This patch converts the spaces to tabs for consistency with the rest of the code base. Signed-off-by: Radostin Stoyanov --- .../arm/plugins/std/syscalls/syscall-aux.h | 12 ++-- .../lib/include/uapi/asm/processor-flags.h | 56 ++++++++-------- .../src/lib/include/uapi/asm/breakpoints.h | 4 +- .../arch/x86/src/lib/include/uapi/asm/fpu.h | 2 +- compel/arch/x86/src/lib/infect.c | 16 ++--- compel/include/uapi/sigframe-common.h | 4 +- criu/arch/x86/include/asm/parasite.h | 4 +- criu/arch/x86/include/asm/restorer.h | 12 ++-- criu/cr-check.c | 2 +- criu/cr-restore.c | 8 +-- criu/filesystems.c | 2 +- criu/include/aio.h | 18 +++--- criu/include/asm-generic/vdso.h | 4 +- criu/include/autofs.h | 62 +++++++++--------- criu/include/packet_diag.h | 60 ++++++++--------- criu/include/page-pipe.h | 10 +-- criu/include/ptrace-compat.h | 6 +- criu/include/sockets.h | 2 +- criu/include/util-pie.h | 2 +- criu/libnetlink.c | 2 +- criu/mount.c | 4 +- criu/net.c | 24 +++---- criu/pagemap-cache.c | 2 +- criu/sk-queue.c | 2 +- include/common/arch/arm/asm/atomic.h | 18 +++--- include/common/arch/ppc64/asm/atomic.h | 2 +- include/common/arch/ppc64/asm/linkage.h | 64 +++++++++---------- include/common/arch/x86/asm/cmpxchg.h | 2 +- lib/c/criu.c | 4 +- soccr/soccr.c | 14 ++-- 30 files changed, 212 insertions(+), 212 deletions(-) diff --git a/compel/arch/arm/plugins/std/syscalls/syscall-aux.h b/compel/arch/arm/plugins/std/syscalls/syscall-aux.h index 0b029301f..3d2056b5a 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall-aux.h +++ b/compel/arch/arm/plugins/std/syscalls/syscall-aux.h @@ -3,25 +3,25 @@ #endif #ifndef __ARM_NR_BASE -# define __ARM_NR_BASE 0x0f0000 +# define __ARM_NR_BASE 0x0f0000 #endif #ifndef __ARM_NR_breakpoint -# define __ARM_NR_breakpoint (__ARM_NR_BASE+1) +# define __ARM_NR_breakpoint (__ARM_NR_BASE+1) #endif #ifndef __ARM_NR_cacheflush -# define __ARM_NR_cacheflush (__ARM_NR_BASE+2) +# define __ARM_NR_cacheflush (__ARM_NR_BASE+2) #endif #ifndef __ARM_NR_usr26 -# define __ARM_NR_usr26 (__ARM_NR_BASE+3) +# define __ARM_NR_usr26 (__ARM_NR_BASE+3) #endif #ifndef __ARM_NR_usr32 -# define __ARM_NR_usr32 (__ARM_NR_BASE+4) +# define __ARM_NR_usr32 (__ARM_NR_BASE+4) #endif #ifndef __ARM_NR_set_tls -# define __ARM_NR_set_tls (__ARM_NR_BASE+5) +# define __ARM_NR_set_tls (__ARM_NR_BASE+5) #endif diff --git a/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h b/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h index fc00a9e64..8745f4459 100644 --- a/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h +++ b/compel/arch/arm/src/lib/include/uapi/asm/processor-flags.h @@ -6,37 +6,37 @@ /* * PSR bits */ -#define USR26_MODE 0x00000000 -#define FIQ26_MODE 0x00000001 -#define IRQ26_MODE 0x00000002 -#define SVC26_MODE 0x00000003 -#define USR_MODE 0x00000010 -#define FIQ_MODE 0x00000011 -#define IRQ_MODE 0x00000012 -#define SVC_MODE 0x00000013 -#define ABT_MODE 0x00000017 -#define UND_MODE 0x0000001b -#define SYSTEM_MODE 0x0000001f -#define MODE32_BIT 0x00000010 -#define MODE_MASK 0x0000001f -#define PSR_T_BIT 0x00000020 -#define PSR_F_BIT 0x00000040 -#define PSR_I_BIT 0x00000080 -#define PSR_A_BIT 0x00000100 -#define PSR_E_BIT 0x00000200 -#define PSR_J_BIT 0x01000000 -#define PSR_Q_BIT 0x08000000 -#define PSR_V_BIT 0x10000000 -#define PSR_C_BIT 0x20000000 -#define PSR_Z_BIT 0x40000000 -#define PSR_N_BIT 0x80000000 +#define USR26_MODE 0x00000000 +#define FIQ26_MODE 0x00000001 +#define IRQ26_MODE 0x00000002 +#define SVC26_MODE 0x00000003 +#define USR_MODE 0x00000010 +#define FIQ_MODE 0x00000011 +#define IRQ_MODE 0x00000012 +#define SVC_MODE 0x00000013 +#define ABT_MODE 0x00000017 +#define UND_MODE 0x0000001b +#define SYSTEM_MODE 0x0000001f +#define MODE32_BIT 0x00000010 +#define MODE_MASK 0x0000001f +#define PSR_T_BIT 0x00000020 +#define PSR_F_BIT 0x00000040 +#define PSR_I_BIT 0x00000080 +#define PSR_A_BIT 0x00000100 +#define PSR_E_BIT 0x00000200 +#define PSR_J_BIT 0x01000000 +#define PSR_Q_BIT 0x08000000 +#define PSR_V_BIT 0x10000000 +#define PSR_C_BIT 0x20000000 +#define PSR_Z_BIT 0x40000000 +#define PSR_N_BIT 0x80000000 /* * Groups of PSR bits */ -#define PSR_f 0xff000000 /* Flags */ -#define PSR_s 0x00ff0000 /* Status */ -#define PSR_x 0x0000ff00 /* Extension */ -#define PSR_c 0x000000ff /* Control */ +#define PSR_f 0xff000000 /* Flags */ +#define PSR_s 0x00ff0000 /* Status */ +#define PSR_x 0x0000ff00 /* Extension */ +#define PSR_c 0x000000ff /* Control */ #endif diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/ppc64/src/lib/include/uapi/asm/breakpoints.h index 1ab89af76..5f090490d 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/breakpoints.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/breakpoints.h @@ -4,12 +4,12 @@ static inline int ptrace_set_breakpoint(pid_t pid, void *addr) { - return 0; + return 0; } static inline int ptrace_flush_breakpoints(pid_t pid) { - return 0; + return 0; } #endif diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h index 896c3f253..509f4488b 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h @@ -149,7 +149,7 @@ struct xsave_hdr_struct { * The high 128 bits are stored here. */ struct ymmh_struct { - uint32_t ymmh_space[64]; + uint32_t ymmh_space[64]; } __packed; /* Intel MPX support: */ diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index e76f7787d..0737e07a3 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -481,15 +481,15 @@ int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s) /* Copied from the gdb header gdb/nat/x86-dregs.h */ /* Debug registers' indices. */ -#define DR_FIRSTADDR 0 -#define DR_LASTADDR 3 -#define DR_NADDR 4 /* The number of debug address registers. */ -#define DR_STATUS 6 /* Index of debug status register (DR6). */ -#define DR_CONTROL 7 /* Index of debug control register (DR7). */ +#define DR_FIRSTADDR 0 +#define DR_LASTADDR 3 +#define DR_NADDR 4 /* The number of debug address registers. */ +#define DR_STATUS 6 /* Index of debug status register (DR6). */ +#define DR_CONTROL 7 /* Index of debug control register (DR7). */ -#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit. */ -#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit. */ -#define DR_ENABLE_SIZE 2 /* Two enable bits per debug register. */ +#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit. */ +#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit. */ +#define DR_ENABLE_SIZE 2 /* Two enable bits per debug register. */ /* Locally enable the break/watchpoint in the I'th debug register. */ #define X86_DR_LOCAL_ENABLE(i) (1 << (DR_LOCAL_ENABLE_SHIFT + DR_ENABLE_SIZE * (i))) diff --git a/compel/include/uapi/sigframe-common.h b/compel/include/uapi/sigframe-common.h index e35c8655e..fc93c5480 100644 --- a/compel/include/uapi/sigframe-common.h +++ b/compel/include/uapi/sigframe-common.h @@ -52,8 +52,8 @@ struct rt_ucontext { rt_stack_t uc_stack; struct rt_sigcontext uc_mcontext; k_rtsigset_t uc_sigmask; /* mask last for extensibility */ - int _unused[32 - (sizeof (k_rtsigset_t) / sizeof (int))]; - unsigned long uc_regspace[128] __attribute__((aligned(8))); + int _unused[32 - (sizeof (k_rtsigset_t) / sizeof (int))]; + unsigned long uc_regspace[128] __attribute__((aligned(8))); }; extern int sigreturn_prep_fpu_frame(struct rt_sigframe *frame, diff --git a/criu/arch/x86/include/asm/parasite.h b/criu/arch/x86/include/asm/parasite.h index 0ef1d9a86..6b4d4ac59 100644 --- a/criu/arch/x86/include/asm/parasite.h +++ b/criu/arch/x86/include/asm/parasite.h @@ -28,8 +28,8 @@ static int arch_get_user_desc(user_desc_t *desc) * }; */ asm volatile ( - " mov %0,%%eax \n" - " mov %1,%%rbx \n" + " mov %0,%%eax \n" + " mov %1,%%rbx \n" " int $0x80 \n" " mov %%eax,%0 \n" : "+m"(ret) diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h index 15867aa12..3c43ce688 100644 --- a/criu/arch/x86/include/asm/restorer.h +++ b/criu/arch/x86/include/asm/restorer.h @@ -25,12 +25,12 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) } #endif /* !CONFIG_COMPAT */ -#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ - thread_args, clone_restore_fn) \ +#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ + thread_args, clone_restore_fn) \ asm volatile( \ "clone_emul: \n" \ "movq %2, %%rsi \n" \ - "subq $16, %%rsi \n" \ + "subq $16, %%rsi \n" \ "movq %6, %%rdi \n" \ "movq %%rdi, 8(%%rsi) \n" \ "movq %5, %%rdi \n" \ @@ -39,16 +39,16 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) "movq %3, %%rdx \n" \ "movq %4, %%r10 \n" \ "movl $"__stringify(__NR_clone)", %%eax \n" \ - "syscall \n" \ + "syscall \n" \ \ - "testq %%rax,%%rax \n" \ + "testq %%rax,%%rax \n" \ "jz thread_run \n" \ \ "movq %%rax, %0 \n" \ "jmp clone_end \n" \ \ "thread_run: \n" \ - "xorq %%rbp, %%rbp \n" \ + "xorq %%rbp, %%rbp \n" \ "popq %%rax \n" \ "popq %%rdi \n" \ "callq *%%rax \n" \ diff --git a/criu/cr-check.c b/criu/cr-check.c index 7addb9fb0..e24668305 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -468,7 +468,7 @@ err: } #ifndef SO_GET_FILTER -#define SO_GET_FILTER SO_ATTACH_FILTER +#define SO_GET_FILTER SO_ATTACH_FILTER #endif static int check_so_gets(void) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 7933fe37b..f25efb823 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -390,10 +390,10 @@ static int populate_root_fd_off(void) struct ns_id *mntns = NULL; int ret; - if (root_ns_mask & CLONE_NEWNS) { - mntns = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc); - BUG_ON(!mntns); - } + if (root_ns_mask & CLONE_NEWNS) { + mntns = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc); + BUG_ON(!mntns); + } ret = mntns_get_root_fd(mntns); if (ret < 0) diff --git a/criu/filesystems.c b/criu/filesystems.c index 8a2c41853..1e4550b37 100644 --- a/criu/filesystems.c +++ b/criu/filesystems.c @@ -58,7 +58,7 @@ static int parse_binfmt_misc_entry(struct bfd *f, BinfmtMiscEntry *bme) char *str; str = breadline(f); - if (IS_ERR(str)) + if (IS_ERR(str)) return -1; if (!str) break; diff --git a/criu/include/aio.h b/criu/include/aio.h index 9a58089b6..858ccd3cf 100644 --- a/criu/include/aio.h +++ b/criu/include/aio.h @@ -13,18 +13,18 @@ struct task_restore_args; int prepare_aios(struct pstree_item *t, struct task_restore_args *ta); struct aio_ring { - unsigned id; /* kernel internal index number */ - unsigned nr; /* number of io_events */ - unsigned head; /* Written to by userland or under ring_lock + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ + unsigned head; /* Written to by userland or under ring_lock * mutex by aio_read_events_ring(). */ - unsigned tail; + unsigned tail; - unsigned magic; - unsigned compat_features; - unsigned incompat_features; - unsigned header_length; /* size of aio_ring */ + unsigned magic; + unsigned compat_features; + unsigned incompat_features; + unsigned header_length; /* size of aio_ring */ - struct io_event io_events[0]; + struct io_event io_events[0]; }; struct rst_aio_ring { diff --git a/criu/include/asm-generic/vdso.h b/criu/include/asm-generic/vdso.h index 81e54d264..6c3e3d137 100644 --- a/criu/include/asm-generic/vdso.h +++ b/criu/include/asm-generic/vdso.h @@ -1,8 +1,8 @@ #ifndef __CR_ASM_GENERIC_VDSO_H__ #define __CR_ASM_GENERIC_VDSO_H__ -#define VDSO_PROT (PROT_READ | PROT_EXEC) -#define VVAR_PROT (PROT_READ) +#define VDSO_PROT (PROT_READ | PROT_EXEC) +#define VVAR_PROT (PROT_READ) /* Just in case of LPAE system PFN is u64. */ #define VDSO_BAD_PFN (-1ull) diff --git a/criu/include/autofs.h b/criu/include/autofs.h index d294277f6..c4618859b 100644 --- a/criu/include/autofs.h +++ b/criu/include/autofs.h @@ -20,70 +20,70 @@ int autofs_mount(struct mount_info *mi, const char *source, const #include -#define AUTOFS_DEVICE_NAME "autofs" +#define AUTOFS_DEVICE_NAME "autofs" #define AUTOFS_DEV_IOCTL_VERSION_MAJOR 1 #define AUTOFS_DEV_IOCTL_VERSION_MINOR 0 -#define AUTOFS_DEVID_LEN 16 +#define AUTOFS_DEVID_LEN 16 -#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) +#define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) /* * An ioctl interface for autofs mount point control. */ struct args_protover { - __u32 version; + __u32 version; }; struct args_protosubver { - __u32 sub_version; + __u32 sub_version; }; struct args_openmount { - __u32 devid; + __u32 devid; }; struct args_ready { - __u32 token; + __u32 token; }; struct args_fail { - __u32 token; - __s32 status; + __u32 token; + __s32 status; }; struct args_setpipefd { - __s32 pipefd; + __s32 pipefd; }; struct args_timeout { - __u64 timeout; + __u64 timeout; }; struct args_requester { - __u32 uid; - __u32 gid; + __u32 uid; + __u32 gid; }; struct args_expire { - __u32 how; + __u32 how; }; struct args_askumount { - __u32 may_umount; + __u32 may_umount; }; struct args_ismountpoint { union { struct args_in { - __u32 type; + __u32 type; } in; struct args_out { - __u32 devid; - __u32 magic; + __u32 devid; + __u32 magic; } out; }; }; @@ -98,24 +98,24 @@ struct args_ismountpoint { struct autofs_dev_ioctl { __u32 ver_major; __u32 ver_minor; - __u32 size; /* total size of data passed in + __u32 size; /* total size of data passed in * including this struct */ - __s32 ioctlfd; /* automount command fd */ + __s32 ioctlfd; /* automount command fd */ /* Command parameters */ union { - struct args_protover protover; - struct args_protosubver protosubver; - struct args_openmount openmount; - struct args_ready ready; - struct args_fail fail; - struct args_setpipefd setpipefd; - struct args_timeout timeout; - struct args_requester requester; - struct args_expire expire; - struct args_askumount askumount; - struct args_ismountpoint ismountpoint; + struct args_protover protover; + struct args_protosubver protosubver; + struct args_openmount openmount; + struct args_ready ready; + struct args_fail fail; + struct args_setpipefd setpipefd; + struct args_timeout timeout; + struct args_requester requester; + struct args_expire expire; + struct args_askumount askumount; + struct args_ismountpoint ismountpoint; }; char path[0]; diff --git a/criu/include/packet_diag.h b/criu/include/packet_diag.h index e5d9193a8..287de84ec 100644 --- a/criu/include/packet_diag.h +++ b/criu/include/packet_diag.h @@ -12,18 +12,18 @@ struct packet_diag_req { __u32 pdiag_cookie[2]; }; -#define PACKET_SHOW_INFO 0x00000001 /* Basic packet_sk information */ -#define PACKET_SHOW_MCLIST 0x00000002 /* A set of packet_diag_mclist-s */ +#define PACKET_SHOW_INFO 0x00000001 /* Basic packet_sk information */ +#define PACKET_SHOW_MCLIST 0x00000002 /* A set of packet_diag_mclist-s */ #define PACKET_SHOW_RING_CFG 0x00000004 /* Rings configuration parameters */ #define PACKET_SHOW_FANOUT 0x00000008 struct packet_diag_msg { - __u8 pdiag_family; - __u8 pdiag_type; - __u16 pdiag_num; + __u8 pdiag_family; + __u8 pdiag_type; + __u16 pdiag_num; - __u32 pdiag_ino; - __u32 pdiag_cookie[2]; + __u32 pdiag_ino; + __u32 pdiag_cookie[2]; }; enum { @@ -37,18 +37,18 @@ enum { }; struct packet_diag_info { - __u32 pdi_index; - __u32 pdi_version; - __u32 pdi_reserve; - __u32 pdi_copy_thresh; - __u32 pdi_tstamp; - __u32 pdi_flags; + __u32 pdi_index; + __u32 pdi_version; + __u32 pdi_reserve; + __u32 pdi_copy_thresh; + __u32 pdi_tstamp; + __u32 pdi_flags; -#define PDI_RUNNING 0x1 -#define PDI_AUXDATA 0x2 -#define PDI_ORIGDEV 0x4 -#define PDI_VNETHDR 0x8 -#define PDI_LOSS 0x10 +#define PDI_RUNNING 0x1 +#define PDI_AUXDATA 0x2 +#define PDI_ORIGDEV 0x4 +#define PDI_VNETHDR 0x8 +#define PDI_LOSS 0x10 }; #ifndef MAX_ADDR_LEN @@ -56,21 +56,21 @@ struct packet_diag_info { #endif struct packet_diag_mclist { - __u32 pdmc_index; - __u32 pdmc_count; - __u16 pdmc_type; - __u16 pdmc_alen; - __u8 pdmc_addr[MAX_ADDR_LEN]; + __u32 pdmc_index; + __u32 pdmc_count; + __u16 pdmc_type; + __u16 pdmc_alen; + __u8 pdmc_addr[MAX_ADDR_LEN]; }; struct packet_diag_ring { - __u32 pdr_block_size; - __u32 pdr_block_nr; - __u32 pdr_frame_size; - __u32 pdr_frame_nr; - __u32 pdr_retire_tmo; - __u32 pdr_sizeof_priv; - __u32 pdr_features; + __u32 pdr_block_size; + __u32 pdr_block_nr; + __u32 pdr_frame_size; + __u32 pdr_frame_nr; + __u32 pdr_retire_tmo; + __u32 pdr_sizeof_priv; + __u32 pdr_features; }; #endif /* __CR_PACKET_DIAG_H__ */ diff --git a/criu/include/page-pipe.h b/criu/include/page-pipe.h index 8fa1bfa5e..80e595871 100644 --- a/criu/include/page-pipe.h +++ b/criu/include/page-pipe.h @@ -6,11 +6,11 @@ #define PAGE_ALLOC_COSTLY_ORDER 3 /* from the kernel source code */ struct kernel_pipe_buffer { - struct page *page; - unsigned int offset, len; - const struct pipe_buf_operations *ops; - unsigned int flags; - unsigned long private; + struct page *page; + unsigned int offset, len; + const struct pipe_buf_operations *ops; + unsigned int flags; + unsigned long private; }; /* diff --git a/criu/include/ptrace-compat.h b/criu/include/ptrace-compat.h index 476da3536..e16fef036 100644 --- a/criu/include/ptrace-compat.h +++ b/criu/include/ptrace-compat.h @@ -7,9 +7,9 @@ #ifndef CONFIG_HAS_PTRACE_PEEKSIGINFO struct ptrace_peeksiginfo_args { - __u64 off; /* from which siginfo to start */ - __u32 flags; - __u32 nr; /* how may siginfos to take */ + __u64 off; /* from which siginfo to start */ + __u32 flags; + __u32 nr; /* how may siginfos to take */ }; #endif diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 65b230131..cd98d18e0 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -92,7 +92,7 @@ static inline int sk_decode_shutdown(int val) extern int set_netns(uint32_t ns_id); #ifndef SIOCGSKNS -#define SIOCGSKNS 0x894C /* get socket network namespace */ +#define SIOCGSKNS 0x894C /* get socket network namespace */ #endif extern int kerndat_socket_netns(void); diff --git a/criu/include/util-pie.h b/criu/include/util-pie.h index ce78b0d19..a8137f441 100644 --- a/criu/include/util-pie.h +++ b/criu/include/util-pie.h @@ -10,7 +10,7 @@ #endif #ifndef SO_PEEK_OFF -#define SO_PEEK_OFF 42 +#define SO_PEEK_OFF 42 #endif #include "common/scm.h" diff --git a/criu/libnetlink.c b/criu/libnetlink.c index ca9968309..18a323b8d 100644 --- a/criu/libnetlink.c +++ b/criu/libnetlink.c @@ -222,5 +222,5 @@ int __wrap_nlmsg_parse(struct nlmsghdr *nlh, int hdrlen, struct nlattr *tb[], int32_t nla_get_s32(const struct nlattr *nla) { - return *(const int32_t *) nla_data(nla); + return *(const int32_t *) nla_data(nla); } diff --git a/criu/mount.c b/criu/mount.c index 118ba623e..c03a435c5 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2325,8 +2325,8 @@ out: * mi->shared_id && !shared - create a new shared group */ if (restore_shared_options(mi, private, - mi->shared_id && !shared, - mi->master_id && !master)) + mi->shared_id && !shared, + mi->master_id && !master)) return -1; mi->mounted = true; diff --git a/criu/net.c b/criu/net.c index a5f632dd6..44b0ce224 100644 --- a/criu/net.c +++ b/criu/net.c @@ -344,7 +344,7 @@ static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr * the kernel, simply write DEVCONFS_UNUSED * into the image so we would skip it. */ -#define DEVCONFS_UNUSED (-1u) +#define DEVCONFS_UNUSED (-1u) static int ipv4_conf_op_old(char *tgt, int *conf, int n, int op, int *def_conf) { @@ -2765,7 +2765,7 @@ static int prep_ns_sockets(struct ns_id *ns, bool for_dump) freecon(ctx); if (ret < 0) { pr_perror("Setting SELinux socket context for PID %d failed", - root_item->pid->real); + root_item->pid->real); goto err_sq; } } @@ -3019,22 +3019,22 @@ int move_veth_to_bridge(void) #ifndef NETNSA_MAX /* Attributes of RTM_NEWNSID/RTM_GETNSID messages */ enum { - NETNSA_NONE, + NETNSA_NONE, #define NETNSA_NSID_NOT_ASSIGNED -1 - NETNSA_NSID, - NETNSA_PID, - NETNSA_FD, - __NETNSA_MAX, + NETNSA_NSID, + NETNSA_PID, + NETNSA_FD, + __NETNSA_MAX, }; -#define NETNSA_MAX (__NETNSA_MAX - 1) +#define NETNSA_MAX (__NETNSA_MAX - 1) #endif static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { - [NETNSA_NONE] = { .type = NLA_UNSPEC }, - [NETNSA_NSID] = { .type = NLA_S32 }, - [NETNSA_PID] = { .type = NLA_U32 }, - [NETNSA_FD] = { .type = NLA_U32 }, + [NETNSA_NONE] = { .type = NLA_UNSPEC }, + [NETNSA_NSID] = { .type = NLA_S32 }, + [NETNSA_PID] = { .type = NLA_U32 }, + [NETNSA_FD] = { .type = NLA_U32 }, }; static int nsid_cb(struct nlmsghdr *msg, struct ns_id *ns, void *arg) diff --git a/criu/pagemap-cache.c b/criu/pagemap-cache.c index aa39dacaa..a1c2d42f4 100644 --- a/criu/pagemap-cache.c +++ b/criu/pagemap-cache.c @@ -119,7 +119,7 @@ static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma) * is to walk page tables less. */ if (!pagemap_cache_disabled && - len < PMC_SIZE && (vma->e->start - low) < PMC_SIZE_GAP) { + len < PMC_SIZE && (vma->e->start - low) < PMC_SIZE_GAP) { size_t size_cov = len; size_t nr_vmas = 1; diff --git a/criu/sk-queue.c b/criu/sk-queue.c index 613e38461..fdf610170 100644 --- a/criu/sk-queue.c +++ b/criu/sk-queue.c @@ -29,7 +29,7 @@ struct sk_packet { struct list_head list; SkPacketEntry *entry; - char *data; + char *data; unsigned scm_len; int *scm; }; diff --git a/include/common/arch/arm/asm/atomic.h b/include/common/arch/arm/asm/atomic.h index 94e65cf3e..7998a20f2 100644 --- a/include/common/arch/arm/asm/atomic.h +++ b/include/common/arch/arm/asm/atomic.h @@ -29,9 +29,9 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) "teq %1, %4\n" "it eq\n" "strexeq %0, %5, [%3]\n" - : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter) - : "r" (&ptr->counter), "Ir" (old), "r" (new) - : "cc"); + : "=&r" (res), "=&r" (oldval), "+Qo" (ptr->counter) + : "r" (&ptr->counter), "Ir" (old), "r" (new) + : "cc"); } while (res); smp_mb(); @@ -47,13 +47,13 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { - int ret; + int ret; - ret = v->counter; - if (ret == old) - v->counter = new; + ret = v->counter; + if (ret == old) + v->counter = new; - return ret; + return ret; } #else @@ -88,7 +88,7 @@ static inline int atomic_add_return(int i, atomic_t *v) " teq %1, #0\n" " bne 1b\n" : "=&r" (result), "=&r" (tmp), "+Qo" (v->counter) - : "r" (&v->counter), "Ir" (i) + : "r" (&v->counter), "Ir" (i) : "cc"); smp_mb(); diff --git a/include/common/arch/ppc64/asm/atomic.h b/include/common/arch/ppc64/asm/atomic.h index 461875c6e..4c6477412 100644 --- a/include/common/arch/ppc64/asm/atomic.h +++ b/include/common/arch/ppc64/asm/atomic.h @@ -8,7 +8,7 @@ */ typedef struct { - int counter; + int counter; } atomic_t; #include "common/arch/ppc64/asm/cmpxchg.h" diff --git a/include/common/arch/ppc64/asm/linkage.h b/include/common/arch/ppc64/asm/linkage.h index 506edc711..01a47ab1a 100644 --- a/include/common/arch/ppc64/asm/linkage.h +++ b/include/common/arch/ppc64/asm/linkage.h @@ -261,38 +261,38 @@ #define N_SLINE 68 #define N_SO 100 -#define __REG_R0 0 -#define __REG_R1 1 -#define __REG_R2 2 -#define __REG_R3 3 -#define __REG_R4 4 -#define __REG_R5 5 -#define __REG_R6 6 -#define __REG_R7 7 -#define __REG_R8 8 -#define __REG_R9 9 -#define __REG_R10 10 -#define __REG_R11 11 -#define __REG_R12 12 -#define __REG_R13 13 -#define __REG_R14 14 -#define __REG_R15 15 -#define __REG_R16 16 -#define __REG_R17 17 -#define __REG_R18 18 -#define __REG_R19 19 -#define __REG_R20 20 -#define __REG_R21 21 -#define __REG_R22 22 -#define __REG_R23 23 -#define __REG_R24 24 -#define __REG_R25 25 -#define __REG_R26 26 -#define __REG_R27 27 -#define __REG_R28 28 -#define __REG_R29 29 -#define __REG_R30 30 -#define __REG_R31 31 +#define __REG_R0 0 +#define __REG_R1 1 +#define __REG_R2 2 +#define __REG_R3 3 +#define __REG_R4 4 +#define __REG_R5 5 +#define __REG_R6 6 +#define __REG_R7 7 +#define __REG_R8 8 +#define __REG_R9 9 +#define __REG_R10 10 +#define __REG_R11 11 +#define __REG_R12 12 +#define __REG_R13 13 +#define __REG_R14 14 +#define __REG_R15 15 +#define __REG_R16 16 +#define __REG_R17 17 +#define __REG_R18 18 +#define __REG_R19 19 +#define __REG_R20 20 +#define __REG_R21 21 +#define __REG_R22 22 +#define __REG_R23 23 +#define __REG_R24 24 +#define __REG_R25 25 +#define __REG_R26 26 +#define __REG_R27 27 +#define __REG_R28 28 +#define __REG_R29 29 +#define __REG_R30 30 +#define __REG_R31 31 diff --git a/include/common/arch/x86/asm/cmpxchg.h b/include/common/arch/x86/asm/cmpxchg.h index 4b6951933..fa5eccf09 100644 --- a/include/common/arch/x86/asm/cmpxchg.h +++ b/include/common/arch/x86/asm/cmpxchg.h @@ -17,7 +17,7 @@ */ #define __xchg_op(ptr, arg, op, lock) \ ({ \ - __typeof__ (*(ptr)) __ret = (arg); \ + __typeof__ (*(ptr)) __ret = (arg); \ switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ asm volatile (lock #op "b %b0, %1\n" \ diff --git a/lib/c/criu.c b/lib/c/criu.c index 6ac510a87..9e36a9795 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -41,10 +41,10 @@ void criu_free_service(criu_opts *opts) case CRIU_COMM_SK: free((void*)(opts->service_address)); break; - case CRIU_COMM_BIN: + case CRIU_COMM_BIN: free((void*)(opts->service_binary)); break; - default: + default: break; } } diff --git a/soccr/soccr.c b/soccr/soccr.c index 1e4827e48..20eabfbd4 100644 --- a/soccr/soccr.c +++ b/soccr/soccr.c @@ -158,13 +158,13 @@ void libsoccr_release(struct libsoccr_sk *sk) } struct soccr_tcp_info { - __u8 tcpi_state; - __u8 tcpi_ca_state; - __u8 tcpi_retransmits; - __u8 tcpi_probes; - __u8 tcpi_backoff; - __u8 tcpi_options; - __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; + __u8 tcpi_state; + __u8 tcpi_ca_state; + __u8 tcpi_retransmits; + __u8 tcpi_probes; + __u8 tcpi_backoff; + __u8 tcpi_options; + __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; }; static int refresh_sk(struct libsoccr_sk *sk, From 94541a81c007475a139f3a1990cc36dda3d01773 Mon Sep 17 00:00:00 2001 From: hygonsoc Date: Mon, 15 Apr 2019 01:35:04 +0800 Subject: [PATCH 0012/2030] arch: add Hygon CPU Vendor ID("HygonGenuine") checking in compel_cpuid() to enable Hygon Dhyana, which can reuse most AMD CPU support codes. Signed-off-by: hygonsoc Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/cpu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compel/arch/x86/src/lib/cpu.c b/compel/arch/x86/src/lib/cpu.c index 9152765bf..617512167 100644 --- a/compel/arch/x86/src/lib/cpu.c +++ b/compel/arch/x86/src/lib/cpu.c @@ -269,7 +269,8 @@ int compel_cpuid(compel_cpuinfo_t *c) if (!strcmp(c->x86_vendor_id, "GenuineIntel")) { c->x86_vendor = X86_VENDOR_INTEL; - } else if (!strcmp(c->x86_vendor_id, "AuthenticAMD")) { + } else if (!strcmp(c->x86_vendor_id, "AuthenticAMD") || + !strcmp(c->x86_vendor_id, "HygonGenuine")) { c->x86_vendor = X86_VENDOR_AMD; } else { pr_err("Unsupported CPU vendor %s\n", From b61b260412ce2722924589f63a6cccf281ceb0df Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 18 Apr 2019 12:57:15 +0300 Subject: [PATCH 0013/2030] sk-inet: udp -- Don't fail on calling shutdown on disconnected socket If socket has been connected and shutted down, it may get disconnected then leaving shutdown mode set inside (which we pull into image). On restore we should not fail when calling shutdown over -- the kernel has a hack to inform listeners even on closed sockets. From userspace perspective to reuse such socket one have to connect it back, so should be safe. Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/sk-inet.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/criu/sk-inet.c b/criu/sk-inet.c index 0b7f0d7ff..ca5c9bf2c 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -894,9 +894,14 @@ done: (ie->proto == IPPROTO_UDP || ie->proto == IPPROTO_UDPLITE)) { if (shutdown(sk, sk_decode_shutdown(ie->shutdown))) { - pr_perror("Can't shutdown socket into %d", - sk_decode_shutdown(ie->shutdown)); - goto err; + if (ie->state != TCP_CLOSE && errno != ENOTCONN) { + pr_perror("Can't shutdown socket into %d", + sk_decode_shutdown(ie->shutdown)); + goto err; + } else { + pr_debug("Called shutdown on closed socket, " + "proto %d ino %x", ie->proto, ie->ino); + } } } From bd4a52e82e4fd9c60857970a23f038fa16b94fbc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 18 Apr 2019 12:57:16 +0300 Subject: [PATCH 0014/2030] test: socket_udplite -- Test shudowned sockets Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- test/zdtm/static/socket_udplite.c | 60 +++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/socket_udplite.c b/test/zdtm/static/socket_udplite.c index d2510ef2f..229005a10 100644 --- a/test/zdtm/static/socket_udplite.c +++ b/test/zdtm/static/socket_udplite.c @@ -28,9 +28,9 @@ static char buf[8]; int main(int argc, char **argv) { - int ret, sk1, sk2; + int ret, sk1, sk2, sk3, sk4; socklen_t len = sizeof(struct sockaddr_in); - struct sockaddr_in addr1, addr2, addr; + struct sockaddr_in addr1, addr2, addr3, addr4, addr; test_init(argc, argv); @@ -74,6 +74,62 @@ int main(int argc, char **argv) return 1; } + sk3 = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDPLITE); + if (sk3 < 0) { + pr_perror("Can't create socket"); + return 1; + } + + memset(&addr3, 0, sizeof(addr3)); + addr3.sin_family = AF_INET; + addr3.sin_addr.s_addr = inet_addr("127.0.0.1"); + addr3.sin_port = htons(port + 2); + + ret = bind(sk3, (struct sockaddr *)&addr3, len); + if (ret < 0) { + pr_perror("Can't bind socket"); + return 1; + } + + sk4 = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDPLITE); + if (sk4 < 0) { + pr_perror("Can't create socket"); + return 1; + } + + memset(&addr4, 0, sizeof(addr4)); + addr4.sin_family = AF_INET; + addr4.sin_addr.s_addr = inet_addr("0.0.0.0"); + addr4.sin_port = htons(0); + + ret = bind(sk4, (struct sockaddr *)&addr4, len); + if (ret < 0) { + pr_perror("Can't bind socket"); + return 1; + } + + ret = connect(sk4, (struct sockaddr *)&addr3, len); + if (ret < 0) { + pr_perror("Can't connect"); + return 1; + } + + ret = connect(sk3, (struct sockaddr *)&addr4, len); + if (ret < 0) { + pr_perror("Can't connect"); + return 1; + } + + if (shutdown(sk4, SHUT_RDWR)) { + pr_perror("Can't shutdown socket"); + return 1; + } + + if (shutdown(sk3, SHUT_RDWR)) { + pr_perror("Can't shutdown socket"); + return 1; + } + test_daemon(); test_waitsig(); From 4a090153c7f61d705c9251cfe1fe24f24454e069 Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Thu, 18 Apr 2019 10:13:58 +0800 Subject: [PATCH 0015/2030] criu/clone: stack size is too small for Android stack for clone is too small, child process will get wild pointer, and segfault. Error (criu/cr-restore.c:1418): 6082 killed by signal 11: Segmentation fault Error (criu/cr-restore.c:2303): Restoring FAILED. enlarge stack size to 1024, then no segfault. Cc: Chen Hu Signed-off-by: Zhang Ning Signed-off-by: Andrei Vagin --- criu/clone-noasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index c44e71969..5ca280eb8 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -21,7 +21,7 @@ */ int clone_noasan(int (*fn)(void *), int flags, void *arg) { - void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 256, 16); + void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16); BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK)); /* * Reserve some bytes for clone() internal needs From 1904a98550427e5c8d545e50ce3cd3f1d3369835 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 28 Apr 2019 20:22:49 +0200 Subject: [PATCH 0016/2030] compel/s390: Fix memset sizeof sizeof(sizeof(x)) is the size of size_t. Instead use the size of the array to ensure the entire array is zeroed. Signed-off-by: Rikard Falkeborn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- compel/arch/s390/src/lib/infect.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index b690b8122..fcb463fa8 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -148,10 +148,8 @@ int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, memcpy(&dst_ext->vxrs_high, &fpregs->vxrs_high, sizeof(fpregs->vxrs_high)); } else { - memset(&dst_ext->vxrs_low, 0, - sizeof(sizeof(fpregs->vxrs_low))); - memset(&dst_ext->vxrs_high, 0, - sizeof(sizeof(fpregs->vxrs_high))); + memset(&dst_ext->vxrs_low, 0, sizeof(dst_ext->vxrs_low)); + memset(&dst_ext->vxrs_high, 0, sizeof(dst_ext->vxrs_high)); } return 0; } From 468f818f89b00c557cfca2fcc8e93b2d0128c339 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 28 Apr 2019 20:22:50 +0200 Subject: [PATCH 0017/2030] compel/s390: Fix return value in error path In a function with return type bool, returning a non-zero value is interpreted as returning true. In the error paths we want to return false to indicate failure. Change -1 to false to fix this. Signed-off-by: Rikard Falkeborn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- compel/arch/s390/src/lib/infect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index fcb463fa8..00e9c36d2 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -493,7 +493,7 @@ bool arch_can_dump_task(struct parasite_ctl *ctl) if (psw->mask & PSW_MASK_RI) { if (get_ri_cb(pid, &fpregs) < 0) { pr_perror("Can't dump process with RI bit active"); - return -1; + return false; } } /* We don't support 24 and 31 bit mode - only 64 bit */ From 66846b26a070928a9ebd1a7b6816ccf1912b1605 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 28 Apr 2019 20:22:51 +0200 Subject: [PATCH 0018/2030] test/zdtm: Move assignment after return value check If read() fails we can not use the return value as index. Move the use of it to after the error check to avoid this. Signed-off-by: Rikard Falkeborn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- test/zdtm/lib/ns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm/lib/ns.c b/test/zdtm/lib/ns.c index 6b4a75399..3099f7495 100644 --- a/test/zdtm/lib/ns.c +++ b/test/zdtm/lib/ns.c @@ -325,11 +325,11 @@ int ns_init(int argc, char **argv) exit(1); } ret = read(fd, buf, sizeof(buf) - 1); - buf[ret] = '\0'; if (ret == -1) { fprintf(stderr, "read() failed: %m\n"); exit(1); } + buf[ret] = '\0'; pid = atoi(buf); fprintf(stderr, "kill(%d, SIGTERM)\n", pid); From bdf8972b6151ff31740f3986a0c92b4abd674ce0 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 28 Apr 2019 20:22:52 +0200 Subject: [PATCH 0019/2030] test: remove unused variables Signed-off-by: Rikard Falkeborn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- test/others/bers/bers.c | 4 +--- test/others/libcriu/test_errno.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/test/others/bers/bers.c b/test/others/bers/bers.c index 208325c67..0954868ff 100644 --- a/test/others/bers/bers.c +++ b/test/others/bers/bers.c @@ -93,7 +93,6 @@ static int sys_gettid(void) static void dirtify_memory(unsigned long *chunks, size_t nr_chunks, size_t chunk_size, int mode, const size_t nr_pages) { - void *page; size_t i; pr_trace("filling memory\n"); @@ -115,7 +114,7 @@ static void dirtify_memory(unsigned long *chunks, size_t nr_chunks, static void dirtify_files(int *fd, size_t nr_files, size_t size) { size_t buf[8192]; - size_t i, j, c; + size_t i; /* * Note we don't write any _sane_ data here, the only @@ -265,7 +264,6 @@ int main(int argc, char *argv[]) char workdir[PATH_MAX]; int opt, idx, pidfd; char pidbuf[32]; - int status; pid_t pid; size_t i; diff --git a/test/others/libcriu/test_errno.c b/test/others/libcriu/test_errno.c index e09144304..8bd19fe2f 100644 --- a/test/others/libcriu/test_errno.c +++ b/test/others/libcriu/test_errno.c @@ -56,7 +56,7 @@ static int no_process(void) size_t len; ssize_t count; char *buf = NULL; - int pid, fd, ret; + int pid, ret; printf("--- Try to dump unexisting process\n"); From 8bc0ad91213d43dfcf6698af23ad13faf63e5f40 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 28 Apr 2019 20:22:53 +0200 Subject: [PATCH 0020/2030] files-reg: Remove redundant inner if Remove a redundant if-statement, since the same condition is already checked in the outer if-statement. Signed-off-by: Rikard Falkeborn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- criu/files-reg.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 1a35166a5..3072289ef 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1704,12 +1704,10 @@ ext: } if (rfi->rfe->has_mode && (st.st_mode != rfi->rfe->mode)) { - if (st.st_mode != rfi->rfe->mode) { - pr_err("File %s has bad mode 0%o (expect 0%o)\n", - rfi->path, (int)st.st_mode, - rfi->rfe->mode); - return -1; - } + pr_err("File %s has bad mode 0%o (expect 0%o)\n", + rfi->path, (int)st.st_mode, + rfi->rfe->mode); + return -1; } /* From e95b5c67ae74759c9d08dc5465b032a7e391c3b1 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 28 Apr 2019 20:22:54 +0200 Subject: [PATCH 0021/2030] test: add missing va_end Signed-off-by: Rikard Falkeborn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- test/zdtm/static/autofs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/test/zdtm/static/autofs.c b/test/zdtm/static/autofs.c index c2e047714..f74bc35ac 100644 --- a/test/zdtm/static/autofs.c +++ b/test/zdtm/static/autofs.c @@ -49,6 +49,7 @@ static char *xvstrcat(char *str, const char *fmt, va_list args) if (new) { va_copy(tmp, args); ret = vsnprintf(new + offset, delta, fmt, tmp); + va_end(tmp); if (ret >= delta) { /* NOTE: vsnprintf returns the amount of bytes * * to allocate. */ From d04eba411f0740d93649371605ab0ab9e9abdfb3 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Sun, 28 Apr 2019 20:22:55 +0200 Subject: [PATCH 0022/2030] test/bers: Fix sizeof to memset sizeof(fd) is the size of the pointer. Make sure the entire array is set by using the number of elements times the size of the elements. Signed-off-by: Rikard Falkeborn Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- test/others/bers/bers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/others/bers/bers.c b/test/others/bers/bers.c index 0954868ff..90b70c349 100644 --- a/test/others/bers/bers.c +++ b/test/others/bers/bers.c @@ -138,7 +138,7 @@ static int create_files(shared_data_t *shared, int *fd, size_t nr_files) char path[PATH_MAX]; size_t i; - memset(fd, 0xff, sizeof(fd)); + memset(fd, 0xff, sizeof(*fd) * MAX_CHUNK); pr_info("\tCreating %lu files\n", shared->opt_files); From 3b579373c1463d4cddd458cb6171917846815528 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 2 May 2019 02:34:42 +0100 Subject: [PATCH 0023/2030] zdtm/vdso/ia32: Use uint64_t for /proc/self/maps Add some comments to state things those might be not obvious. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/static/vdso-proxy.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/test/zdtm/static/vdso-proxy.c b/test/zdtm/static/vdso-proxy.c index 2381127b7..66d6741f4 100644 --- a/test/zdtm/static/vdso-proxy.c +++ b/test/zdtm/static/vdso-proxy.c @@ -1,3 +1,4 @@ +#include #include #include @@ -8,6 +9,10 @@ const char *test_author = "Dmitry Safonov "; #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #define VDSO_BAD_ADDR (-1ul) +/* + * Use constant MAX_VMAS - to minimize the risk of allocating a new + * mapping or changing the size of existent VMA with realloc() + */ #define MAX_VMAS 80 #define BUF_SIZE 1024 @@ -18,8 +23,13 @@ const char *test_author = "Dmitry Safonov "; * Also previous vdso/vvar vma should still be present after C/R. */ struct vm_area { - unsigned long start; - unsigned long end; + /* + * Intentionally use 64bit integer to make sure that it's possible + * to parse mappings >4Gb - those might appear on ia32 + * that's restored by x86_64 CRIU ¯\(°_o)/¯ + */ + uint64_t start; + uint64_t end; bool is_vvar_or_vdso; }; @@ -43,11 +53,12 @@ static int parse_maps(struct vm_area *vmas) if (fgets(buf, BUF_SIZE, maps) == NULL) break; - v->start = strtoul(buf, &end, 16); - v->end = strtoul(end + 1, NULL, 16); + v->start = strtoull(buf, &end, 16); + v->end = strtoull(end + 1, NULL, 16); v->is_vvar_or_vdso |= strstr(buf, "[vdso]") != NULL; v->is_vvar_or_vdso |= strstr(buf, "[vvar]") != NULL; - test_msg("[NOTE]\tVMA: [%#lx, %#lx]\n", v->start, v->end); + test_msg("[NOTE]\tVMA: [%#" PRIx64 ", %#" PRIx64 "]\n", + v->start, v->end); } if (i == MAX_VMAS) { @@ -88,7 +99,7 @@ static int check_vvar_vdso(struct vm_area *before, struct vm_area *after) continue; if (cmp < 0) {/* Lost mapping */ - test_msg("[NOTE]\tLost mapping: %#lx-%#lx\n", + test_msg("[NOTE]\tLost mapping: %#" PRIx64 "-%#" PRIx64 "\n", before[i].start, before[i].end); j--; if (before[i].is_vvar_or_vdso) { @@ -98,7 +109,7 @@ static int check_vvar_vdso(struct vm_area *before, struct vm_area *after) continue; } - test_msg("[NOTE]\tNew mapping appeared: %#lx-%#lx\n", + test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end); i--; } From a0662df6f9e74da75e6f8f5c60160d37d02db117 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 2 May 2019 02:34:43 +0100 Subject: [PATCH 0024/2030] zdtm/vdso/ia32: Ignore vsyscall page appear Not a major bummer. On the other side, it's also becomes less important as it seems that distribution switches from LEGACY_VSYSCALL_EMULATE to LEGACY_VSYSCALL_NONE (by security reasons). Might be not worth fixing at all in the end. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/static/vdso-proxy.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/test/zdtm/static/vdso-proxy.c b/test/zdtm/static/vdso-proxy.c index 66d6741f4..ecb71e892 100644 --- a/test/zdtm/static/vdso-proxy.c +++ b/test/zdtm/static/vdso-proxy.c @@ -8,7 +8,7 @@ const char *test_doc = "Compare mappings before/after C/R for vdso/vvar presence const char *test_author = "Dmitry Safonov "; #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#define VDSO_BAD_ADDR (-1ul) +#define VSYSCALL_START 0xffffffffff600000ULL /* * Use constant MAX_VMAS - to minimize the risk of allocating a new * mapping or changing the size of existent VMA with realloc() @@ -55,6 +55,18 @@ static int parse_maps(struct vm_area *vmas) v->start = strtoull(buf, &end, 16); v->end = strtoull(end + 1, NULL, 16); + +#if defined(__i386__) + /* + * XXX: ia32 is being restored from x86_64 and leaves + * emulated vsyscall "mapping". Hopefully, will be done + * per-process, ignore for now. + */ + if (v->start == VSYSCALL_START) { + i--; + continue; + } +#endif v->is_vvar_or_vdso |= strstr(buf, "[vdso]") != NULL; v->is_vvar_or_vdso |= strstr(buf, "[vvar]") != NULL; test_msg("[NOTE]\tVMA: [%#" PRIx64 ", %#" PRIx64 "]\n", From 0e22e245e5429a906efa72e0555600b6488c68dc Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 2 May 2019 02:34:44 +0100 Subject: [PATCH 0025/2030] x86/compel/infect: Be verbose on remote mmap failure Error-case print missing. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/infect.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 0737e07a3..11e7f4c91 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -375,10 +375,13 @@ void *remote_mmap(struct parasite_ctl *ctl, if (err < 0) return NULL; + if (map == -EACCES && (prot & PROT_WRITE) && (prot & PROT_EXEC)) { + pr_warn("mmap(PROT_WRITE | PROT_EXEC) failed for %d, " + "check selinux execmem policy\n", ctl->rpid); + return NULL; + } if (IS_ERR_VALUE(map)) { - if (map == -EACCES && (prot & PROT_WRITE) && (prot & PROT_EXEC)) - pr_warn("mmap(PROT_WRITE | PROT_EXEC) failed for %d, " - "check selinux execmem policy\n", ctl->rpid); + pr_err("remote mmap() failed: %s\n", strerror(-map)); return NULL; } From 8c5b25cbf5216d1253f8d19e7279b75b94e3ec2e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 2 May 2019 02:34:45 +0100 Subject: [PATCH 0026/2030] zdtm/thread-bomb: Limit stack size in thread-bomb ia32 thread-bomb test failed when compel refused to seize the test, trying to mmap() in remote process and getting ENOMEM. It turns to be true - remote process thread-bomb was filled with 8Mb mappings created by pthread_create() (the default stack size). So, that 1024 * 8Mb is a bit too much to place in 4Gb. Fix the test on 32-bit platforms by using much smaller stack. Also check the return value of pthread_create(). Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/transition/thread-bomb.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/test/zdtm/transition/thread-bomb.c b/test/zdtm/transition/thread-bomb.c index 0b794ef2e..6621b18ed 100644 --- a/test/zdtm/transition/thread-bomb.c +++ b/test/zdtm/transition/thread-bomb.c @@ -11,6 +11,10 @@ #define exit_group(code) \ syscall(__NR_exit_group, code) +static pthread_attr_t attr; +/* Having in mind setup with 64 Kb large pages */ +static const size_t stack_size = 64 * 1024; + static void *thread_fn(void *arg) { pthread_t t, p, *self; @@ -24,14 +28,27 @@ static void *thread_fn(void *arg) self = malloc(sizeof(*self)); *self = pthread_self(); - pthread_create(&t, NULL, thread_fn, self); + pthread_create(&t, &attr, thread_fn, self); return NULL; } int main(int argc, char **argv) { - char *val; int max_nr = 1024, i; + char *val; + int err; + + err = pthread_attr_init(&attr); + if (err) { + pr_err("pthread_attr_init(): %d\n", err); + exit(1); + } + + err = pthread_attr_setstacksize(&attr, stack_size); + if (err) { + pr_err("pthread_attr_setstacksize(): %d\n", err); + exit(1); + } val = getenv("ZDTM_THREAD_BOMB"); if (val) @@ -43,7 +60,11 @@ int main(int argc, char **argv) for (i = 0; i < max_nr; i++) { pthread_t p; - pthread_create(&p, NULL, thread_fn, NULL); + err = pthread_create(&p, &attr, thread_fn, NULL); + if (err) { + pr_err("pthread_create(): %d\n", err); + exit(1); + } } test_daemon(); From 4eb2df5ae6f525e55506a92b679e293e36d7f993 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 1 May 2019 14:59:44 +0100 Subject: [PATCH 0027/2030] cgroup: Add "ignore" mode for --manage-cgroups Since commit 6c572bee8f10 ("cgroup: Set "soft" mode by default") it become impossible to set ignore mode at all. Provide a user option to do that. Cc: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Documentation/criu.txt | 2 ++ criu/config.c | 2 ++ criu/crtools.c | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 414c9bb2d..6111c3baf 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -440,6 +440,8 @@ The 'mode' may be one of the following: *strict*::: Restore all cgroups and their properties from the scratch, requiring them to not present in the system. + *ignore*::: Don't deal with cgroups and pretend that they don't exist. + *--cgroup-root* ['controller'*:*]/'newroot':: Change the root cgroup the controller will be installed into. No controller means that root is the default for all controllers not specified. diff --git a/criu/config.c b/criu/config.c index d5354ae9c..11c49e73b 100644 --- a/criu/config.c +++ b/criu/config.c @@ -368,6 +368,8 @@ static int parse_manage_cgroups(struct cr_options *opts, const char *optarg) opts->manage_cgroups = CG_MODE_FULL; } else if (!strcmp(optarg, "strict")) { opts->manage_cgroups = CG_MODE_STRICT; + } else if (!strcmp(optarg, "ignore")) { + opts->manage_cgroups = CG_MODE_IGNORE; } else goto Esyntax; diff --git a/criu/crtools.c b/criu/crtools.c index 9c8064462..55562a63e 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -345,7 +345,8 @@ usage: " --irmap-scan-path FILE\n" " add a path the irmap hints to scan\n" " --manage-cgroups [m] dump/restore process' cgroups; argument can be one of\n" -" 'none', 'props', 'soft' (default), 'full' or 'strict'\n" +" 'none', 'props', 'soft' (default), 'full', 'strict'\n" +" or 'ignore'\n" " --cgroup-root [controller:]/newroot\n" " on dump: change the root for the controller that will\n" " be dumped. By default, only the paths with tasks in\n" From 9c9d1516932a4905d6aa9979ca3233b45c7e54b4 Mon Sep 17 00:00:00 2001 From: Harshavardhan Unnibhavi Date: Sun, 7 Apr 2019 14:29:53 +0530 Subject: [PATCH 0028/2030] test/exhaustive: Replace map by list comprehension Fixes #331. https://github.com/checkpoint-restore/criu/issues/331 Signed-off-by: Harshavardhan Unnibhavi Signed-off-by: Andrei Vagin --- test/exhaustive/unix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/exhaustive/unix.py b/test/exhaustive/unix.py index 6b1ed85d8..41053bd0d 100755 --- a/test/exhaustive/unix.py +++ b/test/exhaustive/unix.py @@ -415,7 +415,7 @@ class state: # one in which. At the same time really different states # shouldn't map to the same string. def describe(self): - sks = map(lambda x: x.describe(self), self.sockets) + sks = [x.describe(self) for x in self.sockets] sks = sorted(sks) return '_'.join(sks) From 66428d2845250a15d633bc6d63966b8001bb4dcf Mon Sep 17 00:00:00 2001 From: guoqd Date: Mon, 22 Apr 2019 14:40:11 +0800 Subject: [PATCH 0029/2030] [coredump]: correct the parsing of reg_files from files.img Fixes #679 --- coredump/criu_coredump/coredump.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 963e8c61b..2b0c37f1a 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -163,7 +163,8 @@ class coredump_generator: self.mms[pid] = self._img_open_and_strip("mm", True, pid) self.pagemaps[pid] = self._img_open_and_strip("pagemap", False, pid) - self.reg_files = self._img_open_and_strip("reg-files", False) + files = self._img_open_and_strip("files", False) + self.reg_files = [ x["reg"] for x in files if x["type"]=="REG" ] for pid in self.pstree: self.coredumps[pid] = self._gen_coredump(pid) From 8e59ed48bd31e52831b77a339a900ad77cce09e0 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 4 May 2019 19:53:55 +0100 Subject: [PATCH 0030/2030] zdtm: Simplify string to boolean conversion The built-in bool() function returns a boolean value by converting the input using standard truth testing procedure. https://docs.python.org/3/library/functions.html#bool Signed-off-by: Radostin Stoyanov --- test/zdtm.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 3344c0022..fb859d1c7 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -940,23 +940,23 @@ class criu: self.__dump_path = None self.__iter = 0 self.__prev_dump_iter = None - self.__page_server = (opts['page_server'] and True or False) - self.__remote_lazy_pages = (opts['remote_lazy_pages'] and True or False) + self.__page_server = bool(opts['page_server']) + self.__remote_lazy_pages = bool(opts['remote_lazy_pages']) self.__lazy_pages = (self.__remote_lazy_pages or - opts['lazy_pages'] and True or False) - self.__lazy_migrate = (opts['lazy_migrate'] and True or False) - self.__restore_sibling = (opts['sibling'] and True or False) - self.__join_ns = (opts['join_ns'] and True or False) - self.__empty_ns = (opts['empty_ns'] and True or False) - self.__fault = (opts['fault']) + bool(opts['lazy_pages'])) + self.__lazy_migrate = bool(opts['lazy_migrate']) + self.__restore_sibling = bool(opts['sibling']) + self.__join_ns = bool(opts['join_ns']) + self.__empty_ns = bool(opts['empty_ns']) + self.__fault = opts['fault'] self.__script = opts['script'] - self.__sat = (opts['sat'] and True or False) - self.__dedup = (opts['dedup'] and True or False) - self.__mdedup = (opts['noauto_dedup'] and True or False) - self.__user = (opts['user'] and True or False) - self.__leave_stopped = (opts['stop'] and True or False) + self.__sat = bool(opts['sat']) + self.__dedup = bool(opts['dedup']) + self.__mdedup = bool(opts['noauto_dedup']) + self.__user = bool(opts['user']) + self.__leave_stopped = bool(opts['stop']) self.__criu = (opts['rpc'] and criu_rpc or criu_cli) - self.__show_stats = (opts['show_stats'] and True or False) + self.__show_stats = bool(opts['show_stats']) self.__lazy_pages_p = None self.__page_server_p = None self.__dump_process = None From 9483b12935dfe3160cc30cf0d1b402c6ceb70711 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 16 Apr 2019 17:16:46 +0100 Subject: [PATCH 0031/2030] sk-inet: restore SO_BROADCAST option Inet sockets may have broadcasting capability enabled. The SO_BROADCAST option is used to enable this feature. It is a Boolean flag option, which is defined, fetched, and set with the int data type. During checkpoint, CRIU should detect the state of this flag, and during restore, it should be set appropriately. Fixes #673 Reported-by: @dubukuangye Signed-off-by: Radostin Stoyanov --- criu/sk-inet.c | 4 ++++ criu/sockets.c | 9 +++++++++ images/sk-opts.proto | 1 + 3 files changed, 14 insertions(+) diff --git a/criu/sk-inet.c b/criu/sk-inet.c index ca5c9bf2c..ebae53113 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -742,6 +742,10 @@ static int post_open_inet_sk(struct file_desc *d, int sk) if (!val && restore_opt(sk, SOL_SOCKET, SO_REUSEPORT, &val)) return -1; + val = ii->ie->opts->so_broadcast; + if (!val && restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val)) + return -1; + return 0; } diff --git a/criu/sockets.c b/criu/sockets.c index 7f7453ca1..312b55c6d 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -566,6 +566,11 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) pr_debug("\tset no_check for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_NO_CHECK, &val); } + if (soe->has_so_broadcast && soe->so_broadcast) { + val = 1; + pr_debug("\tset broadcast for socket\n"); + ret |= restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); + } tv.tv_sec = soe->so_snd_tmo_sec; tv.tv_usec = soe->so_snd_tmo_usec; @@ -647,6 +652,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe) soe->has_so_no_check = true; soe->so_no_check = val ? true : false; + ret |= dump_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); + soe->has_so_broadcast = true; + soe->so_broadcast = val ? true : false; + ret |= dump_bound_dev(sk, soe); ret |= dump_socket_filter(sk, soe); diff --git a/images/sk-opts.proto b/images/sk-opts.proto index af61975e9..c93ec5fd5 100644 --- a/images/sk-opts.proto +++ b/images/sk-opts.proto @@ -22,6 +22,7 @@ message sk_opts_entry { repeated fixed64 so_filter = 16; optional bool so_reuseport = 17; + optional bool so_broadcast = 18; } enum sk_shutdown { From 5af2bd905fad41f0a18c4b6e675429c5e0b741c7 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 25 Apr 2019 12:13:19 +0100 Subject: [PATCH 0032/2030] zdtm: Add UDP broadcast test Signed-off-by: Radostin Stoyanov --- test/zdtm/static/Makefile | 1 + test/zdtm/static/socket_udp-broadcast.c | 47 +++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/zdtm/static/socket_udp-broadcast.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 1ffaa9039..7799c0b0a 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -30,6 +30,7 @@ TST_NOFILE := \ socket_listen6 \ socket_listen4v6 \ socket_udp \ + socket_udp-broadcast \ socket_udp-corked \ socket6_udp \ socket_udp_shutdown \ diff --git a/test/zdtm/static/socket_udp-broadcast.c b/test/zdtm/static/socket_udp-broadcast.c new file mode 100644 index 000000000..a5fb55444 --- /dev/null +++ b/test/zdtm/static/socket_udp-broadcast.c @@ -0,0 +1,47 @@ +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "test checkpoint/restore of SO_BROADCAST\n"; +const char *test_author = "Radostin Stoyanov \n"; + +/* Description: + * Create UDP socket, set SO_BROADCAST and verify its value after restore. + */ + +int main(int argc, char **argv) +{ + int sockfd; + int val; + socklen_t len = sizeof(val); + + test_init(argc, argv); + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + pr_perror("Can't create socket"); + return 1; + } + + if (setsockopt(sockfd, SOL_SOCKET, SO_BROADCAST, &(int){ 1 }, len)) { + pr_perror("setsockopt"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (getsockopt(sockfd, SOL_SOCKET, SO_BROADCAST, &val, &len)) { + pr_perror("getsockopt"); + return 1; + } + + if (len != sizeof(val) || val != 1) { + fail("SO_BROADCAST not set"); + return 1; + } + + pass(); + return 0; +} From fad89f892dc7d0cf9c2989587627be5d462d446f Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 21 Apr 2019 07:48:41 +0100 Subject: [PATCH 0033/2030] util: cr_daemon: Drop keep_fd argument When running lazy-pages in daemon mode, file descriptor 3 is reused after fork to 'protect' the opened UNIX socket. However, fd 3 happens to correspond to the opened image directory. Thus, when this file descriptor is overwritten CRIU fails with the following error: $ criu lazy-pages -D --page-server \ --address --port -d ... (06.835596) Error (criu/image.c:470): Unable to open pagemap-1.img: Not a directory (06.835855) Error (criu/uffd.c:773): uffd: 1-7: Failed to open pagemap The need for keep_fd is really only necessary if the file descriptor we would like to 'protect' is 0, 1 or 2. Assuming that the standard file descriptors STDIN, STDOUT and STDERR are open this hack is unnecessary. Signed-off-by: Radostin Stoyanov --- criu/include/util.h | 2 +- criu/uffd.c | 2 +- criu/util.c | 14 ++------------ 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/criu/include/util.h b/criu/include/util.h index 621abb4a0..313aacd8c 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -176,7 +176,7 @@ extern int is_anon_link_type(char *link, char *type); extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags); extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid); -extern int cr_daemon(int nochdir, int noclose, int *keep_fd, int close_fd); +extern int cr_daemon(int nochdir, int noclose, int close_fd); extern int close_status_fd(void); extern int is_root_user(void); diff --git a/criu/uffd.c b/criu/uffd.c index e437f1f63..6699cb14a 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -1427,7 +1427,7 @@ int cr_lazy_pages(bool daemon) return -1; if (daemon) { - ret = cr_daemon(1, 0, &lazy_sk, -1); + ret = cr_daemon(1, 0, -1); if (ret == -1) { pr_err("Can't run in the background\n"); return -1; diff --git a/criu/util.c b/criu/util.c index 9dd5010a1..97084939e 100644 --- a/criu/util.c +++ b/criu/util.c @@ -643,7 +643,7 @@ int close_status_fd(void) return close_safe(&opts.status_fd); } -int cr_daemon(int nochdir, int noclose, int *keep_fd, int close_fd) +int cr_daemon(int nochdir, int noclose, int close_fd) { int pid; @@ -666,16 +666,6 @@ int cr_daemon(int nochdir, int noclose, int *keep_fd, int close_fd) if (close_fd != -1) close(close_fd); - if ((*keep_fd != -1) && (*keep_fd != 3)) { - fd = dup2(*keep_fd, 3); - if (fd < 0) { - pr_perror("Dup2 failed"); - return -1; - } - close(*keep_fd); - *keep_fd = fd; - } - fd = open("/dev/null", O_RDWR); if (fd < 0) { pr_perror("Can't open /dev/null"); @@ -1144,7 +1134,7 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) socklen_t clen = sizeof(caddr); if (daemon_mode) { - ret = cr_daemon(1, 0, ask, cfd); + ret = cr_daemon(1, 0, cfd); if (ret == -1) { pr_err("Can't run in the background\n"); goto out; From 635a66d8ae347e16b0f52baa7ea852e9648f6452 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 1 May 2019 19:41:34 +0100 Subject: [PATCH 0034/2030] config: Exit with error if ps-socket is std fd In daemon mode the standard file descriptors 0, 1 and 2 will be closed and ps-socket should not be one of them. Signed-off-by: Radostin Stoyanov --- criu/config.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/criu/config.c b/criu/config.c index 11c49e73b..bcf5176d9 100644 --- a/criu/config.c +++ b/criu/config.c @@ -846,9 +846,16 @@ int check_options() return 1; } - if (opts.ps_socket != -1 && (opts.addr || opts.port)) - pr_warn("Using --address or --port in " - "combination with --ps-socket is obsolete\n"); + if (opts.ps_socket != -1) { + if (opts.addr || opts.port) + pr_warn("Using --address or --port in " + "combination with --ps-socket is obsolete\n"); + if (opts.ps_socket <= STDERR_FILENO && opts.daemon_mode) { + pr_err("Standard file descriptors will be closed" + " in daemon mode\n"); + return 1; + } + } if (check_namespace_opts()) { pr_err("Error: namespace flags conflict\n"); From 530c03a202763e7a22f8c821ed5608731c2b28da Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 5 Apr 2019 14:06:44 +0100 Subject: [PATCH 0035/2030] crtools: Print err messages from check_options() When check_options() exits with an error (return value != 0) the logging is not yet initialised, and therefore the error messages are not printed out. Since this affects only command-line usage, and only when check_options() reports an error, flush the early log messages to STDERR. Signed-off-by: Radostin Stoyanov --- criu/crtools.c | 4 +++- criu/include/log.h | 2 ++ criu/log.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 55562a63e..6c83b27da 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -95,8 +95,10 @@ int main(int argc, char *argv[], char *envp[]) return cr_service_work(atoi(argv[2])); } - if (check_options()) + if (check_options()) { + flush_early_log_buffer(STDERR_FILENO); return 1; + } if (opts.imgs_dir == NULL) SET_CHAR_OPTS(imgs_dir, "."); diff --git a/criu/include/log.h b/criu/include/log.h index 797be1bb2..15787b09f 100644 --- a/criu/include/log.h +++ b/criu/include/log.h @@ -30,6 +30,8 @@ extern void print_on_level(unsigned int loglevel, const char *format, ...) # define LOG_PREFIX #endif +void flush_early_log_buffer(int fd); + #define print_once(loglevel, fmt, ...) \ do { \ static bool __printed; \ diff --git a/criu/log.c b/criu/log.c index edd2511ce..1e43f663d 100644 --- a/criu/log.c +++ b/criu/log.c @@ -170,7 +170,7 @@ struct early_log_hdr { uint16_t len; }; -static void flush_early_log_buffer(int fd) +void flush_early_log_buffer(int fd) { unsigned int pos = 0; int ret; From 1bc68dd873cbc0bbeba0e365a3ababe120d2ef2e Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 14 May 2019 11:34:22 +0300 Subject: [PATCH 0036/2030] lazy-pages: fix stack detection The commit 5432a964dcc7 ("lazy-pages: don't mark current stack page as lazy") tried to make the pages surrounding the stack pointers non-lazy. Unfortunately, it used a wrong mask for the detection. Fix it. Signed-off-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/mem.c b/criu/mem.c index 8015a7e4e..df87ed5b0 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -149,7 +149,7 @@ static bool is_stack(struct pstree_item *item, unsigned long vaddr) for (i = 0; i < item->nr_threads; i++) { uint64_t sp = dmpi(item)->thread_sp[i]; - if (!((sp ^ vaddr) & PAGE_MASK)) + if (!((sp ^ vaddr) & ~PAGE_MASK)) return true; } From 866bed0ed2ecfa1dd87367c3b4e0f6bb45a587f3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:00 +0100 Subject: [PATCH 0037/2030] build/pie: Add comments to build files And drop a stale comment that doesn't clearify anything. Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/pie/Makefile | 4 ++++ criu/pie/Makefile.library | 12 ++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 24f97ea0d..739191308 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -1,3 +1,7 @@ +# Recipes to compile PIEs: parastie and restorer +# Compel will deal with converting the result binaries +# to a C array to be used in CRIU. + target := parasite restorer CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index f268b5ded..a48a0ea4c 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -1,3 +1,9 @@ +# PIE library is a static library that's going to be linked into +# *both* CRIU binary and PIEs (parasite/restorer). +# Please, make sure that you're including here only objects +# those will be used in CRIU too. For objects files only for PIEs +# edit their separate recipes criu/pie/Makefile + lib-name := pie.lib.a CFLAGS += -fno-stack-protector -DCR_NOGLIBC -fpie @@ -27,12 +33,6 @@ ifeq ($(SRCARCH),x86) CFLAGS_util-vdso-elf32.o += -DCONFIG_VDSO_32 endif -# -# We can't provide proper mount implementation -# in parasite code -- it requires run-time rellocation -# applications, which is not the target of the -# project. -# CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) asflags-y := -D__ASSEMBLY__ From 1b66b66b56517a1e522530efe159c7322bafd921 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:01 +0100 Subject: [PATCH 0038/2030] arm/build: Move -marm cflag to CFLAGS_PIE I don't want to see CFLAGS redefined per-architecture in PIE makefiles in couple of places. Clean it up. The only expected per-arch ifdeffery should be object files. Also add a big comment about -marm vs -mthumb[2] Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 7 +++++++ criu/pie/Makefile | 4 ---- criu/pie/Makefile.library | 5 ----- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 38887da99..50948787a 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,13 @@ ifeq ($(ARCH),arm) endif PROTOUFIX := y + # For simplicity - compile code in Arm mode without interwork. + # We could choose Thumb mode as default instead - but a dirty + # experiment shows that with 90Kb PIEs Thumb code doesn't save + # even one page. So, let's stick so far to Arm mode as it's more + # universal around all different Arm variations, until someone + # will find any use for Thumb mode. -dima + CFLAGS_PIE := -marm endif ifeq ($(ARCH),aarch64) diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 739191308..c9e8a3d82 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -17,10 +17,6 @@ ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif -ifeq ($(SRCARCH),arm) - ccflags-y += -marm -endif - asflags-y += -D__ASSEMBLY__ LDS := compel/arch/$(SRCARCH)/scripts/compel-pack.lds.S diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index a48a0ea4c..b7918438b 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -38,8 +38,3 @@ CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) asflags-y := -D__ASSEMBLY__ ccflags-y += $(COMPEL_UAPI_INCLUDES) ccflags-y += $(CFLAGS_PIE) - -ifeq ($(SRCARCH),arm) - ccflags-y += -marm -endif - From 9df47bb26f95214383554640a1f7ec65e7841cbc Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:02 +0100 Subject: [PATCH 0039/2030] build: Move __ASSEMBLY__ define to the top Makefile __ASSEMBLY__ is used to guard C-related code in headers from asm-compatible defines. We actually want every .S file to be assembled with -D__ASSEMBLY__ not to burst with C in asm file. Move __ASSEMBLY__ from all local asflags to top Makefile's AFLAGS. Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 3 ++- compel/plugins/Makefile | 2 +- criu/arch/aarch64/Makefile | 1 - criu/arch/arm/Makefile | 1 - criu/arch/x86/Makefile | 2 +- criu/pie/Makefile | 2 -- criu/pie/Makefile.library | 1 - 7 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 50948787a..84fc0841f 100644 --- a/Makefile +++ b/Makefile @@ -129,9 +129,10 @@ ifeq ($(GMON),1) export GMON GMONLDOPT endif +AFLAGS += -D__ASSEMBLY__ CFLAGS += $(USERCFLAGS) $(WARNINGS) $(DEFINES) -iquote include/ HOSTCFLAGS += $(WARNINGS) $(DEFINES) -iquote include/ -export CFLAGS USERCLFAGS HOSTCFLAGS +export AFLAGS CFLAGS USERCLFAGS HOSTCFLAGS # Default target all: criu lib crit diff --git a/compel/plugins/Makefile b/compel/plugins/Makefile index 60b78473c..8f44ba86d 100644 --- a/compel/plugins/Makefile +++ b/compel/plugins/Makefile @@ -29,7 +29,7 @@ asflags-y += -iquote $(PLUGIN_ARCH_DIR) # General flags for assembly asflags-y += -fpie -Wstrict-prototypes -asflags-y += -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer +asflags-y += -nostdlib -fomit-frame-pointer asflags-y += -fno-stack-protector ldflags-y += -z noexecstack diff --git a/criu/arch/aarch64/Makefile b/criu/arch/aarch64/Makefile index 49ef6a480..fd721d12f 100644 --- a/criu/arch/aarch64/Makefile +++ b/criu/arch/aarch64/Makefile @@ -3,7 +3,6 @@ builtin-name := crtools.built-in.o ccflags-y += -iquote $(obj)/include -iquote criu/include ccflags-y += -iquote include ccflags-y += $(COMPEL_UAPI_INCLUDES) -asflags-y += -D__ASSEMBLY__ ldflags-y += -r obj-y += cpu.o diff --git a/criu/arch/arm/Makefile b/criu/arch/arm/Makefile index d01c69a16..5142fbe12 100644 --- a/criu/arch/arm/Makefile +++ b/criu/arch/arm/Makefile @@ -4,7 +4,6 @@ ccflags-y += -iquote $(obj)/include ccflags-y += -iquote criu/include -iquote include ccflags-y += $(COMPEL_UAPI_INCLUDES) -asflags-y += -D__ASSEMBLY__ ldflags-y += -r -z noexecstack obj-y += cpu.o diff --git a/criu/arch/x86/Makefile b/criu/arch/x86/Makefile index 20a40e4ae..ca92a241c 100644 --- a/criu/arch/x86/Makefile +++ b/criu/arch/x86/Makefile @@ -5,7 +5,7 @@ ccflags-y += -iquote criu/include -iquote include ccflags-y += $(COMPEL_UAPI_INCLUDES) asflags-y += -Wstrict-prototypes -asflags-y += -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer +asflags-y += -nostdlib -fomit-frame-pointer asflags-y += -iquote $(obj)/include ldflags-y += -r -z noexecstack diff --git a/criu/pie/Makefile b/criu/pie/Makefile index c9e8a3d82..5c0606786 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -17,8 +17,6 @@ ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif -asflags-y += -D__ASSEMBLY__ - LDS := compel/arch/$(SRCARCH)/scripts/compel-pack.lds.S restorer-obj-y += ./$(ARCH_DIR)/restorer.o diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index b7918438b..577497f5a 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -35,6 +35,5 @@ endif CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) -asflags-y := -D__ASSEMBLY__ ccflags-y += $(COMPEL_UAPI_INCLUDES) ccflags-y += $(CFLAGS_PIE) From 9f081e576b965ee593a4ea0c568364937b502d96 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:03 +0100 Subject: [PATCH 0040/2030] make: Don't export ccflags-y As far as I know, difference between CFLAGS and ccflags-y in kernel is that CFLAGS are global and exported and ccflags-y are per-Makefile. So, exporting ccflags-y should be omitted. While at it, remove COMPEL_UAPI_INCLUDES - they're added to CFLAGS straight away and exported to sub-makes, so no-one need to include them twice. Also, remove from sub-Makefiles -iquote(s) for includes those are already added in criu/Makefile Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/Makefile | 24 ++++++++++-------------- criu/Makefile.crtools | 1 - criu/arch/aarch64/Makefile | 3 --- criu/arch/arm/Makefile | 4 ---- criu/arch/ppc64/Makefile | 3 --- criu/arch/s390/Makefile | 3 --- criu/arch/x86/Makefile | 4 ---- criu/pie/Makefile | 1 - criu/pie/Makefile.library | 2 -- 9 files changed, 10 insertions(+), 35 deletions(-) diff --git a/criu/Makefile b/criu/Makefile index 797878176..1e9a16789 100644 --- a/criu/Makefile +++ b/criu/Makefile @@ -7,9 +7,8 @@ PIE_DIR := criu/pie export ARCH_DIR PIE_DIR ifeq ($(filter clean mrproper,$(MAKECMDGOALS)),) - COMPEL_UAPI_INCLUDES := $(shell $(COMPEL_BIN) includes) - export COMPEL_UAPI_INCLUDES - COMPEL_LIBS := $(shell $(COMPEL_BIN) --static libs) + CFLAGS += $(shell $(COMPEL_BIN) includes) + COMPEL_LIBS := $(shell $(COMPEL_BIN) --static libs) endif # @@ -20,17 +19,14 @@ CONFIG-DEFINES += -DUSER_CONFIG_DIR='".criu/"' # # General flags. -ccflags-y += -fno-strict-aliasing -ccflags-y += -iquote criu/include -ccflags-y += -iquote include -ccflags-y += -iquote images -ccflags-y += -iquote $(ARCH_DIR)/include -ccflags-y += -iquote . -ccflags-y += $(shell pkg-config --cflags libnl-3.0) -ccflags-y += $(COMPEL_UAPI_INCLUDES) -ccflags-y += $(CONFIG-DEFINES) - -export ccflags-y +CFLAGS += -fno-strict-aliasing +CFLAGS += -iquote criu/include +CFLAGS += -iquote include +CFLAGS += -iquote images +CFLAGS += -iquote $(ARCH_DIR)/include +CFLAGS += -iquote . +CFLAGS += $(shell pkg-config --cflags libnl-3.0) +CFLAGS += $(CONFIG-DEFINES) ifeq ($(GMON),1) CFLAGS += -pg diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 3717467c2..05d587d44 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -1,5 +1,4 @@ ccflags-y += -iquote criu/$(ARCH) -ccflags-y += $(COMPEL_UAPI_INCLUDES) CFLAGS_REMOVE_clone-noasan.o += $(CFLAGS-ASAN) CFLAGS_kerndat.o += -DKDAT_MAGIC_2=${shell echo $${SOURCE_DATE_EPOCH:-$$(date +%s)}} -DKDAT_RUNDIR=\"$(RUNDIR)\" ldflags-y += -r diff --git a/criu/arch/aarch64/Makefile b/criu/arch/aarch64/Makefile index fd721d12f..b26487367 100644 --- a/criu/arch/aarch64/Makefile +++ b/criu/arch/aarch64/Makefile @@ -1,8 +1,5 @@ builtin-name := crtools.built-in.o -ccflags-y += -iquote $(obj)/include -iquote criu/include -ccflags-y += -iquote include -ccflags-y += $(COMPEL_UAPI_INCLUDES) ldflags-y += -r obj-y += cpu.o diff --git a/criu/arch/arm/Makefile b/criu/arch/arm/Makefile index 5142fbe12..b111e5959 100644 --- a/criu/arch/arm/Makefile +++ b/criu/arch/arm/Makefile @@ -1,9 +1,5 @@ builtin-name := crtools.built-in.o -ccflags-y += -iquote $(obj)/include -ccflags-y += -iquote criu/include -iquote include -ccflags-y += $(COMPEL_UAPI_INCLUDES) - ldflags-y += -r -z noexecstack obj-y += cpu.o diff --git a/criu/arch/ppc64/Makefile b/criu/arch/ppc64/Makefile index ff0a71207..f37337f74 100644 --- a/criu/arch/ppc64/Makefile +++ b/criu/arch/ppc64/Makefile @@ -1,8 +1,5 @@ builtin-name := crtools.built-in.o -ccflags-y += -iquote $(obj)/include -ccflags-y += -iquote criu/include -iquote include -ccflags-y += $(COMPEL_UAPI_INCLUDES) ldflags-y += -r obj-y += cpu.o diff --git a/criu/arch/s390/Makefile b/criu/arch/s390/Makefile index ff0a71207..f37337f74 100644 --- a/criu/arch/s390/Makefile +++ b/criu/arch/s390/Makefile @@ -1,8 +1,5 @@ builtin-name := crtools.built-in.o -ccflags-y += -iquote $(obj)/include -ccflags-y += -iquote criu/include -iquote include -ccflags-y += $(COMPEL_UAPI_INCLUDES) ldflags-y += -r obj-y += cpu.o diff --git a/criu/arch/x86/Makefile b/criu/arch/x86/Makefile index ca92a241c..618e85bb3 100644 --- a/criu/arch/x86/Makefile +++ b/criu/arch/x86/Makefile @@ -1,9 +1,5 @@ builtin-name := crtools.built-in.o -ccflags-y += -iquote $(obj)/include -ccflags-y += -iquote criu/include -iquote include -ccflags-y += $(COMPEL_UAPI_INCLUDES) - asflags-y += -Wstrict-prototypes asflags-y += -nostdlib -fomit-frame-pointer asflags-y += -iquote $(obj)/include diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 5c0606786..526e4e1ad 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -5,7 +5,6 @@ target := parasite restorer CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) -ccflags-y += $(COMPEL_UAPI_INCLUDES) ccflags-y += $(CFLAGS_PIE) ccflags-y += -DCR_NOGLIBC ccflags-y += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 577497f5a..467dfd6b6 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -34,6 +34,4 @@ ifeq ($(SRCARCH),x86) endif CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) - -ccflags-y += $(COMPEL_UAPI_INCLUDES) ccflags-y += $(CFLAGS_PIE) From e689144cb0b2ba39030ac2a880eac5a05a7c1b03 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:04 +0100 Subject: [PATCH 0041/2030] Makefile.crtools: Remove bogus ccflags-y There ain't even such path in sources. Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/Makefile.crtools | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 05d587d44..13ea22775 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -1,4 +1,3 @@ -ccflags-y += -iquote criu/$(ARCH) CFLAGS_REMOVE_clone-noasan.o += $(CFLAGS-ASAN) CFLAGS_kerndat.o += -DKDAT_MAGIC_2=${shell echo $${SOURCE_DATE_EPOCH:-$$(date +%s)}} -DKDAT_RUNDIR=\"$(RUNDIR)\" ldflags-y += -r From a989c6add06120285cb9edc5020ea5ed27f1a0ac Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:05 +0100 Subject: [PATCH 0042/2030] build: Use cflags from compel for pie.lib.a As pie.lib.a linked also to PIEs - we need to use missing flags as -nostdlib and -fomit-frame-pointer. Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/Makefile | 1 + criu/pie/Makefile | 1 - criu/pie/Makefile.library | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/Makefile b/criu/Makefile index 1e9a16789..3de6eb217 100644 --- a/criu/Makefile +++ b/criu/Makefile @@ -9,6 +9,7 @@ export ARCH_DIR PIE_DIR ifeq ($(filter clean mrproper,$(MAKECMDGOALS)),) CFLAGS += $(shell $(COMPEL_BIN) includes) COMPEL_LIBS := $(shell $(COMPEL_BIN) --static libs) + CFLAGS_PIE += $(shell $(COMPEL_BIN) cflags) endif # diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 526e4e1ad..35aa78bd3 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -11,7 +11,6 @@ ccflags-y += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 ccflags-y += -Wp,-U_FORTIFY_SOURCE -Wp,-D_FORTIFY_SOURCE=0 ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) - CFLAGS += $(shell $(COMPEL_BIN) cflags) LDFLAGS += $(shell $(COMPEL_BIN) ldflags) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 467dfd6b6..2d11ad923 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -6,7 +6,7 @@ lib-name := pie.lib.a -CFLAGS += -fno-stack-protector -DCR_NOGLIBC -fpie +CFLAGS += -DCR_NOGLIBC lib-y += util.o From 3dc4034d00b52a0cdab9d730f1190d81c9fc1130 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:06 +0100 Subject: [PATCH 0043/2030] compel: Don't use CFLAGS_PIE for libcompel.so It's needed for PIEs, but not for the library. It comes earlier than commit 61e6c01d0964, but I don't see the point. Regardles, I'm a bit afraid to break s390, hopefully testing covers the platform. This and the next one "make: Move CR_NOGLIBC into CFLAGS_PIE" should be reverted/dropped from criu-dev if they turn to be breaking something. Cc: Michael Holzheu Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/compel/Makefile b/compel/Makefile index 45736f29e..de9318c42 100644 --- a/compel/Makefile +++ b/compel/Makefile @@ -11,7 +11,6 @@ ccflags-y += -iquote compel/arch/$(ARCH)/src/lib/include ccflags-y += -iquote compel/include ccflags-y += -fno-strict-aliasing ccflags-y += -fPIC -ccflags-y += $(CFLAGS_PIE) ldflags-y += -r # From 60d79020428269fec714fcfb97dafcc9ec77600b Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:07 +0100 Subject: [PATCH 0044/2030] make: Move CR_NOGLIBC into CFLAGS_PIE Lesser duplication, cleaner Makefiles. Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 2 ++ compel/plugins/Makefile | 2 +- criu/pie/Makefile | 1 - criu/pie/Makefile.library | 2 -- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 84fc0841f..cee8a42c9 100644 --- a/Makefile +++ b/Makefile @@ -85,6 +85,8 @@ ifeq ($(ARCH),s390) DEFINES := -DCONFIG_S390 CFLAGS_PIE := -fno-optimize-sibling-calls endif + +CFLAGS_PIE += -DCR_NOGLIBC export CFLAGS_PIE LDARCH ?= $(SRCARCH) diff --git a/compel/plugins/Makefile b/compel/plugins/Makefile index 8f44ba86d..a326e2a66 100644 --- a/compel/plugins/Makefile +++ b/compel/plugins/Makefile @@ -1,5 +1,5 @@ CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) -CFLAGS += -DCR_NOGLIBC -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 +CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 CFLAGS += -Wp,-U_FORTIFY_SOURCE -Wp,-D_FORTIFY_SOURCE=0 PLUGIN_ARCH_DIR := compel/arch/$(ARCH)/plugins diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 35aa78bd3..bb65f8908 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -6,7 +6,6 @@ target := parasite restorer CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) ccflags-y += $(CFLAGS_PIE) -ccflags-y += -DCR_NOGLIBC ccflags-y += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 ccflags-y += -Wp,-U_FORTIFY_SOURCE -Wp,-D_FORTIFY_SOURCE=0 diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 2d11ad923..423c782aa 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -6,8 +6,6 @@ lib-name := pie.lib.a -CFLAGS += -DCR_NOGLIBC - lib-y += util.o ifeq ($(VDSO),y) From 97f8c0f359a5f9a7a9e3f2165e887531e2d7b7b9 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:08 +0100 Subject: [PATCH 0045/2030] criu/ia32: Consolidate compat vdso and move to arch/x86 Do the cleanup that was long pending by XXX :) Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/arch/x86/include/asm/restorer.h | 8 -------- criu/arch/x86/include/asm/vdso.h | 24 ++++++++++++++++++++++++ criu/include/util-vdso.h | 13 ++----------- criu/pie/parasite-vdso.c | 14 ++------------ 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h index 3c43ce688..25559b57c 100644 --- a/criu/arch/x86/include/asm/restorer.h +++ b/criu/arch/x86/include/asm/restorer.h @@ -72,14 +72,6 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) : "r"(ret) \ : "memory") -#ifndef ARCH_MAP_VDSO_32 -# define ARCH_MAP_VDSO_32 0x2002 -#endif - -#ifndef ARCH_MAP_VDSO_64 -# define ARCH_MAP_VDSO_64 0x2003 -#endif - static inline void __setup_sas_compat(struct ucontext_ia32* uc, ThreadSasEntry *sas) { diff --git a/criu/arch/x86/include/asm/vdso.h b/criu/arch/x86/include/asm/vdso.h index d6c2f1b8c..ae893b8d7 100644 --- a/criu/arch/x86/include/asm/vdso.h +++ b/criu/arch/x86/include/asm/vdso.h @@ -23,5 +23,29 @@ "__kernel_sigreturn", \ "__kernel_rt_sigreturn" +#ifndef ARCH_MAP_VDSO_32 +# define ARCH_MAP_VDSO_32 0x2002 +#endif + +#ifndef ARCH_MAP_VDSO_64 +# define ARCH_MAP_VDSO_64 0x2003 +#endif + +#if defined(CONFIG_COMPAT) && !defined(__ASSEMBLY__) +struct vdso_symtable; +extern int vdso_fill_symtable(uintptr_t mem, size_t size, + struct vdso_symtable *t); +extern int vdso_fill_symtable_compat(uintptr_t mem, size_t size, + struct vdso_symtable *t); + +static inline int __vdso_fill_symtable(uintptr_t mem, size_t size, + struct vdso_symtable *t, bool compat_vdso) +{ + if (compat_vdso) + return vdso_fill_symtable_compat(mem, size, t); + else + return vdso_fill_symtable(mem, size, t); +} +#endif #endif /* __CR_ASM_VDSO_H__ */ diff --git a/criu/include/util-vdso.h b/criu/include/util-vdso.h index 05b8326f5..c74360c87 100644 --- a/criu/include/util-vdso.h +++ b/criu/include/util-vdso.h @@ -75,6 +75,8 @@ struct vdso_maps { #define ELF_ST_BIND ELF32_ST_BIND #endif +# define vdso_fill_symtable vdso_fill_symtable_compat + #else /* CONFIG_VDSO_32 */ #define Ehdr_t Elf64_Ehdr @@ -92,17 +94,6 @@ struct vdso_maps { #endif /* CONFIG_VDSO_32 */ -#if defined(CONFIG_VDSO_32) -# define vdso_fill_symtable vdso_fill_symtable_compat -#endif - extern int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t); -#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT) -#ifndef ARCH_MAP_VDSO_32 -# define ARCH_MAP_VDSO_32 0x2002 -#endif -extern int vdso_fill_symtable_compat(uintptr_t mem, size_t size, - struct vdso_symtable *t); -#endif #endif /* __CR_UTIL_VDSO_H__ */ diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 8072c11f7..dc73fb53e 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -74,18 +74,8 @@ int vdso_do_park(struct vdso_maps *rt, unsigned long park_at, return ret; } -/* XXX: move in arch/ */ -#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT) -int __vdso_fill_symtable(uintptr_t mem, size_t size, - struct vdso_symtable *t, bool compat_vdso) -{ - if (compat_vdso) - return vdso_fill_symtable_compat(mem, size, t); - else - return vdso_fill_symtable(mem, size, t); -} -#else -int __vdso_fill_symtable(uintptr_t mem, size_t size, +#ifndef CONFIG_COMPAT +static int __vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t, bool __always_unused compat_vdso) { return vdso_fill_symtable(mem, size, t); From 13c275ad26393d9d823a4654368928b1fc417a0b Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 17 May 2019 23:53:09 +0100 Subject: [PATCH 0046/2030] build/criu/pie: Move trampolines to restorer-obj-y We don't need patching vdso neither in parasite nor in criu. Move it to restorer-only objects. Note that we need filling symtables everywhere (kdat/parasite/restorer), this change doesn't move util-vdso.o which has vdso_fill_symtable(). [those files ask for a proper rename, but it's not directly related to the change, so yet TODO] Reviewed-by: Cyrill Gorcunov Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/pie/Makefile | 12 ++++++++++++ criu/pie/Makefile.library | 10 +--------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/criu/pie/Makefile b/criu/pie/Makefile index bb65f8908..bdff44816 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -25,6 +25,18 @@ ifeq ($(ARCH),x86) endif endif +ifeq ($(VDSO),y) + restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o + + ifeq ($(SRCARCH),aarch64) + restorer-obj-y += ./$(ARCH_DIR)/intraprocedure.o + endif + + ifeq ($(SRCARCH),ppc64) + restorer-obj-y += ./$(ARCH_DIR)/vdso-trampoline.o + endif +endif + define gen-pie-rules $(1)-obj-y += $(1).o $(1)-obj-e += pie.lib.a diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 423c782aa..0a33a8861 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -9,15 +9,7 @@ lib-name := pie.lib.a lib-y += util.o ifeq ($(VDSO),y) - lib-y += util-vdso.o parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o - - ifeq ($(SRCARCH),aarch64) - lib-y += ./$(ARCH_DIR)/intraprocedure.o - endif - - ifeq ($(SRCARCH),ppc64) - lib-y += ./$(ARCH_DIR)/vdso-trampoline.o - endif + lib-y += util-vdso.o endif ifeq ($(SRCARCH),ppc64) From 11b3825ca3d9bf5bd2593bb2ce9ae282d72734f9 Mon Sep 17 00:00:00 2001 From: Zhang Ning Date: Tue, 16 Apr 2019 15:45:05 +0800 Subject: [PATCH 0047/2030] x86/crtools: do not error when YMM is missing for Intel Apollo Lake SOC, its cpuinfo and fpu features: cpu: x86_family 6 x86_vendor_id GenuineIntel x86_model_id Intel(R) Celeron(R) CPU J3455 @ 1.50GHz cpu: fpu: xfeatures_mask 0x11 xsave_size 1088 xsave_size_max 1088 xsaves_size 704 cpu: fpu: x87 floating point registers xstate_offsets 0 / 0 xstate_sizes 160 / 160 this CPU doesn't have AVX registers, YMM feature. when CRIU runs on this CPU, it will report dump error: Dumping GP/FPU registers for 4888 Error (criu/arch/x86/crtools.c:362): x86: Corruption in XFEATURE_YMM area (expected 64 but 0 obtained) Error (criu/cr-dump.c:1278): Can't infect (pid: 4888) with parasite that's because x86/crtools.c will still valid YMM xsave frame, thus fail to dump. bypass unsupported feature, to make CRIU runs this kinds of CPUs. Cc: Chen Hu Signed-off-by: Zhang Ning Acked-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/arch/x86/crtools.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index ee016da00..efc23e5fe 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -354,7 +354,7 @@ static bool valid_xsave_frame(CoreEntry *core) }; for (i = 0; i < ARRAY_SIZE(features); i++) { - if (!features[i].ptr && i > 0) + if (!features[i].ptr) continue; if (features[i].expected > features[i].obtained) { From ac495fbd3f761daa65abbe1c4a49baae0acb6c2c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 06:11:23 +0000 Subject: [PATCH 0048/2030] autofs: fix coverity RESOURCE_LEAK criu-3.12/criu/autofs.c:114: leaked_storage: Variable "path" going out of scope leaks the storage it points to. criu-3.12/criu/autofs.c:254: leaked_storage: Variable "opts" going out of scope leaks the storage it points to. criu-3.12/criu/autofs.c:719: leaked_storage: Variable "path" going out of scope leaks the storage it points to. criu-3.12/criu/autofs.c:980: leaked_storage: Variable "img" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber --- criu/autofs.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/criu/autofs.c b/criu/autofs.c index 576edef68..a2dc60ffc 100644 --- a/criu/autofs.c +++ b/criu/autofs.c @@ -110,8 +110,10 @@ static int autofs_kernel_pipe_alive(int pgrp, int fd, int ino) return -1; if (stat(path, &buf) < 0) { - if (errno == ENOENT) + if (errno == ENOENT) { + xfree(path); return 0; + } pr_perror("Failed to stat %s", path); return -1; } @@ -208,6 +210,7 @@ static int parse_options(char *options, AutofsEntry *entry, long *pipe_ino) { char **opts; int nr_opts, i; + int parse_error = 0; entry->fd = AUTOFS_OPT_UNKNOWN; entry->timeout = AUTOFS_OPT_UNKNOWN; @@ -250,14 +253,19 @@ static int parse_options(char *options, AutofsEntry *entry, long *pipe_ino) else if (!strncmp(opt, "gid=", strlen("gid="))) err = xatoi(opt + strlen("gid="), &entry->gid); - if (err) - return -1; + if (err) { + parse_error = 1; + break; + } } for (i = 0; i < nr_opts; i++) xfree(opts[i]); xfree(opts); + if (parse_error) + return -1; + if (entry->fd == AUTOFS_OPT_UNKNOWN) { pr_err("Failed to find fd option\n"); return -1; @@ -716,6 +724,7 @@ static int autofs_create_dentries(const struct mount_info *mi, char *mnt_path) return -1; if (mkdir(path, 0555) < 0) { pr_perror("Failed to create autofs dentry %s", path); + free(path); return -1; } free(path); @@ -967,6 +976,7 @@ static int autofs_add_mount_info(struct pprep_head *ph) static int autofs_restore_entry(struct mount_info *mi, AutofsEntry **entry) { struct cr_img *img; + int ret; img = open_image(CR_FD_AUTOFS, O_RSTR, mi->s_dev); if (!img) @@ -976,10 +986,11 @@ static int autofs_restore_entry(struct mount_info *mi, AutofsEntry **entry) return -1; } - if (pb_read_one_eof(img, entry, PB_AUTOFS) < 0) - return -1; + ret = pb_read_one_eof(img, entry, PB_AUTOFS); close_image(img); + if (ret < 0) + return -1; return 0; } From 8502bc2010637c74310bec135605f53a24972ab4 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 06:55:26 +0000 Subject: [PATCH 0049/2030] cgroup: fix clang 'free released memory' criu-3.12/criu/cgroup.c:927:2: warning: Attempt to free released memory Signed-off-by: Adrian Reber --- criu/cgroup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/cgroup.c b/criu/cgroup.c index 22e722acf..332c79fb9 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -818,6 +818,7 @@ static int dump_controllers(CgroupEntry *cg) if (ce->n_dirs > 0) if (dump_cg_dirs(&cur->heads, cur->n_heads, &ce->dirs, 0) < 0) { xfree(cg->controllers); + cg->controllers = NULL; return -1; } cg->controllers[i++] = ce++; From c189a9bbf2e8b7a7c05cab747c728cf6e286997e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 06:09:31 +0000 Subject: [PATCH 0050/2030] compel: fix clang 'value stored is never read' criu-3.12/compel/src/lib/infect.c:276:3: warning: Value stored to 'ret' is never read Signed-off-by: Adrian Reber --- compel/src/lib/infect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 09c2c53f9..f0bcaf334 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -273,7 +273,6 @@ try_again: goto err; } - ret = 0; if (free_status) free_status(pid, ss, data); goto try_again; From defbd43351bf291c72dee48e62b43e3d7eed15a9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 May 2019 18:11:26 +0000 Subject: [PATCH 0051/2030] cr-service: fix coverity STRING_OVERFLOW criu-3.12/criu/cr-service.c:1305: fixed_size_dest: You might overrun the 108-character fixed-size string "server_addr.sun_path" by copying "opts.addr" without checking the length. Signed-off-by: Adrian Reber --- criu/cr-service.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 1f1e5fa86..52b86bb05 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -1302,7 +1302,8 @@ int cr_service(bool daemon_mode) SET_CHAR_OPTS(addr, CR_DEFAULT_SERVICE_ADDRESS); } - strcpy(server_addr.sun_path, opts.addr); + strncpy(server_addr.sun_path, opts.addr, + sizeof(server_addr.sun_path) - 1); server_addr_len = strlen(server_addr.sun_path) + sizeof(server_addr.sun_family); From 7d152adf65881d9d6d273fe738be773d05163c2f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 07:23:27 +0000 Subject: [PATCH 0052/2030] cr-service: fix clang 'dereference of a null pointer' criu-3.12/criu/cr-service.c:933:7: warning: Access to field 'keep_open' results in a dereference of a null pointer (loaded from variable 'msg') Signed-off-by: Adrian Reber --- criu/cr-service.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 52b86bb05..cb76de4f4 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -1156,7 +1156,7 @@ int cr_service_work(int sk) CriuReq *msg = 0; more: - if (recv_criu_msg(sk, &msg) == -1) { + if (recv_criu_msg(sk, &msg) != 0) { pr_perror("Can't recv request"); goto err; } From 57d4a78aa392dc12b1569f7c85541f41515a094c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 07:42:35 +0000 Subject: [PATCH 0053/2030] files: fix coverity RESOURCE_LEAK criu-3.12/criu/files.c:1250: leaked_storage: Variable "dir" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber --- criu/files.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/files.c b/criu/files.c index 38b12ee4f..ffdaa459f 100644 --- a/criu/files.c +++ b/criu/files.c @@ -1247,6 +1247,8 @@ int close_old_fds(void) ret = sscanf(de->d_name, "%d", &fd); if (ret != 1) { pr_err("Can't parse %s\n", de->d_name); + closedir(dir); + close_pid_proc(); return -1; } From 920c4a6afcee6d660b002ba8370da3400f325eb9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 May 2019 18:01:31 +0000 Subject: [PATCH 0054/2030] files-reg: fix coverity NULL_RETURNS criu-3.12/criu/files-reg.c:1574: dereference: Dereferencing "rmi", which is known to be "NULL". criu-3.12/criu/files-reg.c:1582: dereference: Dereferencing "tmi", which is known to be "NULL". Signed-off-by: Adrian Reber --- criu/files-reg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/criu/files-reg.c b/criu/files-reg.c index 3072289ef..ff0ae7d69 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1553,6 +1553,9 @@ static int rfi_remap(struct reg_file_info *rfi, int *level) } mi = lookup_mnt_id(rfi->rfe->mnt_id); + if (mi == NULL) + return -1; + if (rfi->rfe->mnt_id == rfi->remap->rmnt_id) { /* Both links on the same mount point */ tmi = mi; @@ -1562,6 +1565,8 @@ static int rfi_remap(struct reg_file_info *rfi, int *level) } rmi = lookup_mnt_id(rfi->remap->rmnt_id); + if (rmi == NULL) + return -1; /* * Find the common bind-mount. We know that one mount point was From 288a4a953a53492586e86880ee1347d647411a92 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 08:06:55 +0000 Subject: [PATCH 0055/2030] image: fix clang 'dereference of a null pointer' criu-3.12/criu/include/image.h:129:9: warning: Dereference of null pointer Signed-off-by: Adrian Reber --- criu/include/image.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/include/image.h b/criu/include/image.h index 48ba3ec00..2baa39496 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -133,6 +133,8 @@ extern int open_image_lazy(struct cr_img *img); static inline int img_raw_fd(struct cr_img *img) { + if (!img) + return -1; if (lazy_image(img) && open_image_lazy(img)) return -1; From 688f02a1390b62d4b41f651b5d3ee26cecf0e721 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 May 2019 16:51:50 +0000 Subject: [PATCH 0056/2030] log: fix coverity OVERRUN This fixes a coverity buffer overflow warning: criu-3.12/criu/log.c:344: overrun-local: Overrunning array of 1024 bytes at byte offset 1031 by dereferencing pointer "early_log_buffer + early_log_buf_off + log_size". [Note: The source code implementation of the function has been overridden by a builtin model.] Signed-off-by: Adrian Reber --- criu/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/log.c b/criu/log.c index 1e43f663d..8bdf83534 100644 --- a/criu/log.c +++ b/criu/log.c @@ -320,7 +320,7 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para unsigned int log_size = 0; struct early_log_hdr *hdr; - if (early_log_buf_off >= EARLY_LOG_BUF_LEN) + if ((early_log_buf_off + sizeof(hdr)) >= EARLY_LOG_BUF_LEN) return; /* Save loglevel */ From 27dd87e99a3324ff281d182ec69435603253a9bb Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 May 2019 16:55:20 +0000 Subject: [PATCH 0057/2030] libcriu: fix coverity RESOURCE_LEAK criu-3.12/lib/c/criu.c:255: leaked_storage: Variable "rpc" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber --- lib/c/criu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/c/criu.c b/lib/c/criu.c index 9e36a9795..c7a96b82e 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -252,6 +252,7 @@ int criu_local_init_opts(criu_opts **o) if (opts == NULL) { perror("Can't allocate memory for criu opts"); criu_local_free_opts(opts); + free(rpc); return -1; } From fe1c72a098ca7e44b9e3720609c3415d78f9faac Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 14:24:37 +0000 Subject: [PATCH 0058/2030] lib/c: fix coverity DEADCODE criu-3.12/lib/c/criu.c:869: dead_error_line: Execution cannot reach this statement: "free(ptr);". criu-3.12/lib/c/criu.c:906: dead_error_line: Execution cannot reach this statement: "free(ptr);". Signed-off-by: Adrian Reber --- lib/c/criu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index c7a96b82e..17d5c3983 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -866,8 +866,6 @@ int criu_local_add_enable_fs(criu_opts *opts, const char *fs) err: if (str) free(str); - if (ptr) - free(ptr); return -ENOMEM; } @@ -903,8 +901,6 @@ int criu_local_add_skip_mnt(criu_opts *opts, const char *mnt) err: if (str) free(str); - if (ptr) - free(ptr); return -ENOMEM; } From beecbf096842fd0ce8c4d927ee71089ab2a8a1db Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 05:49:30 +0000 Subject: [PATCH 0059/2030] lsm: fix clang 'Use of memory after it is freed' criu-3.12/criu/lsm.c:257:3: warning: Use of memory after it is freed Signed-off-by: Adrian Reber --- criu/lsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/lsm.c b/criu/lsm.c index 420585ba4..9d7e55c11 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -89,6 +89,7 @@ static int selinux_get_label(pid_t pid, char **output) if (!pos) { pr_err("Invalid selinux context %s\n", (char *)ctx); xfree(*output); + *output = NULL; goto err; } From 75edc02f66d5822584a0fffac2be0b3c038cf46f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 09:39:55 +0000 Subject: [PATCH 0060/2030] mem: fix coverity RESOURCE_LEAK criu-3.12/criu/mem.c:597:3: warning: Value stored to 'ret' is never read criu-3.12/criu/mem.c:632: leaked_storage: Variable "img" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber --- criu/mem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/mem.c b/criu/mem.c index df87ed5b0..f79e04cc4 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -594,7 +594,6 @@ int prepare_mm_pid(struct pstree_item *i) if (!vma) break; - ret = 0; ri->vmas.nr++; if (!img) vma->e = ri->mm->vmas[vn++]; @@ -603,6 +602,7 @@ int prepare_mm_pid(struct pstree_item *i) if (ret <= 0) { xfree(vma); close_image(img); + img = NULL; break; } } @@ -629,6 +629,8 @@ int prepare_mm_pid(struct pstree_item *i) break; } + if (img) + close_image(img); return ret; } From 08a283d29ea711230fcc83183538335bee9f6c64 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 11:57:05 +0000 Subject: [PATCH 0061/2030] pagemap: fix clang 'free released memory' criu-3.12/criu/pagemap.c:460:2: warning: Attempt to free released memory Signed-off-by: Adrian Reber --- criu/pagemap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/pagemap.c b/criu/pagemap.c index 4c4e88685..ee4ae1b19 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -458,6 +458,7 @@ static void free_pagemaps(struct page_read *pr) pagemap_entry__free_unpacked(pr->pmes[i], NULL); xfree(pr->pmes); + pr->pmes = NULL; } static void advance_piov(struct page_read_iov *piov, ssize_t len) From 82d6ef6d8ad1c3fdf4a8e09e6412dda4c95f1b68 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 11:59:05 +0000 Subject: [PATCH 0062/2030] pagemap: fix coverity FORWARD_NULL criu-3.12/criu/pagemap.c:694: var_deref_model: Passing "pr" to "free_pagemaps", which dereferences null "pr->pmes" Signed-off-by: Adrian Reber --- criu/pagemap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/pagemap.c b/criu/pagemap.c index ee4ae1b19..6bcd0d70e 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -678,11 +678,13 @@ static int init_pagemaps(struct page_read *pr) pr->nr_pmes++; if (pr->nr_pmes >= nr_pmes) { + PagemapEntry **new; nr_pmes += nr_realloc; - pr->pmes = xrealloc(pr->pmes, + new = xrealloc(pr->pmes, nr_pmes * sizeof(*pr->pmes)); - if (!pr->pmes) + if (!new) goto free_pagemaps; + pr->pmes = new; } } From bf2d03b7856afc003cf1b269bf84dcbe4349e387 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 11:48:14 +0000 Subject: [PATCH 0063/2030] page-xfer: fix clang 'value is never read' criu-3.12/criu/page-xfer.c:988:3: warning: Value stored to 'ret' is never read Signed-off-by: Adrian Reber --- criu/page-xfer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/page-xfer.c b/criu/page-xfer.c index e3c9c7b25..8868ed226 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -985,7 +985,6 @@ int cr_page_server(bool daemon_mode, bool lazy_dump, int cfd) return -1; if (opts.ps_socket != -1) { - ret = 0; ask = opts.ps_socket; pr_info("Re-using ps socket %d\n", ask); goto no_server; From 5b358ebdc3b792c2d6f5c44b94d86369e38fc16c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:02:40 +0000 Subject: [PATCH 0064/2030] pie/restorer: fix clang 'value is never read' criu-3.12/criu/pie/restorer.c:1514:2: warning: Value stored to 'ret' is never read Signed-off-by: Adrian Reber --- criu/pie/restorer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 4f42605a0..f2db115ff 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1511,8 +1511,6 @@ long __export_restore_task(struct task_restore_args *args) } } - ret = 0; - /* * Tune up the task fields. */ From 0df6ddba6974af926a4db171bf8876e762a0d355 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:17:35 +0000 Subject: [PATCH 0065/2030] proc_parse: fix coverity RESOURCE_LEAK criu-3.12/criu/proc_parse.c:2280: leaked_storage: Variable "dir" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber Signed-off-by: Andrei Vagin --- criu/proc_parse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 1a5722eaf..3d852d755 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -2277,6 +2277,7 @@ int parse_threads(int pid, struct pid **_t, int *_n) tmp = xrealloc(t, nr * sizeof(struct pid)); if (!tmp) { xfree(t); + closedir(dir); return -1; } t = tmp; From b87b02729accb25f0f9d14c0f1413358c268866d Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 May 2019 16:56:39 +0000 Subject: [PATCH 0066/2030] sk-inet: fix coverity RESOURCE_LEAK criu-3.12/criu/sk-inet.c:822: leaked_handle: Handle variable "sk" going out of scope leaks the handle. Signed-off-by: Adrian Reber --- criu/sk-inet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sk-inet.c b/criu/sk-inet.c index ebae53113..fed3181f0 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -823,7 +823,7 @@ static int open_inet_sk(struct file_desc *d, int *new_fd) } if (reset_setsockcreatecon()) - return -1; + goto err; if (ie->v6only) { if (restore_opt(sk, SOL_IPV6, IPV6_V6ONLY, &yes) == -1) From 6f44f78310b7dc070301be4c5871f98dd25f17b2 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:28:45 +0000 Subject: [PATCH 0067/2030] sk-inet: fix clang 'potential memory leak' criu-3.12/criu/sk-inet.c:581:2: warning: Potential leak of memory pointed to by 'ie.ifname' Signed-off-by: Adrian Reber --- criu/sk-inet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/sk-inet.c b/criu/sk-inet.c index fed3181f0..90ab492ed 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -579,6 +579,7 @@ err: release_skopts(&skopts); xfree(ie.src_addr); xfree(ie.dst_addr); + xfree(ie.ifname); return err; } From 9406f1be4e97e3da3b3d1b87d738cd7490a3b7d1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:33:40 +0000 Subject: [PATCH 0068/2030] sk-queue: fix clang 'potential memory leak' criu-3.12/criu/sk-queue.c:272:6: warning: Potential leak of memory pointed to by 'pe.scm' Signed-off-by: Adrian Reber --- criu/sk-queue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/sk-queue.c b/criu/sk-queue.c index fdf610170..776eb5aaf 100644 --- a/criu/sk-queue.c +++ b/criu/sk-queue.c @@ -273,6 +273,8 @@ err_set_sock: pr_perror("setsockopt failed on restore"); ret = -1; } + if (pe.scm) + release_cmsg(&pe); err_brk: xfree(data); return ret; From 2f74e55a18164d9ab309dd4979853effb4776a72 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:38:32 +0000 Subject: [PATCH 0069/2030] sk-unix: fix clang 'value is never read' criu-3.12/criu/sk-unix.c:1545:5: warning: Value stored to 'ret' is never read Signed-off-by: Adrian Reber --- criu/sk-unix.c | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 26123515c..35a6befa7 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1542,7 +1542,6 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) pos = strrchr(path, '/')) { *pos = '\0'; if (rmdir(path)) { - ret = - errno; pr_perror("ghost: Can't remove directory %s on id %#x ino %d", path, ui->ue->id, ui->ue->ino); return -1; From 88d8979b5dd7e8dd85b3b1f8cab4bf6e8960efac Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:40:50 +0000 Subject: [PATCH 0070/2030] sk-unix: fix coverity RESOURCE_LEAK criu-3.12/criu/sk-unix.c:1893: leaked_handle: Handle variable "sk" going out of scope leaks the handle. Signed-off-by: Adrian Reber --- criu/sk-unix.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 35a6befa7..c339ccf56 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1888,13 +1888,16 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) } } - if (bind_unix_sk(sk, ui)) + if (bind_unix_sk(sk, ui)) { + close(sk); return -1; + } if (ui->ue->state == TCP_LISTEN) { pr_info("\tPutting %d into listen state\n", ui->ue->ino); if (listen(sk, ui->ue->backlog) < 0) { pr_perror("Can't make usk listen"); + close(sk); return -1; } ui->listen = 1; From 5a29e253f6c6eba8a45f3b31a2a6f57ff0f4fc71 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 May 2019 18:22:29 +0000 Subject: [PATCH 0071/2030] util: fix coverity FORWARD_NULL criu-3.12/criu/util.c:505: var_deref_model: Passing null pointer "dir" to "dirfd", which dereferences it. (The dereference is assumed on the basis of the 'nonnull' parameter attribute.) Signed-off-by: Adrian Reber --- criu/util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/util.c b/criu/util.c index 97084939e..04b5c3e71 100644 --- a/criu/util.c +++ b/criu/util.c @@ -500,8 +500,10 @@ static int close_fds(int minfd) int fd, ret, dfd; dir = opendir("/proc/self/fd"); - if (dir == NULL) + if (dir == NULL) { pr_perror("Can't open /proc/self/fd"); + return -1; + } dfd = dirfd(dir); while ((de = readdir(dir))) { From 7765a6c3fcfb5f707c4518ad9fdc59eeb62a99aa Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 07:59:23 +0000 Subject: [PATCH 0072/2030] image: fix coverity RESOURCE_LEAK criu-3.12/criu/image.c:103: leaked_storage: Variable "img" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber --- criu/image.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/image.c b/criu/image.c index e1740ce94..5239ab474 100644 --- a/criu/image.c +++ b/criu/image.c @@ -92,6 +92,7 @@ out_close: int write_img_inventory(InventoryEntry *he) { struct cr_img *img; + int ret; pr_info("Writing image inventory (version %u)\n", CRTOOLS_IMAGES_V1); @@ -99,11 +100,12 @@ int write_img_inventory(InventoryEntry *he) if (!img) return -1; - if (pb_write_one(img, he, PB_INVENTORY) < 0) - return -1; + ret = pb_write_one(img, he, PB_INVENTORY); xfree(he->root_ids); close_image(img); + if (ret < 0) + return -1; return 0; } From da652c8b375309f3f609b0225e818443296861e7 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:36:49 +0000 Subject: [PATCH 0073/2030] sk-unix: fix coverity NULL_RETURNS criu-3.12/criu/sk-unix.c:1225: dereference: Dereferencing "ns", which is known to be "NULL". Signed-off-by: Adrian Reber --- criu/sk-unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index c339ccf56..f0620e676 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1208,14 +1208,14 @@ static int prep_unix_sk_cwd(struct unix_sk_info *ui, int *prev_cwd_fd, if (prev_root_fd && (root_ns_mask & CLONE_NEWNS)) { if (ui->ue->mnt_id >= 0) { ns = lookup_nsid_by_mnt_id(ui->ue->mnt_id); - if (ns == NULL) - goto err; } else { if (root == NULL) root = lookup_ns_by_id(root_item->ids->mnt_ns_id, &mnt_ns_desc); ns = root; } + if (ns == NULL) + goto err; *prev_root_fd = open("/", O_RDONLY); if (*prev_root_fd < 0) { pr_perror("Can't open current root"); From 8accfe6cc9d65cdf2203fc58ac3075c16b729e23 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 18 May 2019 16:26:53 +0100 Subject: [PATCH 0074/2030] python: Drop six dependency From the python-six module is used only six.string_types in the is_string() function. An alternative solution is to use basestring with additional if statement for Python 3 compatibility. This change avoids the dependency on the six module. However, this module is required by junit_xml and it is not listed as a dependency in the CentOS 7 package python2-junit_xml. Signed-off-by: Radostin Stoyanov --- lib/py/images/pb2dict.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index 18d4c68eb..4e2c171d5 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -4,7 +4,7 @@ from ipaddress import IPv4Address, ip_address from ipaddress import IPv6Address import socket import collections -import os, six +import os # pb2dict and dict2pb are methods to convert pb to/from dict. # Inspired by: @@ -216,7 +216,10 @@ def get_bytes_dec(field): return decode_base64 def is_string(value): - return isinstance(value, six.string_types) + # Python 3 compatibility + if not hasattr(__builtins__, "basestring"): + basestring = (str, bytes) + return isinstance(value, basestring) def _pb2dict_cast(field, value, pretty = False, is_hex = False): if not is_hex: From bf62dbbe085efadfa08b0562603a7a8d0fce3a87 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 22 May 2019 15:50:30 +0100 Subject: [PATCH 0075/2030] make: Use asciidoctor by default The final release of asciidoc was on Sep 29, 2017 and the development is continued under asciidoctor. With commit 0493724 (Documentation: Allow to use asciidoctor for formatting man pages) was added support for this tool by introducing USE_ASCIIDOCTOR. However, using asciidoctor by default might be a better option. With this change CRIU will use asciidoctor if installed. Otherwise, it will fallback to asciidoc. Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.alpine | 2 +- scripts/build/Dockerfile.centos | 2 +- scripts/build/Dockerfile.fedora.tmpl | 2 +- scripts/nmk/scripts/tools.mk | 2 ++ scripts/travis/travis-tests | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index aab6184d7..a91e01637 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -36,7 +36,7 @@ RUN apk add \ bash \ go \ e2fsprogs \ - asciidoc xmlto + asciidoctor # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test diff --git a/scripts/build/Dockerfile.centos b/scripts/build/Dockerfile.centos index d8e70ac47..2ed3a2db9 100644 --- a/scripts/build/Dockerfile.centos +++ b/scripts/build/Dockerfile.centos @@ -32,7 +32,7 @@ RUN yum install -y \ which \ e2fsprogs \ python2-pip \ - asciidoc xmlto + rubygem-asciidoctor COPY . /criu WORKDIR /criu diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 616b9ec42..22ebaed9c 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -33,7 +33,7 @@ RUN dnf install -y \ tar \ which \ e2fsprogs \ - asciidoc xmlto \ + rubygem-asciidoctor \ kmod # Replace coreutils-single with "traditional" coreutils diff --git a/scripts/nmk/scripts/tools.mk b/scripts/nmk/scripts/tools.mk index 8620ded7c..ce3d85dea 100644 --- a/scripts/nmk/scripts/tools.mk +++ b/scripts/nmk/scripts/tools.mk @@ -35,6 +35,8 @@ CTAGS := ctags export RM HOSTLD LD HOSTCC CC CPP AS AR STRIP OBJCOPY OBJDUMP export NM SH MAKE MKDIR AWK PERL PYTHON SH CSCOPE +export USE_ASCIIDOCTOR ?= $(shell which asciidoctor 2>/dev/null) + # # Footer. ____nmk_defined__tools = y diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 01a2659f6..47ff199cf 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -4,7 +4,7 @@ set -x -e TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev libprotobuf-dev protobuf-compiler libcap-dev libnl-3-dev gcc-multilib gdb bash python-protobuf - libnet-dev util-linux asciidoc xmlto libnl-route-3-dev" + libnet-dev util-linux asciidoctor libnl-route-3-dev" travis_prep () { [ -n "$SKIP_TRAVIS_PREP" ] && return From 5156f2dc0d126904748bd86fdf8f342a24959aaa Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Thu, 23 May 2019 08:58:04 +0000 Subject: [PATCH 0076/2030] mem: Update stats for overflow page pipes Since commit b5dff62e we skipped updating dump stats for pages that overflowed the page-pipe and thus got flushed in "chunk" mode. Signed-off-by: Pavel Emelyanov Acked-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/mem.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/criu/mem.c b/criu/mem.c index f79e04cc4..b1d13188b 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -170,13 +170,14 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct u64 *at = &map[PAGE_PFN(*off)]; unsigned long pfn, nr_to_scan; unsigned long pages[3] = {}; + int ret = 0; nr_to_scan = (vma_area_len(vma) - *off) / PAGE_SIZE; for (pfn = 0; pfn < nr_to_scan; pfn++) { unsigned long vaddr; unsigned int ppb_flags = 0; - int ret; + int st; if (!should_dump_page(vma->e, at[pfn])) continue; @@ -195,19 +196,22 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct if (has_parent && page_in_parent(at[pfn] & PME_SOFT_DIRTY)) { ret = page_pipe_add_hole(pp, vaddr, PP_HOLE_PARENT); - pages[0]++; + st = 0; } else { ret = page_pipe_add_page(pp, vaddr, ppb_flags); if (ppb_flags & PPB_LAZY && opts.lazy_pages) - pages[1]++; + st = 1; else - pages[2]++; + st = 2; } if (ret) { - *off += pfn * PAGE_SIZE; - return ret; + /* Do not do pfn++, just bail out */ + pr_debug("Pagemap full\n"); + break; } + + pages[st]++; } *off += pfn * PAGE_SIZE; @@ -219,7 +223,7 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct pr_info("Pagemap generated: %lu pages (%lu lazy) %lu holes\n", pages[2] + pages[1], pages[1], pages[0]); - return 0; + return ret; } static struct parasite_dump_pages_args *prep_dump_pages_args(struct parasite_ctl *ctl, From 72ce634f43f6d241faf064eaccae2be0cb51e7de Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Thu, 23 May 2019 08:58:35 +0000 Subject: [PATCH 0077/2030] shmem: Save pages stats too Shmem pages are written in the same set of images as regular pages are, but stats for those are not collected. Fix this, but keep the counts separate to have more info. Signed-off-by: Pavel Emelyanov Acked-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/include/stats.h | 4 ++++ criu/shmem.c | 17 +++++++++++++++-- criu/stats.c | 7 +++++++ images/stats.proto | 4 ++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/criu/include/stats.h b/criu/include/stats.h index 07690b8ea..bab9a0507 100644 --- a/criu/include/stats.h +++ b/criu/include/stats.h @@ -29,6 +29,10 @@ enum { CNT_PAGE_PIPES, CNT_PAGE_PIPE_BUFS, + CNT_SHPAGES_SCANNED, + CNT_SHPAGES_SKIPPED_PARENT, + CNT_SHPAGES_WRITTEN, + DUMP_CNT_NR_STATS, }; diff --git a/criu/shmem.c b/criu/shmem.c index 358d848db..bc9f23bd7 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -11,6 +11,7 @@ #include "image.h" #include "cr_options.h" #include "kerndat.h" +#include "stats.h" #include "page-pipe.h" #include "page-xfer.h" #include "rst-malloc.h" @@ -676,6 +677,7 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si) struct page_xfer xfer; int err, ret = -1; unsigned long pfn, nrpages, next_data_pnf = 0, next_hole_pfn = 0; + unsigned long pages[2] = {}; nrpages = (si->size + PAGE_SIZE - 1) / PAGE_SIZE; @@ -693,6 +695,7 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si) unsigned int pgstate = PST_DIRTY; bool use_mc = true; unsigned long pgaddr; + int st = -1; if (pfn >= next_hole_pfn && next_data_segment(fd, pfn, &next_data_pnf, &next_hole_pfn)) @@ -714,10 +717,13 @@ static int do_dump_one_shmem(int fd, void *addr, struct shmem_info *si) again: if (pgstate == PST_ZERO) ret = 0; - else if (xfer.parent && page_in_parent(pgstate == PST_DIRTY)) + else if (xfer.parent && page_in_parent(pgstate == PST_DIRTY)) { ret = page_pipe_add_hole(pp, pgaddr, PP_HOLE_PARENT); - else + st = 0; + } else { ret = page_pipe_add_page(pp, pgaddr, 0); + st = 1; + } if (ret == -EAGAIN) { ret = dump_pages(pp, &xfer); @@ -727,8 +733,15 @@ again: goto again; } else if (ret) goto err_xfer; + + if (st >= 0) + pages[st]++; } + cnt_add(CNT_SHPAGES_SCANNED, nrpages); + cnt_add(CNT_SHPAGES_SKIPPED_PARENT, pages[0]); + cnt_add(CNT_SHPAGES_WRITTEN, pages[1]); + ret = dump_pages(pp, &xfer); err_xfer: diff --git a/criu/stats.c b/criu/stats.c index 64679b134..a64383542 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -165,6 +165,13 @@ void write_stats(int what) ds_entry.page_pipe_bufs = dstats->counts[CNT_PAGE_PIPE_BUFS]; ds_entry.has_page_pipe_bufs = true; + ds_entry.shpages_scanned = dstats->counts[CNT_SHPAGES_SCANNED]; + ds_entry.has_shpages_scanned = true; + ds_entry.shpages_skipped_parent = dstats->counts[CNT_SHPAGES_SKIPPED_PARENT]; + ds_entry.has_shpages_skipped_parent = true; + ds_entry.shpages_written = dstats->counts[CNT_SHPAGES_WRITTEN]; + ds_entry.has_shpages_written = true; + name = "dump"; } else if (what == RESTORE_STATS) { stats.restore = &rs_entry; diff --git a/images/stats.proto b/images/stats.proto index d76503441..68d2f1bbb 100644 --- a/images/stats.proto +++ b/images/stats.proto @@ -16,6 +16,10 @@ message dump_stats_entry { required uint64 pages_lazy = 9; optional uint64 page_pipes = 10; optional uint64 page_pipe_bufs = 11; + + optional uint64 shpages_scanned = 12; + optional uint64 shpages_skipped_parent = 13; + optional uint64 shpages_written = 14; } message restore_stats_entry { From b336fa2e3237f9f93a09aea546fb1164b7c151a1 Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Thu, 23 May 2019 08:59:05 +0000 Subject: [PATCH 0078/2030] zdtm: Check pages stats after dump After dump command -- verify that the amount of bytes counted in stats-dump matches the real sizes of pages-*.img files. Signed-off-by: Pavel Emelyanov Acked-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- test/zdtm.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index fb859d1c7..abe92f9d0 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1098,13 +1098,32 @@ class criu: else: raise test_fail_exc("CRIU %s" % action) + def __stats_file(self, action): + return os.path.join(self.__ddir(), "stats-%s" % action) + def show_stats(self, action): if not self.__show_stats: return - subprocess.Popen([self.__crit_bin, "show", - os.path.join(self.__dump_path, - str(self.__iter), "stats-%s" % action)]).wait() + subprocess.Popen([self.__crit_bin, "show", self.__stats_file(action)]).wait() + + def check_pages_counts(self): + stats_written = -1 + with open(self.__stats_file("dump"), 'rb') as stfile: + stats = crpc.images.load(stfile) + stent = stats['entries'][0]['dump'] + stats_written = int(stent['shpages_written']) + int(stent['pages_written']) + + real_written = 0 + for f in os.listdir(self.__ddir()): + if f.startswith('pages-'): + real_written += os.path.getsize(os.path.join(self.__ddir(), f)) + + r_pages = real_written / 4096 + r_off = real_written % 4096 + if (stats_written != r_pages) or (r_off != 0): + print("ERROR: bad page counts, stats = %d real = %d(%d)" % (stats_written, r_pages, r_off)) + raise test_fail_exc("page counts mismatch") def dump(self, action, opts = []): self.__iter += 1 @@ -1152,6 +1171,7 @@ class criu: self.__criu_act("dedup", opts = []) self.show_stats("dump") + self.check_pages_counts() if self.__leave_stopped: pstree_check_stopped(self.__test.getpid()) From 77d88920fb8f59cd688bd659507ea2199f69d3c1 Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Thu, 23 May 2019 08:59:26 +0000 Subject: [PATCH 0079/2030] stats: Make dstats shmem Dumping shmem segments causing stats "pages written" counter to mismatch the real pages* sizes. This is due to ipcns' dumping happens in another process and the relevant shmem dumping counters remain in its address space. Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/stats.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/criu/stats.c b/criu/stats.c index a64383542..7410b5ced 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -201,7 +201,15 @@ void write_stats(int what) int init_stats(int what) { if (what == DUMP_STATS) { - dstats = xzalloc(sizeof(*dstats)); + /* + * Dumping happens via one process most of the time, + * so we are typically OK with the plain malloc, but + * when dumping namespaces we fork() a separate process + * for it and when it goes and dumps shmem segments + * it will alter the CNT_SHPAGES_ counters, so we need + * to have them in shmem. + */ + dstats = shmalloc(sizeof(*dstats)); return dstats ? 0 : -1; } From 7338a0e74ceca1fb111a27fd327b3e835a2e726f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 21 May 2019 00:13:27 -0700 Subject: [PATCH 0080/2030] util: use F_DUPFD when we don't want to overwrite an existing descriptor Right now we use fcntl(F_GETFD) to check whether a target descriptor is used and then we call dup2(). Actually, we can do this for one system call. Cc: Cyrill Gorcunov Signed-off-by: Andrei Vagin Reviewed-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/include/servicefd.h | 11 ----------- criu/servicefd.c | 37 +++++++++++++++++++++++-------------- criu/util.c | 18 +++++++++--------- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h index 7be472cf4..986c46af5 100644 --- a/criu/include/servicefd.h +++ b/criu/include/servicefd.h @@ -35,17 +35,6 @@ struct pstree_item; extern bool sfds_protected; -#define sfd_verify_target(_type, _old_fd, _new_fd) \ - ({ \ - int __ret = 0; \ - if (fcntl(_new_fd, F_GETFD) != -1 && errno != EBADF) { \ - pr_err("%s busy target %d -> %d\n", \ - sfd_type_name(_type), _old_fd, _new_fd); \ - __ret = -1; \ - } \ - __ret; \ - }) - extern const char *sfd_type_name(enum sfd_type type); extern int init_service_fd(void); extern int get_service_fd(enum sfd_type type); diff --git a/criu/servicefd.c b/criu/servicefd.c index 82147921c..dc423895b 100644 --- a/criu/servicefd.c +++ b/criu/servicefd.c @@ -153,6 +153,7 @@ static void sfds_protection_bug(enum sfd_type type) int install_service_fd(enum sfd_type type, int fd) { int sfd = __get_service_fd(type, service_fd_id); + int tmp; BUG_ON((int)type <= SERVICE_FD_MIN || (int)type >= SERVICE_FD_MAX); if (sfds_protected && !test_bit(type, sfd_map)) @@ -166,16 +167,19 @@ int install_service_fd(enum sfd_type type, int fd) return fd; } - if (!test_bit(type, sfd_map)) { - if (sfd_verify_target(type, fd, sfd)) - return -1; - } - - if (dup3(fd, sfd, O_CLOEXEC) != sfd) { + if (!test_bit(type, sfd_map)) + tmp = fcntl(fd, F_DUPFD, sfd); + else + tmp = dup3(fd, sfd, O_CLOEXEC); + if (tmp < 0) { pr_perror("%s dup %d -> %d failed", sfd_type_name(type), fd, sfd); close(fd); return -1; + } else if (tmp != sfd) { + pr_err("%s busy target %d -> %d\n", sfd_type_name(type), fd, sfd); + close(fd); + return -1; } set_bit(type, sfd_map); @@ -201,25 +205,30 @@ int close_service_fd(enum sfd_type type) return 0; } -static void move_service_fd(struct pstree_item *me, int type, int new_id, int new_base) +static int move_service_fd(struct pstree_item *me, int type, int new_id, int new_base) { int old = get_service_fd(type); int new = new_base - type - SERVICE_FD_MAX * new_id; int ret; if (old < 0) - return; + return 0; if (!test_bit(type, sfd_map)) - sfd_verify_target(type, old, new); - - ret = dup2(old, new); + ret = fcntl(old, F_DUPFD, new); + else + ret = dup2(old, new); if (ret == -1) { - if (errno != EBADF) - pr_perror("%s unable to clone %d->%d", - sfd_type_name(type), old, new); + pr_perror("%s unable to clone %d->%d", + sfd_type_name(type), old, new); + return -1; + } else if (ret != new) { + pr_err("%s busy target %d -> %d\n", sfd_type_name(type), old, new); + return -1; } else if (!(rsti(me)->clone_flags & CLONE_FILES)) close(old); + + return 0; } static int choose_service_fd_base(struct pstree_item *me) diff --git a/criu/util.c b/criu/util.c index 04b5c3e71..0617c97b3 100644 --- a/criu/util.c +++ b/criu/util.c @@ -230,19 +230,19 @@ int reopen_fd_as_safe(char *file, int line, int new_fd, int old_fd, bool allow_r int tmp; if (old_fd != new_fd) { - if (!allow_reuse_fd) { - if (fcntl(new_fd, F_GETFD) != -1 || errno != EBADF) { - pr_err("fd %d already in use (called at %s:%d)\n", - new_fd, file, line); - return -1; - } - } - - tmp = dup2(old_fd, new_fd); + if (!allow_reuse_fd) + tmp = fcntl(old_fd, F_DUPFD, new_fd); + else + tmp = dup2(old_fd, new_fd); if (tmp < 0) { pr_perror("Dup %d -> %d failed (called at %s:%d)", old_fd, new_fd, file, line); return tmp; + } else if (tmp != new_fd) { + close(tmp); + pr_err("fd %d already in use (called at %s:%d)\n", + new_fd, file, line); + return -1; } /* Just to have error message if failed */ From 31c3f3bc1b40386e626b9d43f2bf7e584a8edf5e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 26 May 2019 20:53:57 -0700 Subject: [PATCH 0081/2030] test/s390: add a new patch to xtables libraries Signed-off-by: Andrei Vagin --- test/zdtm/static/socket-tcp-reseted.desc | 6 +++--- test/zdtm/static/socket-tcp-syn-sent.desc | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/zdtm/static/socket-tcp-reseted.desc b/test/zdtm/static/socket-tcp-reseted.desc index c0e83aad7..94425b44e 100644 --- a/test/zdtm/static/socket-tcp-reseted.desc +++ b/test/zdtm/static/socket-tcp-reseted.desc @@ -1,8 +1,8 @@ { 'deps': [ '/bin/sh', '/sbin/iptables', - '/usr/lib64/xtables/libxt_tcp.so|/lib/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so', - '/usr/lib64/xtables/libxt_standard.so|/lib/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so', - '/usr/lib64/xtables/libipt_REJECT.so|/lib/xtables/libipt_REJECT.so|/usr/lib/powerpc64le-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/x86_64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/xtables/libipt_REJECT.so', + '/usr/lib64/xtables/libxt_tcp.so|/lib/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so', + '/usr/lib64/xtables/libxt_standard.so|/lib/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so', + '/usr/lib64/xtables/libipt_REJECT.so|/lib/xtables/libipt_REJECT.so|/usr/lib/powerpc64le-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/x86_64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/xtables/libipt_REJECT.so|/usr/lib/s390x-linux-gnu/xtables/libipt_REJECT.so', ], 'opts': '--tcp-established', 'flags': 'suid nouser samens', diff --git a/test/zdtm/static/socket-tcp-syn-sent.desc b/test/zdtm/static/socket-tcp-syn-sent.desc index c5d1faa0e..b9f3d5e6d 100644 --- a/test/zdtm/static/socket-tcp-syn-sent.desc +++ b/test/zdtm/static/socket-tcp-syn-sent.desc @@ -1,7 +1,7 @@ { 'deps': [ '/bin/sh', '/sbin/iptables', - '/usr/lib64/xtables/libxt_tcp.so|/lib/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so', - '/usr/lib64/xtables/libxt_standard.so|/lib/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so', + '/usr/lib64/xtables/libxt_tcp.so|/lib/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so', + '/usr/lib64/xtables/libxt_standard.so|/lib/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so', ], 'opts': '--tcp-established', 'flags': 'suid nouser samens', From c399235244cb2a2b7b2c11cf44f785a3be7ed822 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 26 May 2019 19:34:15 +0100 Subject: [PATCH 0082/2030] aarch64: Remove stack pointer from clobber list Since gcc version 9.1 was added the restriction that the clobber list of an inline assembly should not contain the stack pointer register. https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9d1cdb749a1 In commit 901f5d4 have been fixed most of the build failures related to this gcc restriction. In this patch is resolved a build error that occurs only on aarch64. Signed-off-by: Radostin Stoyanov --- criu/arch/aarch64/include/asm/restore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/arch/aarch64/include/asm/restore.h b/criu/arch/aarch64/include/asm/restore.h index 2345a579a..3d794ffb5 100644 --- a/criu/arch/aarch64/include/asm/restore.h +++ b/criu/arch/aarch64/include/asm/restore.h @@ -15,7 +15,7 @@ : "r"(new_sp), \ "r"(restore_task_exec_start), \ "r"(task_args) \ - : "sp", "x0", "memory") + : "x0", "memory") static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) { From 33bc00a1589c35ce03a749811ab2fe6f9cf86766 Mon Sep 17 00:00:00 2001 From: Pavel Emelianov Date: Tue, 28 May 2019 08:47:51 +0000 Subject: [PATCH 0083/2030] zdtm: Check stats file presence before reading In some cases the stats-dump file can be missing, so do not crash the whole zdtm.py in this case. https://ci.openvz.org/job/CRIU/job/criu-live-migration/job/criu-dev/2362/console Signed-off-by: Pavel Emelyanov --- test/zdtm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/zdtm.py b/test/zdtm.py index abe92f9d0..a01947557 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1108,6 +1108,9 @@ class criu: subprocess.Popen([self.__crit_bin, "show", self.__stats_file(action)]).wait() def check_pages_counts(self): + if not os.access(self.__stats_file("dump"), os.R_OK): + return + stats_written = -1 with open(self.__stats_file("dump"), 'rb') as stfile: stats = crpc.images.load(stfile) From 831ae18dacd249bd683785ab6c94fb955c3e16ad Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 28 May 2019 08:24:58 +0100 Subject: [PATCH 0084/2030] pb2dict: Fix is_string() check for Python 2 In the __main__ module, __builtins__ is the built-in module builtins. In any other module, __builtins__ is an alias for the dictionary of the builtins module itself. [1] Thus, hasattr(__builtins__, "basestring") would only work in __main__ module. Since pb2dict is part of pycriu and is intended to be called by modules other than __main__, we can assume that __builtins__ would always be a dictionary (not a module). In Python 2, basestring is a superclass for str and unicode. [2] However, the assignment statement creates a variable basestring in the local scope of the function is_string() which, in Python 2, causes a failure with UnboundLocalError. In order to mitigate this issue the local variable name has been changed to string_types. Fixes #708 [1] https://docs.python.org/2/reference/executionmodel.html#builtins-and-restricted-execution [2] https://docs.python.org/2/library/functions.html#basestring Signed-off-by: Radostin Stoyanov --- lib/py/images/pb2dict.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index 4e2c171d5..af14db24d 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -217,9 +217,11 @@ def get_bytes_dec(field): def is_string(value): # Python 3 compatibility - if not hasattr(__builtins__, "basestring"): - basestring = (str, bytes) - return isinstance(value, basestring) + if "basestring" in __builtins__: + string_types = basestring + else: + string_types = (str, bytes) + return isinstance(value, string_types) def _pb2dict_cast(field, value, pretty = False, is_hex = False): if not is_hex: From cd58de7a2b8abec9efee21bf30f6e0ab53131a52 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 29 May 2019 11:58:10 +0100 Subject: [PATCH 0085/2030] pb2dict: Resolve Python 2/3 compatibility issues In Python 3, bytes has only a decode() method, and string has only an encode() method. [1] The modules quopri and base64 from the Python Standard Library perform quoted-printable transport encoding and decoding with both Python 2 [2] and Python 3 [3]. [1] https://docs.python.org/3/howto/pyporting.html#text-versus-binary-data [2] https://docs.python.org/2/library/quopri.html [3] https://docs.python.org/3/library/quopri.html Signed-off-by: Radostin Stoyanov --- lib/py/images/pb2dict.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index af14db24d..c4ce736e8 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -5,6 +5,12 @@ from ipaddress import IPv6Address import socket import collections import os +import base64 +import quopri + +if "encodebytes" not in dir(base64): + base64.encodebytes = base64.encodestring + base64.decodebytes = base64.decodestring # pb2dict and dict2pb are methods to convert pb to/from dict. # Inspired by: @@ -189,14 +195,14 @@ def encode_dev(field, value): return dev[0] << kern_minorbits | dev[1] def encode_base64(value): - return value.encode('base64') + return base64.encodebytes(value) def decode_base64(value): - return value.decode('base64') + return base64.decodebytes(value) def encode_unix(value): - return value.encode('quopri') + return quopri.encodestring(value) def decode_unix(value): - return value.decode('quopri') + return quopri.decodestring(value) encode = { 'unix_name': encode_unix } decode = { 'unix_name': decode_unix } From 37d3781576dce63fc8ede7de7d72d2f1c12d654a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:04 +0100 Subject: [PATCH 0086/2030] cr-check: Add check for mremap() of special mappings During restore any VMA that's a subject to ASLR should be moved at the same address as was on a checkpoint. Previously, ports to non-x86 architectures had problems with VDSO mremap(). On those platforms kernel needs "landing" for return to userspace in some cases. Usually, vdso provides this landing and finishes restoring of registers. That's `int80_landing_pad` on ia32. On arm64/arm32 it's sigtrap for SA_RESTORER - to proceed after signal processing. That's why kernel needs to track the position of landing. On modern kernels for platform we support it's already done - however, for older kernels some patches needs to be backported for C/R. Provide the checks for mremap() of special VMAs: that CRIU has suitable kernel to work on and if we'll have some new platforms - that kernel tracks the position of landing. Signed-off-by: Dmitry Safonov --- criu/cr-check.c | 174 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 173 insertions(+), 1 deletion(-) diff --git a/criu/cr-check.c b/criu/cr-check.c index e24668305..75a665cfb 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -582,7 +582,7 @@ static pid_t fork_and_ptrace_attach(int (*child_setup)(void)) return pid; } -static int check_ptrace_peeksiginfo() +static int check_ptrace_peeksiginfo(void) { struct ptrace_peeksiginfo_args arg; siginfo_t siginfo; @@ -611,6 +611,177 @@ static int check_ptrace_peeksiginfo() return ret; } +struct special_mapping { + const char *name; + void *addr; + size_t size; +}; + +static int parse_special_maps(struct special_mapping *vmas, size_t nr) +{ + FILE *maps; + char buf[256]; + int ret = 0; + + maps = fopen_proc(PROC_SELF, "maps"); + if (!maps) + return -1; + + while (fgets(buf, sizeof(buf), maps)) { + unsigned long start, end; + int r, tail; + size_t i; + + r = sscanf(buf, "%lx-%lx %*s %*s %*s %*s %n\n", + &start, &end, &tail); + if (r != 2) { + fclose(maps); + pr_err("Bad maps format %d.%d (%s)\n", r, tail, buf + tail); + return -1; + } + + for (i = 0; i < nr; i++) { + if (strcmp(buf + tail, vmas[i].name) != 0) + continue; + if (vmas[i].addr != MAP_FAILED) { + pr_err("Special mapping meet twice: %s\n", vmas[i].name); + ret = -1; + goto out; + } + vmas[i].addr = (void *)start; + vmas[i].size = end - start; + } + } + +out: + fclose(maps); + return ret; +} + +static void dummy_sighandler(int sig) +{ +} + +/* + * The idea of test is checking if the kernel correctly tracks positions + * of special_mappings: vdso/vvar/sigpage/... + * Per-architecture commits added handling for mremap() somewhere between + * v4.8...v4.14. If the kernel doesn't have one of those patches, + * a process will crash after receiving a signal (we use SIGUSR1 for + * the test here). That's because after processing a signal the kernel + * needs a "landing" to return to userspace, which is based on vdso/sigpage. + * If the kernel doesn't track the position of mapping - we land in the void. + * And we definitely mremap() support by the fact that those special_mappings + * are subjects for ASLR. (See #288 as a reference) + */ +static void check_special_mapping_mremap_child(struct special_mapping *vmas, + size_t nr) +{ + size_t i, parking_size = 0; + void *parking_lot; + pid_t self = getpid(); + + for (i = 0; i < nr; i++) { + if (vmas[i].addr != MAP_FAILED) + parking_size += vmas[i].size; + } + + if (signal(SIGUSR1, dummy_sighandler) == SIG_ERR) { + pr_perror("signal() failed"); + exit(1); + } + + parking_lot = mmap(NULL, parking_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (parking_lot == MAP_FAILED) { + pr_perror("mmap(%zu) failed", parking_size); + exit(1); + } + + for (i = 0; i < nr; i++) { + unsigned long ret; + + if (vmas[i].addr == MAP_FAILED) + continue; + + ret = syscall(__NR_mremap, (unsigned long)vmas[i].addr, + vmas[i].size, vmas[i].size, + MREMAP_FIXED | MREMAP_MAYMOVE, + (unsigned long)parking_lot); + if (ret != (unsigned long)parking_lot) + syscall(__NR_exit, 1); + parking_lot += vmas[i].size; + } + + syscall(__NR_kill, self, SIGUSR1); + syscall(__NR_exit, 0); +} + +static int check_special_mapping_mremap(void) +{ + struct special_mapping special_vmas[] = { + { + .name = "[vvar]\n", + .addr = MAP_FAILED, + }, + { + .name = "[vdso]\n", + .addr = MAP_FAILED, + }, + { + .name = "[sigpage]\n", + .addr = MAP_FAILED, + }, + /* XXX: { .name = "[uprobes]\n" }, */ + /* + * Not subjects for ASLR, skipping: + * { .name = "[vectors]\n", }, + * { .name = "[vsyscall]\n" }, + */ + }; + size_t vmas_nr = ARRAY_SIZE(special_vmas); + pid_t child; + int stat; + + if (parse_special_maps(special_vmas, vmas_nr)) + return -1; + + child = fork(); + if (child < 0) { + pr_perror("%s(): failed to fork()", __func__); + return -1; + } + + if (child == 0) + check_special_mapping_mremap_child(special_vmas, vmas_nr); + + if (waitpid(child, &stat, 0) != child) { + if (errno == ECHILD) { + pr_err("BUG: Someone waited for the child already\n"); + return -1; + } + /* Probably, we're interrupted with a signal - cleanup */ + pr_err("Failed to wait for a child %d\n", errno); + kill(child, SIGKILL); + return -1; + } + + if (WIFSIGNALED(stat)) { + pr_err("Child killed by signal %d\n", WTERMSIG(stat)); + pr_err("Your kernel probably lacks the support for mremapping special mappings\n"); + return -1; + } else if (WIFEXITED(stat)) { + if (WEXITSTATUS(stat) == 0) + return 0; + pr_err("Child exited with %d\n", WEXITSTATUS(stat)); + return -1; + } + + pr_err("BUG: waitpid() returned stat=%d\n", stat); + /* We're not killing the child here - it's predestined to die anyway. */ + return -1; +} + static int check_ptrace_suspend_seccomp(void) { pid_t pid; @@ -1172,6 +1343,7 @@ int cr_check(void) CHECK_CAT1(check_ipc()); CHECK_CAT1(check_sigqueuinfo()); CHECK_CAT1(check_ptrace_peeksiginfo()); + CHECK_CAT1(check_special_mapping_mremap()); /* * Category 2 - required for specific cases. From 8824579c8ae31aae0dfa1a02619566c3acedade8 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:05 +0100 Subject: [PATCH 0087/2030] vdso/arm: Add vdso symbols from kernel Signed-off-by: Dmitry Safonov --- criu/arch/arm/include/asm/vdso.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 criu/arch/arm/include/asm/vdso.h diff --git a/criu/arch/arm/include/asm/vdso.h b/criu/arch/arm/include/asm/vdso.h new file mode 100644 index 000000000..cf9d500be --- /dev/null +++ b/criu/arch/arm/include/asm/vdso.h @@ -0,0 +1,17 @@ +#ifndef __CR_ASM_VDSO_H__ +#define __CR_ASM_VDSO_H__ + +#include "asm/int.h" +#include "asm-generic/vdso.h" + +/* This definition is used in pie/util-vdso.c to initialize the vdso symbol + * name string table 'vdso_symbols' + * + * Poke from kernel file arch/arm/vdso/vdso.lds.S + */ +#define VDSO_SYMBOL_MAX 2 +#define ARCH_VDSO_SYMBOLS \ + "__vdso_clock_gettime", \ + "__vdso_gettimeofday" + +#endif /* __CR_ASM_VDSO_H__ */ From d2b4eddf52e3675524b9dbb93d5573dfb724c27a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:06 +0100 Subject: [PATCH 0088/2030] parasite-vdso: Add ugly casts for arm32 criu/pie/parasite-vdso.c: In function 'remap_rt_vdso': criu/pie/parasite-vdso.c:144:17: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) { ^ criu/pie/parasite-vdso.c:154:17: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) { ^ cc1: all warnings being treated as errors Signed-off-by: Dmitry Safonov --- criu/pie/parasite-vdso.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index dc73fb53e..90e20f767 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -127,11 +127,17 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, { unsigned long rt_vvar_addr = vdso_rt_parked_at; unsigned long rt_vdso_addr = vdso_rt_parked_at; + void *remap_addr; int ret; pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n"); - if (sys_munmap((void *)vma_vdso->start, vma_entry_len(vma_vdso))) { + /* + * Ugly casts for 32bit platforms, which don't like uint64_t + * cast to (void *) + */ + remap_addr = (void *)(uintptr_t)vma_vdso->start; + if (sys_munmap(remap_addr, vma_entry_len(vma_vdso))) { pr_err("Failed to unmap dumpee vdso\n"); return -1; } @@ -141,7 +147,8 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, vma_vdso->start, sym_rt->vdso_size); } - if (sys_munmap((void *)vma_vvar->start, vma_entry_len(vma_vvar))) { + remap_addr = (void *)(uintptr_t)vma_vvar->start; + if (sys_munmap(remap_addr, vma_entry_len(vma_vvar))) { pr_err("Failed to unmap dumpee vvar\n"); return -1; } From d6ce398eb42319ee546a9064e9ce346c59cfa294 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:07 +0100 Subject: [PATCH 0089/2030] vdso/arm32: Add vdso trampoline support Signed-off-by: Dmitry Safonov --- criu/arch/arm/vdso-pie.c | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 criu/arch/arm/vdso-pie.c diff --git a/criu/arch/arm/vdso-pie.c b/criu/arch/arm/vdso-pie.c new file mode 100644 index 000000000..0ec8bd9a8 --- /dev/null +++ b/criu/arch/arm/vdso-pie.c @@ -0,0 +1,58 @@ +#include + +#include "asm/types.h" + +#include +#include +#include "parasite-vdso.h" +#include "log.h" +#include "common/bug.h" + +#ifdef LOG_PREFIX +# undef LOG_PREFIX +#endif +#define LOG_PREFIX "vdso: " + +static void insert_trampoline(uintptr_t from, uintptr_t to) +{ + struct { + uint32_t ldr_pc; + uint32_t imm32; + uint32_t guards; + } __packed jmp = { + .ldr_pc = 0xe51ff004, /* ldr pc, [pc, #-4] */ + .imm32 = to, + .guards = 0xe1200070, /* bkpt 0x0000 */ + }; + void *iflush_start = (void *)from; + void *iflush_end = iflush_start + sizeof(jmp); + + memcpy((void *)from, &jmp, sizeof(jmp)); + + __builtin___clear_cache(iflush_start, iflush_end); +} + +int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, + struct vdso_symtable *sto, struct vdso_symtable *sfrom, + bool compat_vdso) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(sto->symbols); i++) { + uintptr_t from, to; + + if (vdso_symbol_empty(&sfrom->symbols[i])) + continue; + + pr_debug("jmp: %lx/%lx -> %lx/%lx (index %d)\n", + base_from, sfrom->symbols[i].offset, + base_to, sto->symbols[i].offset, i); + + from = base_from + sfrom->symbols[i].offset; + to = base_to + sto->symbols[i].offset; + + insert_trampoline(from, to); + } + + return 0; +} From a856c48e7756ba4f6f2cf5a2e14efadc413e3368 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 29 May 2019 17:15:08 +0100 Subject: [PATCH 0090/2030] arm: Build {pie-, }util-vdso with CONFIG_VDSO_32 Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> --- Makefile | 2 +- criu/Makefile.crtools | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index cee8a42c9..475d4abaf 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,7 @@ endif # Architecture specific options. ifeq ($(ARCH),arm) ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') - DEFINES := -DCONFIG_ARMV$(ARMV) + DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32 ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 13ea22775..dd4dc3783 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -87,7 +87,7 @@ obj-y += servicefd.o ifeq ($(VDSO),y) obj-y += pie-util-vdso.o obj-y += vdso.o -obj-y += pie-util-vdso-elf32.o +obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 obj-$(CONFIG_COMPAT) += vdso-compat.o CFLAGS_REMOVE_vdso-compat.o += $(CFLAGS-ASAN) $(CFLAGS-GCOV) From 934a050788bd9b508ae22441f434a93d23e8ac10 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Wed, 29 May 2019 17:15:09 +0100 Subject: [PATCH 0091/2030] arm: Provide aeabi helpers in ARM format We're building PIEs in arm format rather than in thumb. Copy helpers from libgcc, provide a proper define and link them into blobs. Also substitute tabs by spaces, how it should have been in pie/Makefile - tabs are for recipes. Fixes: LINK criu/pie/parasite.built-in.o criu/pie/pie.lib.a(util-vdso.o): In function `elf_hash': /criu/criu/pie/util-vdso.c:61: undefined reference to `__aeabi_uidivmod' /criu/scripts/nmk/scripts/build.mk:209: recipe for target 'criu/pie/parasite.built-in.o' failed Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> --- criu/arch/arm/aeabi-helpers.S | 96 +++++++++++++ criu/arch/arm/uidiv.S | 186 -------------------------- criu/pie/Makefile | 4 +- criu/pie/Makefile.library | 4 + include/common/arch/arm/asm/linkage.h | 4 + 5 files changed, 106 insertions(+), 188 deletions(-) create mode 100644 criu/arch/arm/aeabi-helpers.S delete mode 100644 criu/arch/arm/uidiv.S diff --git a/criu/arch/arm/aeabi-helpers.S b/criu/arch/arm/aeabi-helpers.S new file mode 100644 index 000000000..ea8561d48 --- /dev/null +++ b/criu/arch/arm/aeabi-helpers.S @@ -0,0 +1,96 @@ +/* + * Code borrowed from gcc, arm/lib1funcs.S + * and adapted to CRIU macros. + */ + +#if defined(__thumb__) +/* + * We don't support compiling PIEs in Thumb mode, + * see top Makefile for details (ARM CFLAGS_PIE section). +*/ +#error Unsupported Thumb mode +#endif + +#include "common/asm/linkage.h" + +#define RET bx lr +#define RETc(x) bx##x lr +#define LSYM(x) .x + +.macro do_it cond, suffix="" +.endm + +.macro ARM_DIV2_ORDER divisor, order + clz \order, \divisor + rsb \order, \order, #31 +.endm + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + clz \curbit, \dividend + clz \result, \divisor + sub \curbit, \result, \curbit + rsbs \curbit, \curbit, #31 + addne \curbit, \curbit, \curbit, lsl #1 + mov \result, #0 + addne pc, pc, \curbit, lsl #2 + nop + .set shift, 32 + .rept 32 + .set shift, shift - 1 + cmp \dividend, \divisor, lsl #shift + adc \result, \result, \result + subcs \dividend, \dividend, \divisor, lsl #shift + .endr +.endm + +/* + * XXX: as an optimization add udiv instruction based version. + * It's possible to check if CPU supports the instruction by + * reading Instruction Set Attribute Register (ID_ISAR0) + * and checking fields "Divide_instrs". + */ +ENTRY(__aeabi_uidiv) + /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily + check for division-by-zero a second time. */ +LSYM(udivsi3_skip_div0_test): + subs r2, r1, #1 + do_it eq + RETc(eq) + bcc LSYM(Ldiv0) + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + RET + +11: do_it eq, e + moveq r0, #1 + movne r0, #0 + RET + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + RET + +LSYM(Ldiv0): + .byte 0xf0, 0x01, 0xf0, 0xe7 @ the instruction UDF #32 generates the signal SIGTRAP in Linux + +END(__aeabi_uidiv) +ALIAS(__udivsi3, __aeabi_uidiv) + +ENTRY(__aeabi_uidivmod) + cmp r1, #0 + beq LSYM(Ldiv0) + stmfd sp!, { r0, r1, lr } + bl LSYM(udivsi3_skip_div0_test) + ldmfd sp!, { r1, r2, lr } + mul r3, r2, r0 + sub r1, r1, r3 + RET +END(__aeabi_uidivmod) +ALIAS(__umodsi3, __aeabi_uidiv) diff --git a/criu/arch/arm/uidiv.S b/criu/arch/arm/uidiv.S deleted file mode 100644 index e77f6100c..000000000 --- a/criu/arch/arm/uidiv.S +++ /dev/null @@ -1,186 +0,0 @@ -.globl __aeabi_uidiv - -work .req r4 @ XXXX is this safe ? -dividend .req r0 -divisor .req r1 -overdone .req r2 -result .req r2 -curbit .req r3 - -#define LSYM(x) x - -.macro THUMB_DIV_MOD_BODY modulo - @ Load the constant 0x10000000 into our work register. - mov work, #1 - lsl work, #28 -LSYM(Loop1): - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, work - bhs LSYM(Lbignum) - cmp divisor, dividend - bhs LSYM(Lbignum) - lsl divisor, #4 - lsl curbit, #4 - b LSYM(Loop1) -LSYM(Lbignum): - @ Set work to 0x80000000 - lsl work, #3 -LSYM(Loop2): - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, work - bhs LSYM(Loop3) - cmp divisor, dividend - bhs LSYM(Loop3) - lsl divisor, #1 - lsl curbit, #1 - b LSYM(Loop2) -LSYM(Loop3): - @ Test for possible subtractions ... - .if \modulo - @ ... On the final pass, this may subtract too much from the dividend, - @ so keep track of which subtractions are done, we can fix them up - @ afterwards. - mov overdone, #0 - cmp dividend, divisor - blo LSYM(Lover1) - sub dividend, dividend, divisor -LSYM(Lover1): - lsr work, divisor, #1 - cmp dividend, work - blo LSYM(Lover2) - sub dividend, dividend, work - mov ip, curbit - mov work, #1 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover2): - lsr work, divisor, #2 - cmp dividend, work - blo LSYM(Lover3) - sub dividend, dividend, work - mov ip, curbit - mov work, #2 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover3): - lsr work, divisor, #3 - cmp dividend, work - blo LSYM(Lover4) - sub dividend, dividend, work - mov ip, curbit - mov work, #3 - ror curbit, work - orr overdone, curbit - mov curbit, ip -LSYM(Lover4): - mov ip, curbit - .else - @ ... and note which bits are done in the result. On the final pass, - @ this may subtract too much from the dividend, but the result will be ok, - @ since the "bit" will have been shifted out at the bottom. - cmp dividend, divisor - blo LSYM(Lover1) - sub dividend, dividend, divisor - orr result, result, curbit -LSYM(Lover1): - lsr work, divisor, #1 - cmp dividend, work - blo LSYM(Lover2) - sub dividend, dividend, work - lsr work, curbit, #1 - orr result, work -LSYM(Lover2): - lsr work, divisor, #2 - cmp dividend, work - blo LSYM(Lover3) - sub dividend, dividend, work - lsr work, curbit, #2 - orr result, work -LSYM(Lover3): - lsr work, divisor, #3 - cmp dividend, work - blo LSYM(Lover4) - sub dividend, dividend, work - lsr work, curbit, #3 - orr result, work -LSYM(Lover4): - .endif - - cmp dividend, #0 @ Early termination? - beq LSYM(Lover5) - lsr curbit, #4 @ No, any more bits to do? - beq LSYM(Lover5) - lsr divisor, #4 - b LSYM(Loop3) -LSYM(Lover5): - .if \modulo - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - mov work, #0xe - lsl work, #28 - and overdone, work - beq LSYM(Lgot_result) - - @ If we terminated early, because dividend became zero, then the - @ bit in ip will not be in the bottom nibble, and we should not - @ perform the additions below. We must test for this though - @ (rather relying upon the TSTs to prevent the additions) since - @ the bit in ip could be in the top two bits which might then match - @ with one of the smaller RORs. - mov curbit, ip - mov work, #0x7 - tst curbit, work - beq LSYM(Lgot_result) - - mov curbit, ip - mov work, #3 - ror curbit, work - tst overdone, curbit - beq LSYM(Lover6) - lsr work, divisor, #3 - add dividend, work -LSYM(Lover6): - mov curbit, ip - mov work, #2 - ror curbit, work - tst overdone, curbit - beq LSYM(Lover7) - lsr work, divisor, #2 - add dividend, work -LSYM(Lover7): - mov curbit, ip - mov work, #1 - ror curbit, work - tst overdone, curbit - beq LSYM(Lgot_result) - lsr work, divisor, #1 - add dividend, work - .endif -LSYM(Lgot_result): -.endm - - - .thumb - .text - -__aeabi_uidiv: - mov curbit, #1 - mov result, #0 - - push { work } - cmp dividend, divisor - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 0 - - mov r0, result - pop { work } - - bx lr diff --git a/criu/pie/Makefile b/criu/pie/Makefile index bdff44816..ade186346 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -10,8 +10,8 @@ ccflags-y += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 ccflags-y += -Wp,-U_FORTIFY_SOURCE -Wp,-D_FORTIFY_SOURCE=0 ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) - LDFLAGS += $(shell $(COMPEL_BIN) ldflags) - compel_plugins := $(shell $(COMPEL_BIN) plugins) + LDFLAGS += $(shell $(COMPEL_BIN) ldflags) + compel_plugins := $(shell $(COMPEL_BIN) plugins) endif LDS := compel/arch/$(SRCARCH)/scripts/compel-pack.lds.S diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 0a33a8861..2d2d1faf1 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -23,5 +23,9 @@ ifeq ($(SRCARCH),x86) CFLAGS_util-vdso-elf32.o += -DCONFIG_VDSO_32 endif +ifeq ($(SRCARCH),arm) + lib-y += ./$(ARCH_DIR)/aeabi-helpers.o +endif + CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) ccflags-y += $(CFLAGS_PIE) diff --git a/include/common/arch/arm/asm/linkage.h b/include/common/arch/arm/asm/linkage.h index 738064233..a93898be5 100644 --- a/include/common/arch/arm/asm/linkage.h +++ b/include/common/arch/arm/asm/linkage.h @@ -19,6 +19,10 @@ #define END(sym) \ .size sym, . - sym +#define ALIAS(sym_new, sym_old) \ + .globl sym_new; \ + .set sym_new, sym_old + #endif /* __ASSEMBLY__ */ #endif /* __CR_LINKAGE_H__ */ From 670648c931d6851e2483576fa25158001b4b4ba0 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:10 +0100 Subject: [PATCH 0092/2030] arm/pie: Provide __clear_cache() After patching code - we need to flush CPU cache, it's done with __builtin___clear_cache(). As we don't link to libgcc, provide a helper that wraps ARM-specific syscall. Fixes: LINK criu/pie/restorer.built-in.o ld: ./criu/arch/arm/vdso-pie.o: in function `insert_trampoline': /root/criu/criu/arch/arm/vdso-pie.c:32: undefined reference to `__clear_cache' Signed-off-by: Dmitry Safonov --- compel/arch/arm/plugins/std/syscalls/syscall.def | 1 + criu/arch/arm/pie-cacheflush.c | 7 +++++++ criu/pie/Makefile.library | 1 + 3 files changed, 9 insertions(+) create mode 100644 criu/arch/arm/pie-cacheflush.c diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index bcd61d4a1..653a7539b 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -110,3 +110,4 @@ gettimeofday 169 78 (struct timeval *tv, struct timezone *tz) preadv_raw 69 361 (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) userfaultfd 282 388 (int flags) fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) +cacheflush ! 983042 (void *start, void *end, int flags) diff --git a/criu/arch/arm/pie-cacheflush.c b/criu/arch/arm/pie-cacheflush.c new file mode 100644 index 000000000..e6fd71f1e --- /dev/null +++ b/criu/arch/arm/pie-cacheflush.c @@ -0,0 +1,7 @@ +#include + +/* That's __builtin___clear_cache() to flush CPU cache */ +void __clear_cache(void *start, void *end) +{ + sys_cacheflush(start, end, 0); +} diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 2d2d1faf1..b1ac600c6 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -25,6 +25,7 @@ endif ifeq ($(SRCARCH),arm) lib-y += ./$(ARCH_DIR)/aeabi-helpers.o + lib-y += ./$(ARCH_DIR)/pie-cacheflush.o endif CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) From b2efa720ce91c7f032869c6fcfc6aebb8ea95447 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:11 +0100 Subject: [PATCH 0093/2030] rt-vdso: Be verbose if !blobs_matches() (00.251007) pie: 4: vdso: Runtime vdso mismatches dumpee, generate proxy And I want to know why :) Signed-off-by: Dmitry Safonov --- criu/pie/parasite-vdso.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 90e20f767..00bc2bffa 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -95,23 +95,44 @@ static int __vdso_fill_symtable(uintptr_t mem, size_t size, static bool blobs_matches(VmaEntry *vdso_img, VmaEntry *vvar_img, struct vdso_symtable *sym_img, struct vdso_symtable *sym_rt) { + unsigned long vdso_size = vma_entry_len(vdso_img); + unsigned long rt_vdso_size = sym_rt->vdso_size; size_t i; - if (vma_entry_len(vdso_img) != sym_rt->vdso_size) + if (vdso_size != rt_vdso_size) { + pr_info("size differs: %lx != %lx (rt)\n", + vdso_size, rt_vdso_size); return false; + } for (i = 0; i < ARRAY_SIZE(sym_img->symbols); i++) { - if (sym_img->symbols[i].offset != sym_rt->symbols[i].offset) + unsigned long sym_offset = sym_img->symbols[i].offset; + unsigned long rt_sym_offset = sym_rt->symbols[i].offset; + char *sym_name = sym_img->symbols[i].name; + + if (sym_offset != rt_sym_offset) { + pr_info("[%zu]`%s` offset differs: %lx != %lx (rt)\n", + i, sym_name, sym_offset, rt_sym_offset); return false; + } } if (vvar_img && sym_rt->vvar_size != VVAR_BAD_SIZE) { bool vdso_firstly = (vvar_img->start > vdso_img->start); + unsigned long vvar_size = vma_entry_len(vvar_img); + unsigned long rt_vvar_size = sym_rt->vvar_size; - if (sym_rt->vvar_size != vma_entry_len(vvar_img)) + if (vvar_size != rt_vvar_size) { + pr_info("vvar size differs: %lx != %lx (rt)\n", + vdso_size, rt_vdso_size); return false; + } - return (vdso_firstly == sym_rt->vdso_before_vvar); + if (vdso_firstly != sym_rt->vdso_before_vvar) { + pr_info("[%s] pair has different order\n", + vdso_firstly ? "vdso/vvar" : "vvar/vdso"); + return false; + } } return true; From ee6412fa20e683868cbc33ba4ff6526bf5523e82 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:12 +0100 Subject: [PATCH 0094/2030] x86/vdso: Don't insert trampolines in vsyscall The patch "util-vdso: Check chain for STN_UNDEF" fixed an issue about not discovering present symbols on vdso. While it's a good and a proper fix, as the result __kernel_vsyscall started being patched. Which in result broke zdtm trampoline test on ia32. So, let's omit patching vsyscall while #512 issue is not fixed. We might actually refrain patching it for long time as it doesn't access vvar, so there is little sense in doing patching. Signed-off-by: Dmitry Safonov --- criu/arch/x86/include/asm/vdso.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/criu/arch/x86/include/asm/vdso.h b/criu/arch/x86/include/asm/vdso.h index ae893b8d7..046db2336 100644 --- a/criu/arch/x86/include/asm/vdso.h +++ b/criu/arch/x86/include/asm/vdso.h @@ -12,17 +12,38 @@ * This is a minimal amount of symbols * we should support at the moment. */ -#define VDSO_SYMBOL_MAX 7 +#define VDSO_SYMBOL_MAX 6 +/* + * XXX: we don't patch __kernel_vsyscall as it's too small: + * + * byte *before* *after* + * 0x0 push %ecx mov $[rt-vdso],%eax + * 0x1 push %edx ^ + * 0x2 push %ebp ^ + * 0x3 mov %esp,%ebp ^ + * 0x5 sysenter jmp *%eax + * 0x7 int $0x80 int3 + * 0x9 pop %ebp int3 + * 0xa pop %edx int3 + * 0xb pop %ecx pop %ecx + * 0xc ret ret + * + * As restarting a syscall is quite likely after restore, + * the patched version quitly crashes. + * vsyscall will be patched again when addressing: + * https://github.com/checkpoint-restore/criu/issues/512 + */ #define ARCH_VDSO_SYMBOLS \ "__vdso_clock_gettime", \ "__vdso_getcpu", \ "__vdso_gettimeofday", \ "__vdso_time", \ - "__kernel_vsyscall", \ "__kernel_sigreturn", \ "__kernel_rt_sigreturn" +/* "__kernel_vsyscall", */ + #ifndef ARCH_MAP_VDSO_32 # define ARCH_MAP_VDSO_32 0x2002 #endif From a96f0f1877a7b0c2fbb975043c96c32584d8f501 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:13 +0100 Subject: [PATCH 0095/2030] util-vdso: Check chain for STN_UNDEF Rather than chain[chain] != STN_UNDEF. Seems like, on !ARM32 vdso there are more symbols and less chance to hit this "feature". Fixes parsing of __vdso_clock_gettime symbol on v5.1 arm kernel. Signed-off-by: Dmitry Safonov --- criu/pie/util-vdso.c | 2 +- test/zdtm/static/vdso01.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c index 6213df9a4..104da0633 100644 --- a/criu/pie/util-vdso.c +++ b/criu/pie/util-vdso.c @@ -242,7 +242,7 @@ static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, const char * symbol = vdso_symbols[i]; k = elf_hash((const unsigned char *)symbol); - for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) { + for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) { addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; Sym_t *sym; char *name; diff --git a/test/zdtm/static/vdso01.c b/test/zdtm/static/vdso01.c index be733663c..269688c5e 100644 --- a/test/zdtm/static/vdso01.c +++ b/test/zdtm/static/vdso01.c @@ -246,7 +246,7 @@ static int vdso_fill_symtable(char *mem, size_t size, struct vdso_symtable *t) for (i = 0; i < ARRAY_SIZE(vdso_symbols); i++) { k = elf_hash((const unsigned char *)vdso_symbols[i]); - for (j = bucket[k % nbucket]; j < nchain && chain[j] != STN_UNDEF; j = chain[j]) { + for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) { Sym_t *sym = (void *)&mem[dyn_symtab->d_un.d_ptr - load->p_vaddr]; char *name; From e73df268af04e84ecf65c830b49bb687c1a54c7e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:14 +0100 Subject: [PATCH 0096/2030] pie/build: Add CFLAGS_PIE to CFLAGS There is a little difference between ccflags-y and CFLAGS, except the local/global visibility: nmk adds $(CFLAGS) to nmk-asflags and assembles using them, but without ccflags-y. The other possible way would be adding asflags-y with CFLAGS_PIE, but I'm not convinced - let's update CFLAGS for the time being. Signed-off-by: Dmitry Safonov --- criu/pie/Makefile | 2 +- criu/pie/Makefile.library | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pie/Makefile b/criu/pie/Makefile index ade186346..47443c26b 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -5,7 +5,7 @@ target := parasite restorer CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) -ccflags-y += $(CFLAGS_PIE) +CFLAGS += $(CFLAGS_PIE) ccflags-y += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 ccflags-y += -Wp,-U_FORTIFY_SOURCE -Wp,-D_FORTIFY_SOURCE=0 diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index b1ac600c6..d802417de 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -29,4 +29,4 @@ ifeq ($(SRCARCH),arm) endif CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) -ccflags-y += $(CFLAGS_PIE) +CFLAGS += $(CFLAGS_PIE) From de53191179ad3413e4ce3d18ed915e4ecdc39ed7 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 29 May 2019 17:15:15 +0100 Subject: [PATCH 0097/2030] criu/vdso: Purge CONFIG_VDSO Vigorously remove the config ifdef. The config option *never* had any excuse to exist: - for x86 we were grand - for ppc64/arm64 patches to support mremap() on vdso were long ago accepted, but regardless - it's not possible to disable CONFIG_VDSO for those platforms in kernel - for s390 - patches were mainstreamed not that long ago, but it's not possible to disable the kernel config - for arm32 it's possible to disable the kernel config, but kernel returns to userspace historically through sigpage, not vdso. That's the only platform that criu disallows to have CONFIG_VDSO=y in kernel, but that's just meaningles. A kernel patch for sigpage mremap() has gone into v4.13: commit 280e87e98c09 ("ARM: 8683/1: ARM32: Support mremap() for sigpage/vDSO"). So, removing the config was long-lived item on my TODO list that bligted arm32 users and made changes to vdso more complex by all "needed" iffdeferry. Get rid of it with fire. Fixes: #446 Signed-off-by: Dmitry Safonov --- Makefile | 6 +----- Makefile.config | 4 ---- criu/Makefile.crtools | 3 --- criu/cr-restore.c | 6 ------ criu/include/kerndat.h | 4 ---- criu/include/parasite-vdso.h | 9 --------- criu/include/restorer.h | 2 -- criu/include/vdso.h | 13 ------------- criu/mem.c | 3 +-- criu/pie/Makefile | 15 ++++++--------- criu/pie/Makefile.library | 9 +-------- criu/pie/parasite.c | 8 -------- criu/pie/restorer.c | 8 -------- criu/proc_parse.c | 14 -------------- 14 files changed, 9 insertions(+), 95 deletions(-) diff --git a/Makefile b/Makefile index 475d4abaf..09cf2406a 100644 --- a/Makefile +++ b/Makefile @@ -56,19 +56,16 @@ ifeq ($(ARCH),arm) endif ifeq ($(ARCH),aarch64) - VDSO := y DEFINES := -DCONFIG_AARCH64 endif ifeq ($(ARCH),ppc64) LDARCH := powerpc:common64 - VDSO := y DEFINES := -DCONFIG_PPC64 -D__SANE_USERSPACE_TYPES__ endif ifeq ($(ARCH),x86) LDARCH := i386:x86-64 - VDSO := y DEFINES := -DCONFIG_X86_64 endif @@ -81,7 +78,6 @@ endif ifeq ($(ARCH),s390) ARCH := s390 SRCARCH := s390 - VDSO := y DEFINES := -DCONFIG_S390 CFLAGS_PIE := -fno-optimize-sibling-calls endif @@ -90,7 +86,7 @@ CFLAGS_PIE += -DCR_NOGLIBC export CFLAGS_PIE LDARCH ?= $(SRCARCH) -export LDARCH VDSO +export LDARCH export PROTOUFIX DEFINES # diff --git a/Makefile.config b/Makefile.config index a853705b3..008a82289 100644 --- a/Makefile.config +++ b/Makefile.config @@ -57,10 +57,6 @@ $(CONFIG_HEADER): scripts/feature-tests.mak $(CONFIG_FILE) $(Q) echo '' >> $$@ $(call map,gen-feature-test,$(FEATURES_LIST)) $(Q) cat $(CONFIG_FILE) | sed -n -e '/^[^#]/s/^/#define CONFIG_/p' >> $$@ -ifeq ($$(VDSO),y) - $(Q) echo '#define CONFIG_VDSO' >> $$@ - $(Q) echo '' >> $$@ -endif $(Q) echo '#endif /* __CR_CONFIG_H__ */' >> $$@ endef diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index dd4dc3783..383ed1940 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -83,15 +83,12 @@ obj-y += fdstore.o obj-y += uffd.o obj-y += config.o obj-y += servicefd.o - -ifeq ($(VDSO),y) obj-y += pie-util-vdso.o obj-y += vdso.o obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 obj-$(CONFIG_COMPAT) += vdso-compat.o CFLAGS_REMOVE_vdso-compat.o += $(CFLAGS-ASAN) $(CFLAGS-GCOV) -endif PROTOBUF_GEN := scripts/protobuf-gen.sh diff --git a/criu/cr-restore.c b/criu/cr-restore.c index f25efb823..23be81140 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3225,10 +3225,8 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns struct thread_restore_args *thread_args; struct restore_mem_zone *mz; -#ifdef CONFIG_VDSO struct vdso_maps vdso_maps_rt; unsigned long vdso_rt_size = 0; -#endif struct vm_area_list self_vmas; struct vm_area_list *vmas = &rsti(current)->vmas; @@ -3279,7 +3277,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns pr_info("%d threads require %ldK of memory\n", current->nr_threads, KBYTES(task_args->bootstrap_len)); -#ifdef CONFIG_VDSO if (core_is_compat(core)) vdso_maps_rt = vdso_maps_compat; else @@ -3291,7 +3288,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns if (vdso_rt_size && vdso_maps_rt.sym.vvar_size) vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); task_args->bootstrap_len += vdso_rt_size; -#endif /* * Restorer is a blob (code + args) that will get mapped in some @@ -3506,7 +3502,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns } -#ifdef CONFIG_VDSO /* * Restorer needs own copy of vdso parameters. Runtime * vdso must be kept non intersecting with anything else, @@ -3518,7 +3513,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns task_args->vdso_maps_rt = vdso_maps_rt; task_args->vdso_rt_size = vdso_rt_size; task_args->can_map_vdso = kdat.can_map_vdso; -#endif new_sp = restorer_stack(task_args->t->mz); diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 2740dd3b1..75e2130b2 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -5,9 +5,7 @@ #include "int.h" #include "common/config.h" #include "asm/kerndat.h" -#ifdef CONFIG_VDSO #include "util-vdso.h" -#endif struct stat; @@ -61,11 +59,9 @@ struct kerndat_s { bool has_thp_disable; bool can_map_vdso; bool vdso_hint_reliable; -#ifdef CONFIG_VDSO struct vdso_symtable vdso_sym; #ifdef CONFIG_COMPAT struct vdso_symtable vdso_sym_compat; -#endif #endif bool has_nsid; bool has_link_nsid; diff --git a/criu/include/parasite-vdso.h b/criu/include/parasite-vdso.h index 6667fe5c4..3cf67bbb3 100644 --- a/criu/include/parasite-vdso.h +++ b/criu/include/parasite-vdso.h @@ -2,9 +2,6 @@ #define __CR_PARASITE_VDSO_H__ #include "common/config.h" - -#ifdef CONFIG_VDSO - #include "util-vdso.h" #include "images/vma.pb-c.h" @@ -95,10 +92,4 @@ extern int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, struct vdso_symtable *to, struct vdso_symtable *from, bool compat_vdso); -#else /* CONFIG_VDSO */ -#define vdso_do_park(sym_rt, park_at, park_size) (0) -#define vdso_map_compat(map_at) (0) - -#endif /* CONFIG_VDSO */ - #endif /* __CR_PARASITE_VDSO_H__ */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h index b83e9130c..effbc3655 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -207,11 +207,9 @@ struct task_restore_args { bool can_map_vdso; bool auto_dedup; -#ifdef CONFIG_VDSO unsigned long vdso_rt_size; struct vdso_maps vdso_maps_rt; /* runtime vdso symbols */ unsigned long vdso_rt_parked_at; /* safe place to keep vdso */ -#endif void **breakpoint; enum faults fault_strategy; diff --git a/criu/include/vdso.h b/criu/include/vdso.h index 1719f3fb7..fd30772b4 100644 --- a/criu/include/vdso.h +++ b/criu/include/vdso.h @@ -5,9 +5,6 @@ #include #include "common/config.h" - -#ifdef CONFIG_VDSO - #include "util-vdso.h" extern struct vdso_maps vdso_maps; @@ -26,14 +23,4 @@ extern void compat_vdso_helper(struct vdso_maps *native, int pipe_fd, int err_fd, void *vdso_buf, size_t buf_size); #endif -#else /* CONFIG_VDSO */ - -#define vdso_init_dump() (0) -#define vdso_init_restore() (0) -#define kerndat_vdso_fill_symtable() (0) -#define kerndat_vdso_preserves_hint() (0) -#define parasite_fixup_vdso(ctl, pid, vma_area_list) (0) - -#endif /* CONFIG_VDSO */ - #endif /* __CR_VDSO_H__ */ diff --git a/criu/mem.c b/criu/mem.c index b1d13188b..6a1a87a1e 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -101,7 +101,6 @@ static inline bool __page_in_parent(bool dirty) bool should_dump_page(VmaEntry *vmae, u64 pme) { -#ifdef CONFIG_VDSO /* * vDSO area must be always dumped because on restore * we might need to generate a proxy. @@ -117,7 +116,7 @@ bool should_dump_page(VmaEntry *vmae, u64 pme) */ if (vma_entry_is(vmae, VMA_AREA_VVAR)) return false; -#endif + /* * Optimisation for private mapping pages, that haven't * yet being COW-ed diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 47443c26b..1ad456f43 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -16,6 +16,7 @@ endif LDS := compel/arch/$(SRCARCH)/scripts/compel-pack.lds.S +restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o restorer-obj-y += ./$(ARCH_DIR)/restorer.o ifeq ($(ARCH),x86) @@ -25,16 +26,12 @@ ifeq ($(ARCH),x86) endif endif -ifeq ($(VDSO),y) - restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o +ifeq ($(SRCARCH),aarch64) + restorer-obj-y += ./$(ARCH_DIR)/intraprocedure.o +endif - ifeq ($(SRCARCH),aarch64) - restorer-obj-y += ./$(ARCH_DIR)/intraprocedure.o - endif - - ifeq ($(SRCARCH),ppc64) - restorer-obj-y += ./$(ARCH_DIR)/vdso-trampoline.o - endif +ifeq ($(SRCARCH),ppc64) + restorer-obj-y += ./$(ARCH_DIR)/vdso-trampoline.o endif define gen-pie-rules diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index d802417de..658c8a4eb 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -7,14 +7,7 @@ lib-name := pie.lib.a lib-y += util.o - -ifeq ($(VDSO),y) - lib-y += util-vdso.o -endif - -ifeq ($(SRCARCH),ppc64) - lib-y += ./$(ARCH_DIR)/misc.o -endif +lib-y += util-vdso.o ifeq ($(SRCARCH),x86) ifeq ($(CONFIG_COMPAT),y) diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index c32e31384..01bacd311 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -573,7 +573,6 @@ err_io: #undef __tty_ioctl } -#ifdef CONFIG_VDSO static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args) { struct vdso_mark *m = (void *)args->start; @@ -609,13 +608,6 @@ static int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args) return 0; } -#else -static inline int parasite_check_vdso_mark(struct parasite_vdso_vma_entry *args) -{ - pr_err("Unexpected VDSO check command\n"); - return -1; -} -#endif static int parasite_dump_cgroup(struct parasite_dump_cgroup_args *args) { diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index f2db115ff..513be74e0 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1081,11 +1081,7 @@ static void restore_posix_timers(struct task_restore_args *args) * sys_munmap must not return here. The control process must * trap us on the exit from sys_munmap. */ -#ifdef CONFIG_VDSO unsigned long vdso_rt_size = 0; -#else -#define vdso_rt_size (0) -#endif void *bootstrap_start = NULL; unsigned int bootstrap_len = 0; @@ -1259,9 +1255,7 @@ long __export_restore_task(struct task_restore_args *args) bootstrap_start = args->bootstrap_start; bootstrap_len = args->bootstrap_len; -#ifdef CONFIG_VDSO vdso_rt_size = args->vdso_rt_size; -#endif fi_strategy = args->fault_strategy; @@ -1446,7 +1440,6 @@ long __export_restore_task(struct task_restore_args *args) sys_close(args->vma_ios_fd); -#ifdef CONFIG_VDSO /* * Proxify vDSO. */ @@ -1454,7 +1447,6 @@ long __export_restore_task(struct task_restore_args *args) args->vmas, args->vmas_n, args->compatible_mode, fault_injected(FI_VDSO_TRAMPOLINES))) goto core_restore_end; -#endif /* * Walk though all VMAs again to drop PROT_WRITE diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 3d852d755..f6ebb1fd6 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -502,7 +502,6 @@ err: return -1; } -#ifdef CONFIG_VDSO static inline int handle_vdso_vma(struct vma_area *vma) { vma->e->status |= VMA_AREA_REGULAR; @@ -518,19 +517,6 @@ static inline int handle_vvar_vma(struct vma_area *vma) vma->e->status |= VMA_AREA_VVAR; return 0; } -#else -static inline int handle_vdso_vma(struct vma_area *vma) -{ - pr_warn_once("Found vDSO area without support\n"); - return -1; -} - -static inline int handle_vvar_vma(struct vma_area *vma) -{ - pr_warn_once("Found VVAR area without support\n"); - return -1; -} -#endif static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_path, DIR *map_files_dir, From bd7920717fed499761b5c7ddec08f326c613e0d4 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 18 May 2019 09:28:36 +0100 Subject: [PATCH 0098/2030] zdtm: Fix memory and resource leaks These errors were found by Cppcheck 1.84 Signed-off-by: Radostin Stoyanov --- test/zdtm/static/autofs.c | 11 ++++++----- test/zdtm/static/caps00.c | 1 + test/zdtm/static/packet_sock_mmap.c | 1 + test/zdtm/static/s390x_runtime_instr.c | 2 ++ test/zdtm/static/sock_peercred.c | 18 +++++++++++------- test/zdtm/static/unlink_multiple_largefiles.c | 10 +++++++--- test/zdtm/static/vdso-proxy.c | 8 ++++---- test/zdtm/transition/lazy-thp.c | 2 ++ 8 files changed, 34 insertions(+), 19 deletions(-) diff --git a/test/zdtm/static/autofs.c b/test/zdtm/static/autofs.c index f74bc35ac..4360f90f0 100644 --- a/test/zdtm/static/autofs.c +++ b/test/zdtm/static/autofs.c @@ -312,7 +312,7 @@ static int autofs_open_mount(int devid, const char *mountpoint) { struct autofs_dev_ioctl *param; size_t size; - int fd; + int ret; size = sizeof(struct autofs_dev_ioctl) + strlen(mountpoint) + 1; param = malloc(size); @@ -325,13 +325,14 @@ static int autofs_open_mount(int devid, const char *mountpoint) if (ioctl(autofs_dev, AUTOFS_DEV_IOCTL_OPENMOUNT, param) < 0) { pr_perror("failed to open autofs mount %s", mountpoint); - return -errno; + ret = -errno; + goto out; } - fd = param->ioctlfd; + ret = param->ioctlfd; +out: free(param); - - return fd; + return ret; } static int autofs_report_result(int token, int devid, const char *mountpoint, diff --git a/test/zdtm/static/caps00.c b/test/zdtm/static/caps00.c index 62484c4f4..7a256c08a 100644 --- a/test/zdtm/static/caps00.c +++ b/test/zdtm/static/caps00.c @@ -47,6 +47,7 @@ int main(int argc, char **argv) if (f) { if (fscanf(f, "%d", &cap_last_cap) != 1) { pr_perror("Unable to read cal_last_cap"); + fclose(f); return 1; } fclose(f); diff --git a/test/zdtm/static/packet_sock_mmap.c b/test/zdtm/static/packet_sock_mmap.c index edf96c66e..2a82950bc 100644 --- a/test/zdtm/static/packet_sock_mmap.c +++ b/test/zdtm/static/packet_sock_mmap.c @@ -47,6 +47,7 @@ static void check_map_is_there(unsigned long addr, int sk) sscanf(line, "%lx-%*x %*s %*s %x:%x %d %*s", &start, &maj, &min, &ino); if ((start == addr) && ss.st_dev == makedev(maj, min) && ss.st_ino == ino) { pass(); + fclose(f); return; } } diff --git a/test/zdtm/static/s390x_runtime_instr.c b/test/zdtm/static/s390x_runtime_instr.c index 6be32c3c1..e0a5742d9 100644 --- a/test/zdtm/static/s390x_runtime_instr.c +++ b/test/zdtm/static/s390x_runtime_instr.c @@ -147,9 +147,11 @@ int main(int argc, char **argv) test_waitsig(); skip("RI not supported"); pass(); + free(buf); return 0; } fail("Fail with error %d", errno); + free(buf); return -1; } /* Set buffer for RI */ diff --git a/test/zdtm/static/sock_peercred.c b/test/zdtm/static/sock_peercred.c index e681ecec9..069cc52f7 100644 --- a/test/zdtm/static/sock_peercred.c +++ b/test/zdtm/static/sock_peercred.c @@ -67,6 +67,7 @@ int main(int argc, char **argv) socklen_t len; char *stack; pid_t pid; + int exit_code = 1; test_init(argc, argv); @@ -78,7 +79,7 @@ int main(int argc, char **argv) stack = malloc(2 * STACK_SIZE); if (!stack) { pr_err("malloc\n"); - return 1; + goto out; } /* Find unused fd */ @@ -89,18 +90,18 @@ int main(int argc, char **argv) if (fd == INT_MAX) { pr_err("INT_MAX happens...\n"); - return 1; + goto out; } pid = clone(child_func, stack + STACK_SIZE, CLONE_FILES|SIGCHLD, (void *)(unsigned long)fd); if (pid == -1) { pr_perror("clone"); - return 1; + goto out; } if (wait(&status) == -1 || status) { pr_perror("wait error: status=%d\n", status); - return 1; + goto out; } test_daemon(); @@ -109,15 +110,18 @@ int main(int argc, char **argv) len = sizeof(ucred); if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &ucred, &len) < 0) { fail("Can't getsockopt()"); - return 1; + goto out; } if (ucred.pid != pid || ucred.gid != getuid() + UID_INC || ucred.gid != getgid() + GID_INC) { fail("Wrong pid, uid or gid\n"); - return 1; + goto out; } pass(); - return 0; + exit_code = 0; + out: + free(stack); + return exit_code; } diff --git a/test/zdtm/static/unlink_multiple_largefiles.c b/test/zdtm/static/unlink_multiple_largefiles.c index a87093439..7cf628606 100644 --- a/test/zdtm/static/unlink_multiple_largefiles.c +++ b/test/zdtm/static/unlink_multiple_largefiles.c @@ -31,7 +31,7 @@ void create_check_pattern(char *buf, size_t count, unsigned char seed) struct fiemap *read_fiemap(int fd) { test_msg("Obtaining fiemap for fd %d\n", fd); - struct fiemap *fiemap; + struct fiemap *fiemap, *tmp; int extents_size; fiemap = malloc(sizeof(struct fiemap)); @@ -48,16 +48,19 @@ struct fiemap *read_fiemap(int fd) if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) { pr_perror("FIEMAP ioctl failed"); + free(fiemap); return NULL; } extents_size = sizeof(struct fiemap_extent) * fiemap->fm_mapped_extents; - fiemap = realloc(fiemap,sizeof(struct fiemap) + extents_size); - if (fiemap == NULL) { + tmp = realloc(fiemap, sizeof(struct fiemap) + extents_size); + if (tmp == NULL) { + free(fiemap); pr_perror("Cannot resize fiemap"); return NULL; } + fiemap = tmp; memset(fiemap->fm_extents, 0, extents_size); fiemap->fm_extent_count = fiemap->fm_mapped_extents; @@ -65,6 +68,7 @@ struct fiemap *read_fiemap(int fd) if (ioctl(fd, FS_IOC_FIEMAP, fiemap) < 0) { pr_perror("fiemap ioctl() failed"); + free(fiemap); return NULL; } test_msg("Debugkillme: %x\n", fiemap->fm_mapped_extents); diff --git a/test/zdtm/static/vdso-proxy.c b/test/zdtm/static/vdso-proxy.c index ecb71e892..2946eb790 100644 --- a/test/zdtm/static/vdso-proxy.c +++ b/test/zdtm/static/vdso-proxy.c @@ -73,13 +73,13 @@ static int parse_maps(struct vm_area *vmas) v->start, v->end); } - if (i == MAX_VMAS) { - pr_err("Number of VMAs is bigger than reserved array's size\n"); + if (fclose(maps)) { + pr_err("Failed to close maps file: %m\n"); return -1; } - if (fclose(maps)) { - pr_err("Failed to close maps file: %m\n"); + if (i == MAX_VMAS) { + pr_err("Number of VMAs is bigger than reserved array's size\n"); return -1; } diff --git a/test/zdtm/transition/lazy-thp.c b/test/zdtm/transition/lazy-thp.c index f7af41446..a0cf33041 100644 --- a/test/zdtm/transition/lazy-thp.c +++ b/test/zdtm/transition/lazy-thp.c @@ -57,5 +57,7 @@ int main(int argc, char ** argv) } pass(); + free(org); + free(mem); return 0; } From 75adf0e6087b8cbba850013db11ecf296a90f3cf Mon Sep 17 00:00:00 2001 From: Uchio Kondo Date: Fri, 1 Mar 2019 18:48:06 +0900 Subject: [PATCH 0099/2030] c-lib: Support to build a static archive Signed-off-by: Uchio Kondo --- lib/Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index 94632848e..0c9841071 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,4 +1,5 @@ CRIU_SO := libcriu.so +CRIU_A := libcriu.a UAPI_HEADERS := lib/c/criu.h images/rpc.proto # @@ -19,8 +20,12 @@ ldflags-so += -lprotobuf-c lib/c/$(CRIU_SO): lib/c/built-in.o $(call msg-link, $@) $(Q) $(CC) -shared $(cflags-so) -o $@ $^ $(ldflags-so) $(LDFLAGS) +lib/c/$(CRIU_A): lib/c/built-in.o + $(call msg-link, $@) + $(Q) $(AR) rcs $@ $^ lib-c: lib/c/$(CRIU_SO) -.PHONY: lib-c +lib-a: lib/c/$(CRIU_A) +.PHONY: lib-c lib-a # # Python bindings. From 6adf006b87fbc7f1f714af11452242f915b39890 Mon Sep 17 00:00:00 2001 From: Uchio Kondo Date: Fri, 8 Mar 2019 15:08:32 +0900 Subject: [PATCH 0100/2030] c-lib: Add lib-a into all-y targets Signed-off-by: Uchio Kondo --- lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index 0c9841071..7b95956ab 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -6,7 +6,7 @@ UAPI_HEADERS := lib/c/criu.h images/rpc.proto # File to keep track of files installed by setup.py CRIT_SETUP_FILES := lib/.crit-setup.files -all-y += lib-c lib-py +all-y += lib-c lib-a lib-py # # C language bindings. From a29b348bae98d1f4154782c790e875d789cb3982 Mon Sep 17 00:00:00 2001 From: Uchio Kondo Date: Wed, 29 May 2019 13:48:17 +0900 Subject: [PATCH 0101/2030] Add CRIU_A to cleanup target - This patch is from the comment by Radostin Stoyanov @rst0git Signed-off-by: Uchio Kondo --- lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index 7b95956ab..4a131e88e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -42,7 +42,7 @@ clean-lib: $(Q) $(MAKE) $(build)=lib/py clean .PHONY: clean-lib clean: clean-lib -cleanup-y += lib/c/$(CRIU_SO) lib/c/criu.pc +cleanup-y += lib/c/$(CRIU_SO) lib/c/$(CRIU_A) lib/c/criu.pc mrproper: clean install: lib-c lib-py crit/crit lib/c/criu.pc.in From 6a2cd621cb4195dbeea7a694ffc4beb785892942 Mon Sep 17 00:00:00 2001 From: Uchio Kondo Date: Fri, 7 Jun 2019 17:11:02 +0900 Subject: [PATCH 0102/2030] c-lib: Install and uninstall libcriu.a Signed-off-by: Uchio Kondo --- lib/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/Makefile b/lib/Makefile index 4a131e88e..67c50b95a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -45,12 +45,13 @@ clean: clean-lib cleanup-y += lib/c/$(CRIU_SO) lib/c/$(CRIU_A) lib/c/criu.pc mrproper: clean -install: lib-c lib-py crit/crit lib/c/criu.pc.in +install: lib-c lib-a lib-py crit/crit lib/c/criu.pc.in $(E) " INSTALL " lib $(Q) mkdir -p $(DESTDIR)$(LIBDIR) $(Q) install -m 755 lib/c/$(CRIU_SO) $(DESTDIR)$(LIBDIR)/$(CRIU_SO).$(CRIU_SO_VERSION_MAJOR).$(CRIU_SO_VERSION_MINOR) $(Q) ln -fns $(CRIU_SO).$(CRIU_SO_VERSION_MAJOR).$(CRIU_SO_VERSION_MINOR) $(DESTDIR)$(LIBDIR)/$(CRIU_SO).$(CRIU_SO_VERSION_MAJOR) $(Q) ln -fns $(CRIU_SO).$(CRIU_SO_VERSION_MAJOR).$(CRIU_SO_VERSION_MINOR) $(DESTDIR)$(LIBDIR)/$(CRIU_SO) + $(Q) install -m 755 lib/c/$(CRIU_A) $(DESTDIR)$(LIBDIR)/$(CRIU_A) $(Q) mkdir -p $(DESTDIR)$(INCLUDEDIR)/criu/ $(Q) install -m 644 $(UAPI_HEADERS) $(DESTDIR)$(INCLUDEDIR)/criu/ $(E) " INSTALL " pkgconfig/criu.pc @@ -65,6 +66,7 @@ uninstall: $(E) " UNINSTALL" $(CRIU_SO) $(Q) $(RM) $(addprefix $(DESTDIR)$(LIBDIR)/,$(CRIU_SO).$(CRIU_SO_VERSION_MAJOR)) $(Q) $(RM) $(addprefix $(DESTDIR)$(LIBDIR)/,$(CRIU_SO)) + $(Q) $(RM) $(addprefix $(DESTDIR)$(LIBDIR)/,$(CRIU_A)) $(Q) $(RM) $(addprefix $(DESTDIR)$(LIBDIR)/,$(CRIU_SO).$(CRIU_SO_VERSION_MAJOR).$(CRIU_SO_VERSION_MINOR)) $(Q) $(RM) $(addprefix $(DESTDIR)$(INCLUDEDIR)/criu/,$(notdir $(UAPI_HEADERS))) $(E) " UNINSTALL" pkgconfig/criu.pc From ffec56803412afbb3f13d16d57101742e50984c6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 4 Jun 2019 10:11:28 +0300 Subject: [PATCH 0103/2030] fsnotify: More precious error handling - make sure the alloc_openable is not failed with memory error, so that we should not lookup via irmap - irmap lookup should provide us a copy of the path instead of reference to irmap entry https://github.com/checkpoint-restore/criu/issues/698 Signed-off-by: Cyrill Gorcunov Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- criu/fsnotify.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/criu/fsnotify.c b/criu/fsnotify.c index ed8a67a21..09093c0be 100644 --- a/criu/fsnotify.c +++ b/criu/fsnotify.c @@ -175,7 +175,7 @@ static char *alloc_openable(unsigned int s_dev, unsigned long i_ino, FhEntry *f_ if (st.st_ino == i_ino) { path = xstrdup(buf); if (path == NULL) - goto err; + return ERR_PTR(-ENOMEM); if (root_ns_mask & CLONE_NEWNS) { f_handle->has_mnt_id = true; f_handle->mnt_id = m->mnt_id; @@ -227,8 +227,8 @@ out: int check_open_handle(unsigned int s_dev, unsigned long i_ino, FhEntry *f_handle) { + char *path, *irmap_path; int fd = -1; - char *path; if (fault_injected(FI_CHECK_OPEN_HANDLE)) { fd = -1; @@ -262,6 +262,8 @@ fault: path = alloc_openable(s_dev, i_ino, f_handle); if (!IS_ERR_OR_NULL(path)) goto out; + else if (IS_ERR(path) && PTR_ERR(path) == -ENOMEM) + goto err; if ((mi->fstype->code == FSTYPE__TMPFS) || (mi->fstype->code == FSTYPE__DEVTMPFS)) { @@ -284,11 +286,14 @@ fault: } pr_warn("\tHandle 0x%x:0x%lx cannot be opened\n", s_dev, i_ino); - path = irmap_lookup(s_dev, i_ino); - if (!path) { + irmap_path = irmap_lookup(s_dev, i_ino); + if (!irmap_path) { pr_err("\tCan't dump that handle\n"); return -1; } + path = xstrdup(irmap_path); + if (!path) + goto err; out: pr_debug("\tDumping %s as path for handle\n", path); f_handle->path = path; From b7230b6132b8eefb46da61ac158a99612d15dc10 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 31 Mar 2019 11:43:16 +0100 Subject: [PATCH 0104/2030] make: config -- Link with GnuTLS There are two notable open-source libraries that provide TLS implementation - OpenSSL and GnuTLS. The license of OpenSSL is incompatible with CRIU's license, and threfore GnuTLS is the recommended choice. GnuTLS offers an API to access secure communication protocols. These protocols provide privacy over insecure lines, and are designed to prevent eavesdropping, tampering or message forgery. Signed-off-by: Radostin Stoyanov --- Makefile.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Makefile.config b/Makefile.config index 008a82289..6f7324069 100644 --- a/Makefile.config +++ b/Makefile.config @@ -15,6 +15,14 @@ ifeq ($(call pkg-config-check,libselinux),y) FEATURE_DEFINES += -DCONFIG_HAS_SELINUX endif +ifeq ($(NO_GNUTLS)x$(call pkg-config-check,gnutls),xy) + LIBS_FEATURES += -lgnutls + export CONFIG_GNUTLS := y + FEATURE_DEFINES += -DCONFIG_GNUTLS +else + $(info Note: Building without GnuTLS support) +endif + export LIBS += $(LIBS_FEATURES) CONFIG_FILE = .config From 76a41209b0942fcc76508f1bdee7e7119c79f625 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 31 Mar 2019 12:05:22 +0100 Subject: [PATCH 0105/2030] page-xfer: Add TLS support with X509 certificates This commit adds Transport Layer Security (TLS) support for remote page-server connections. The following command-line options are introduced with this commit: --tls-cacert FILE Trust certificates signed only by this CA --tls-cacrl FILE CA certificate revocation list --tls-cert FILE TLS certificate --tls-key FILE TLS private key --tls Use TLS to secure remote connections The default PKI locations are: CA certificate /etc/pki/CA/cacert.pem CA revocation list /etc/pki/CA/cacrl.pem Client/server certificate /etc/pki/criu/cert.pem Client/server private key /etc/pki/criu/private/key.pem The files cacert.pem and cacrl.pem are optional. If they are not present, and not explicitly specified with a command-line option, CRIU will use only the system's trusted CAs to verify the remote peer's identity. This implies that if a CA certificate is specified using "--tls-cacert" only this CA will be used for verification. If CA certificate (cacert.pem) is not present, certificate revocation list (cacrl.pem) will be ignored. Both (client and server) sides require a private key and certificate. When the "--tls" option is specified, a TLS handshake (key exchange) will be performed immediately after the remote TCP connection has been accepted. X.509 certificates can be generated as follows: -------------------------%<------------------------- # Generate CA key and certificate echo -ne "ca\ncert_signing_key" > temp certtool --generate-privkey > cakey.pem certtool --generate-self-signed \ --template temp \ --load-privkey cakey.pem \ --outfile cacert.pem # Generate server key and certificate echo -ne "cn=$HOSTNAME\nencryption_key\nsigning_key" > temp certtool --generate-privkey > key.pem certtool --generate-certificate \ --template temp \ --load-privkey key.pem \ --load-ca-certificate cacert.pem \ --load-ca-privkey cakey.pem \ --outfile cert.pem rm temp mkdir -p /etc/pki/CA mkdir -p /etc/pki/criu/private mv cacert.pem /etc/pki/CA/ mv cert.pem /etc/pki/criu/ mv key.pem /etc/pki/criu/private -------------------------%<------------------------- Usage Example: Page-server: [src]# criu page-server -D --port --tls [dst]# criu dump --page-server --address --port \ -t -D --tls Lazy migration: [src]# criu dump --lazy-pages --port -t -D --tls [dst]# criu lazy-pages --page-server --address --port \ -D --tls [dst]# criu restore -D --lazy-pages Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 27 +++ Makefile | 1 + criu/Makefile | 3 +- criu/Makefile.crtools | 1 + criu/config.c | 24 +++ criu/crtools.c | 5 + criu/include/cr_options.h | 5 + criu/include/tls.h | 26 +++ criu/page-xfer.c | 119 +++++++++---- criu/tls.c | 366 ++++++++++++++++++++++++++++++++++++++ criu/uffd.c | 3 + 11 files changed, 546 insertions(+), 34 deletions(-) create mode 100644 criu/include/tls.h create mode 100644 criu/tls.c diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 6111c3baf..94fc5428a 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -594,6 +594,33 @@ Launches *criu* in page server mode. remote *lazy-pages* daemon to request memory pages in random order. +*--tls-cacert* 'file':: + Specifies the path to a trusted Certificate Authority (CA) certificate + file to be used for verification of a client or server certificate. + The 'file' must be in PEM format. When this option is used only the + specified CA is used for verification. Otherwise, the system's trusted CAs + and, if present, '/etc/pki/CA/cacert.pem' will be used. + +*--tls-cacrl* 'file':: + Specifies a path to a Certificate Revocation List (CRL) 'file' which + contains a list of revoked certificates that should no longer be trusted. + The 'file' must be in PEM format. When this option is not specified, the + file, if present, '/etc/pki/CA/cacrl.pem' will be used. + +*--tls-cert* 'file':: + Specifies a path to a file that contains a X.509 certificate to present + to the remote entity. The 'file' must be in PEM format. When this option + is not specified, the default location ('/etc/pki/criu/cert.pem') will be + used. + +*--tls-key* 'file':: + Specifies a path to a file that contains TLS private key. The 'file' must + be in PEM format. When this option is not the default location + ('/etc/pki/criu/private/key.pem') will be used. + +*--tls*:: + Use TLS to secure remote connections. + *lazy-pages* ~~~~~~~~~~~~ Launches *criu* in lazy-pages daemon mode. diff --git a/Makefile b/Makefile index 09cf2406a..9d83862d1 100644 --- a/Makefile +++ b/Makefile @@ -193,6 +193,7 @@ include Makefile.config else # To clean all files, enable make/build options here export CONFIG_COMPAT := y +export CONFIG_GNUTLS := y endif # diff --git a/criu/Makefile b/criu/Makefile index 3de6eb217..4134e5052 100644 --- a/criu/Makefile +++ b/criu/Makefile @@ -1,5 +1,5 @@ # here is a workaround for a bug in libnl-3: -# 6a8d90f5fec4 "attr: Allow attribute type 0" +# 6a8d90f5fec4 "attr: Allow attribute type 0" WRAPFLAGS += -Wl,--wrap=nla_parse,--wrap=nlmsg_parse ARCH_DIR := criu/arch/$(SRCARCH) @@ -14,6 +14,7 @@ endif # # Configuration file paths +CONFIG-DEFINES += -DSYSCONFDIR='"/etc"' CONFIG-DEFINES += -DGLOBAL_CONFIG_DIR='"/etc/criu/"' CONFIG-DEFINES += -DDEFAULT_CONFIG_FILENAME='"default.conf"' CONFIG-DEFINES += -DUSER_CONFIG_DIR='".criu/"' diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 383ed1940..4588ea5b8 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -73,6 +73,7 @@ obj-y += string.o obj-y += sysctl.o obj-y += sysfs_parse.o obj-y += timerfd.o +obj-$(CONFIG_GNUTLS) += tls.o obj-y += tty.o obj-y += tun.o obj-y += util.o diff --git a/criu/config.c b/criu/config.c index bcf5176d9..bfdd6c658 100644 --- a/criu/config.c +++ b/criu/config.c @@ -510,6 +510,11 @@ int parse_options(int argc, char **argv, bool *usage_error, { "ps-socket", required_argument, 0, 1091}, { "config", required_argument, 0, 1089}, { "no-default-config", no_argument, 0, 1090}, + { "tls-cacert", required_argument, 0, 1092}, + { "tls-cacrl", required_argument, 0, 1093}, + { "tls-cert", required_argument, 0, 1094}, + { "tls-key", required_argument, 0, 1095}, + BOOL_OPT("tls", &opts.tls), { }, }; @@ -796,6 +801,18 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1091: opts.ps_socket = atoi(optarg); break; + case 1092: + SET_CHAR_OPTS(tls_cacert, optarg); + break; + case 1093: + SET_CHAR_OPTS(tls_cacrl, optarg); + break; + case 1094: + SET_CHAR_OPTS(tls_cert, optarg); + break; + case 1095: + SET_CHAR_OPTS(tls_key, optarg); + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) @@ -857,6 +874,13 @@ int check_options() } } +#ifndef CONFIG_GNUTLS + if (opts.tls) { + pr_err("CRIU was built without TLS support\n"); + return 1; + } +#endif + if (check_namespace_opts()) { pr_err("Error: namespace flags conflict\n"); return 1; diff --git a/criu/crtools.c b/criu/crtools.c index 6c83b27da..a07df064c 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -427,6 +427,11 @@ usage: " -d|--daemon run in the background after creating socket\n" " --status-fd FD write \\0 to the FD and close it once process is ready\n" " to handle requests\n" +" --tls-cacert FILE trust certificates signed only by this CA\n" +" --tls-cacrl FILE path to CA certificate revocation list file\n" +" --tls-cert FILE path to TLS certificate file\n" +" --tls-key FILE path to TLS private key file\n" +" --tls use TLS to secure remote connection\n" "\n" "Configuration file options:\n" " --config FILEPATH pass a specific configuration file\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 9f152fac0..fcba278e0 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -138,6 +138,11 @@ struct cr_options { pid_t tree_id; int log_level; char *imgs_dir; + char *tls_cacert; + char *tls_cacrl; + char *tls_cert; + char *tls_key; + int tls; }; extern struct cr_options opts; diff --git a/criu/include/tls.h b/criu/include/tls.h new file mode 100644 index 000000000..aa2517887 --- /dev/null +++ b/criu/include/tls.h @@ -0,0 +1,26 @@ +#ifndef __CR_TLS_H__ +#define __CR_TLS_H__ + +# ifdef CONFIG_GNUTLS + +int tls_x509_init(int sockfd, bool is_server); +void tls_terminate_session(); + +ssize_t tls_send(const void *buf, size_t len, int flags); +ssize_t tls_recv(void *buf, size_t len, int flags); + +int tls_send_data_from_fd(int fd, unsigned long len); +int tls_recv_data_to_fd(int fd, unsigned long len); + +# else /* CONFIG_GNUTLS */ + +#define tls_x509_init(sockfd, is_server) (0) +#define tls_send(buf, len, flags) (-1) +#define tls_recv(buf, len, flags) (-1) +#define tls_send_data_from_fd(fd, len) (-1) +#define tls_recv_data_to_fd(fd, len) (-1) +#define tls_terminate_session() + +#endif /* CONFIG_HAS_GNUTLS */ + +#endif /* __CR_TLS_H__ */ diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 8868ed226..f74716100 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -21,6 +21,7 @@ #include "parasite-syscall.h" #include "rst_info.h" #include "stats.h" +#include "tls.h" static int page_server_sk = -1; @@ -128,13 +129,22 @@ static inline u32 decode_ps_flags(u32 cmd) return cmd >> PS_CMD_BITS; } +static inline int __send(int sk, const void *buf, size_t sz, int fl) +{ + return opts.tls ? tls_send(buf, sz, fl) : send(sk, buf, sz, fl); +} + +static inline int __recv(int sk, void *buf, size_t sz, int fl) +{ + return opts.tls ? tls_recv(buf, sz, fl) : recv(sk, buf, sz, fl); +} + static inline int send_psi_flags(int sk, struct page_server_iov *pi, int flags) { - if (send(sk, pi, sizeof(*pi), flags) != sizeof(*pi)) { + if (__send(sk, pi, sizeof(*pi), flags) != sizeof(*pi)) { pr_perror("Can't send PSI %d to server", pi->cmd); return -1; } - return 0; } @@ -149,17 +159,28 @@ static int write_pages_to_server(struct page_xfer *xfer, { ssize_t ret, left = len; - pr_debug("Splicing %lu bytes / %lu pages into socket\n", len, len / PAGE_SIZE); + if (opts.tls) { + pr_debug("Sending %lu bytes / %lu pages\n", + len, len / PAGE_SIZE); - while (left > 0) { - ret = splice(p, NULL, xfer->sk, NULL, left, SPLICE_F_MOVE); - if (ret < 0) { - pr_perror("Can't write pages to socket"); + if (tls_send_data_from_fd(p, len)) return -1; - } + } else { + pr_debug("Splicing %lu bytes / %lu pages into socket\n", + len, len / PAGE_SIZE); - pr_debug("\tSpliced: %lu bytes sent\n", (unsigned long)ret); - left -= ret; + while (left > 0) { + ret = splice(p, NULL, xfer->sk, NULL, left, + SPLICE_F_MOVE); + if (ret < 0) { + pr_perror("Can't write pages to socket"); + return -1; + } + + pr_debug("\tSpliced: %lu bytes sent\n", + (unsigned long)ret); + left -= ret; + } } return 0; @@ -205,7 +226,7 @@ static int open_page_server_xfer(struct page_xfer *xfer, int fd_type, unsigned l /* Push the command NOW */ tcp_nodelay(xfer->sk, true); - if (read(xfer->sk, &has_parent, 1) != 1) { + if (__recv(xfer->sk, &has_parent, 1, 0) != 1) { pr_perror("The page server doesn't answer"); return -1; } @@ -539,7 +560,7 @@ static int page_server_check_parent(int sk, struct page_server_iov *pi) if (ret < 0) return -1; - if (write(sk, &ret, sizeof(ret)) != sizeof(ret)) { + if (__send(sk, &ret, sizeof(ret), 0) != sizeof(ret)) { pr_perror("Unable to send response"); return -1; } @@ -560,7 +581,7 @@ static int check_parent_server_xfer(int fd_type, unsigned long img_id) tcp_nodelay(page_server_sk, true); - if (read(page_server_sk, &has_parent, sizeof(int)) != sizeof(int)) { + if (__recv(page_server_sk, &has_parent, sizeof(int), 0) != sizeof(int)) { pr_perror("The page server doesn't answer"); return -1; } @@ -624,8 +645,7 @@ static int page_server_open(int sk, struct page_server_iov *pi) if (sk >= 0) { char has_parent = !!cxfer.loc_xfer.parent; - - if (write(sk, &has_parent, 1) != 1) { + if (__send(sk, &has_parent, 1, 0) != 1) { pr_perror("Unable to send response"); close_page_xfer(&cxfer.loc_xfer); return -1; @@ -684,14 +704,23 @@ static int page_server_add(int sk, struct page_server_iov *pi, u32 flags) return -1; } - chunk = splice(sk, NULL, cxfer.p[1], NULL, chunk, SPLICE_F_MOVE | SPLICE_F_NONBLOCK); - if (chunk < 0) { - pr_perror("Can't read from socket"); - return -1; - } - if (chunk == 0) { - pr_err("A socket was closed unexpectedly\n"); - return -1; + if (opts.tls) { + if(tls_recv_data_to_fd(cxfer.p[1], chunk)) { + pr_err("Can't read from socket\n"); + return -1; + } + } else { + chunk = splice(sk, NULL, cxfer.p[1], NULL, chunk, + SPLICE_F_MOVE | SPLICE_F_NONBLOCK); + + if (chunk < 0) { + pr_perror("Can't read from socket"); + return -1; + } + if (chunk == 0) { + pr_err("A socket was closed unexpectedly\n"); + return -1; + } } if (lxfer->write_pages(lxfer, cxfer.p[0], chunk)) @@ -733,9 +762,16 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi) return -1; len = pi->nr_pages * PAGE_SIZE; - ret = splice(pipe_read_dest.p[0], NULL, sk, NULL, len, SPLICE_F_MOVE); - if (ret != len) - return -1; + + if (opts.tls) { + if (tls_send_data_from_fd(pipe_read_dest.p[0], len)) + return -1; + } else { + ret = splice(pipe_read_dest.p[0], NULL, sk, NULL, len, + SPLICE_F_MOVE); + if (ret != len) + return -1; + } tcp_nodelay(sk, true); @@ -773,7 +809,7 @@ static int page_server_serve(int sk) struct page_server_iov pi; u32 cmd; - ret = recv(sk, &pi, sizeof(pi), MSG_WAITALL); + ret = __recv(sk, &pi, sizeof(pi), MSG_WAITALL); if (!ret) break; @@ -823,7 +859,7 @@ static int page_server_serve(int sk) * An answer must be sent back to inform another side, * that all data were received */ - if (write(sk, &status, sizeof(status)) != sizeof(status)) { + if (__send(sk, &status, sizeof(status), 0) != sizeof(status)) { pr_perror("Can't send the final package"); ret = -1; } @@ -856,14 +892,15 @@ static int page_server_serve(int sk) * Wait when a remote side closes the connection * to avoid TIME_WAIT bucket */ - if (read(sk, &c, sizeof(c)) != 0) { pr_perror("Unexpected data"); ret = -1; } } + tls_terminate_session(); page_server_close(); + pr_info("Session over\n"); close(sk); @@ -1011,6 +1048,11 @@ no_server: if (ret != 0) return ret > 0 ? 0 : -1; + if (tls_x509_init(ask, true)) { + close(sk); + return -1; + } + if (ask >= 0) ret = page_server_serve(ask); @@ -1034,6 +1076,11 @@ static int connect_to_page_server(void) page_server_sk = setup_tcp_client(opts.addr); if (page_server_sk == -1) return -1; + + if (tls_x509_init(page_server_sk, false)) { + close(page_server_sk); + return -1; + } out: /* * CORK the socket at the very beginning. As per ANK @@ -1076,14 +1123,16 @@ int disconnect_from_page_server(void) if (send_psi(page_server_sk, &pi)) goto out; - if (read(page_server_sk, &status, sizeof(status)) != sizeof(status)) { + if (__recv(page_server_sk, &status, sizeof(status), 0) != sizeof(status)) { pr_perror("The page server doesn't answer"); goto out; } ret = 0; out: + tls_terminate_session(); close_safe(&page_server_sk); + return ret ? : status; } @@ -1160,10 +1209,14 @@ static int page_server_read(struct ps_async_read *ar, int flags) need = ar->goal - ar->rb; } - ret = recv(page_server_sk, buf, need, flags); + ret = __recv(page_server_sk, buf, need, flags); if (ret < 0) { - pr_perror("Error reading async data from page server"); - return -1; + if (flags == MSG_DONTWAIT && (errno == EAGAIN || errno == EINTR)) { + ret = 0; + } else { + pr_perror("Error reading data from page server"); + return -1; + } } ar->rb += ret; diff --git a/criu/tls.c b/criu/tls.c new file mode 100644 index 000000000..a6bf25db4 --- /dev/null +++ b/criu/tls.c @@ -0,0 +1,366 @@ +#include +#include +#include +#include + +#include + +#include "cr_options.h" +#include "xmalloc.h" + +/* Compatability with GnuTLS verson <3.5 */ +#ifndef GNUTLS_E_CERTIFICATE_VERIFICATION_ERROR +# define GNUTLS_E_CERTIFICATE_VERIFICATION_ERROR GNUTLS_E_CERTIFICATE_ERROR +#endif + +#undef LOG_PREFIX +#define LOG_PREFIX "tls: " + +#define CRIU_PKI_DIR SYSCONFDIR "/pki" +#define CRIU_CACERT CRIU_PKI_DIR "/CA/cacert.pem" +#define CRIU_CACRL CRIU_PKI_DIR "/CA/cacrl.pem" +#define CRIU_CERT CRIU_PKI_DIR "/criu/cert.pem" +#define CRIU_KEY CRIU_PKI_DIR "/criu/private/key.pem" + +#define SPLICE_BUF_SZ_MAX (PIPE_BUF * 100) + +#define tls_perror(msg, ret) pr_err("%s: %s\n", msg, gnutls_strerror(ret)) + +static gnutls_session_t session; +static gnutls_certificate_credentials_t x509_cred; +static int tls_sk = -1; +static int tls_sk_flags = 0; + +void tls_terminate_session() +{ + int ret; + + if (!opts.tls) + return; + + if (session) { + do { + /* don't wait for peer to close connection */ + ret = gnutls_bye(session, GNUTLS_SHUT_WR); + } while(ret == GNUTLS_E_AGAIN || ret == GNUTLS_E_INTERRUPTED); + gnutls_deinit(session); + } + + tls_sk = -1; + if (x509_cred) + gnutls_certificate_free_credentials(x509_cred); +} + +ssize_t tls_send(const void *buf, size_t len, int flags) +{ + int ret; + + tls_sk_flags = flags; + ret = gnutls_record_send(session, buf, len); + tls_sk_flags = 0; + + if (ret < 0) { + switch(ret) { + case GNUTLS_E_AGAIN: + errno = EAGAIN; + break; + case GNUTLS_E_INTERRUPTED: + errno = EINTR; + break; + case GNUTLS_E_UNEXPECTED_PACKET_LENGTH: + errno = ENOMSG; + break; + default: + tls_perror("Failed to send data", ret); + errno = EIO; + break; + } + } + + return ret; +} + +/* + * Read data from a file descriptor, then encrypt and send it with GnuTLS. + * This function is used for cases when we would otherwise use splice() + * to transfer data from PIPE to TCP socket. + */ +int tls_send_data_from_fd(int fd, unsigned long len) +{ + ssize_t copied; + unsigned long buf_size = min(len, (unsigned long)SPLICE_BUF_SZ_MAX); + void *buf = xmalloc(buf_size); + + if (!buf) + return -1; + + while (len > 0) { + int ret, sent; + + copied = read(fd, buf, min(len, buf_size)); + if (copied <= 0) { + pr_perror("Can't read from pipe"); + goto err; + } + + for(sent = 0; sent < copied; sent += ret) { + ret = tls_send((buf + sent), (copied - sent), 0); + if (ret < 0) { + tls_perror("Failed sending data", ret); + goto err; + } + } + len -= copied; + } +err: + xfree(buf); + return (len > 0); +} + +ssize_t tls_recv(void *buf, size_t len, int flags) +{ + int ret; + + tls_sk_flags = flags; + ret = gnutls_record_recv(session, buf, len); + tls_sk_flags = 0; + + /* Check if there are any data to receive in the gnutls buffers. */ + if (flags == MSG_DONTWAIT + && (ret == GNUTLS_E_AGAIN || ret == GNUTLS_E_INTERRUPTED)) { + size_t pending = gnutls_record_check_pending(session); + if (pending > 0) { + pr_debug("Receiving pending data (%zu bytes)\n", pending); + ret = gnutls_record_recv(session, buf, len); + } + } + + if (ret < 0) { + switch (ret) { + case GNUTLS_E_AGAIN: + errno = EAGAIN; + break; + case GNUTLS_E_INTERRUPTED: + errno = EINTR; + break; + default: + tls_perror("Failed receiving data", ret); + errno = EIO; + break; + } + ret = -1; + } + + return ret; +} + +/* + * Read and decrypt data with GnuTLS, then write it to a file descriptor. + * This function is used for cases when we would otherwise use splice() + * to transfer data from a TCP socket to a PIPE. + */ +int tls_recv_data_to_fd(int fd, unsigned long len) +{ + gnutls_packet_t packet; + + while (len > 0) { + int ret, w; + gnutls_datum_t pdata; + + ret = gnutls_record_recv_packet(session, &packet); + if (ret == 0) { + pr_info("Connection closed by peer\n"); + break; + } else if (ret < 0) { + tls_perror("Received corrupted data", ret); + break; + } + + gnutls_packet_get(packet, &pdata, NULL); + for(w = 0; w < pdata.size; w += ret) { + ret = write(fd, (pdata.data + w), (pdata.size - w)); + if (ret < 0) { + pr_perror("Failed writing to fd"); + goto err; + } + } + len -= pdata.size; + } +err: + gnutls_packet_deinit(packet); + return (len > 0); +} + +static inline void tls_handshake_verification_status_print(int ret, unsigned status) +{ + gnutls_datum_t out; + int type = gnutls_certificate_type_get(session); + + if (!gnutls_certificate_verification_status_print(status, type, &out, 0)) + pr_err("%s\n", out.data); + + gnutls_free(out.data); +} + +static int tls_x509_verify_peer_cert(void) +{ + int ret; + unsigned status; + + ret = gnutls_certificate_verify_peers3(session, opts.addr, &status); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Unable to verify TLS peer", ret); + return -1; + } + + if (status != 0) { + pr_err("Invalid certificate\n"); + tls_handshake_verification_status_print( + GNUTLS_E_CERTIFICATE_VERIFICATION_ERROR, status); + return -1; + } + + return 0; +} + +static int tls_handshake() +{ + int ret = -1; + while (ret != GNUTLS_E_SUCCESS) { + ret = gnutls_handshake(session); + if (gnutls_error_is_fatal(ret)) { + tls_perror("TLS handshake failed", ret); + return -1; + } + } + pr_info("TLS handshake completed\n"); + return 0; +} + +static int tls_x509_setup_creds() +{ + int ret; + char *cacert = CRIU_CACERT; + char *cacrl = CRIU_CACRL; + char *cert = CRIU_CERT; + char *key = CRIU_KEY; + gnutls_x509_crt_fmt_t pem = GNUTLS_X509_FMT_PEM; + + if (opts.tls_cacert) + cacert = opts.tls_cacert; + if (opts.tls_cacrl) + cacrl = opts.tls_cacrl; + if (opts.tls_cert) + cert = opts.tls_cert; + if (opts.tls_key) + key = opts.tls_key; + + ret = gnutls_certificate_allocate_credentials(&x509_cred); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Failed to allocate x509 credentials", ret); + return -1; + } + + if (!opts.tls_cacert) { + ret = gnutls_certificate_set_x509_system_trust(x509_cred); + if (ret < 0) { + tls_perror("Failed to load default trusted CAs", ret); + return -1; + } + } + + ret = gnutls_certificate_set_x509_trust_file(x509_cred, cacert, pem); + if (ret == 0) { + pr_info("No trusted CA certificates added (%s)\n", cacert); + if (opts.tls_cacert) + return -1; + } + + if (!access(cacrl, R_OK)) { + ret = gnutls_certificate_set_x509_crl_file(x509_cred, cacrl, pem); + if (ret < 0) { + tls_perror("Can't set certificate revocation list", ret); + return -1; + } + } else if (opts.tls_cacrl) { + pr_perror("Can't read certificate revocation list %s", cacrl); + return -1; + } + + ret = gnutls_certificate_set_x509_key_file(x509_cred, cert, key, pem); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Failed to set certificate/private key pair", ret); + return -1; + } + + return 0; +} + +static ssize_t _tls_push_cb(void *p, const void* data, size_t sz) +{ + int fd = *(int *)(p); + return send(fd, data, sz, tls_sk_flags); +} + +static ssize_t _tls_pull_cb(void *p, void* data, size_t sz) +{ + int fd = *(int *)(p); + return recv(fd, data, sz, tls_sk_flags); +} + +static int tls_x509_setup_session(unsigned int flags) +{ + int ret; + + ret = gnutls_init(&session, flags); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Failed to initialize session", ret); + return -1; + } + + ret = gnutls_credentials_set(session, GNUTLS_CRD_CERTIFICATE, x509_cred); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Failed to set session credentials", ret); + return -1; + } + + ret = gnutls_set_default_priority(session); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Failed to set priority", ret); + return -1; + } + + gnutls_transport_set_ptr(session, &tls_sk); + gnutls_transport_set_push_function(session, _tls_push_cb); + gnutls_transport_set_pull_function(session, _tls_pull_cb); + + if (flags == GNUTLS_SERVER) { + /* Require client certificate */ + gnutls_certificate_server_set_request(session, GNUTLS_CERT_REQUIRE); + /* Do not advertise trusted CAs to the client */ + gnutls_certificate_send_x509_rdn_sequence(session, 1); + } + + return 0; +} + +int tls_x509_init(int sockfd, bool is_server) +{ + if (!opts.tls) + return 0; + + tls_sk = sockfd; + if (tls_x509_setup_creds()) + goto err; + if (tls_x509_setup_session(is_server ? GNUTLS_SERVER : GNUTLS_CLIENT)) + goto err; + if (tls_handshake()) + goto err; + if (tls_x509_verify_peer_cert()) + goto err; + + return 0; +err: + tls_terminate_session(); + return -1; +} diff --git a/criu/uffd.c b/criu/uffd.c index 6699cb14a..5c1e32184 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -37,6 +37,7 @@ #include "page-xfer.h" #include "common/lock.h" #include "rst-malloc.h" +#include "tls.h" #include "fdstore.h" #include "util.h" @@ -1469,5 +1470,7 @@ int cr_lazy_pages(bool daemon) ret = handle_requests(epollfd, events, nr_fds); + tls_terminate_session(); + return ret; } From d4b4a6e6c3eb507b600dabcc9910103f2ab11d29 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 6 May 2019 14:12:36 +0100 Subject: [PATCH 0106/2030] tls: Add --tls-no-cn-verify option By default, CRIU will verify the certificate of a server (with gnutls_certificate_verify_peers3()) by providing the value specified with "--address" as a hostname. As part of the verification process, this value will be compared against the common name (CN) included in the TLS certificate of the server. If the CN doesn't match the TLS handshake will be terminated and CRIU will exit with an error. Although, this is an important feature that is used to mitigate MITM attacks, a user might need to disable such hostname verification for a particular use case or testing purposes. For instance, this option is needed when the common name included in the certificate corresponds to the server's domain name and an IP address is being used to establish connection. Signed-off-by: Radostin Stoyanov --- criu/config.c | 1 + criu/crtools.c | 1 + criu/include/cr_options.h | 1 + criu/tls.c | 6 +++++- 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/criu/config.c b/criu/config.c index bfdd6c658..39aa071c9 100644 --- a/criu/config.c +++ b/criu/config.c @@ -515,6 +515,7 @@ int parse_options(int argc, char **argv, bool *usage_error, { "tls-cert", required_argument, 0, 1094}, { "tls-key", required_argument, 0, 1095}, BOOL_OPT("tls", &opts.tls), + {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, { }, }; diff --git a/criu/crtools.c b/criu/crtools.c index a07df064c..a94875684 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -432,6 +432,7 @@ usage: " --tls-cert FILE path to TLS certificate file\n" " --tls-key FILE path to TLS private key file\n" " --tls use TLS to secure remote connection\n" +" --tls-no-cn-verify do not verify common name in server certificate\n" "\n" "Configuration file options:\n" " --config FILEPATH pass a specific configuration file\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index fcba278e0..82f76ad94 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -143,6 +143,7 @@ struct cr_options { char *tls_cert; char *tls_key; int tls; + int tls_no_cn_verify; }; extern struct cr_options opts; diff --git a/criu/tls.c b/criu/tls.c index a6bf25db4..db9cc4f5a 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -206,8 +206,12 @@ static int tls_x509_verify_peer_cert(void) { int ret; unsigned status; + const char *hostname = NULL; - ret = gnutls_certificate_verify_peers3(session, opts.addr, &status); + if (!opts.tls_no_cn_verify) + hostname = opts.addr; + + ret = gnutls_certificate_verify_peers3(session, hostname, &status); if (ret != GNUTLS_E_SUCCESS) { tls_perror("Unable to verify TLS peer", ret); return -1; From 43842046984701db42286ebcd96f0270a063044f Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 4 Apr 2019 00:44:22 +0100 Subject: [PATCH 0107/2030] rpc: Add support for TLS options Signed-off-by: Radostin Stoyanov --- criu/cr-service.c | 13 +++++++++++++ images/rpc.proto | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/criu/cr-service.c b/criu/cr-service.c index cb76de4f4..0938db02b 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -608,6 +608,19 @@ static int setup_opts_from_req(int sk, CriuOpts *req) goto err; } + if (req->tls_cacert) + SET_CHAR_OPTS(tls_cacert, req->tls_cacert); + if (req->tls_cacrl) + SET_CHAR_OPTS(tls_cacrl, req->tls_cacrl); + if (req->tls_cert) + SET_CHAR_OPTS(tls_cert, req->tls_cert); + if (req->tls_key) + SET_CHAR_OPTS(tls_key, req->tls_key); + if (req->tls) + opts.tls = req->tls; + if (req->tls_no_cn_verify) + opts.tls_no_cn_verify = req->tls_no_cn_verify; + if (req->has_auto_ext_mnt) opts.autodetect_ext_mounts = req->auto_ext_mnt; diff --git a/images/rpc.proto b/images/rpc.proto index 16c5d5028..15e677a77 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -114,6 +114,12 @@ message criu_opts { optional string config_file = 51; optional bool tcp_close = 52; optional string lsm_profile = 53; + optional string tls_cacert = 54; + optional string tls_cacrl = 55; + optional string tls_cert = 56; + optional string tls_key = 57; + optional bool tls = 58; + optional bool tls_no_cn_verify = 59; /* optional bool check_mounts = 128; */ } From b12d4f275850f4f9d65a961028cb50e355985fcf Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 8 May 2019 21:09:33 +0100 Subject: [PATCH 0108/2030] zdtm: Add --tls option Signed-off-by: Radostin Stoyanov --- test/pki/cacert.pem | 23 ++++++ test/pki/cert.pem | 24 ++++++ test/pki/key.pem | 182 ++++++++++++++++++++++++++++++++++++++++++++ test/zdtm.py | 23 ++++-- 4 files changed, 246 insertions(+), 6 deletions(-) create mode 100644 test/pki/cacert.pem create mode 100644 test/pki/cert.pem create mode 100644 test/pki/key.pem diff --git a/test/pki/cacert.pem b/test/pki/cacert.pem new file mode 100644 index 000000000..2f8706616 --- /dev/null +++ b/test/pki/cacert.pem @@ -0,0 +1,23 @@ +-----BEGIN CERTIFICATE----- +MIID0TCCAjmgAwIBAgIUWzgmx9p7y7mkrNptGX9+0acjpa4wDQYJKoZIhvcNAQEL +BQAwADAeFw0xOTA1MDYxMjAzMDJaFw0yMDA1MDUxMjAzMDJaMAAwggGiMA0GCSqG +SIb3DQEBAQUAA4IBjwAwggGKAoIBgQD0p0lJUlq917GmJuCBeP2eLNd1/MUg1ojy +s7rrpinPYtLZqqquUhp32lfQtt3uJLjkhTrseZd86zWi3SMZlGs8zGGmKfqg0vaG +BXIgpEIr5C0wU9995kL9A6LS+eFZR6vJQETO5T22tjponoqEPOXeU8VaiC9jNipC +uFJT0wyC0bKIo+TUn573kxsGMt8jMOv0tc/okUlH16UAsYrmN7kWzgkWTJPddB7S +v5a9ibpPkbh+wrIGK5A6V5hTZ8U1wz2bE6/Xp+qjsD2R3jeU6f1tDvc8FZilabQy +Rmbxggucl1G3Ulo6Nvor1lhog72eZlHZujzf/5e/aMiZ7Br6plZ1/WTwtNgoCw6A +rgpLDraasQohiK6opYs2rr7uuiQxPLLVWE/RryXwUEoPXzxaf782XtXxkB0UhGvz +y2JBxCVPn7uUGuyEYywjTjI2UFvsMcXnMiQ4WaAfMbAmrBWM7EQ4b7VpD2c+OZkQ +J/AJeg85/ovTAtHPjhPP+0a9hnirktkCAwEAAaNDMEEwDwYDVR0TAQH/BAUwAwEB +/zAPBgNVHQ8BAf8EBQMDBwQAMB0GA1UdDgQWBBQOg6AA8Qu/m/O/II5spzYsTnsn +pjANBgkqhkiG9w0BAQsFAAOCAYEA1KKtw+ILKOg1AhGwgPsJXAWZoSIt7hdLaJ3P +WGyTWHLKKoJiGlLj3bSsJcMmMO+UwHBH9lmXrOWe/xcOvob2H+7dXbddQ0pX1wzK +KJKzSG35FZ2BfuSn5oEDtRsHnN2Ikc5MYz1a+F4w2tVL/Hcxld+oSAodDlCbGoe+ +0MkI5f1KhdAw00l/5IO7xPOcThjHw+nB5/cZTQ+l4zLWCWaXkor4IAEq/plPcdX1 +uoLSj3JruLz7/ts/EgG+ARAzXQrJ+LM2hdPB1NiaVxFq7MSWM6FybUdmMYgbP5s4 +RMNqI/M+bU9K5LRySDaiPhDXUoVULuqG1a23GQwXLOjF0JbrUQewfAaTO7TaPFh1 +lr25j9Fc9/gcXZjvLl+CEIv6P/haGOwySCTCks0F5bDehbLjZStPmugcnJflXdBn +lzoejlw2rePojQMlffQsaRGmmhj0beU4WQBfGACcZQB8GFNxQB8aynf0CK7Dvvb0 +9c9y4k0gHL7RxeLoQfq+smzKm+Eo +-----END CERTIFICATE----- diff --git a/test/pki/cert.pem b/test/pki/cert.pem new file mode 100644 index 000000000..a0946ee41 --- /dev/null +++ b/test/pki/cert.pem @@ -0,0 +1,24 @@ +-----BEGIN CERTIFICATE----- +MIIEAzCCAmugAwIBAgIUKV6zLC//OJDnmOYBuIG1Gvmv+V4wDQYJKoZIhvcNAQEL +BQAwADAeFw0xOTA1MDYxMjAzMDJaFw0yMDA1MDUxMjAzMDJaMBQxEjAQBgNVBAMT +CWxvY2FsaG9zdDCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoCggGBANX1nv4J +U8+TEb2bWej5O2nOowpw2zSYTDAQ1oyAvV3P99Y6GZCuVZ1uT/7DWat0uRpcdmNi +HvownkO4VmDZdVqgiK1eHzY5YBJ7hBVDs3tpWNuN7eJPjnskNmJqKQ6l9rxYl/au +781T+tdtHp1ATtToMgVJxWaUx5lrpEJdmYc8Y6GpAA42D+rI3o4Sll3mI5rPCk16 +QY5dT2lnL2HuCKzM2bjWat6b3lMpfNz3A/blU9E/462Zxr/yKK/0yy3SBZhYzrrQ +1/erjIpm4I0sakHIOexM1AQliFiowFzVvr/paiXApWGOcuBJVIbmPI/bEGuTh0nr +3pmiF0YrkDCRhargElYcz64KQ9IxPFCKcKjkMnFPjTStZ7rcMyqKvGczqFaM5a6c +9gIn2ieUrVZ38yvtI5Lo/uxZ5IjXqB1Fdg4xi2tyf9WMHKy2tydBr9bTjfQRXfNT +/Zm3woDXOYsHzj+Sf6ntLVCkO1fnczw03fPRV03/uVRa5mPGyyj9xdPBqwIDAQAB +o2EwXzAMBgNVHRMBAf8EAjAAMA8GA1UdDwEB/wQFAwMHoAAwHQYDVR0OBBYEFEtF +ELehnIjLzoh/W51TGm2B00QAMB8GA1UdIwQYMBaAFA6DoADxC7+b878gjmynNixO +eyemMA0GCSqGSIb3DQEBCwUAA4IBgQA17NZCaiCgD9P4GPWgdVNkWUrmc8itJuIB +z3c9RdJGduxb9W/D7IW//D3hIblOPorNi3+08kO/IRMGah874MDCprMNppk94WGj +Kgqi/rsxq+rT6bcZXxMrcOIg0j2EvTPIgPh7trd8nHVWxNT/hvFClDtBJ2ssL2Tz +76EA7smDCUsfdzFJ2Xvk95fSTL49nfT2j9N/YoLaBQtCIxWAVZHKiCF2K+yXufHz +B/9UlXwsPJfqxM75dYWXFEqvhNf08YRHT1e1GRrybNGrNKF864KbLsnASdK4N5wu +sK9vZJ7VkLDQz+YpZkbm+UgOYK/BY3M8IX+F+WngV+43fr6Wh89TSgD7acEBvQTm +q1y9FipRvz0my7fwBh6UlYDja6/3yw6/YfN7uMFGsOOSgpNDCrMLqesf8l1HdQUF +VaVJyDjgFswV9KykAeJK2KU8QI7TGHv9soW60sr97DgUtCh4a6OPXLt79Ji3RSNw +MbU54JnpnfmMAj/0suDymdrJWv8EJKc= +-----END CERTIFICATE----- diff --git a/test/pki/key.pem b/test/pki/key.pem new file mode 100644 index 000000000..eda1aa761 --- /dev/null +++ b/test/pki/key.pem @@ -0,0 +1,182 @@ +Public Key Info: + Public Key Algorithm: RSA + Key Security Level: High (3072 bits) + +modulus: + 00:d5:f5:9e:fe:09:53:cf:93:11:bd:9b:59:e8:f9:3b + 69:ce:a3:0a:70:db:34:98:4c:30:10:d6:8c:80:bd:5d + cf:f7:d6:3a:19:90:ae:55:9d:6e:4f:fe:c3:59:ab:74 + b9:1a:5c:76:63:62:1e:fa:30:9e:43:b8:56:60:d9:75 + 5a:a0:88:ad:5e:1f:36:39:60:12:7b:84:15:43:b3:7b + 69:58:db:8d:ed:e2:4f:8e:7b:24:36:62:6a:29:0e:a5 + f6:bc:58:97:f6:ae:ef:cd:53:fa:d7:6d:1e:9d:40:4e + d4:e8:32:05:49:c5:66:94:c7:99:6b:a4:42:5d:99:87 + 3c:63:a1:a9:00:0e:36:0f:ea:c8:de:8e:12:96:5d:e6 + 23:9a:cf:0a:4d:7a:41:8e:5d:4f:69:67:2f:61:ee:08 + ac:cc:d9:b8:d6:6a:de:9b:de:53:29:7c:dc:f7:03:f6 + e5:53:d1:3f:e3:ad:99:c6:bf:f2:28:af:f4:cb:2d:d2 + 05:98:58:ce:ba:d0:d7:f7:ab:8c:8a:66:e0:8d:2c:6a + 41:c8:39:ec:4c:d4:04:25:88:58:a8:c0:5c:d5:be:bf + e9:6a:25:c0:a5:61:8e:72:e0:49:54:86:e6:3c:8f:db + 10:6b:93:87:49:eb:de:99:a2:17:46:2b:90:30:91:85 + aa:e0:12:56:1c:cf:ae:0a:43:d2:31:3c:50:8a:70:a8 + e4:32:71:4f:8d:34:ad:67:ba:dc:33:2a:8a:bc:67:33 + a8:56:8c:e5:ae:9c:f6:02:27:da:27:94:ad:56:77:f3 + 2b:ed:23:92:e8:fe:ec:59:e4:88:d7:a8:1d:45:76:0e + 31:8b:6b:72:7f:d5:8c:1c:ac:b6:b7:27:41:af:d6:d3 + 8d:f4:11:5d:f3:53:fd:99:b7:c2:80:d7:39:8b:07:ce + 3f:92:7f:a9:ed:2d:50:a4:3b:57:e7:73:3c:34:dd:f3 + d1:57:4d:ff:b9:54:5a:e6:63:c6:cb:28:fd:c5:d3:c1 + ab: + +public exponent: + 01:00:01: + +private exponent: + 1e:38:b0:79:7f:85:c8:17:24:f5:5c:41:29:e8:32:5d + 32:a3:d2:f0:b7:f5:c8:e1:52:14:be:c9:5f:d1:df:b3 + 65:75:6c:05:7a:6b:35:8a:a4:2f:46:73:ff:71:79:6e + 3f:eb:f9:88:f6:2e:1b:f6:cc:14:12:b0:98:c3:7e:91 + 0b:85:e2:bf:1d:b7:82:09:30:f3:23:68:01:85:13:94 + 80:c9:9a:55:94:96:da:30:48:a0:29:ec:86:da:1b:d5 + 2b:2b:74:63:92:b8:2a:8f:87:29:f0:ae:d7:55:63:0d + 2d:b3:0b:0e:2d:84:dc:d5:08:b5:ac:a0:f7:29:9d:71 + 89:3d:27:6a:eb:96:f5:4e:9b:8a:dc:14:82:0a:c7:5c + 16:1c:d2:7e:b9:1b:13:69:d8:b2:b1:b1:7e:aa:a9:ad + 06:ce:66:0e:5b:50:10:42:2a:0a:fd:29:14:f7:09:63 + c1:20:18:5f:27:81:46:12:8c:b8:f4:89:a6:3d:55:a1 + d4:64:fc:f2:db:d7:9c:f5:be:f7:9d:88:5c:6d:36:a4 + 4b:ea:c5:e3:ea:32:81:6b:f3:47:b5:35:d5:c4:1a:b2 + ae:12:9d:19:a3:ec:a4:af:41:7e:5e:34:9d:f5:bc:b9 + 1f:a3:c2:32:b4:fc:95:a7:7a:54:04:e2:d6:4e:10:2f + 66:68:8b:3b:20:ea:05:db:2e:72:01:11:e7:7c:f8:72 + 0f:60:be:f1:27:19:ad:3a:6f:e9:70:56:3a:86:6e:46 + 0d:e3:55:31:66:77:09:84:48:b9:25:4b:c3:26:70:12 + ca:a4:5f:c6:3d:6a:e5:db:4d:63:04:b8:09:07:c9:30 + 85:08:9d:77:40:26:60:da:10:c2:53:d2:00:0d:9e:d9 + d5:71:06:30:eb:fb:f7:3f:82:1f:b3:9a:f3:4d:24:86 + 2e:94:fd:06:9e:dc:26:68:fa:64:c3:f9:fa:08:c4:b2 + ec:7a:f5:55:c5:10:b5:e2:2d:de:ba:04:30:10:5b:99 + + +prime1: + 00:fb:d1:47:9d:9e:73:f8:1e:09:21:fd:89:16:05:56 + af:a5:cf:52:d5:cd:f7:26:18:d1:84:3a:36:65:0b:a2 + cd:f9:b8:99:c0:c7:ef:00:c9:2f:c9:92:1a:1d:3d:86 + 58:3b:b1:be:d4:8c:c6:1b:df:ba:ee:87:aa:d1:22:47 + 18:bd:de:01:0f:0d:cb:ac:d0:48:a4:f4:93:e2:a6:cb + b5:b7:f5:f5:72:dd:ec:ac:13:e8:3d:62:23:54:ac:52 + ff:ee:9a:e1:7f:b0:ae:3b:41:38:d8:39:2b:40:ef:25 + 81:50:b0:98:db:f8:40:16:6e:1a:41:79:22:90:58:99 + 80:c2:0d:ba:b5:d3:54:ec:28:33:e4:b0:58:ea:de:61 + a1:b7:30:0b:9d:dc:73:62:c2:07:d3:75:91:48:49:dd + be:cf:b2:90:95:8f:29:6c:6f:f6:68:cb:cf:d5:24:a3 + d7:37:81:1b:34:3b:af:9a:48:52:af:53:7c:f7:32:a2 + 3f: + +prime2: + 00:d9:83:5e:be:0a:ea:0b:d9:66:63:56:3b:9e:44:aa + 46:6d:8d:6c:10:81:4b:de:19:5d:2c:16:7e:30:7c:ad + 23:9a:89:53:cc:18:e8:e8:51:2b:79:35:d0:67:7d:9e + 8f:be:ea:63:5e:14:c0:6b:ba:02:6c:4a:da:07:70:9d + 14:fa:be:1e:40:47:50:6f:f2:5a:87:9e:b6:b1:b8:55 + 2c:b6:a2:e3:b0:24:ba:ea:9b:55:87:8b:4b:cf:40:4a + 25:b4:89:cf:9e:76:ca:79:4a:f4:74:b7:ee:cf:6c:8f + cb:e3:3d:9e:86:3b:44:b7:70:ec:05:0c:68:ce:d6:c3 + a2:ec:e6:11:d6:2f:f7:80:26:a9:5c:aa:b9:a6:33:84 + a9:00:43:cf:72:07:8a:91:59:a2:b1:de:79:07:6b:81 + 67:a5:c2:4b:fd:29:8a:1a:96:66:57:66:d4:37:9a:98 + 69:d1:19:24:53:b1:a4:54:68:1e:8c:2b:b4:93:19:ed + 95: + +coefficient: + 00:90:9a:7f:6f:14:a8:bc:79:3f:25:e5:62:f9:5d:29 + 78:a4:78:8e:7a:e4:8a:62:8a:7f:9c:ae:75:95:fe:ee + 1a:99:53:40:01:76:29:7d:48:85:28:a2:2a:9f:0f:10 + 8c:19:6a:36:6b:e1:ac:a2:07:b9:72:5c:b9:a6:20:bb + 8f:cb:f5:ea:dd:3f:0e:ab:9d:c1:57:7e:7b:96:f9:da + b0:52:3c:3f:62:94:e7:5c:04:9e:ac:60:cd:4d:ec:7e + 68:d3:fb:2a:b4:02:f0:0e:be:37:bc:2a:f8:6e:8d:31 + b5:38:67:00:9e:67:9f:71:d0:88:36:32:69:4b:20:73 + eb:a1:d9:bc:72:c2:7e:39:1a:36:cc:c1:45:a2:14:37 + e6:ca:db:4d:0b:5b:68:a4:ff:b7:7b:b1:db:2f:70:27 + a1:6c:31:3f:c0:c3:23:04:b0:7a:e2:0d:21:ba:5a:80 + 52:c1:a1:2b:57:72:20:b6:ed:b1:e8:3b:95:88:81:90 + 5d: + +exp1: + 00:ef:ce:66:20:01:44:b9:35:89:46:f8:56:33:45:54 + 3f:23:6d:23:9a:7e:71:6d:b3:56:db:50:40:7a:cb:b0 + f7:ec:67:52:ec:96:b9:d1:8a:c6:5a:74:2b:30:4b:66 + 03:e2:9d:2b:78:e8:b2:c4:da:b3:fe:f1:ed:c7:09:98 + a1:44:37:05:d5:1b:33:2a:58:93:c5:9b:30:b6:38:57 + 68:af:4e:a8:b7:02:06:9f:fc:b9:3e:b3:95:a7:ce:0f + a0:b0:ce:88:0e:7c:e7:ff:7f:e6:2d:6b:8b:f8:63:85 + d8:f7:49:a5:d8:5d:3a:52:e1:f9:58:fe:8d:de:de:b1 + 18:40:34:a8:e8:fc:df:33:a2:39:81:00:3b:3d:38:17 + cb:d4:53:09:cd:04:a2:51:9b:2b:ae:c1:98:60:3a:0f + d4:e5:a0:4c:36:51:46:86:80:bd:2d:21:62:c3:bd:07 + d6:2d:82:62:b0:c4:62:3f:4f:be:86:3e:c0:93:fc:81 + 2b: + +exp2: + 11:e4:73:93:b0:74:26:3b:60:e7:c4:fd:2c:7c:bb:81 + 05:9b:ff:8a:b0:08:1c:a1:fb:7f:17:ee:93:70:7e:11 + 92:b1:bf:39:e7:c6:a8:ed:9c:64:e1:1f:5e:93:ff:ca + 15:4b:54:97:35:9f:ca:7c:c7:9c:3e:e0:06:82:a5:f9 + 46:d3:02:cc:08:d1:be:13:b2:8c:bb:6a:8d:dd:fa:eb + ad:ae:62:8a:67:cb:14:67:68:b6:b8:a7:a8:c9:c2:0f + ad:f5:34:25:f5:e1:9b:ee:a5:83:40:6a:1d:97:f1:90 + 35:06:29:97:23:22:f8:f0:0a:0a:34:46:1e:d5:9d:cc + 36:2e:8a:c3:12:b9:0a:4a:a3:dd:e2:91:58:f1:9d:f5 + 04:f7:8f:05:f3:46:db:c4:02:d5:1c:d6:d9:dc:67:0d + ae:9d:f8:00:40:3d:83:08:62:2c:c8:61:a6:9d:49:f2 + 52:67:fe:0c:00:6d:e3:1f:99:7b:b0:50:af:55:0f:ad + + + +Public Key PIN: + pin-sha256:EiqPFBPoLKkCzVlK8KoKYGQT/LSo7/0iLg/I7nKt1/0= +Public Key ID: + sha256:122a8f1413e82ca902cd594af0aa0a606413fcb4a8effd222e0fc8ee72add7fd + sha1:4b4510b7a19c88cbce887f5b9d531a6d81d34400 + +-----BEGIN RSA PRIVATE KEY----- +MIIG5AIBAAKCAYEA1fWe/glTz5MRvZtZ6Pk7ac6jCnDbNJhMMBDWjIC9Xc/31joZ +kK5VnW5P/sNZq3S5Glx2Y2Ie+jCeQ7hWYNl1WqCIrV4fNjlgEnuEFUOze2lY243t +4k+OeyQ2YmopDqX2vFiX9q7vzVP6120enUBO1OgyBUnFZpTHmWukQl2ZhzxjoakA +DjYP6sjejhKWXeYjms8KTXpBjl1PaWcvYe4IrMzZuNZq3pveUyl83PcD9uVT0T/j +rZnGv/Ior/TLLdIFmFjOutDX96uMimbgjSxqQcg57EzUBCWIWKjAXNW+v+lqJcCl +YY5y4ElUhuY8j9sQa5OHSevemaIXRiuQMJGFquASVhzPrgpD0jE8UIpwqOQycU+N +NK1nutwzKoq8ZzOoVozlrpz2AifaJ5StVnfzK+0jkuj+7FnkiNeoHUV2DjGLa3J/ +1YwcrLa3J0Gv1tON9BFd81P9mbfCgNc5iwfOP5J/qe0tUKQ7V+dzPDTd89FXTf+5 +VFrmY8bLKP3F08GrAgMBAAECggGAHjiweX+FyBck9VxBKegyXTKj0vC39cjhUhS+ +yV/R37NldWwFems1iqQvRnP/cXluP+v5iPYuG/bMFBKwmMN+kQuF4r8dt4IJMPMj +aAGFE5SAyZpVlJbaMEigKeyG2hvVKyt0Y5K4Ko+HKfCu11VjDS2zCw4thNzVCLWs +oPcpnXGJPSdq65b1TpuK3BSCCsdcFhzSfrkbE2nYsrGxfqqprQbOZg5bUBBCKgr9 +KRT3CWPBIBhfJ4FGEoy49ImmPVWh1GT88tvXnPW+952IXG02pEvqxePqMoFr80e1 +NdXEGrKuEp0Zo+ykr0F+XjSd9by5H6PCMrT8lad6VATi1k4QL2Zoizsg6gXbLnIB +Eed8+HIPYL7xJxmtOm/pcFY6hm5GDeNVMWZ3CYRIuSVLwyZwEsqkX8Y9auXbTWME +uAkHyTCFCJ13QCZg2hDCU9IADZ7Z1XEGMOv79z+CH7Oa800khi6U/Qae3CZo+mTD ++foIxLLsevVVxRC14i3eugQwEFuZAoHBAPvRR52ec/geCSH9iRYFVq+lz1LVzfcm +GNGEOjZlC6LN+biZwMfvAMkvyZIaHT2GWDuxvtSMxhvfuu6HqtEiRxi93gEPDcus +0Eik9JPipsu1t/X1ct3srBPoPWIjVKxS/+6a4X+wrjtBONg5K0DvJYFQsJjb+EAW +bhpBeSKQWJmAwg26tdNU7Cgz5LBY6t5hobcwC53cc2LCB9N1kUhJ3b7PspCVjyls +b/Zoy8/VJKPXN4EbNDuvmkhSr1N89zKiPwKBwQDZg16+CuoL2WZjVjueRKpGbY1s +EIFL3hldLBZ+MHytI5qJU8wY6OhRK3k10Gd9no++6mNeFMBrugJsStoHcJ0U+r4e +QEdQb/Jah562sbhVLLai47AkuuqbVYeLS89ASiW0ic+edsp5SvR0t+7PbI/L4z2e +hjtEt3DsBQxoztbDouzmEdYv94AmqVyquaYzhKkAQ89yB4qRWaKx3nkHa4FnpcJL +/SmKGpZmV2bUN5qYadEZJFOxpFRoHowrtJMZ7ZUCgcEA785mIAFEuTWJRvhWM0VU +PyNtI5p+cW2zVttQQHrLsPfsZ1LslrnRisZadCswS2YD4p0reOiyxNqz/vHtxwmY +oUQ3BdUbMypYk8WbMLY4V2ivTqi3Agaf/Lk+s5Wnzg+gsM6IDnzn/3/mLWuL+GOF +2PdJpdhdOlLh+Vj+jd7esRhANKjo/N8zojmBADs9OBfL1FMJzQSiUZsrrsGYYDoP +1OWgTDZRRoaAvS0hYsO9B9YtgmKwxGI/T76GPsCT/IErAoHAEeRzk7B0Jjtg58T9 +LHy7gQWb/4qwCByh+38X7pNwfhGSsb8558ao7Zxk4R9ek//KFUtUlzWfynzHnD7g +BoKl+UbTAswI0b4Tsoy7ao3d+uutrmKKZ8sUZ2i2uKeoycIPrfU0JfXhm+6lg0Bq +HZfxkDUGKZcjIvjwCgo0Rh7Vncw2LorDErkKSqPd4pFY8Z31BPePBfNG28QC1RzW +2dxnDa6d+ABAPYMIYizIYaadSfJSZ/4MAG3jH5l7sFCvVQ+tAoHBAJCaf28UqLx5 +PyXlYvldKXikeI565Ipiin+crnWV/u4amVNAAXYpfUiFKKIqnw8QjBlqNmvhrKIH +uXJcuaYgu4/L9erdPw6rncFXfnuW+dqwUjw/YpTnXASerGDNTex+aNP7KrQC8A6+ +N7wq+G6NMbU4ZwCeZ59x0Ig2MmlLIHProdm8csJ+ORo2zMFFohQ35srbTQtbaKT/ +t3ux2y9wJ6FsMT/AwyMEsHriDSG6WoBSwaErV3Igtu2x6DuViIGQXQ== +-----END RSA PRIVATE KEY----- diff --git a/test/zdtm.py b/test/zdtm.py index a01947557..000f590b0 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -960,6 +960,7 @@ class criu: self.__lazy_pages_p = None self.__page_server_p = None self.__dump_process = None + self.__tls = self.__tls_options() if opts['tls'] else [] self.__criu_bin = opts['criu_bin'] self.__crit_bin = opts['crit_bin'] @@ -1008,6 +1009,13 @@ class criu: print("Removing %s" % self.__dump_path) shutil.rmtree(self.__dump_path) + def __tls_options(self): + pki_dir = os.path.dirname(os.path.abspath(__file__)) + "/pki" + return ["--tls", "--tls-no-cn-verify", + "--tls-key", pki_dir + "/key.pem", + "--tls-cert", pki_dir + "/cert.pem", + "--tls-cacert", pki_dir + "/cacert.pem"] + def __ddir(self): return os.path.join(self.__dump_path, "%d" % self.__iter) @@ -1141,12 +1149,12 @@ class criu: if self.__page_server: print("Adding page server") - ps_opts = ["--port", "12345"] + ps_opts = ["--port", "12345"] + self.__tls if self.__dedup: ps_opts += ["--auto-dedup"] self.__page_server_p = self.__criu_act("page-server", opts = ps_opts, nowait = True) - a_opts += ["--page-server", "--address", "127.0.0.1", "--port", "12345"] + a_opts += ["--page-server", "--address", "127.0.0.1", "--port", "12345"] + self.__tls a_opts += self.__test.getdopts() @@ -1167,7 +1175,7 @@ class criu: nowait = False if self.__lazy_migrate and action == "dump": - a_opts += ["--lazy-pages", "--port", "12345"] + a_opts += ["--lazy-pages", "--port", "12345"] + self.__tls nowait = True self.__dump_process = self.__criu_act(action, opts = a_opts + opts, nowait = nowait) if self.__mdedup and self.__iter > 1: @@ -1215,10 +1223,12 @@ class criu: if self.__lazy_pages or self.__lazy_migrate: lp_opts = [] if self.__remote_lazy_pages or self.__lazy_migrate: - lp_opts += ['--page-server', "--port", "12345", "--address", "127.0.0.1"] + lp_opts += ["--page-server", "--port", "12345", + "--address", "127.0.0.1"] + self.__tls + if self.__remote_lazy_pages: ps_opts = ["--pidfile", "ps.pid", - "--port", "12345", "--lazy-pages"] + "--port", "12345", "--lazy-pages"] + self.__tls self.__page_server_p = self.__criu_act("page-server", opts = ps_opts, nowait = True) self.__lazy_pages_p = self.__criu_act("lazy-pages", opts = lp_opts, nowait = True) r_opts += ["--lazy-pages"] @@ -1719,7 +1729,7 @@ class Launcher: nd = ('nocr', 'norst', 'pre', 'iters', 'page_server', 'sibling', 'stop', 'empty_ns', 'fault', 'keep_img', 'report', 'snaps', 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', - 'remote_lazy_pages', 'show_stats', 'lazy_migrate', + 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'tls', 'criu_bin', 'crit_bin') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) @@ -2283,6 +2293,7 @@ rp.add_argument("--ignore-taint", help = "Don't care about a non-zero kernel tai rp.add_argument("--lazy-pages", help = "restore pages on demand", action = 'store_true') rp.add_argument("--lazy-migrate", help = "restore pages on demand", action = 'store_true') rp.add_argument("--remote-lazy-pages", help = "simulate lazy migration", action = 'store_true') +rp.add_argument("--tls", help = "use TLS for migration", action = 'store_true') rp.add_argument("--title", help = "A test suite title", default = "criu") rp.add_argument("--show-stats", help = "Show criu statistics", action = 'store_true') rp.add_argument("--criu-bin", help = "Path to criu binary", default = '../criu/criu') From 73d3ddef3400336e8b36b24ad91c005acddfde87 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 8 May 2019 21:45:39 +0100 Subject: [PATCH 0109/2030] travis: Enable TLS testing Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.alpine | 1 + scripts/build/Dockerfile.centos | 1 + scripts/build/Dockerfile.fedora.tmpl | 1 + scripts/build/Dockerfile.tmpl | 2 ++ scripts/travis/travis-tests | 5 +++-- 5 files changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index a91e01637..c71a3901f 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -9,6 +9,7 @@ RUN apk update && apk add \ ccache \ coreutils \ git \ + gnutls-dev \ libaio-dev \ libcap-dev \ libnet-dev \ diff --git a/scripts/build/Dockerfile.centos b/scripts/build/Dockerfile.centos index 2ed3a2db9..2ce40b179 100644 --- a/scripts/build/Dockerfile.centos +++ b/scripts/build/Dockerfile.centos @@ -9,6 +9,7 @@ RUN yum install -y \ findutils \ gcc \ git \ + gnutls-devel \ iproute \ iptables \ libaio-devel \ diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 22ebaed9c..965309623 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -6,6 +6,7 @@ RUN dnf install -y \ findutils \ gcc \ git \ + gnutls-devel \ iproute \ iptables \ libaio-devel \ diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl index bdfdf713a..4378ba149 100644 --- a/scripts/build/Dockerfile.tmpl +++ b/scripts/build/Dockerfile.tmpl @@ -12,6 +12,8 @@ RUN apt-get update && apt-get install -y \ iptables \ libaio-dev \ libcap-dev \ + libgnutls28-dev \ + libgnutls30 \ libnl-3-dev \ libprotobuf-c0-dev \ libprotobuf-dev \ diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 47ff199cf..664f723e9 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -2,8 +2,8 @@ set -x -e TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev - libprotobuf-dev protobuf-compiler libcap-dev - libnl-3-dev gcc-multilib gdb bash python-protobuf + libgnutls28-dev libgnutls30 libprotobuf-dev protobuf-compiler + libcap-dev libnl-3-dev gcc-multilib gdb bash python-protobuf libnet-dev util-linux asciidoctor libnl-route-3-dev" travis_prep () { @@ -125,6 +125,7 @@ LAZY_TESTS=.*\(maps0\|uffd-events\|lazy-thp\|futex\|fork\).* ./test/zdtm.py run -p 2 -T $LAZY_TESTS --lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS ./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS +./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages --tls $LAZY_EXCLUDE $ZDTM_OPTS bash ./test/jenkins/criu-fault.sh bash ./test/jenkins/criu-fcg.sh From 38d86fa0a2c5197bdaac5ff8c178cb5535d1e297 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 27 Jun 2019 14:18:38 +0300 Subject: [PATCH 0110/2030] lint: Print flake8 version before checking Signed-off-by: Pavel Emelyanov --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 9d83862d1..0b49364fb 100644 --- a/Makefile +++ b/Makefile @@ -380,6 +380,7 @@ help: .PHONY: help lint: + flake8 --version flake8 --config=scripts/flake8.cfg test/zdtm.py flake8 --config=scripts/flake8.cfg test/inhfd/*.py flake8 --config=scripts/flake8.cfg test/others/rpc/config_file.py From 9bd4aee1b464524fb7924f49aeeeab8e993cba1e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 28 Jun 2019 20:16:38 +0300 Subject: [PATCH 0111/2030] flake.cfg: Update to yapf formatting Signed-off-by: Pavel Emelyanov --- scripts/flake8.cfg | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/scripts/flake8.cfg b/scripts/flake8.cfg index 4231e843d..b6a587729 100644 --- a/scripts/flake8.cfg +++ b/scripts/flake8.cfg @@ -1,10 +1,4 @@ [flake8] -# W191 indentation contains tabs -# E128 continuation line under-indented for visual indent # E501 line too long -# E251 unexpected spaces around keyword / parameter equals -# E101 indentation contains mixed spaces and tabs -# E126 continuation line over-indented for hanging indent # W504 line break after binary operator -# E117 over-indented -ignore = W191,E128,E501,E251,E101,E126,W504,E117 +ignore = E501,W504 From c797dae453c74852f16fe9e1f9024f4ed0c08235 Mon Sep 17 00:00:00 2001 From: Harshavardhan Unnibhavi Date: Fri, 8 Mar 2019 15:43:46 +0530 Subject: [PATCH 0112/2030] Documentation: Create man page for libcompel Resolves #349 Signed-off-by: Harshavardhan Unnibhavi --- Documentation/Makefile | 1 + Documentation/compel.txt | 119 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 Documentation/compel.txt diff --git a/Documentation/Makefile b/Documentation/Makefile index aa5d3ebbf..cbc7ff2c8 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -12,6 +12,7 @@ endif FOOTER := footer.txt SRC1 += crit.txt +SRC1 += compel.txt SRC8 += criu.txt SRC := $(SRC1) $(SRC8) XMLS := $(patsubst %.txt,%.xml,$(SRC)) diff --git a/Documentation/compel.txt b/Documentation/compel.txt new file mode 100644 index 000000000..744a3b35d --- /dev/null +++ b/Documentation/compel.txt @@ -0,0 +1,119 @@ +COMPEL(1) +========== +include::footer.txt[] + +NAME +---- +compel - Execute parasitic code within another process. + +SYNOPSIS +-------- +*compel* 'hgen' ['option' ...] + +*compel* 'plugins' ['PLUGIN_NAME' ...] + +*compel* ['--compat'] 'includes' | 'cflags' | 'ldflags' + +*compel* ['--compat'] ['--static'] 'libs' + +DESCRIPTION +------------ +*compel* is a utility to execute arbitrary code, also called parasite code, +in the context of a foreign process. The parasitic code, once compiled with +compel flags and packed, can be executed in the context of other tasks. Currently +there is only one way to load the parasitic blob into victim task using libcompel.a, +called c-header. + +ARGUMENTS +---------- + +Positional Arguments +~~~~~~~~~~~~~~~~~~~~ + +*hgen*:: + create a header from the .po file, which is the parasite binary. + +*plugins*:: + prints the plugins available. + +*ldflags*:: + prints the ldflags available to compel during linking of parasite code. + +*cflags*:: + prints the compel cflags to be used during compilation of parasitic code. + +*includes*:: + prints list of standard include directories. + +*libs*:: + prints list of static or dynamic libraries that compel can link with. + +OPTIONS +-------- +*-f*, *--file* 'FILE':: + Path to the binary file, 'FILE', which *compel* must turn into a header + +*-o*, *--output* 'FILE':: + Path to the header file, 'FILE', where compel must write the resulting header. + +*-p*, *--prefix* 'NAME':: + Specify prefix for var names + +*-l*, *--log-level* 'NUM':: + Default log level of compel. + +*-h*, *--help*:: + Prints usage and exits. + +*-V*, *--version*:: + Prints version number of compel. + +SOURCE EXAMPLES +---------------- + +Parasitic Code +~~~~~~~~~~~~~~ + +*#include * + +*int parasite_trap_cmd(int cmd, void *args);* //gets called by compel_run_in_thread() + +*int parasite_daemon_cmd(int cmd, void *arg);* // gets called by compel_rpc_call() and compel_rpc_call_sync() + +*void parasite_cleanup(void);* //gets called on parasite unload by compel_cure() + +Infecting code +~~~~~~~~~~~~~~ +The parasitic code is compiled and converted to a header using *compel*, and included here. + +*#include * + +*#include "parasite.h"* + +Following steps are perfomed to infect the victim process: + + - stop the task: *int compel_stop_task(int pid);* + - prepare infection handler: *struct parasite_ctl *compel_prepare(int pid);* + - execute system call: *int compel_syscall(ctl, int syscall_nr, long *ret, int arg ...);* + - infect victim: *int compel_infect(ctl, nr_thread, size_of_args_area);* + - cure the victim: *int compel_cure(ctl);* //ctl pointer is freed by this call + - Resume victim: *int compel_resume_task(pid, orig_state, state);* + +*ctl* must be configured with blob information by calling *PREFIX_setup_c_header()*, with ctl as its argument. +*PREFIX* is the argument given to *-p* when calling hgen, else it is deduced from file name. + + +EXAMPLES +--------- +To generate a header file(.h) from a parasite binary file(.po) use: + +---------- + compel hgen -f parasite.po -o parasite.h +---------- + +'parasite.po' file is obtained by compiling the parasite source with compel flags and +linking it with the compel plugins. + +AUTHOR +------ +The CRIU team. From b30b0dcb14be6b2fd38c1146b734e876a6e9a8e4 Mon Sep 17 00:00:00 2001 From: Dengguangxing Date: Wed, 19 Jun 2019 09:13:39 +0000 Subject: [PATCH 0113/2030] fix segmentation fault caused by uninitialized mutex Segmentation fault was raised while trying to restore a process with tty. Coredump file says this is caused by uninitialized tty_mutex: (gdb) where #0 0x00000000004d7270 in atomic_add_return (i=1, v=0x0) at include/common/asm/atomic.h:34 #1 0x00000000004d7398 in mutex_lock (m=0x0) at include/common/lock.h:151 #2 0x00000000004d840c in __pty_open_ptmx_index (index=3, flags=2, cb=0x4dce50 , arg=0x11, path=0x5562e0 "ptmx") at criu/tty.c:603 #3 0x00000000004dced8 in pty_create_ptmx_index (dfd=17, index=3, flags=2) at criu/tty.c:2384 since init_tty_mutex() is reentrantable, just calling it before mutex_lock() Signed-off-by: Deng Guangxing Reviewed-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/tty.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/criu/tty.c b/criu/tty.c index 6fe11530a..e9a28897c 100644 --- a/criu/tty.c +++ b/criu/tty.c @@ -600,6 +600,9 @@ static int __pty_open_ptmx_index(int index, int flags, memset(fds, 0xff, sizeof(fds)); + if (init_tty_mutex()) + return -1; + mutex_lock(tty_mutex); for (i = 0; i < ARRAY_SIZE(fds); i++) { From 13b29f8e16d117461f42a4d06aa20973b03e82cb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 25 Jun 2019 15:16:26 +0300 Subject: [PATCH 0114/2030] tty: Move tty layer shared init into tty_init_restore Instead of using tty_mutex value in atomic context (which is wrong, since it is not atomic) better move tty_mutex allocation into cr_restore_tasks where our all initializers live. Otherwise weird race effect might be observed. Reported-by: Deng Guangxing Signed-off-by: Cyrill Gorcunov --- criu/cr-restore.c | 3 +++ criu/include/tty.h | 1 + criu/tty.c | 15 +-------------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 23be81140..bf85ab04b 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2353,6 +2353,9 @@ int cr_restore_tasks(void) if (vdso_init_restore()) goto err; + if (tty_init_restore()) + goto err; + if (opts.cpu_cap & CPU_CAP_IMAGE) { if (cpu_validate_cpuinfo()) goto err; diff --git a/criu/include/tty.h b/criu/include/tty.h index 95ced8396..8419593e5 100644 --- a/criu/include/tty.h +++ b/criu/include/tty.h @@ -32,6 +32,7 @@ struct mount_info; extern int devpts_restore(struct mount_info *pm); extern int tty_prep_fds(void); +extern int tty_init_restore(void); extern int devpts_check_bindmount(struct mount_info *m); diff --git a/criu/tty.c b/criu/tty.c index e9a28897c..dee8d46bf 100644 --- a/criu/tty.c +++ b/criu/tty.c @@ -349,11 +349,8 @@ static mutex_t *tty_mutex; static bool tty_is_master(struct tty_info *info); -static int init_tty_mutex(void) +int tty_init_restore(void) { - if (tty_mutex) - return 0; - tty_mutex = shmalloc(sizeof(*tty_mutex)); if (!tty_mutex) { pr_err("Can't create ptmx index mutex\n"); @@ -600,9 +597,6 @@ static int __pty_open_ptmx_index(int index, int flags, memset(fds, 0xff, sizeof(fds)); - if (init_tty_mutex()) - return -1; - mutex_lock(tty_mutex); for (i = 0; i < ARRAY_SIZE(fds); i++) { @@ -1792,13 +1786,6 @@ static int tty_info_setup(struct tty_info *info) add_post_prepare_cb_once(&prep_tty_restore); - /* - * Call it explicitly. Post-callbacks will be called after - * namespaces preparation, while the latter needs this mutex. - */ - if (init_tty_mutex()) - return -1; - info->fdstore_id = -1; return file_desc_add(&info->d, info->tfe->id, &tty_desc_ops); } From 60bd698d7ccd37c794d33e874b8cbfb730fb020e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 May 2019 18:12:53 +0000 Subject: [PATCH 0115/2030] sk-inet: fix coverity IDENTICAL_BRANCHES criu-3.12/criu/sk-inet.c:575: identical_branches: The same code is executed when the condition "pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE)" is true or false, because the code in the if-then branch and after the if statement is identical. Should the if statement be removed? Signed-off-by: Adrian Reber --- criu/sk-inet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sk-inet.c b/criu/sk-inet.c index 90ab492ed..f9c64c7af 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -573,7 +573,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa ie.ip_opts->raw = NULL; if (pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE)) - goto err; + err = -1; err: ip_raw_opts_free(&ipopts_raw); release_skopts(&skopts); From afdde285a15eecdee909749509c6ac0a95a66ea6 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 12:23:56 +0000 Subject: [PATCH 0116/2030] seize: fix coverity RESOURCE_LEAK criu-3.12/criu/seize.c:648: leaked_storage: Variable "threads" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber --- criu/seize.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index b958d4bf9..cce8911b9 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -627,6 +627,7 @@ static int collect_threads(struct pstree_item *item) { struct seccomp_entry *task_seccomp_entry; struct pid *threads = NULL; + struct pid *tmp = NULL; int nr_threads = 0, i = 0, ret, nr_inprogress, nr_stopped = 0; task_seccomp_entry = seccomp_find_entry(item->pid->real); @@ -643,9 +644,11 @@ static int collect_threads(struct pstree_item *item) } /* The number of threads can't be less than already frozen */ - item->threads = xrealloc(item->threads, nr_threads * sizeof(struct pid)); - if (item->threads == NULL) - return -1; + tmp = xrealloc(item->threads, nr_threads * sizeof(struct pid)); + if (tmp == NULL) + goto err; + + item->threads = tmp; if (item->nr_threads == 0) { item->threads[0].real = item->pid->real; From 927382d6fcbbc9fd16876eab9bdf626f28151fc1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 14:03:19 +0000 Subject: [PATCH 0117/2030] util: fix clang 'null pointer passed' criu-3.12/criu/util.c:879:9: warning: Null pointer passed as an argument to a 'nonnull' parameter criu-3.12/criu/util.c:1171:3: warning: Value stored to 'ret' is never read Signed-off-by: Adrian Reber --- criu/util.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/criu/util.c b/criu/util.c index 0617c97b3..028f604bb 100644 --- a/criu/util.c +++ b/criu/util.c @@ -856,6 +856,12 @@ void split(char *str, char token, char ***out, int *n) cur++; } + if (*n == 0) { + /* This can only happen if str == NULL */ + *out = NULL; + *n = -1; + return; + } *out = xmalloc((*n) * sizeof(char *)); if (!*out) { @@ -1139,7 +1145,7 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) ret = cr_daemon(1, 0, cfd); if (ret == -1) { pr_err("Can't run in the background\n"); - goto out; + goto err; } if (ret > 0) { /* parent task, daemon started */ close_safe(&sk); @@ -1160,10 +1166,11 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) return -1; if (sk >= 0) { - ret = *ask = accept(sk, (struct sockaddr *)&caddr, &clen); - if (*ask < 0) + *ask = accept(sk, (struct sockaddr *)&caddr, &clen); + if (*ask < 0) { pr_perror("Can't accept connection to server"); - else + goto err; + } else pr_info("Accepted connection from %s:%u\n", inet_ntoa(caddr.sin_addr), (int)ntohs(caddr.sin_port)); @@ -1171,7 +1178,7 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) } return 0; -out: +err: close(sk); return -1; } From 04f8fac210da821ed837a7ffb539fe2882726c3d Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 07:37:58 +0000 Subject: [PATCH 0118/2030] files-reg: fix coverity RESOURCE_LEAK criu-3.12/criu/files-reg.c:774: leaked_storage: Variable "img" going out of scope leaks the storage it points to. criu-3.12/criu/files-reg.c:788: leaked_storage: Variable "img" going out of scope leaks the storage it points to. criu-3.12/criu/files-reg.c:797: leaked_storage: Variable "img" going out of scope leaks the storage it points to. Signed-off-by: Adrian Reber --- criu/files-reg.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index ff0ae7d69..2f68bc03f 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -735,6 +735,7 @@ static struct collect_image_info remap_cinfo = { static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_dev) { struct cr_img *img; + int exit_code = -1; GhostFileEntry gfe = GHOST_FILE_ENTRY__INIT; Timeval atim = TIMEVAL__INIT, mtim = TIMEVAL__INIT; @@ -771,7 +772,7 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de } if (pb_write_one(img, &gfe, PB_GHOST_FILE)) - return -1; + goto err_out; if (S_ISREG(st->st_mode)) { int fd, ret; @@ -785,7 +786,7 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de fd = open(lpath, O_RDONLY); if (fd < 0) { pr_perror("Can't open ghost original file"); - return -1; + goto err_out; } if (gfe.chunks) @@ -794,11 +795,13 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de ret = copy_file(fd, img_raw_fd(img), st->st_size); close(fd); if (ret) - return -1; + goto err_out; } + exit_code = 0; +err_out: close_image(img); - return 0; + return exit_code; } struct file_remap *lookup_ghost_remap(u32 dev, u32 ino) From 72da499a27b3211dbedc1e313d41b5edaf000f6e Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 2 Jul 2019 12:51:50 +0300 Subject: [PATCH 0119/2030] dedup: convert noisy warning to debug and improve messages We want to grep warnings from zdtm tests to travis final logs. And I see a lot of these: (00.250989) Warn (criu/pagemap.c:90): Missing 7f84103e3000 in parent pagemap (00.250999) p 0x7f84103f5000 [1] We do a lookup of an intersecting pagemap entry with a memory region we want to dedup, it is expected that sometimes we don't have some subrange in pagemap entries. So these should not be a warning, make it debug message. While on it change the message to save us from been confused with other "Missing..." error messages, and change abstract "parent image" message to the IDs of pages image in all messages in dedup_one_iovec(). v2: print image ids --- criu/pagemap.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/criu/pagemap.c b/criu/pagemap.c index 6bcd0d70e..05f6b82b8 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -86,11 +86,15 @@ int dedup_one_iovec(struct page_read *pr, unsigned long off, unsigned long len) ret = pr->seek_pagemap(pr, off); if (ret == 0) { - pr_warn("Missing %lx in parent pagemap\n", off); - if (off < pr->cvaddr && pr->cvaddr < iov_end) + if (off < pr->cvaddr && pr->cvaddr < iov_end) { + pr_debug("pr%lu-%u:No range %lx-%lx in pagemap\n", + pr->img_id, pr->id, off, pr->cvaddr); off = pr->cvaddr; - else + } else { + pr_debug("pr%lu-%u:No range %lx-%lx in pagemap\n", + pr->img_id, pr->id, off, iov_end); return 0; + } } if (!pr->pe) @@ -105,7 +109,8 @@ int dedup_one_iovec(struct page_read *pr, unsigned long off, unsigned long len) prp = pr->parent; if (prp) { /* recursively */ - pr_debug("Go to next parent level\n"); + pr_debug("pr%lu-%u:Go to next parent level\n", + pr->img_id, pr->id); len = min(piov_end, iov_end) - off; ret = dedup_one_iovec(prp, off, len); if (ret != 0) From facfebee22b4223f406d6cfe09fda862924a7b12 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 2 Jul 2019 13:33:30 +0300 Subject: [PATCH 0120/2030] inventory: skip warning in case of no parent directory We want to grep warnings from zdtm tests to travis final logs. And I see a lot of these: Warn (criu/image.c:137): Failed to open parent directory If there is no parent images directory then there is no previous dump and no pid-reuse problem with pagemaps possible, so it is fine to have no parent inventory image at the same time which is used here to fix the problem. These always hapens on the first iteration of iterative dump. So don't warn here. While on it also fix error message in detect_pid_reuse. v2: add detect_pid_reuse part v3: improve comments --- criu/image.c | 22 +++++++++++++++++++++- criu/mem.c | 2 +- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/criu/image.c b/criu/image.c index 5239ab474..2eb926929 100644 --- a/criu/image.c +++ b/criu/image.c @@ -125,6 +125,18 @@ int inventory_save_uptime(InventoryEntry *he) return 0; } +/* + * This function is intended to get an inventory image from previous (parent) + * dump iteration. We use dump_uptime from the image in detect_pid_reuse(). + * + * You see that these function never fails by itself, it only prints warnings + * to better understand reasons why we don't found a proper image, failing here + * is too early. We get to detect_pid_reuse() only if we have a parent pagemap + * and that's the proper place to fail: we know that there is a parent pagemap + * but we don't have (can't access, etc) parent inventory => can't detect + * pid-reuse => fail. + */ + InventoryEntry *get_parent_inventory(void) { struct cr_img *img; @@ -133,7 +145,15 @@ InventoryEntry *get_parent_inventory(void) dir = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); if (dir == -1) { - pr_warn("Failed to open parent directory\n"); + /* + * We print the warning below to be notified that we had some + * unexpected problem on open. For instance we have a parent + * directory but have no access. Having no parent inventory + * when also having no parent directory is an expected case of + * first dump iteration. + */ + if (errno != ENOENT) + pr_warn("Failed to open parent directory\n"); return NULL; } diff --git a/criu/mem.c b/criu/mem.c index 6a1a87a1e..d2a39a9db 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -318,7 +318,7 @@ static int detect_pid_reuse(struct pstree_item *item, if (!parent_ie) { pr_err("Pid-reuse detection failed: no parent inventory, " \ - "check warnings in get_parent_stats\n"); + "check warnings in get_parent_inventory\n"); return -1; } From 3ca4c73e478ddb9b525a24164e523c8487658151 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 1 Jul 2019 11:28:43 +0300 Subject: [PATCH 0121/2030] zdtm: make grep_errors also grep warnings It is inspired by the discussion about inotify fix: https://github.com/checkpoint-restore/criu/pull/728#issuecomment-506929427 From one point of view, warnings might be important to understand why we detect some visible change in the environment after c/r-ing the process, and if this change is expected or not. So we should add "Warn" messages to the output. From over point, these warnings if they are expected, can spoil our final logs with a lot of unnecessary details, so add changes in previous patches to silence the most noisy of these warnings. Signed-off-by: Pavel Tikhomirov --- test/zdtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 000f590b0..c64fb2e49 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1915,7 +1915,7 @@ def grep_errors(fname): before.append(l) if len(before) > 5: before.pop(0) - if "Error" in l: + if "Error" in l or "Warn" in l: if first: print_fname(fname, 'log') print_sep("grep Error", "-", 60) From cb6768b62c5347fedef6a88363667280fbce2cff Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 15 Jul 2019 01:04:42 -0700 Subject: [PATCH 0122/2030] test/packet_sock_mmap: parse inode as unsigned long long 7f95a16df000-7f95a16e1000 rw-p 00000000 00:09 2183152397 socket:[2183152397] Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- test/zdtm/static/packet_sock_mmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/zdtm/static/packet_sock_mmap.c b/test/zdtm/static/packet_sock_mmap.c index 2a82950bc..93d6ebbf2 100644 --- a/test/zdtm/static/packet_sock_mmap.c +++ b/test/zdtm/static/packet_sock_mmap.c @@ -35,16 +35,17 @@ struct tpacket_req3 { static void check_map_is_there(unsigned long addr, int sk) { FILE *f; - char line[64]; + char line[4096]; struct stat ss; fstat(sk, &ss); f = fopen("/proc/self/maps", "r"); while (fgets(line, sizeof(line), f) != NULL) { + unsigned long long ino; unsigned long start; - int maj, min, ino; + int maj, min; - sscanf(line, "%lx-%*x %*s %*s %x:%x %d %*s", &start, &maj, &min, &ino); + sscanf(line, "%lx-%*x %*s %*s %x:%x %llu %*s", &start, &maj, &min, &ino); if ((start == addr) && ss.st_dev == makedev(maj, min) && ss.st_ino == ino) { pass(); fclose(f); From a82275f3d4407f673b9a3d91032f2dea0629574b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 17 Jul 2019 00:42:24 -0700 Subject: [PATCH 0123/2030] zdtm: use a proper page size for the host In zdtm.py, the page size is hardcoded as 4096, but on ppc64le, is is equal to 64K and all test fail with errors like this: ERROR: bad page counts, stats = 13 real = 208(0) Signed-off-by: Andrei Vagin --- test/zdtm.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index c64fb2e49..c52964528 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -23,6 +23,7 @@ import errno import datetime import yaml import struct +import mmap import pycriu as crpc os.chdir(os.path.dirname(os.path.abspath(__file__))) @@ -1130,8 +1131,8 @@ class criu: if f.startswith('pages-'): real_written += os.path.getsize(os.path.join(self.__ddir(), f)) - r_pages = real_written / 4096 - r_off = real_written % 4096 + r_pages = real_written / mmap.PAGESIZE + r_off = real_written % mmap.PAGESIZE if (stats_written != r_pages) or (r_off != 0): print("ERROR: bad page counts, stats = %d real = %d(%d)" % (stats_written, r_pages, r_off)) raise test_fail_exc("page counts mismatch") From b758e4b4762255236048d85ca90dcaf06f75c117 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Wed, 17 Jul 2019 14:30:01 +0200 Subject: [PATCH 0124/2030] Replace libprotobuf-c0-dev with libprotobuf-c-dev The `libprotobuf-c0-dev` virtual package is no longer available in Debian Buster, but is provided by `libprotobuf-c-dev`, which is available. Signed-off-by: Sebastiaan van Stijn --- contrib/debian/dev-packages.lst | 2 +- criu/Makefile.packages | 2 +- scripts/build/Dockerfile.tmpl | 2 +- scripts/travis/travis-tests | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/debian/dev-packages.lst b/contrib/debian/dev-packages.lst index b0b664f72..049bbd82d 100644 --- a/contrib/debian/dev-packages.lst +++ b/contrib/debian/dev-packages.lst @@ -1,7 +1,7 @@ # Required packages for development in Debian build-essential libprotobuf-dev -libprotobuf-c0-dev +libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python-protobuf diff --git a/criu/Makefile.packages b/criu/Makefile.packages index b01b4b044..f380fa2f0 100644 --- a/criu/Makefile.packages +++ b/criu/Makefile.packages @@ -11,7 +11,7 @@ REQ-RPM-PKG-NAMES += $(PYTHON)-future REQ-RPM-PKG-TEST-NAMES += libaio-devel REQ-DEB-PKG-NAMES += libprotobuf-dev -REQ-DEB-PKG-NAMES += libprotobuf-c0-dev +REQ-DEB-PKG-NAMES += libprotobuf-c-dev REQ-DEB-PKG-NAMES += protobuf-c-compiler REQ-DEB-PKG-NAMES += protobuf-compiler REQ-DEB-PKG-NAMES += python-protobuf diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl index 4378ba149..d90a1d229 100644 --- a/scripts/build/Dockerfile.tmpl +++ b/scripts/build/Dockerfile.tmpl @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y \ libgnutls28-dev \ libgnutls30 \ libnl-3-dev \ - libprotobuf-c0-dev \ + libprotobuf-c-dev \ libprotobuf-dev \ libselinux-dev \ pkg-config \ diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 664f723e9..348daca1f 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -1,7 +1,7 @@ #!/bin/sh set -x -e -TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c0-dev libaio-dev +TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev libgnutls28-dev libgnutls30 libprotobuf-dev protobuf-compiler libcap-dev libnl-3-dev gcc-multilib gdb bash python-protobuf libnet-dev util-linux asciidoctor libnl-route-3-dev" From db2777e73c3cf44ddb75aa4ab9f6e5bb88705571 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Fri, 12 Jul 2019 18:12:42 +0200 Subject: [PATCH 0125/2030] Add support for migrating CHILD_SUBREAPER prctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Checkpoint it via parasite. 2. Restore it after forking. Signed-off-by: Michał Cłapiński Reviewed-by: Pavel Tikhomirov --- criu/cr-dump.c | 8 ++++++-- criu/cr-restore.c | 7 +++++-- criu/include/parasite.h | 1 + criu/include/restorer.h | 1 + criu/pie/parasite.c | 12 +++++++++++- criu/pie/restorer.c | 13 +++++++++++++ images/core.proto | 2 ++ 7 files changed, 39 insertions(+), 5 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 91b8b383a..b12119fa6 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -726,7 +726,8 @@ int dump_thread_core(int pid, CoreEntry *core, const struct parasite_dump_thread static int dump_task_core_all(struct parasite_ctl *ctl, struct pstree_item *item, const struct proc_pid_stat *stat, - const struct cr_imgset *cr_imgset) + const struct cr_imgset *cr_imgset, + const struct parasite_dump_misc *misc) { struct cr_img *img; CoreEntry *core = item->core[0]; @@ -740,6 +741,9 @@ static int dump_task_core_all(struct parasite_ctl *ctl, pr_info("Dumping core (pid: %d)\n", pid); pr_info("----------------------------------------\n"); + core->tc->child_subreaper = misc->child_subreaper; + core->tc->has_child_subreaper = true; + ret = get_task_personality(pid, &core->tc->personality); if (ret < 0) goto err; @@ -1378,7 +1382,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) goto err_cure; } - ret = dump_task_core_all(parasite_ctl, item, &pps_buf, cr_imgset); + ret = dump_task_core_all(parasite_ctl, item, &pps_buf, cr_imgset, &misc); if (ret) { pr_err("Dump core (pid: %d) failed with %d\n", pid, ret); goto err_cure; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index bf85ab04b..2ffd9a86c 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -806,10 +806,13 @@ static int prepare_oom_score_adj(int value) return ret; } -static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc) +static int prepare_proc_misc(pid_t pid, TaskCoreEntry *tc, struct task_restore_args *args) { int ret; + if (tc->has_child_subreaper) + args->child_subreaper = tc->child_subreaper; + /* loginuid value is critical to restore */ if (kdat.luid == LUID_FULL && tc->has_loginuid && tc->loginuid != INVALID_UID) { @@ -877,7 +880,7 @@ static int restore_one_alive_task(int pid, CoreEntry *core) if (collect_zombie_pids(ta) < 0) return -1; - if (prepare_proc_misc(pid, core->tc)) + if (prepare_proc_misc(pid, core->tc, ta)) return -1; /* diff --git a/criu/include/parasite.h b/criu/include/parasite.h index 0a62f2439..d9570948a 100644 --- a/criu/include/parasite.h +++ b/criu/include/parasite.h @@ -126,6 +126,7 @@ struct parasite_dump_misc { int dumpable; int thp_disabled; + int child_subreaper; }; /* diff --git a/criu/include/restorer.h b/criu/include/restorer.h index effbc3655..f980bfad3 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -217,6 +217,7 @@ struct task_restore_args { unsigned page_size; #endif int lsm_type; + int child_subreaper; } __aligned(64); /* diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 01bacd311..9a179ef8b 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -39,6 +39,10 @@ static struct parasite_dump_pages_args *mprotect_args = NULL; #define PR_GET_PDEATHSIG 2 #endif +#ifndef PR_GET_CHILD_SUBREAPER +#define PR_GET_CHILD_SUBREAPER 37 +#endif + static int mprotect_vmas(struct parasite_dump_pages_args *args) { struct parasite_vma_entry *vmas, *vma; @@ -202,6 +206,8 @@ out: static int dump_misc(struct parasite_dump_misc *args) { + int ret; + args->brk = sys_brk(0); args->pid = sys_getpid(); @@ -212,7 +218,11 @@ static int dump_misc(struct parasite_dump_misc *args) args->dumpable = sys_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0); args->thp_disabled = sys_prctl(PR_GET_THP_DISABLE, 0, 0, 0, 0); - return 0; + ret = sys_prctl(PR_GET_CHILD_SUBREAPER, (unsigned long)&args->child_subreaper, 0, 0, 0); + if (ret) + pr_err("PR_GET_CHILD_SUBREAPER failed (%d)\n", ret); + + return ret; } static int dump_creds(struct parasite_dump_creds *args) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 513be74e0..324a11e0c 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -52,6 +52,10 @@ #define PR_SET_PDEATHSIG 1 #endif +#ifndef PR_SET_CHILD_SUBREAPER +#define PR_SET_CHILD_SUBREAPER 36 +#endif + #ifndef FALLOC_FL_KEEP_SIZE #define FALLOC_FL_KEEP_SIZE 0x01 #endif @@ -1231,6 +1235,14 @@ static bool vdso_needs_parking(struct task_restore_args *args) return !vdso_unmapped(args); } +static inline int restore_child_subreaper(int child_subreaper) +{ + if (child_subreaper) + return sys_prctl(PR_SET_CHILD_SUBREAPER, child_subreaper, 0, 0, 0); + else + return 0; +} + /* * The main routine to restore task via sigreturn. * This one is very special, we never return there @@ -1731,6 +1743,7 @@ long __export_restore_task(struct task_restore_args *args) args->lsm_type); ret = ret || restore_dumpable_flag(&args->mm); ret = ret || restore_pdeath_sig(args->t); + ret = ret || restore_child_subreaper(args->child_subreaper); futex_set_and_wake(&thread_inprogress, args->nr_threads); diff --git a/images/core.proto b/images/core.proto index 312a983f0..9f3f870c9 100644 --- a/images/core.proto +++ b/images/core.proto @@ -51,6 +51,8 @@ message task_core_entry { // Reserved for tty inheritance //optional int32 tty_nr = 16; //optional int32 tty_pgrp = 17; + + optional int32 child_subreaper = 18; } message task_kobj_ids_entry { From 6606f246c21a8e1ff40b56087a360c1ff6fbe6bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Fri, 12 Jul 2019 18:14:41 +0200 Subject: [PATCH 0126/2030] Add ZDTM tests for child subreaper property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Basic check if property is migrated 2. Check that property is restored for existing children 3. Check that child subreaper does not affect reparenting Signed-off-by: Pavel Tikhomirov Signed-off-by: Michał Cłapiński Reviewed-by: Pavel Tikhomirov --- test/zdtm/static/Makefile | 3 + test/zdtm/static/child_subreaper.c | 36 +++++ .../static/child_subreaper_and_reparent.c | 142 ++++++++++++++++++ .../static/child_subreaper_and_reparent.desc | 1 + .../static/child_subreaper_existing_child.c | 138 +++++++++++++++++ 5 files changed, 320 insertions(+) create mode 100644 test/zdtm/static/child_subreaper.c create mode 100644 test/zdtm/static/child_subreaper_and_reparent.c create mode 100644 test/zdtm/static/child_subreaper_and_reparent.desc create mode 100644 test/zdtm/static/child_subreaper_existing_child.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 7799c0b0a..52bd00602 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -214,6 +214,9 @@ TST_NOFILE := \ selinux00 \ selinux01 \ selinux02 \ + child_subreaper \ + child_subreaper_existing_child \ + child_subreaper_and_reparent \ # jobctl00 \ ifneq ($(SRCARCH),arm) diff --git a/test/zdtm/static/child_subreaper.c b/test/zdtm/static/child_subreaper.c new file mode 100644 index 000000000..267795249 --- /dev/null +++ b/test/zdtm/static/child_subreaper.c @@ -0,0 +1,36 @@ +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check that child subreaper attribute is restored"; +const char *test_author = "Michał Cłapiński "; + +int main(int argc, char **argv) +{ + test_init(argc, argv); + + int cs_before = 1; + int ret = prctl(PR_SET_CHILD_SUBREAPER, cs_before, 0, 0, 0); + if (ret) { + pr_perror("Can't set child subreaper attribute, err = %d", ret); + exit(1); + } + + test_daemon(); + test_waitsig(); + + int cs_after; + ret = prctl(PR_GET_CHILD_SUBREAPER, (unsigned long)&cs_after, 0, 0, 0); + if (ret) { + pr_perror("Can't get child subreaper attribute, err = %d", ret); + exit(1); + } + + if (cs_before != cs_after) + fail("%d != %d\n", cs_before, cs_after); + else + pass(); + + return 0; +} diff --git a/test/zdtm/static/child_subreaper_and_reparent.c b/test/zdtm/static/child_subreaper_and_reparent.c new file mode 100644 index 000000000..57943a67b --- /dev/null +++ b/test/zdtm/static/child_subreaper_and_reparent.c @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" +#include "lock.h" + +const char *test_doc = "Check that child subreaper does not affect reparenting"; +const char *test_author = "Pavel Tikhomirov "; + +enum { + TEST_FORK, + TEST_SAVE, + TEST_CRIU, + TEST_CHECK, + TEST_EXIT, +}; + +struct shared { + futex_t fstate; + int parent_before_cr; + int parent_after_cr; +} *sh; + +int orphan() +{ + /* + * Wait until reparented to the pidns init. (By waiting + * for the SUBREAPER to reap our parent.) + */ + futex_wait_until(&sh->fstate, TEST_SAVE); + + sh->parent_before_cr = getppid(); + + /* Return the control back to MAIN worker to do C/R */ + futex_set_and_wake(&sh->fstate, TEST_CRIU); + futex_wait_until(&sh->fstate, TEST_CHECK); + + sh->parent_after_cr = getppid(); + + futex_set_and_wake(&sh->fstate, TEST_EXIT); + return 0; +} + +int helper() +{ + int pid; + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork"); + return 1; + } else if (pid == 0) { + exit(orphan()); + } + return 0; +} + +int subreaper() +{ + int pid, ret, status; + + setsid(); + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork"); + return 1; + } else if (pid == 0) { + exit(helper()); + } + + /* Reap the HELPER */ + waitpid(pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status)) { + pr_perror("Wrong exit status for helper: %d", status); + return 1; + } + + ret = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0); + if (ret) { + pr_perror("Can't set child subreaper attribute, err = %d", ret); + return 1; + } + + /* Give control to ORPHAN to save it's parent */ + futex_set_and_wake(&sh->fstate, TEST_SAVE); + futex_wait_until(&sh->fstate, TEST_EXIT); + return 0; +} + +int main(int argc, char **argv) +{ + int pid, status; + + sh = mmap(NULL, sizeof(struct shared), PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (sh == MAP_FAILED) { + pr_perror("Failed to alloc shared region"); + exit(1); + } + + futex_set(&sh->fstate, TEST_FORK); + + test_init(argc, argv); + + setsid(); + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork"); + exit(1); + } else if (pid == 0) { + exit(subreaper()); + } + + /* Wait until ORPHAN is ready to C/R */ + futex_wait_until(&sh->fstate, TEST_CRIU); + + test_daemon(); + test_waitsig(); + + /* Give control to ORPHAN to check it's parent */ + futex_set_and_wake(&sh->fstate, TEST_CHECK); + futex_wait_until(&sh->fstate, TEST_EXIT); + + /* Cleanup */ + while (wait(&status) > 0) { + if (!WIFEXITED(status) || WEXITSTATUS(status)) { + fail("Wrong exit status: %d", status); + return 1; + } + } + + if (sh->parent_before_cr != sh->parent_after_cr) + fail("Parent mismatch before %d after %d", sh->parent_before_cr, sh->parent_after_cr); + else + pass(); + return 0; +} diff --git a/test/zdtm/static/child_subreaper_and_reparent.desc b/test/zdtm/static/child_subreaper_and_reparent.desc new file mode 100644 index 000000000..6c4afe5f0 --- /dev/null +++ b/test/zdtm/static/child_subreaper_and_reparent.desc @@ -0,0 +1 @@ +{'flavor': 'ns uns'} diff --git a/test/zdtm/static/child_subreaper_existing_child.c b/test/zdtm/static/child_subreaper_existing_child.c new file mode 100644 index 000000000..28e9dbb8a --- /dev/null +++ b/test/zdtm/static/child_subreaper_existing_child.c @@ -0,0 +1,138 @@ +#include +#include +#include +#include +#include + +#include "zdtmtst.h" +#include "lock.h" + +const char *test_doc = "Check that property is restored for existing children"; +const char *test_author = "Michał Cłapiński "; + +enum { + TEST_FORK, + TEST_CRIU, + TEST_DIE, + TEST_CHECK, + TEST_EXIT, +}; + +struct shared { + futex_t fstate; + int ppid_after_reparent; +} *sh; + + +int orphan() +{ + /* Return the control back to MAIN worker to do C/R */ + futex_set_and_wake(&sh->fstate, TEST_CRIU); + futex_wait_until(&sh->fstate, TEST_CHECK); + + sh->ppid_after_reparent = getppid(); + + futex_set_and_wake(&sh->fstate, TEST_EXIT); + return 0; +} + +int helper() +{ + int pid; + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork"); + return 1; + } else if (pid == 0) { + exit(orphan()); + } + + futex_wait_until(&sh->fstate, TEST_DIE); + return 0; +} + +int subreaper() +{ + int pid, ret, status; + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork"); + return 1; + } else if (pid == 0) { + exit(helper()); + } + + ret = prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0); + if (ret) { + pr_perror("Can't set child subreaper attribute, err = %d", ret); + return 1; + } + + /* Reap the HELPER */ + waitpid(pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status)) { + pr_perror("Wrong exit status for HELPER: %d", status); + return 1; + } + + /* Give control to ORPHAN so it can check its parent */ + futex_set_and_wake(&sh->fstate, TEST_CHECK); + futex_wait_until(&sh->fstate, TEST_EXIT); + + /* Cleanup: reap the ORPHAN */ + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) { + pr_perror("Wrong exit status for ORPHAN: %d", status); + return 1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + int pid, status; + + sh = mmap(NULL, sizeof(struct shared), PROT_WRITE | PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (sh == MAP_FAILED) { + pr_perror("Failed to alloc shared region"); + exit(1); + } + + futex_set(&sh->fstate, TEST_FORK); + + test_init(argc, argv); + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork"); + exit(1); + } else if (pid == 0) { + exit(subreaper()); + } + + /* Wait until ORPHAN is ready to C/R */ + futex_wait_until(&sh->fstate, TEST_CRIU); + + test_daemon(); + test_waitsig(); + + /* Give control to HELPER so it can die */ + futex_set_and_wake(&sh->fstate, TEST_DIE); + futex_wait_until(&sh->fstate, TEST_EXIT); + + /* Cleanup: reap the SUBREAPER */ + waitpid(pid, &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status)) { + fail("Wrong exit status: %d", status); + return 1; + } + + if (sh->ppid_after_reparent != pid) + fail("Orphan was reparented to %d instead of %d", sh->ppid_after_reparent, pid); + else + pass(); + return 0; +} From cb946164367d8ea6cfd9490ee3f8f66ed38b1a4d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:37:56 +0300 Subject: [PATCH 0127/2030] mem/page-pipe: Eliminate redundant pipe_off setup In case if we may use previous pipe the pipe_off get set directly so no need for redundat unconditional assignment. Signed-off-by: Cyrill Gorcunov Acked-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/page-pipe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/criu/page-pipe.c b/criu/page-pipe.c index c32b89332..33741db42 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -104,8 +104,6 @@ static struct page_pipe_buf *ppb_alloc(struct page_pipe *pp, return NULL; cnt_add(CNT_PAGE_PIPE_BUFS, 1); - ppb->pipe_off = 0; - if (prev && ppb_resize_pipe(prev) == 0) { /* The previous pipe isn't full and we can continue to use it. */ ppb->p[0] = prev->p[0]; @@ -120,6 +118,7 @@ static struct page_pipe_buf *ppb_alloc(struct page_pipe *pp, } cnt_add(CNT_PAGE_PIPES, 1); + ppb->pipe_off = 0; ppb->pipe_size = fcntl(ppb->p[0], F_GETPIPE_SZ, 0) / PAGE_SIZE; pp->nr_pipes++; } From 088b086bfe82621d64b3c52414909f791d6033dd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:37:57 +0300 Subject: [PATCH 0128/2030] mem/page-pipe: create_page_pipe -- Drop redundant zero assignment We allocate with xzalloc, no need for additional zero assignemtns. Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/page-pipe.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/criu/page-pipe.c b/criu/page-pipe.c index 33741db42..32be2f981 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -187,26 +187,18 @@ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs, uns if (!pp) return NULL; + INIT_LIST_HEAD(&pp->free_bufs); + INIT_LIST_HEAD(&pp->bufs); + pp->nr_iovs = nr_segs; pp->flags = flags; if (!iovs) { iovs = xmalloc(sizeof(*iovs) * nr_segs); if (!iovs) goto err_free_pp; - pp->flags |= PP_OWN_IOVS; } - - pp->nr_pipes = 0; - INIT_LIST_HEAD(&pp->bufs); - INIT_LIST_HEAD(&pp->free_bufs); - pp->nr_iovs = nr_segs; pp->iovs = iovs; - pp->free_iov = 0; - - pp->nr_holes = 0; - pp->free_hole = 0; - pp->holes = NULL; if (page_pipe_grow(pp, 0)) goto err_free_iovs; From 692fdada56456406b72ba62b53f1e6922f640341 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:37:58 +0300 Subject: [PATCH 0129/2030] mem/page-pipe: Align members for readability sake Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/include/page-pipe.h | 44 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/criu/include/page-pipe.h b/criu/include/page-pipe.h index 80e595871..decd14321 100644 --- a/criu/include/page-pipe.h +++ b/criu/include/page-pipe.h @@ -91,15 +91,15 @@ struct kernel_pipe_buffer { */ struct page_pipe_buf { - int p[2]; /* pipe with pages */ - unsigned int pipe_size; /* how many pages can be fit into pipe */ - unsigned int pipe_off; /* where this buf is started in a pipe */ - unsigned int pages_in; /* how many pages are there */ - unsigned int nr_segs; /* how many iov-s are busy */ + int p[2]; /* pipe with pages */ + unsigned int pipe_size; /* how many pages can be fit into pipe */ + unsigned int pipe_off; /* where this buf is started in a pipe */ + unsigned int pages_in; /* how many pages are there */ + unsigned int nr_segs; /* how many iov-s are busy */ #define PPB_LAZY (1 << 0) - unsigned int flags; - struct iovec *iov; /* vaddr:len map */ - struct list_head l; /* links into page_pipe->bufs */ + unsigned int flags; + struct iovec *iov; /* vaddr:len map */ + struct list_head l; /* links into page_pipe->bufs */ }; /* @@ -114,21 +114,21 @@ struct page_pipe_buf { #define PP_HOLE_PARENT (1 << 0) struct page_pipe { - unsigned int nr_pipes; /* how many page_pipe_bufs in there */ - struct list_head bufs; /* list of bufs */ - struct list_head free_bufs; /* list of bufs */ - struct page_pipe_buf *prev[PP_PIPE_TYPES]; /* last ppb of each type - for pipe sharing */ - unsigned int nr_iovs; /* number of iovs */ - unsigned int free_iov; /* first free iov */ - struct iovec *iovs; /* iovs. They are provided into create_page_pipe - and all bufs have their iov-s in there */ + unsigned int nr_pipes; /* how many page_pipe_bufs in there */ + struct list_head bufs; /* list of bufs */ + struct list_head free_bufs; /* list of bufs */ + struct page_pipe_buf *prev[PP_PIPE_TYPES]; /* last ppb of each type for pipe sharing */ + unsigned int nr_iovs; /* number of iovs */ + unsigned int free_iov; /* first free iov */ - unsigned int nr_holes; /* number of holes allocated */ - unsigned int free_hole; /* number of holes in use */ - struct iovec *holes; /* holes */ - unsigned int *hole_flags; - unsigned flags; /* PP_FOO flags below */ + struct iovec *iovs; /* iovs. They are provided into create_page_pipe + and all bufs have their iov-s in there */ + + unsigned int nr_holes; /* number of holes allocated */ + unsigned int free_hole; /* number of holes in use */ + struct iovec *holes; /* holes */ + unsigned int *hole_flags; + unsigned int flags; /* PP_FOO flags below */ }; #define PP_CHUNK_MODE 0x1 /* Restrict the maximum buffer size of pipes From f3b8371c3095b3eb73476d9b8d8f98c9ad7bd078 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:37:59 +0300 Subject: [PATCH 0130/2030] mem/page-pipe: Use ssize_t for splice/tee results Integer value is too short. Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/page-pipe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/page-pipe.c b/criu/page-pipe.c index 32be2f981..534380b0f 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -389,7 +389,7 @@ int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, struct page_pipe_buf *ppb; struct iovec *iov = NULL; unsigned long skip = 0, len; - int ret; + ssize_t ret; /* * Get ppb that contains addr and count length of data between @@ -418,13 +418,13 @@ int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, ret = tee(ppb->p[0], prd->p[1], len, 0); if (ret != len) { - pr_perror("tee: %d", ret); + pr_perror("tee: %zd", ret); return -1; } ret = splice(prd->p[0], NULL, prd->sink_fd, NULL, skip, 0); if (ret != skip) { - pr_perror("splice: %d", ret); + pr_perror("splice: %zd", ret); return -1; } From 6e86b9eb01f6e7f0da04ff255d49bdefe19b7c04 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:00 +0300 Subject: [PATCH 0131/2030] mem/vma: Use memset for vm_area_list_init To eliminate side effects, in particular setting nr_aios is already missing here. Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/include/vma.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/criu/include/vma.h b/criu/include/vma.h index c297c0d14..6f5ee19d3 100644 --- a/criu/include/vma.h +++ b/criu/include/vma.h @@ -7,6 +7,7 @@ #include "images/vma.pb-c.h" #include +#include struct vm_area_list { struct list_head h; @@ -21,11 +22,8 @@ struct vm_area_list { static inline void vm_area_list_init(struct vm_area_list *vml) { + memset(vml, 0, sizeof(*vml)); INIT_LIST_HEAD(&vml->h); - vml->nr = 0; - vml->priv_size = 0; - vml->priv_longest = 0; - vml->shared_longest = 0; } struct file_desc; From 32e9f155254e820bed4f2e1582015e7a1a98b0b6 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:01 +0300 Subject: [PATCH 0132/2030] mem/vma: Use vm_area_list_init where appropriate Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 9 +++------ criu/proc_parse.c | 7 +------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index b12119fa6..dc151b9a8 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -108,8 +108,7 @@ void free_mappings(struct vm_area_list *vma_area_list) free(vma_area); } - INIT_LIST_HEAD(&vma_area_list->h); - vma_area_list->nr = 0; + vm_area_list_init(vma_area_list); } int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list, @@ -1142,8 +1141,7 @@ static int pre_dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie struct parasite_dump_misc misc; struct mem_dump_ctl mdc; - INIT_LIST_HEAD(&vmas.h); - vmas.nr = 0; + vm_area_list_init(&vmas); pr_info("========================================\n"); pr_info("Pre-dumping task (pid: %d)\n", pid); @@ -1224,8 +1222,7 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) struct proc_posix_timers_stat proc_args; struct mem_dump_ctl mdc; - INIT_LIST_HEAD(&vmas.h); - vmas.nr = 0; + vm_area_list_init(&vmas); pr_info("========================================\n"); pr_info("Dumping task (pid: %d)\n", pid); diff --git a/criu/proc_parse.c b/criu/proc_parse.c index f6ebb1fd6..4c127f264 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -705,12 +705,7 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, DIR *map_files_dir = NULL; struct bfd f; - vma_area_list->nr = 0; - vma_area_list->nr_aios = 0; - vma_area_list->priv_longest = 0; - vma_area_list->priv_size = 0; - vma_area_list->shared_longest = 0; - INIT_LIST_HEAD(&vma_area_list->h); + vm_area_list_init(vma_area_list); f.fd = open_proc(pid, "smaps"); if (f.fd < 0) From 0ee3d0764ddd5d1ad744586f770601e394a8cd14 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:02 +0300 Subject: [PATCH 0133/2030] mem/vma: Drop never used VM_AREA_LIST macro Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/include/vma.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/include/vma.h b/criu/include/vma.h index 6f5ee19d3..3cdd1b319 100644 --- a/criu/include/vma.h +++ b/criu/include/vma.h @@ -18,8 +18,6 @@ struct vm_area_list { unsigned long shared_longest; /* nr of pages in longest shared VMA */ }; -#define VM_AREA_LIST(name) struct vm_area_list name = { .h = LIST_HEAD_INIT(name.h), .nr = 0, } - static inline void vm_area_list_init(struct vm_area_list *vml) { memset(vml, 0, sizeof(*vml)); From 8f9ae895d04b032c227f6adf0ac2d244b6b43e31 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:03 +0300 Subject: [PATCH 0134/2030] mem/vma: Sanitize struct vm_area_list - make names more descriptive - add comments - use union for nr_priv_pages and rst_priv_size since former priv_size has been used with different meaning: number of pages during checkpoint time and size in bytes on restore moment Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 2 +- criu/include/vma.h | 15 +++++++++------ criu/mem.c | 20 ++++++++++---------- criu/proc_parse.c | 9 +++++---- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index dc151b9a8..9273fc0a5 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -125,7 +125,7 @@ int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list, goto err; pr_info("Collected, longest area occupies %lu pages\n", - vma_area_list->priv_longest); + vma_area_list->nr_priv_pages_longest); pr_info_vma_list(&vma_area_list->h); pr_info("----------------------------------------\n"); diff --git a/criu/include/vma.h b/criu/include/vma.h index 3cdd1b319..5e3f3527b 100644 --- a/criu/include/vma.h +++ b/criu/include/vma.h @@ -10,12 +10,15 @@ #include struct vm_area_list { - struct list_head h; - unsigned nr; - unsigned int nr_aios; - unsigned long priv_size; /* nr of pages in private VMAs */ - unsigned long priv_longest; /* nr of pages in longest private VMA */ - unsigned long shared_longest; /* nr of pages in longest shared VMA */ + struct list_head h; /* list of VMAs */ + unsigned nr; /* nr of all VMAs in the list */ + unsigned int nr_aios; /* nr of AIOs VMAs in the list */ + union { + unsigned long nr_priv_pages; /* dmp: nr of pages in private VMAs */ + unsigned long rst_priv_size; /* rst: size of private VMAs */ + }; + unsigned long nr_priv_pages_longest; /* nr of pages in longest private VMA */ + unsigned long nr_shared_pages_longest;/* nr of pages in longest shared VMA */ }; static inline void vm_area_list_init(struct vm_area_list *vml) diff --git a/criu/mem.c b/criu/mem.c index d2a39a9db..de66a6210 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -81,7 +81,7 @@ unsigned long dump_pages_args_size(struct vm_area_list *vmas) /* In the worst case I need one iovec for each page */ return sizeof(struct parasite_dump_pages_args) + vmas->nr * sizeof(struct parasite_vma_entry) + - (vmas->priv_size + 1) * sizeof(struct iovec); + (vmas->nr_priv_pages + 1) * sizeof(struct iovec); } static inline bool __page_is_zero(u64 pme) @@ -414,14 +414,14 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, timing_start(TIME_MEMDUMP); pr_debug(" Private vmas %lu/%lu pages\n", - vma_area_list->priv_longest, vma_area_list->priv_size); + vma_area_list->nr_priv_pages_longest, vma_area_list->nr_priv_pages); /* * Step 0 -- prepare */ - pmc_size = max(vma_area_list->priv_longest, - vma_area_list->shared_longest); + pmc_size = max(vma_area_list->nr_priv_pages_longest, + vma_area_list->nr_shared_pages_longest); if (pmc_init(&pmc, item->pid->real, &vma_area_list->h, pmc_size * PAGE_SIZE)) return -1; @@ -433,7 +433,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, * use, i.e. on non-lazy non-predump. */ cpp_flags |= PP_CHUNK_MODE; - pp = create_page_pipe(vma_area_list->priv_size, + pp = create_page_pipe(vma_area_list->nr_priv_pages, mdc->lazy ? NULL : pargs_iovs(args), cpp_flags); if (!pp) @@ -612,9 +612,9 @@ int prepare_mm_pid(struct pstree_item *i) list_add_tail(&vma->list, &ri->vmas.h); if (vma_area_is_private(vma, kdat.task_size)) { - ri->vmas.priv_size += vma_area_len(vma); + ri->vmas.rst_priv_size += vma_area_len(vma); if (vma_has_guard_gap_hidden(vma)) - ri->vmas.priv_size += PAGE_SIZE; + ri->vmas.rst_priv_size += PAGE_SIZE; } pr_info("vma 0x%"PRIx64" 0x%"PRIx64"\n", vma->e->start, vma->e->end); @@ -1171,17 +1171,17 @@ int prepare_mappings(struct pstree_item *t) goto out; /* Reserve a place for mapping private vma-s one by one */ - addr = mmap(NULL, vmas->priv_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + addr = mmap(NULL, vmas->rst_priv_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); if (addr == MAP_FAILED) { ret = -1; - pr_perror("Unable to reserve memory (%lu bytes)", vmas->priv_size); + pr_perror("Unable to reserve memory (%lu bytes)", vmas->rst_priv_size); goto out; } old_premmapped_addr = rsti(t)->premmapped_addr; old_premmapped_len = rsti(t)->premmapped_len; rsti(t)->premmapped_addr = addr; - rsti(t)->premmapped_len = vmas->priv_size; + rsti(t)->premmapped_len = vmas->rst_priv_size; ret = open_page_read(vpid(t), &pr, PR_TASK); if (ret <= 0) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 4c127f264..0e8b6f209 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -660,14 +660,15 @@ static int vma_list_add(struct vma_area *vma_area, unsigned long pages; pages = vma_area_len(vma_area) / PAGE_SIZE; - vma_area_list->priv_size += pages; - vma_area_list->priv_longest = max(vma_area_list->priv_longest, pages); + vma_area_list->nr_priv_pages += pages; + vma_area_list->nr_priv_pages_longest = + max(vma_area_list->nr_priv_pages_longest, pages); } else if (vma_area_is(vma_area, VMA_ANON_SHARED)) { unsigned long pages; pages = vma_area_len(vma_area) / PAGE_SIZE; - vma_area_list->shared_longest = - max(vma_area_list->shared_longest, pages); + vma_area_list->nr_shared_pages_longest = + max(vma_area_list->nr_shared_pages_longest, pages); } *prev_vfi = *vfi; From 43ac528cb946ce8d9c07368c4f2de94684583227 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:04 +0300 Subject: [PATCH 0135/2030] mem/page-xfer: Add log prefix Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/page-xfer.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/criu/page-xfer.c b/criu/page-xfer.c index f74716100..75e135c66 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -7,6 +7,9 @@ #include #include +#undef LOG_PREFIX +#define LOG_PREFIX "page-xfer: " + #include "types.h" #include "cr_options.h" #include "servicefd.h" From 7eec03bd958d04e96a81069676205fc4c20096d2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:05 +0300 Subject: [PATCH 0136/2030] mem/pmc: Use pr_warn_once if cache is disabled No need to spam on every pmc_init call. Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/pagemap-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/pagemap-cache.c b/criu/pagemap-cache.c index a1c2d42f4..c80776123 100644 --- a/criu/pagemap-cache.c +++ b/criu/pagemap-cache.c @@ -56,7 +56,7 @@ int pmc_init(pmc_t *pmc, pid_t pid, const struct list_head *vma_head, size_t siz goto err; if (pagemap_cache_disabled) - pr_debug("The pagemap cache is disabled\n"); + pr_warn_once("The pagemap cache is disabled\n"); if (kdat.pmap == PM_DISABLED) { /* From cb7b013dd01b79fb31e6e855ee61b718906dd573 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:06 +0300 Subject: [PATCH 0137/2030] mem/pmc: Print pid for debug sake When logs are massive it is convenient for grepping. Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/pagemap-cache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/criu/pagemap-cache.c b/criu/pagemap-cache.c index c80776123..61ab09387 100644 --- a/criu/pagemap-cache.c +++ b/criu/pagemap-cache.c @@ -105,8 +105,8 @@ static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma) pmc->start = vma->e->start; pmc->end = vma->e->end; - pr_debug("filling VMA %lx-%lx (%zuK) [l:%lx h:%lx]\n", - (long)vma->e->start, (long)vma->e->end, len >> 10, low, high); + pr_debug("%d: filling VMA %lx-%lx (%zuK) [l:%lx h:%lx]\n", + pmc->pid, (long)vma->e->start, (long)vma->e->end, len >> 10, low, high); /* * If we meet a small VMA, lets try to fit 2M cache @@ -123,8 +123,8 @@ static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma) size_t size_cov = len; size_t nr_vmas = 1; - pr_debug("\t%16lx-%-16lx nr:%-5zu cov:%zu\n", - (long)vma->e->start, (long)vma->e->end, nr_vmas, size_cov); + pr_debug("\t%d: %16lx-%-16lx nr:%-5zu cov:%zu\n", + pmc->pid, (long)vma->e->start, (long)vma->e->end, nr_vmas, size_cov); list_for_each_entry_continue(vma, pmc->vma_head, list) { if (vma->e->start > high || vma->e->end > high) @@ -134,8 +134,8 @@ static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma) size_cov += vma_area_len(vma); nr_vmas++; - pr_debug("\t%16lx-%-16lx nr:%-5zu cov:%zu\n", - (long)vma->e->start, (long)vma->e->end, nr_vmas, size_cov); + pr_debug("\t%d: %16lx-%-16lx nr:%-5zu cov:%zu\n", + pmc->pid, (long)vma->e->start, (long)vma->e->end, nr_vmas, size_cov); } if (nr_vmas > 1) { @@ -145,9 +145,9 @@ static int pmc_fill_cache(pmc_t *pmc, const struct vma_area *vma) * allows us to save a couple of code bytes. */ pmc->end = high; - pr_debug("\tcache mode [l:%lx h:%lx]\n", pmc->start, pmc->end); + pr_debug("\t%d: cache mode [l:%lx h:%lx]\n", pmc->pid, pmc->start, pmc->end); } else - pr_debug("\tsimple mode [l:%lx h:%lx]\n", pmc->start, pmc->end); + pr_debug("\t%d: simple mode [l:%lx h:%lx]\n", pmc->pid, pmc->start, pmc->end); } size_map = PAGEMAP_LEN(pmc->end - pmc->start); From 0319d1bf2e273295dea3fa8699519e7ca8579823 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:07 +0300 Subject: [PATCH 0138/2030] mem/page-pipe: Use xrealloc_safe in page_pipe_add_hole To shrink code a bit. Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/page-pipe.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/criu/page-pipe.c b/criu/page-pipe.c index 534380b0f..a8216962d 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -298,14 +298,12 @@ int page_pipe_add_hole(struct page_pipe *pp, unsigned long addr, unsigned int flags) { if (pp->free_hole >= pp->nr_holes) { - pp->holes = xrealloc(pp->holes, - (pp->nr_holes + PP_HOLES_BATCH) * sizeof(struct iovec)); - if (!pp->holes) + size_t new_size = (pp->nr_holes + PP_HOLES_BATCH) * sizeof(struct iovec); + if (xrealloc_safe(&pp->holes, new_size)) return -1; - pp->hole_flags = xrealloc(pp->hole_flags, - (pp->nr_holes + PP_HOLES_BATCH) * sizeof(unsigned int)); - if(!pp->hole_flags) + new_size = (pp->nr_holes + PP_HOLES_BATCH) * sizeof(unsigned int); + if (xrealloc_safe(&pp->hole_flags, new_size)) return -1; pp->nr_holes += PP_HOLES_BATCH; From d999a9e07951ceda18b43dd8658924d29179b886 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:08 +0300 Subject: [PATCH 0139/2030] mem/shmem: Use xrealloc_safe in expand_shmem Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/shmem.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/criu/shmem.c b/criu/shmem.c index bc9f23bd7..3c2811398 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -197,8 +197,7 @@ static int expand_shmem(struct shmem_info *si, unsigned long new_size) BUG_ON(new_map_size < map_size); - si->pstate_map = xrealloc(si->pstate_map, new_map_size); - if (!si->pstate_map) + if (xrealloc_safe(&si->pstate_map, new_map_size)) return -1; memzero(si->pstate_map + nr_map_items, new_map_size - map_size); return 0; From 67d5dbb995926e4d28b6b3b76e43c342953ed21e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 20:47:10 +0300 Subject: [PATCH 0140/2030] mem/shmem: More elegant entries declaration Signed-off-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/shmem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/criu/shmem.c b/criu/shmem.c index 3c2811398..a183460a7 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -179,11 +179,12 @@ static void set_pstate(unsigned long *pstate_map, unsigned long pfn, static int expand_shmem(struct shmem_info *si, unsigned long new_size) { - unsigned long nr_pages, nr_map_items, map_size, - nr_new_map_items, new_map_size, old_size; + unsigned long nr_pages, nr_map_items, map_size; + unsigned long nr_new_map_items, new_map_size, old_size; old_size = si->size; si->size = new_size; + if (!is_shmem_tracking_en()) return 0; From 92717977cd507e21c0aa6f0948cde456da91a6bd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:10 +0300 Subject: [PATCH 0141/2030] mem/shmem: Use xmalloc in collect_sysv_shmem To get error message in log if no memory available. Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/shmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/shmem.c b/criu/shmem.c index a183460a7..cf35873a7 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -234,7 +234,7 @@ int collect_sysv_shmem(unsigned long shmid, unsigned long size) * Tasks will not modify this object, so don't * shmalloc() as we do it for anon shared mem */ - si = malloc(sizeof(*si)); + si = xmalloc(sizeof(*si)); if (!si) return -1; From c01da212da56144b4aa07fdd63ae1e47ca51e3ba Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 5 Jul 2019 18:38:11 +0300 Subject: [PATCH 0142/2030] mem/shmem: Fix typos for_each_shmem macro Since we use _i as a counter in macro declaration we should use it as a reference. This macro simply happen to work now because of being called with variable i declarated in the caller code. Signed-off-by: Cyrill Gorcunov Reviewed-by: Mike Rapoport Signed-off-by: Andrei Vagin --- criu/shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/shmem.c b/criu/shmem.c index cf35873a7..cee47dba7 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -43,8 +43,8 @@ #define SHMEM_HASH_SIZE 32 static struct hlist_head shmems_hash[SHMEM_HASH_SIZE]; -#define for_each_shmem(_i, _si) \ - for (i = 0; i < SHMEM_HASH_SIZE; i++) \ +#define for_each_shmem(_i, _si) \ + for (_i = 0; _i < SHMEM_HASH_SIZE; _i++) \ hlist_for_each_entry(_si, &shmems_hash[_i], h) struct shmem_info { From ba454407bf8217d28be97af8888be9e7cac22932 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 10 Jul 2019 18:47:56 +0100 Subject: [PATCH 0143/2030] make: Insert version macros in criu.h Including the version information of CRIU in criu.h is required by projects that use libcriu to preserve backward compatibility. Closes #738 Signed-off-by: Radostin Stoyanov --- lib/Makefile | 2 +- lib/c/criu.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 67c50b95a..f9b66701e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,6 +1,6 @@ CRIU_SO := libcriu.so CRIU_A := libcriu.a -UAPI_HEADERS := lib/c/criu.h images/rpc.proto +UAPI_HEADERS := lib/c/criu.h images/rpc.proto criu/include/version.h # # File to keep track of files installed by setup.py diff --git a/lib/c/criu.h b/lib/c/criu.h index 4462ce082..76f3547fc 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -21,8 +21,10 @@ #include +#include "version.h" + #ifdef __GNUG__ - extern "C" { + extern "C" { #endif enum criu_service_comm { From 104aaf383e884f87174b8cdc068ab1b29c9c1b18 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 17 Jul 2019 13:07:34 -0700 Subject: [PATCH 0144/2030] restorer: print errors if prctl syscalls failed Signed-off-by: Andrei Vagin --- criu/pie/restorer.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 324a11e0c..2a7180d6a 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -328,10 +328,18 @@ static int restore_creds(struct thread_creds_args *args, int procfd, static inline int restore_pdeath_sig(struct thread_restore_args *ta) { - if (ta->pdeath_sig) - return sys_prctl(PR_SET_PDEATHSIG, ta->pdeath_sig, 0, 0, 0); - else + int ret; + + if (!ta->pdeath_sig) return 0; + + ret = sys_prctl(PR_SET_PDEATHSIG, ta->pdeath_sig, 0, 0, 0); + if (ret) { + pr_err("Unable to set PR_SET_PDEATHSIG(%d): %d\n", ta->pdeath_sig, ret); + return -1; + } + + return 0; } static int restore_dumpable_flag(MmEntry *mme) @@ -1237,10 +1245,18 @@ static bool vdso_needs_parking(struct task_restore_args *args) static inline int restore_child_subreaper(int child_subreaper) { - if (child_subreaper) - return sys_prctl(PR_SET_CHILD_SUBREAPER, child_subreaper, 0, 0, 0); - else + int ret; + + if (!child_subreaper) return 0; + + ret = sys_prctl(PR_SET_CHILD_SUBREAPER, child_subreaper, 0, 0, 0); + if (ret) { + pr_err("Unable to set PR_SET_CHILD_SUBREAPER(%d): %d\n", child_subreaper, ret); + return -1; + } + + return 0; } /* @@ -1364,8 +1380,10 @@ long __export_restore_task(struct task_restore_args *args) if (args->uffd > -1) { /* re-enable THP if we disabled it previously */ if (args->has_thp_enabled) { - if (sys_prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0)) { - pr_err("Cannot re-enable THP\n"); + int ret; + ret = sys_prctl(PR_SET_THP_DISABLE, 0, 0, 0, 0); + if (ret) { + pr_err("Cannot re-enable THP: %d\n", ret); goto core_restore_end; } } From 1e2647f12393547f1d1b9c1e403f8ab75eb46ef5 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 17 Jul 2019 13:08:56 -0700 Subject: [PATCH 0145/2030] images: convert type of child_subreaper from int32 to bool Signed-off-by: Andrei Vagin --- images/core.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/images/core.proto b/images/core.proto index 9f3f870c9..c3dba6f6d 100644 --- a/images/core.proto +++ b/images/core.proto @@ -52,7 +52,7 @@ message task_core_entry { //optional int32 tty_nr = 16; //optional int32 tty_pgrp = 17; - optional int32 child_subreaper = 18; + optional bool child_subreaper = 18; } message task_kobj_ids_entry { From 9fe8960d89f009c8e9a3c6245aeac3b46b0120ca Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 29 Jul 2019 13:25:20 +0000 Subject: [PATCH 0146/2030] scripts: add possibility to override docker with podman To be able to run the test containers in scripts/build with podman this puts the name of the container runtime into $CONTAINER_RUNTIME. Now it can be overridden with make fedora-rawhide CONTAINER_RUNTIME=podman Signed-off-by: Adrian Reber --- scripts/build/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/build/Makefile b/scripts/build/Makefile index f333b214a..bb2e9ca9d 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,6 +2,7 @@ QEMU_ARCHES := armv7hf aarch64 ppc64le s390x fedora-rawhide-aarch64 # require qe ARCHES := $(QEMU_ARCHES) x86_64 fedora-asan fedora-rawhide centos TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) +CONTAINER_RUNTIME := docker all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all @@ -27,8 +28,8 @@ $(QEMU_ARCHES): qemu-user-static binfmt_misc $(TARGETS): mkdir -p $(HOME)/.ccache mv $(HOME)/.ccache ../../ - docker build -t criu-$@ -f Dockerfile.$@ $(DB_CC) $(DB_ENV) ../.. - docker run criu-$@ tar c -C /tmp .ccache | tar x -C $(HOME) + $(CONTAINER_RUNTIME) build -t criu-$@ -f Dockerfile.$@ $(DB_CC) $(DB_ENV) ../.. + $(CONTAINER_RUNTIME) run criu-$@ tar c -C /tmp .ccache | tar x -C $(HOME) .PHONY: $(TARGETS) # Clang builds add some Docker build env From 229a8ab06b23e651b17b14a04939e8c2c2024ca5 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 29 Jul 2019 13:51:21 +0000 Subject: [PATCH 0147/2030] scripts: remove python2 from Fedora Dockerfiles More and more python2 packages are being removed from future Fedora releases. This removes python2 packages explicitly listed in CRIU's Dockerfiles, which all are not required for the current level of testing. Signed-off-by: Adrian Reber --- scripts/build/Dockerfile.fedora.tmpl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 965309623..94f112671 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -18,14 +18,9 @@ RUN dnf install -y \ procps-ng \ protobuf-c-devel \ protobuf-devel \ - python2-protobuf \ - python2 \ - # Starting with Fedora 28 this is python2-ipaddress python-ipaddress \ - # Starting with Fedora 28 this is python2-pyyaml python-yaml \ python3-pip \ - python2-future \ python3-PyYAML \ python3-future \ python3-protobuf \ From 77efcde96dd17dff942bf7677f159c9a9369d622 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 29 Jul 2019 15:28:08 +0300 Subject: [PATCH 0148/2030] mount: fix inconsistent return and goto err alternation Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/mount.c b/criu/mount.c index c03a435c5..486d01719 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2327,7 +2327,7 @@ out: if (restore_shared_options(mi, private, mi->shared_id && !shared, mi->master_id && !master)) - return -1; + goto err; mi->mounted = true; exit_code = 0; From cd87a628e14c68c3bb30988df56d04e373079435 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 29 Jul 2019 21:34:51 +0100 Subject: [PATCH 0149/2030] scripts: Remove yaml/ipaddress Py2 fedora modules Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 94f112671..15460665a 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -18,8 +18,6 @@ RUN dnf install -y \ procps-ng \ protobuf-c-devel \ protobuf-devel \ - python-ipaddress \ - python-yaml \ python3-pip \ python3-PyYAML \ python3-future \ From 2a683849b9b3619f7f5a619192e309c0993193c3 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 29 Jul 2019 21:42:19 +0100 Subject: [PATCH 0150/2030] scripts: Set PYTHON=python3 in Fedora Dockerfiles Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 15460665a..74348f3e6 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -37,6 +37,7 @@ RUN dnf install -y \ RUN dnf install -y --allowerasing coreutils RUN ln -sf python3 /usr/bin/python +ENV PYTHON=python3 COPY . /criu WORKDIR /criu From 5721e6100037b4e51a67548f7eecd7f67432b7af Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 31 Jul 2019 07:02:10 +0100 Subject: [PATCH 0151/2030] scripts: Install flake8 with dnf in Fedora In the Fedora tests we install python3-pip only to install flake8. This is not necessary as there is a Fedora package for flake8. Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 2 +- scripts/travis/travis-tests | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 74348f3e6..280ce1cdd 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -18,7 +18,7 @@ RUN dnf install -y \ procps-ng \ protobuf-c-devel \ protobuf-devel \ - python3-pip \ + python3-flake8 \ python3-PyYAML \ python3-future \ python3-protobuf \ diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 348daca1f..c055860fd 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -163,7 +163,9 @@ ip net add test make -C test/others/shell-job -pip install flake8 +if ! [ -x "$(command -v flake8)" ]; then + pip install flake8 +fi make lint # Check that help output fits into 80 columns From b25d1facaedae7a1bd7154fe28f909c7e961a93c Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 31 Jul 2019 07:08:20 +0100 Subject: [PATCH 0152/2030] pb2dict: Disable undefined name 'basestring' The following error is falsely reported by flake8: lib/py/images/pb2dict.py:266:24: F821 undefined name 'basestring' This error occurs because `basestring` is not available in Python 3, however the if condition on the line above ensures that this error will not occur at run time. Signed-off-by: Radostin Stoyanov --- lib/py/images/pb2dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index c4ce736e8..6b4a772c7 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -224,7 +224,7 @@ def get_bytes_dec(field): def is_string(value): # Python 3 compatibility if "basestring" in __builtins__: - string_types = basestring + string_types = basestring # noqa: F821 else: string_types = (str, bytes) return isinstance(value, string_types) From d2d6e3f5379ce9a8cfa320bb07113995a8191bfb Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:07 +0100 Subject: [PATCH 0153/2030] compel/log: Use enum as parameter for std_log_set_loglevel() Doesn't change uapi, but makes it a bit more friendly and documented which loglevel means what for foreign user. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/plugins/include/uapi/std/log.h | 4 +++- compel/plugins/std/log.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/compel/plugins/include/uapi/std/log.h b/compel/plugins/include/uapi/std/log.h index fbd1803bb..7b27b1250 100644 --- a/compel/plugins/include/uapi/std/log.h +++ b/compel/plugins/include/uapi/std/log.h @@ -1,10 +1,12 @@ #ifndef COMPEL_PLUGIN_STD_LOG_H__ #define COMPEL_PLUGIN_STD_LOG_H__ +#include "compel/loglevels.h" + #define STD_LOG_SIMPLE_CHUNK 256 extern void std_log_set_fd(int fd); -extern void std_log_set_loglevel(unsigned int level); +extern void std_log_set_loglevel(enum __compel_log_levels level); extern void std_log_set_start(struct timeval *tv); extern int std_vprint_num(char *buf, int blen, int num, char **ps); extern void std_sprintf(char output[STD_LOG_SIMPLE_CHUNK], const char *format, ...) diff --git a/compel/plugins/std/log.c b/compel/plugins/std/log.c index 403ea46f7..06b9894ae 100644 --- a/compel/plugins/std/log.c +++ b/compel/plugins/std/log.c @@ -120,7 +120,7 @@ void std_log_set_fd(int fd) logfd = fd; } -void std_log_set_loglevel(unsigned int level) +void std_log_set_loglevel(enum __compel_log_levels level) { cur_loglevel = level; } From 28949d5fb80d4cf8998fdda69b02800dadf7e544 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:08 +0100 Subject: [PATCH 0154/2030] compel/std/uapi: Provide setter for gettimeofday() Provide a way to set gettimeofday() function for an infected task. CRIU's parasite & restorer are very voluble as more logs are better than lesser in terms of bug investigations. In all modern kernels there is a way to get time without entering kernel: vdso. So, add a way to reduce the cost of logging without making it less valuable. [I'm not particularly fond of std_log_set_gettimeofday() name, so if someone can come with a better naming - I'm up for a change] Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/plugins/include/uapi/std/log.h | 12 ++++++++++++ compel/plugins/std/log.c | 16 +++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/compel/plugins/include/uapi/std/log.h b/compel/plugins/include/uapi/std/log.h index 7b27b1250..f21b6df0d 100644 --- a/compel/plugins/include/uapi/std/log.h +++ b/compel/plugins/include/uapi/std/log.h @@ -8,6 +8,18 @@ extern void std_log_set_fd(int fd); extern void std_log_set_loglevel(enum __compel_log_levels level); extern void std_log_set_start(struct timeval *tv); + +/* + * Provides a function to get time *in the infected task* for log timings. + * Expected use-case: address on the vdso page to get time. + * If not set or called with NULL - compel will use raw syscall, + * which requires enter in the kernel and as a result affects performance. + */ +typedef int (*gettimeofday_t)(struct timeval *tv, struct timezone *tz); +extern void std_log_set_gettimeofday(gettimeofday_t gtod); +/* std plugin helper to get time (hopefully, efficiently) */ +extern int std_gettimeofday(struct timeval *tv, struct timezone *tz); + extern int std_vprint_num(char *buf, int blen, int num, char **ps); extern void std_sprintf(char output[STD_LOG_SIMPLE_CHUNK], const char *format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); diff --git a/compel/plugins/std/log.c b/compel/plugins/std/log.c index 06b9894ae..f9be432ea 100644 --- a/compel/plugins/std/log.c +++ b/compel/plugins/std/log.c @@ -16,6 +16,7 @@ struct simple_buf { static int logfd = -1; static int cur_loglevel = COMPEL_DEFAULT_LOGLEVEL; static struct timeval start; +static gettimeofday_t __std_gettimeofday; static void sbuf_log_flush(struct simple_buf *b); @@ -54,7 +55,7 @@ static void sbuf_log_init(struct simple_buf *b) if (start.tv_sec != 0) { struct timeval now; - sys_gettimeofday(&now, NULL); + std_gettimeofday(&now, NULL); timediff(&start, &now); /* Seconds */ @@ -130,6 +131,19 @@ void std_log_set_start(struct timeval *s) start = *s; } +void std_log_set_gettimeofday(gettimeofday_t gtod) +{ + __std_gettimeofday = gtod; +} + +int std_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + if (__std_gettimeofday != NULL) + return __std_gettimeofday(tv, tz); + + return sys_gettimeofday(tv, tz); +} + static void print_string(const char *msg, struct simple_buf *b) { while (*msg) { From 2d521f3c9354719cb2bf488ad0a4c308b9f4c376 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:09 +0100 Subject: [PATCH 0155/2030] vdso/restorer: Try best to preserve vdso during restore vdso will be used in restorer for timings in logs - try to keep it during restore process. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/include/parasite-vdso.h | 2 +- criu/pie/parasite-vdso.c | 5 ++- criu/pie/restorer.c | 72 +++++++++++++++++++++++++----------- 3 files changed, 55 insertions(+), 24 deletions(-) diff --git a/criu/include/parasite-vdso.h b/criu/include/parasite-vdso.h index 3cf67bbb3..cf15d135f 100644 --- a/criu/include/parasite-vdso.h +++ b/criu/include/parasite-vdso.h @@ -84,7 +84,7 @@ static inline bool is_vdso_mark(void *addr) extern int vdso_do_park(struct vdso_maps *rt, unsigned long park_at, unsigned long park_size); extern int vdso_map_compat(unsigned long map_at); -extern int vdso_proxify(struct vdso_symtable *sym_rt, +extern int vdso_proxify(struct vdso_symtable *sym_rt, bool *added_proxy, unsigned long vdso_rt_parked_at, VmaEntry *vmas, size_t nr_vmas, bool compat_vdso, bool force_trampolines); diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 00bc2bffa..c4197d0cf 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -242,7 +242,8 @@ static int add_vdso_proxy(VmaEntry *vma_vdso, VmaEntry *vma_vvar, return 0; } -int vdso_proxify(struct vdso_symtable *sym_rt, unsigned long vdso_rt_parked_at, +int vdso_proxify(struct vdso_symtable *sym_rt, bool *added_proxy, + unsigned long vdso_rt_parked_at, VmaEntry *vmas, size_t nr_vmas, bool compat_vdso, bool force_trampolines) { @@ -289,11 +290,13 @@ int vdso_proxify(struct vdso_symtable *sym_rt, unsigned long vdso_rt_parked_at, vma_vvar ? (unsigned long)vma_vvar->start : VVAR_BAD_ADDR, vma_vvar ? (unsigned long)vma_vvar->end : VVAR_BAD_ADDR); + *added_proxy = false; if (blobs_matches(vma_vdso, vma_vvar, &s, sym_rt) && !force_trampolines) { return remap_rt_vdso(vma_vdso, vma_vvar, sym_rt, vdso_rt_parked_at); } + *added_proxy = true; return add_vdso_proxy(vma_vdso, vma_vvar, &s, sym_rt, vdso_rt_parked_at, compat_vdso); } diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 2a7180d6a..565ea0167 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1215,34 +1215,39 @@ static int wait_zombies(struct task_restore_args *task_args) return 0; } -static bool vdso_unmapped(struct task_restore_args *args) +static bool can_restore_vdso(struct task_restore_args *args) { + struct vdso_maps *rt = &args->vdso_maps_rt; + bool had_vdso = false, had_vvar = false; unsigned int i; - /* Don't park rt-vdso or rt-vvar if dumpee doesn't have them */ for (i = 0; i < args->vmas_n; i++) { VmaEntry *vma = &args->vmas[i]; - if (vma_entry_is(vma, VMA_AREA_VDSO) || - vma_entry_is(vma, VMA_AREA_VVAR)) - return false; + if (vma_entry_is(vma, VMA_AREA_VDSO)) + had_vdso = true; + if (vma_entry_is(vma, VMA_AREA_VVAR)) + had_vvar = true; } + if (had_vdso && (rt->vdso_start == VDSO_BAD_ADDR)) { + pr_err("Task had vdso, restorer doesn't\n"); + return false; + } + + /* + * There is a use-case for restoring vvar alone: valgrind (see #488). + * On the other side, we expect that vvar is touched by application + * only from vdso. So, we can put a stale page and proceed restore + * if kernel doesn't provide vvar [but provides vdso, if needede. + * Just warn aloud that we don't like it. + */ + if (had_vvar && (rt->vvar_start == VVAR_BAD_ADDR)) + pr_warn("Can't restore vvar - continuing regardless\n"); + return true; } -static bool vdso_needs_parking(struct task_restore_args *args) -{ - /* Compatible vDSO will be mapped, not moved */ - if (args->compatible_mode) - return false; - - if (args->can_map_vdso) - return false; - - return !vdso_unmapped(args); -} - static inline int restore_child_subreaper(int child_subreaper) { int ret; @@ -1279,6 +1284,7 @@ long __export_restore_task(struct task_restore_args *args) k_rtsigset_t to_block; pid_t my_pid = sys_getpid(); rt_sigaction_t act; + bool has_vdso_proxy; bootstrap_start = args->bootstrap_start; bootstrap_len = args->bootstrap_len; @@ -1325,7 +1331,21 @@ long __export_restore_task(struct task_restore_args *args) pr_debug("lazy-pages: uffd %d\n", args->uffd); } - if (vdso_needs_parking(args)) { + /* + * Park vdso/vvar in a safe place if architecture doesn't support + * mapping them with arch_prctl(). + * Always preserve/map rt-vdso pair if it's possible, regardless + * it's presence in original task: vdso will be used for fast + * getttimeofday() in restorer's log timings. + */ + if (!args->can_map_vdso) { + /* It's already checked in kdat, but let's check again */ + if (args->compatible_mode) { + pr_err("Compatible mode without vdso map support\n"); + goto core_restore_end; + } + if (!can_restore_vdso(args)) + goto core_restore_end; if (vdso_do_park(&args->vdso_maps_rt, args->vdso_rt_parked_at, vdso_rt_size)) goto core_restore_end; @@ -1336,9 +1356,12 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; /* Map vdso that wasn't parked */ - if (!vdso_unmapped(args) && args->can_map_vdso) { - if (arch_map_vdso(args->vdso_rt_parked_at, - args->compatible_mode) < 0) { + if (args->can_map_vdso) { + int err = arch_map_vdso(args->vdso_rt_parked_at, + args->compatible_mode); + + if (err < 0) { + pr_err("Failed to map vdso %d\n", err); goto core_restore_end; } } @@ -1473,11 +1496,16 @@ long __export_restore_task(struct task_restore_args *args) /* * Proxify vDSO. */ - if (vdso_proxify(&args->vdso_maps_rt.sym, args->vdso_rt_parked_at, + if (vdso_proxify(&args->vdso_maps_rt.sym, &has_vdso_proxy, + args->vdso_rt_parked_at, args->vmas, args->vmas_n, args->compatible_mode, fault_injected(FI_VDSO_TRAMPOLINES))) goto core_restore_end; + /* unmap rt-vdso with restorer blob after restore's finished */ + if (!has_vdso_proxy) + vdso_rt_size = 0; + /* * Walk though all VMAs again to drop PROT_WRITE * if it was not there. From 53c2fdc955d8966100c2abd9580d5a065710c31a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:10 +0100 Subject: [PATCH 0156/2030] vdso/restorer: Always track vdso/vvar positions in vdso_maps_rt For simplicity, make them always valid in restorer. rt->vdso_start will be used to calculate gettimeofday() address. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/include/parasite-vdso.h | 3 +- criu/pie/parasite-vdso.c | 81 +++++++++++++----------------------- criu/pie/restorer.c | 44 ++++++++++++++------ 3 files changed, 60 insertions(+), 68 deletions(-) diff --git a/criu/include/parasite-vdso.h b/criu/include/parasite-vdso.h index cf15d135f..872105133 100644 --- a/criu/include/parasite-vdso.h +++ b/criu/include/parasite-vdso.h @@ -84,8 +84,7 @@ static inline bool is_vdso_mark(void *addr) extern int vdso_do_park(struct vdso_maps *rt, unsigned long park_at, unsigned long park_size); extern int vdso_map_compat(unsigned long map_at); -extern int vdso_proxify(struct vdso_symtable *sym_rt, bool *added_proxy, - unsigned long vdso_rt_parked_at, +extern int vdso_proxify(struct vdso_maps *rt, bool *added_proxy, VmaEntry *vmas, size_t nr_vmas, bool compat_vdso, bool force_trampolines); extern int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index c4197d0cf..848883b42 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -24,19 +24,19 @@ #endif #define LOG_PREFIX "vdso: " - -static int vdso_remap(char *who, unsigned long from, unsigned long to, size_t size) +/* Updates @from on success */ +static int vdso_remap(char *who, unsigned long *from, unsigned long to, size_t size) { unsigned long addr; - pr_debug("Remap %s %lx -> %lx\n", who, from, to); + pr_debug("Remap %s %lx -> %lx\n", who, *from, to); - addr = sys_mremap(from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to); + addr = sys_mremap(*from, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, to); if (addr != to) { - pr_err("Unable to remap %lx -> %lx %lx\n", - from, to, addr); + pr_err("Unable to remap %lx -> %lx %lx\n", *from, to, addr); return -1; } + *from = addr; return 0; } @@ -57,7 +57,7 @@ int vdso_do_park(struct vdso_maps *rt, unsigned long park_at, if (rt->vvar_start == VVAR_BAD_ADDR) { BUG_ON(vdso_size < park_size); - return vdso_remap("rt-vdso", rt->vdso_start, + return vdso_remap("rt-vdso", &rt->vdso_start, rt_vdso_park, vdso_size); } @@ -68,8 +68,8 @@ int vdso_do_park(struct vdso_maps *rt, unsigned long park_at, else rt_vdso_park = park_at + vvar_size; - ret = vdso_remap("rt-vdso", rt->vdso_start, rt_vdso_park, vdso_size); - ret |= vdso_remap("rt-vvar", rt->vvar_start, rt_vvar_park, vvar_size); + ret = vdso_remap("rt-vdso", &rt->vdso_start, rt_vdso_park, vdso_size); + ret |= vdso_remap("rt-vvar", &rt->vvar_start, rt_vvar_park, vvar_size); return ret; } @@ -144,10 +144,8 @@ static bool blobs_matches(VmaEntry *vdso_img, VmaEntry *vvar_img, * to dumpee position without generating any proxy. */ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, - struct vdso_symtable *sym_rt, unsigned long vdso_rt_parked_at) + struct vdso_maps *rt) { - unsigned long rt_vvar_addr = vdso_rt_parked_at; - unsigned long rt_vdso_addr = vdso_rt_parked_at; void *remap_addr; int ret; @@ -164,8 +162,8 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, } if (!vma_vvar) { - return vdso_remap("rt-vdso", rt_vdso_addr, - vma_vdso->start, sym_rt->vdso_size); + return vdso_remap("rt-vdso", &rt->vdso_start, + vma_vdso->start, rt->sym.vdso_size); } remap_addr = (void *)(uintptr_t)vma_vvar->start; @@ -174,15 +172,10 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, return -1; } - if (vma_vdso->start < vma_vvar->start) - rt_vvar_addr = vdso_rt_parked_at + sym_rt->vdso_size; - else - rt_vdso_addr = vdso_rt_parked_at + sym_rt->vvar_size; - - ret = vdso_remap("rt-vdso", rt_vdso_addr, - vma_vdso->start, sym_rt->vdso_size); - ret |= vdso_remap("rt-vvar", rt_vvar_addr, - vma_vvar->start, sym_rt->vvar_size); + ret = vdso_remap("rt-vdso", &rt->vdso_start, + vma_vdso->start, rt->sym.vdso_size); + ret |= vdso_remap("rt-vvar", &rt->vvar_start, + vma_vvar->start, rt->sym.vvar_size); return ret; } @@ -193,28 +186,14 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, * to operate as proxy vdso. */ static int add_vdso_proxy(VmaEntry *vma_vdso, VmaEntry *vma_vvar, - struct vdso_symtable *sym_img, struct vdso_symtable *sym_rt, - unsigned long vdso_rt_parked_at, bool compat_vdso) + struct vdso_symtable *sym_img, struct vdso_maps *rt, + bool compat_vdso) { - unsigned long rt_vvar_addr = vdso_rt_parked_at; - unsigned long rt_vdso_addr = vdso_rt_parked_at; unsigned long orig_vvar_addr = vma_vvar ? vma_vvar->start : VVAR_BAD_ADDR; pr_info("Runtime vdso mismatches dumpee, generate proxy\n"); - /* - * Don't forget to shift if vvar is before vdso. - */ - if (sym_rt->vvar_size == VVAR_BAD_SIZE) { - rt_vvar_addr = VVAR_BAD_ADDR; - } else { - if (sym_rt->vdso_before_vvar) - rt_vvar_addr += sym_rt->vdso_size; - else - rt_vdso_addr += sym_rt->vvar_size; - } - /* * Note: we assume that after first migration with inserted * rt-vdso and trampoilines on the following migrations @@ -223,8 +202,8 @@ static int add_vdso_proxy(VmaEntry *vma_vdso, VmaEntry *vma_vvar, * jumps, so we can't remove them if on the following migration * found that number of symbols in vdso has decreased. */ - if (vdso_redirect_calls(rt_vdso_addr, vma_vdso->start, - sym_rt, sym_img, compat_vdso)) { + if (vdso_redirect_calls(rt->vdso_start, vma_vdso->start, + &rt->sym, sym_img, compat_vdso)) { pr_err("Failed to proxify dumpee contents\n"); return -1; } @@ -234,16 +213,15 @@ static int add_vdso_proxy(VmaEntry *vma_vdso, VmaEntry *vma_vvar, * routine we could detect this vdso and do not dump it, since * it's auto-generated every new session if proxy required. */ - sys_mprotect((void *)rt_vdso_addr, sym_rt->vdso_size, PROT_WRITE); - vdso_put_mark((void *)rt_vdso_addr, rt_vvar_addr, - vma_vdso->start, orig_vvar_addr); - sys_mprotect((void *)rt_vdso_addr, sym_rt->vdso_size, VDSO_PROT); + sys_mprotect((void *)rt->vdso_start, rt->sym.vdso_size, PROT_WRITE); + vdso_put_mark((void *)rt->vdso_start, rt->vvar_start, + vma_vdso->start, orig_vvar_addr); + sys_mprotect((void *)rt->vdso_start, rt->sym.vdso_size, VDSO_PROT); return 0; } -int vdso_proxify(struct vdso_symtable *sym_rt, bool *added_proxy, - unsigned long vdso_rt_parked_at, +int vdso_proxify(struct vdso_maps *rt, bool *added_proxy, VmaEntry *vmas, size_t nr_vmas, bool compat_vdso, bool force_trampolines) { @@ -291,12 +269,9 @@ int vdso_proxify(struct vdso_symtable *sym_rt, bool *added_proxy, vma_vvar ? (unsigned long)vma_vvar->end : VVAR_BAD_ADDR); *added_proxy = false; - if (blobs_matches(vma_vdso, vma_vvar, &s, sym_rt) && !force_trampolines) { - return remap_rt_vdso(vma_vdso, vma_vvar, - sym_rt, vdso_rt_parked_at); - } + if (blobs_matches(vma_vdso, vma_vvar, &s, &rt->sym) && !force_trampolines) + return remap_rt_vdso(vma_vdso, vma_vvar, rt); *added_proxy = true; - return add_vdso_proxy(vma_vdso, vma_vvar, &s, sym_rt, - vdso_rt_parked_at, compat_vdso); + return add_vdso_proxy(vma_vdso, vma_vvar, &s, rt, compat_vdso); } diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 565ea0167..d60fdbebf 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1264,6 +1264,32 @@ static inline int restore_child_subreaper(int child_subreaper) return 0; } +static int map_vdso(struct task_restore_args *args, bool compatible) +{ + struct vdso_maps *rt = &args->vdso_maps_rt; + int err; + + err = arch_map_vdso(args->vdso_rt_parked_at, compatible); + if (err < 0) { + pr_err("Failed to map vdso %d\n", err); + return err; + } + + if (rt->sym.vdso_before_vvar) { + rt->vdso_start = args->vdso_rt_parked_at; + /* kernel may provide only vdso */ + if (rt->sym.vvar_size != VVAR_BAD_SIZE) + rt->vvar_start = rt->vdso_start + rt->sym.vdso_size; + else + rt->vvar_start = VVAR_BAD_ADDR; + } else { + rt->vvar_start = args->vdso_rt_parked_at; + rt->vdso_start = rt->vvar_start + rt->sym.vvar_size; + } + + return 0; +} + /* * The main routine to restore task via sigreturn. * This one is very special, we never return there @@ -1356,15 +1382,8 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; /* Map vdso that wasn't parked */ - if (args->can_map_vdso) { - int err = arch_map_vdso(args->vdso_rt_parked_at, - args->compatible_mode); - - if (err < 0) { - pr_err("Failed to map vdso %d\n", err); - goto core_restore_end; - } - } + if (args->can_map_vdso && (map_vdso(args, args->compatible_mode) < 0)) + goto core_restore_end; /* Shift private vma-s to the left */ for (i = 0; i < args->vmas_n; i++) { @@ -1496,10 +1515,9 @@ long __export_restore_task(struct task_restore_args *args) /* * Proxify vDSO. */ - if (vdso_proxify(&args->vdso_maps_rt.sym, &has_vdso_proxy, - args->vdso_rt_parked_at, - args->vmas, args->vmas_n, args->compatible_mode, - fault_injected(FI_VDSO_TRAMPOLINES))) + if (vdso_proxify(&args->vdso_maps_rt, &has_vdso_proxy, + args->vmas, args->vmas_n, args->compatible_mode, + fault_injected(FI_VDSO_TRAMPOLINES))) goto core_restore_end; /* unmap rt-vdso with restorer blob after restore's finished */ From 90ecb82202f6e426644b4d8ae88757649abacf45 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:11 +0100 Subject: [PATCH 0157/2030] restorer/parasite-vdso: Don't move vvar if failed to move vdso Also slight refactor. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/pie/parasite-vdso.c | 51 +++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 848883b42..be90090de 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -25,7 +25,7 @@ #define LOG_PREFIX "vdso: " /* Updates @from on success */ -static int vdso_remap(char *who, unsigned long *from, unsigned long to, size_t size) +static int remap_one(char *who, unsigned long *from, unsigned long to, size_t size) { unsigned long addr; @@ -41,37 +41,38 @@ static int vdso_remap(char *who, unsigned long *from, unsigned long to, size_t s return 0; } +static int park_at(struct vdso_maps *rt, unsigned long vdso, unsigned long vvar) +{ + int ret; + + ret = remap_one("rt-vdso", &rt->vdso_start, vdso, rt->sym.vdso_size); + + if (ret || !vvar) + return ret; + + return remap_one("rt-vvar", &rt->vvar_start, vvar, rt->sym.vvar_size); +} + /* * Park runtime vDSO in some safe place where it can be accessible * from the restorer */ -int vdso_do_park(struct vdso_maps *rt, unsigned long park_at, - unsigned long park_size) +int vdso_do_park(struct vdso_maps *rt, unsigned long addr, unsigned long space) { unsigned long vvar_size = rt->sym.vvar_size; unsigned long vdso_size = rt->sym.vdso_size; - unsigned long rt_vvar_park = park_at; - unsigned long rt_vdso_park = park_at; - int ret; - if (rt->vvar_start == VVAR_BAD_ADDR) { - BUG_ON(vdso_size < park_size); - return vdso_remap("rt-vdso", &rt->vdso_start, - rt_vdso_park, vdso_size); + BUG_ON(vdso_size < space); + return park_at(rt, addr, 0); } - BUG_ON((vdso_size + vvar_size) < park_size); + BUG_ON((vdso_size + vvar_size) < space); if (rt->sym.vdso_before_vvar) - rt_vvar_park = park_at + vdso_size; + return park_at(rt, addr, addr + vvar_size); else - rt_vdso_park = park_at + vvar_size; - - ret = vdso_remap("rt-vdso", &rt->vdso_start, rt_vdso_park, vdso_size); - ret |= vdso_remap("rt-vvar", &rt->vvar_start, rt_vvar_park, vvar_size); - - return ret; + return park_at(rt, addr + vdso_size, addr); } #ifndef CONFIG_COMPAT @@ -147,7 +148,6 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, struct vdso_maps *rt) { void *remap_addr; - int ret; pr_info("Runtime vdso/vvar matches dumpee, remap inplace\n"); @@ -161,10 +161,8 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, return -1; } - if (!vma_vvar) { - return vdso_remap("rt-vdso", &rt->vdso_start, - vma_vdso->start, rt->sym.vdso_size); - } + if (!vma_vvar) + return park_at(rt, vma_vdso->start, 0); remap_addr = (void *)(uintptr_t)vma_vvar->start; if (sys_munmap(remap_addr, vma_entry_len(vma_vvar))) { @@ -172,12 +170,7 @@ static int remap_rt_vdso(VmaEntry *vma_vdso, VmaEntry *vma_vvar, return -1; } - ret = vdso_remap("rt-vdso", &rt->vdso_start, - vma_vdso->start, rt->sym.vdso_size); - ret |= vdso_remap("rt-vvar", &rt->vvar_start, - vma_vvar->start, rt->sym.vvar_size); - - return ret; + return park_at(rt, vma_vdso->start, vma_vvar->start); } /* From 23960fe60e448ad71b509b49ed03c8a20bae0228 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:12 +0100 Subject: [PATCH 0158/2030] seccomp/restorer: Disable gtod from vdso in strict mode Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/pie/restorer.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index d60fdbebf..9d49a8313 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -476,6 +476,23 @@ static int restore_seccomp(struct thread_restore_args *args) return 0; break; case SECCOMP_MODE_STRICT: + /* + * Disable gettimeofday() from vdso: it may use TSC + * which is restricted by kernel: + * + * static long seccomp_set_mode_strict(void) + * { + * [..] + * #ifdef TIF_NOTSC + * disable_TSC(); + * #endif + * [..] + * + * XXX: It may need to be fixed in kernel under + * PTRACE_O_SUSPEND_SECCOMP, but for now just get timings + * with a raw syscall instead of vdso. + */ + std_log_set_gettimeofday(NULL); ret = sys_prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); if (ret < 0) { pr_err("seccomp: SECCOMP_MODE_STRICT returned %d on tid %d\n", From 9e5c0634ff2f3333c8be8561b03a66afe77b2e37 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:13 +0100 Subject: [PATCH 0159/2030] vdso: Add compatible property to vdso_maps We need to differ compatible (ia32) vdso maps from x86_64. That dictates ABI on vdso code. According to that, the decision to (not) use gettimeofday() from vdso in 64-bit restorer. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/include/util-vdso.h | 1 + criu/vdso.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/criu/include/util-vdso.h b/criu/include/util-vdso.h index c74360c87..33b7411de 100644 --- a/criu/include/util-vdso.h +++ b/criu/include/util-vdso.h @@ -38,6 +38,7 @@ struct vdso_maps { unsigned long vdso_start; unsigned long vvar_start; struct vdso_symtable sym; + bool compatible; }; #define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, } diff --git a/criu/vdso.c b/criu/vdso.c index 257cbcd92..50b8b8dba 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -597,7 +597,8 @@ int vdso_init_restore(void) vdso_maps.sym = kdat.vdso_sym; #ifdef CONFIG_COMPAT - vdso_maps_compat.sym = kdat.vdso_sym_compat; + vdso_maps_compat.sym = kdat.vdso_sym_compat; + vdso_maps_compat.compatible = true; #endif return 0; @@ -621,7 +622,8 @@ int kerndat_vdso_fill_symtable(void) pr_err("Failed to fill compat vdso symtable\n"); return -1; } - kdat.vdso_sym_compat = vdso_maps_compat.sym; + vdso_maps_compat.compatible = true; + kdat.vdso_sym_compat = vdso_maps_compat.sym; #endif return 0; From 10a831689e160464b628b1d4e3e58da2439cd983 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 25 Jul 2019 23:01:14 +0100 Subject: [PATCH 0160/2030] restorer: Use gettimeofday() from rt-vdso for log timings Omit calling raw syscalls and use vdso for the purpose of logging. That will eliminate as much as one-syscall-per-PIE-message. Getting time without switching to kernel will speed up C/R, keeping logs as informative as they were. Fixes: #346 I haven't enabled vdso timings for ia32 applications as it needs more changes and complexity.. Maybe later. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/arch/aarch64/include/asm/vdso.h | 1 + criu/arch/arm/include/asm/vdso.h | 1 + criu/arch/ppc64/include/asm/vdso.h | 1 + criu/arch/s390/include/asm/vdso.h | 3 +- criu/arch/x86/include/asm/vdso.h | 3 +- criu/include/parasite-vdso.h | 1 + criu/pie/parasite-vdso.c | 59 +++++++++++++++++++++++++--- criu/pie/restorer.c | 2 + 8 files changed, 64 insertions(+), 7 deletions(-) diff --git a/criu/arch/aarch64/include/asm/vdso.h b/criu/arch/aarch64/include/asm/vdso.h index a7802a279..8a65e0947 100644 --- a/criu/arch/aarch64/include/asm/vdso.h +++ b/criu/arch/aarch64/include/asm/vdso.h @@ -10,6 +10,7 @@ * we should support at the moment. */ #define VDSO_SYMBOL_MAX 4 +#define VDSO_SYMBOL_GTOD 2 /* * Workaround for VDSO array symbol table's relocation. diff --git a/criu/arch/arm/include/asm/vdso.h b/criu/arch/arm/include/asm/vdso.h index cf9d500be..f57790ac2 100644 --- a/criu/arch/arm/include/asm/vdso.h +++ b/criu/arch/arm/include/asm/vdso.h @@ -10,6 +10,7 @@ * Poke from kernel file arch/arm/vdso/vdso.lds.S */ #define VDSO_SYMBOL_MAX 2 +#define VDSO_SYMBOL_GTOD 1 #define ARCH_VDSO_SYMBOLS \ "__vdso_clock_gettime", \ "__vdso_gettimeofday" diff --git a/criu/arch/ppc64/include/asm/vdso.h b/criu/arch/ppc64/include/asm/vdso.h index 9546e2460..6c92348d6 100644 --- a/criu/arch/ppc64/include/asm/vdso.h +++ b/criu/arch/ppc64/include/asm/vdso.h @@ -13,6 +13,7 @@ * inside the text page which should not be used as is from user space. */ #define VDSO_SYMBOL_MAX 10 +#define VDSO_SYMBOL_GTOD 5 #define ARCH_VDSO_SYMBOLS \ "__kernel_clock_getres", \ "__kernel_clock_gettime", \ diff --git a/criu/arch/s390/include/asm/vdso.h b/criu/arch/s390/include/asm/vdso.h index 63e7e0464..c54d848ad 100644 --- a/criu/arch/s390/include/asm/vdso.h +++ b/criu/arch/s390/include/asm/vdso.h @@ -8,7 +8,8 @@ * This is a minimal amount of symbols * we should support at the moment. */ -#define VDSO_SYMBOL_MAX 4 +#define VDSO_SYMBOL_MAX 4 +#define VDSO_SYMBOL_GTOD 0 /* * This definition is used in pie/util-vdso.c to initialize the vdso symbol diff --git a/criu/arch/x86/include/asm/vdso.h b/criu/arch/x86/include/asm/vdso.h index 046db2336..28ae2d15a 100644 --- a/criu/arch/x86/include/asm/vdso.h +++ b/criu/arch/x86/include/asm/vdso.h @@ -12,7 +12,8 @@ * This is a minimal amount of symbols * we should support at the moment. */ -#define VDSO_SYMBOL_MAX 6 +#define VDSO_SYMBOL_MAX 6 +#define VDSO_SYMBOL_GTOD 2 /* * XXX: we don't patch __kernel_vsyscall as it's too small: diff --git a/criu/include/parasite-vdso.h b/criu/include/parasite-vdso.h index 872105133..9ee32f2a7 100644 --- a/criu/include/parasite-vdso.h +++ b/criu/include/parasite-vdso.h @@ -81,6 +81,7 @@ static inline bool is_vdso_mark(void *addr) return false; } +extern void vdso_update_gtod_addr(struct vdso_maps *rt); extern int vdso_do_park(struct vdso_maps *rt, unsigned long park_at, unsigned long park_size); extern int vdso_map_compat(unsigned long map_at); diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index be90090de..38da76680 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -12,7 +12,8 @@ #include "int.h" #include "types.h" #include "page.h" -#include +#include "compel/plugins/std/syscall.h" +#include "compel/plugins/std/log.h" #include "image.h" #include "parasite-vdso.h" #include "vma.h" @@ -43,14 +44,62 @@ static int remap_one(char *who, unsigned long *from, unsigned long to, size_t si static int park_at(struct vdso_maps *rt, unsigned long vdso, unsigned long vvar) { + unsigned long vvar_size = rt->sym.vvar_size; + unsigned long vdso_size = rt->sym.vdso_size; int ret; - ret = remap_one("rt-vdso", &rt->vdso_start, vdso, rt->sym.vdso_size); - - if (ret || !vvar) + ret = remap_one("rt-vdso", &rt->vdso_start, vdso, vdso_size); + if (ret) return ret; - return remap_one("rt-vvar", &rt->vvar_start, vvar, rt->sym.vvar_size); + std_log_set_gettimeofday(NULL); /* stop using vdso for timings */ + + if (vvar) + ret = remap_one("rt-vvar", &rt->vvar_start, vvar, vvar_size); + + if (!ret) + vdso_update_gtod_addr(rt); + + return ret; +} + +void vdso_update_gtod_addr(struct vdso_maps *rt) +{ + struct vdso_symbol *gtod_sym; + void *gtod; + + if (rt->vdso_start == VDSO_BAD_ADDR) { + pr_debug("No rt-vdso - no fast gettimeofday()\n"); + return; + } + + if (VDSO_SYMBOL_GTOD < 0) { + pr_debug("Arch doesn't support gettimeofday() from vdso\n"); + return; + } + + /* + * XXX: Don't enable vdso timings for compatible applications. + * We would need to temporary map 64-bit vdso for timings in restorer + * and remap it with compatible at the end of restore. + * And vdso proxification should be done much later. + * Also, restorer should have two sets of vdso_maps in arguments. + */ + if (rt->compatible) { + pr_debug("compat mode: using syscall for gettimeofday()\n"); + return; + } + + gtod_sym = &rt->sym.symbols[VDSO_SYMBOL_GTOD]; + if (gtod_sym->offset == VDSO_BAD_ADDR) { + pr_debug("No gettimeofday() on rt-vdso\n"); + return; + } + + gtod = (void*)(rt->vdso_start + gtod_sym->offset); + pr_info("Using gettimeofday() on vdso at %p\n", gtod); + + std_log_set_gettimeofday(gtod); } /* diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 9d49a8313..4fff2c85d 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1402,6 +1402,8 @@ long __export_restore_task(struct task_restore_args *args) if (args->can_map_vdso && (map_vdso(args, args->compatible_mode) < 0)) goto core_restore_end; + vdso_update_gtod_addr(&args->vdso_maps_rt); + /* Shift private vma-s to the left */ for (i = 0; i < args->vmas_n; i++) { vma_entry = args->vmas + i; From 96992883ca7550387b347fbbcfde1266f078c55a Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 26 Jun 2019 11:55:19 +0300 Subject: [PATCH 0161/2030] inotify: cleanup auxiliary events from queue I've mentioned the problem that after c/r each inotify receives one or more unexpected events. This happens because our algorithm mixes setting up an inotify watch on the file with opening and closing it. We mix inotify creation and watched file open/close because we need to create the inotify watch on the file from another mntns (generally). And we do a trick opening the file so that it can be referenced in current mntns by /proc//fd/ path. Moreover if we have several inotifies on the same file, than queue gets even more events than just one which happens in a simple case. note: For now we don't have a way to c/r events in queue but we need to at least leave the queue clean from events generated by our own. These, still, looks harder to rewrite wd creation without this proc-fd trick than to remove unexpected events from queues. So just cleanup these events for each fdt-restorer process, for each of its inotify fds _after_ restore stage (at CR_STATE_RESTORE_SIGCHLD). These is a closest place where for an _alive_ process we know that all prepare_fds() are done by all processes. These means we need to do the cleanup in PIE code, so need to add sys_ppoll definitions for PIE and divide process in two phases: first collect and transfer fds, second do real cleanup. note: We still do prepare_fds() for zombies. But zombies have no fds in /proc/pid/fd so we will collect no in collect_fds() and therefore we have no in prepare_fds(), thus there is no need to cleanup inotifies for zombies. v2: adopt to multiple unexpected events v3: do not cleanup from fdt-receivers, done from fdt-restorer v4: do without additional fds restore stage v5: replace sys_poll with sys_ppoll and fix minor nits Signed-off-by: Pavel Tikhomirov use ppoll always and remove poll --- .../arch/arm/plugins/std/syscalls/syscall.def | 1 + .../plugins/std/syscalls/syscall-ppc64.tbl | 1 + .../plugins/std/syscalls/syscall-s390.tbl | 1 + .../x86/plugins/std/syscalls/syscall_32.tbl | 1 + .../x86/plugins/std/syscalls/syscall_64.tbl | 1 + .../plugins/include/uapi/std/syscall-types.h | 1 + criu/cr-restore.c | 38 ++++++++++ criu/include/restorer.h | 3 + criu/pie/restorer.c | 70 +++++++++++++++++++ 9 files changed, 117 insertions(+) diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 653a7539b..721ff16dc 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -111,3 +111,4 @@ preadv_raw 69 361 (int fd, struct iovec *iov, unsigned long nr, unsigned long userfaultfd 282 388 (int flags) fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) cacheflush ! 983042 (void *start, void *end, int flags) +ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 62e0bc1a0..3b3079040 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -107,3 +107,4 @@ __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, un __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_userfaultfd 364 sys_userfaultfd (int flags) +__NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index 3521e9150..cc13a63dd 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -107,3 +107,4 @@ __NR_ipc 117 sys_ipc (unsigned int call, int first, unsigned long second, un __NR_userfaultfd 355 sys_userfaultfd (int flags) __NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) +__NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index a6c55b83c..7903ab150 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -95,3 +95,4 @@ __NR_kcmp 349 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs) __NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 374 sys_userfaultfd (int flags) +__NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 642715147..4ac9164ea 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -106,3 +106,4 @@ __NR_setns 308 sys_setns (int fd, int nstype) __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) __NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 323 sys_userfaultfd (int flags) +__NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) diff --git a/compel/plugins/include/uapi/std/syscall-types.h b/compel/plugins/include/uapi/std/syscall-types.h index ddb740c82..57865e741 100644 --- a/compel/plugins/include/uapi/std/syscall-types.h +++ b/compel/plugins/include/uapi/std/syscall-types.h @@ -38,6 +38,7 @@ struct siginfo; struct msghdr; struct rusage; struct iocb; +struct pollfd; typedef unsigned long aio_context_t; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 2ffd9a86c..b4530f8e5 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -726,6 +726,40 @@ static int collect_zombie_pids(struct task_restore_args *ta) return collect_child_pids(TASK_DEAD, &ta->zombies_n); } +static int collect_inotify_fds(struct task_restore_args *ta) +{ + struct list_head *list = &rsti(current)->fds; + struct fdt *fdt = rsti(current)->fdt; + struct fdinfo_list_entry *fle; + + /* Check we are an fdt-restorer */ + if (fdt && fdt->pid != vpid(current)) + return 0; + + ta->inotify_fds = (int *)rst_mem_align_cpos(RM_PRIVATE); + + list_for_each_entry(fle, list, ps_list) { + struct file_desc *d = fle->desc; + int *inotify_fd; + + if (d->ops->type != FD_TYPES__INOTIFY) + continue; + + if (fle != file_master(d)) + continue; + + inotify_fd = rst_mem_alloc(sizeof(*inotify_fd), RM_PRIVATE); + if (!inotify_fd) + return -1; + + ta->inotify_fds_n++; + *inotify_fd = fle->fe->fd; + + pr_debug("Collect inotify fd %d to cleanup later\n", *inotify_fd); + } + return 0; +} + static int open_core(int pid, CoreEntry **pcore) { int ret; @@ -880,6 +914,9 @@ static int restore_one_alive_task(int pid, CoreEntry *core) if (collect_zombie_pids(ta) < 0) return -1; + if (collect_inotify_fds(ta) < 0) + return -1; + if (prepare_proc_misc(pid, core->tc, ta)) return -1; @@ -3411,6 +3448,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns RST_MEM_FIXUP_PPTR(task_args->helpers); RST_MEM_FIXUP_PPTR(task_args->zombies); RST_MEM_FIXUP_PPTR(task_args->vma_ios); + RST_MEM_FIXUP_PPTR(task_args->inotify_fds); task_args->compatible_mode = core_is_compat(core); /* diff --git a/criu/include/restorer.h b/criu/include/restorer.h index f980bfad3..b93807f5f 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -177,6 +177,9 @@ struct task_restore_args { pid_t *zombies; unsigned int zombies_n; + int *inotify_fds; /* fds to cleanup inotify events at CR_STATE_RESTORE_SIGCHLD stage */ + unsigned int inotify_fds_n; + /* * * * * * * * * * * * * * * * * * * * */ unsigned long task_size; diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 4fff2c85d..6f8f1ae54 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "linux/userfaultfd.h" @@ -1307,6 +1308,72 @@ static int map_vdso(struct task_restore_args *args, bool compatible) return 0; } +static int fd_poll(int inotify_fd) +{ + struct pollfd pfd = {inotify_fd, POLLIN, 0}; + struct timespec tmo = {0, 0}; + + return sys_ppoll(&pfd, 1, &tmo, NULL, sizeof(sigset_t)); +} + +/* + * note: Actually kernel may want even more space for one event (see + * round_event_name_len), so using buffer of EVENT_BUFF_SIZE size may fail. + * To be on the safe side - take a bigger buffer, and these also allows to + * read more events in one syscall. + */ +#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX)) + +/* + * Read all available events from inotify queue + */ +static int cleanup_inotify_events(int inotify_fd) +{ + char buf[EVENT_BUFF_SIZE * 8]; + int ret; + + while (1) { + ret = fd_poll(inotify_fd); + if (ret < 0) { + pr_err("Failed to poll from inotify fd: %d\n", ret); + return -1; + } else if (ret == 0) { + break; + } + + ret = sys_read(inotify_fd, buf, sizeof(buf)); + if (ret < 0) { + pr_err("Failed to read inotify events\n"); + return -1; + } + } + + return 0; +} + +/* + * When we restore inotifies we can open and close files we create a watch + * for. So wee need to cleanup these auxiliary events which we've generated. + * + * note: For now we don't have a way to c/r events in queue but we need to + * at least leave the queue clean from events generated by our own. + */ +int cleanup_current_inotify_events(struct task_restore_args *task_args) +{ + int i; + + for (i = 0; i < task_args->inotify_fds_n; i++) { + int inotify_fd = task_args->inotify_fds[i]; + + pr_debug("Cleaning inotify events from %d\n", inotify_fd); + + if (cleanup_inotify_events(inotify_fd)) + return -1; + } + + return 0; +} + /* * The main routine to restore task via sigreturn. * This one is very special, we never return there @@ -1767,6 +1834,9 @@ long __export_restore_task(struct task_restore_args *args) restore_finish_stage(task_entries_local, CR_STATE_RESTORE); + if (cleanup_current_inotify_events(args)) + goto core_restore_end; + if (wait_helpers(args) < 0) goto core_restore_end; if (wait_zombies(args) < 0) From e5bdcbbd1d975f088df3e5766f1ddda4a29c3cb1 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 26 Jun 2019 15:47:39 +0300 Subject: [PATCH 0162/2030] zdtm/inotify: add a test that no unexpected events appear after c/r Just create two inotify watches on a testfile, and do nothing except c/r, it is expected that there is no events in queue after these. before "inotify: cleanup auxiliary events from queue": [root@snorch criu]# ./test/zdtm.py run -t zdtm/static/inotify04 === Run 1/1 ================ zdtm/static/inotify04 ======================== Run zdtm/static/inotify04 in h ======================== DEP inotify04.d CC inotify04.o LINK inotify04 Start test ./inotify04 --pidfile=inotify04.pid --outfile=inotify04.out --dirname=inotify04.test Run criu dump Run criu restore Send the 15 signal to 60 Wait for zdtm/static/inotify04(60) to die for 0.100000 =============== Test zdtm/static/inotify04 FAIL at result check ================ Test output: ================================ 18:37:14.279: 60: Event 0x10 18:37:14.280: 60: Event 0x20 18:37:14.280: 60: Event 0x10 18:37:14.280: 60: Read 3 events 18:37:14.280: 60: FAIL: inotify04.c:105: Found 3 unexpected inotify events (errno = 11 (Resource temporarily unavailable)) <<< ================================ v2: make two inotifies on the same file Signed-off-by: Pavel Tikhomirov zdtm: inotify04 add another inotify on the same file --- test/zdtm/static/Makefile | 1 + test/zdtm/static/inotify04.c | 124 +++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 test/zdtm/static/inotify04.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 52bd00602..d8279d6f8 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -311,6 +311,7 @@ TST_DIR = \ inotify00 \ inotify01 \ inotify02 \ + inotify04 \ cgroup00 \ rmdir_open \ cgroup01 \ diff --git a/test/zdtm/static/inotify04.c b/test/zdtm/static/inotify04.c new file mode 100644 index 000000000..fb9293024 --- /dev/null +++ b/test/zdtm/static/inotify04.c @@ -0,0 +1,124 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check inotify does not have trash in queue after c/r"; +const char *test_author = "Pavel Tikhomirov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +#define TEST_FILE "inotify-testfile" + +#define BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX)) + +static int inotify_read_events(int inotify_fd, unsigned int *n) +{ + struct inotify_event *event; + char buf[BUFF_SIZE * 8]; + int ret, off; + + *n = 0; + + while (1) { + ret = read(inotify_fd, buf, sizeof(buf)); + if (ret < 0) { + if (errno != EAGAIN) { + pr_perror("Can't read inotify queue"); + return -1; + } else { + ret = 0; + break; + } + } else if (ret == 0) + break; + + for (off = 0; off < ret; (*n)++, off += sizeof(*event) + event->len) { + event = (void *)(buf + off); + test_msg("Event %#10x\n", event->mask); + } + } + + test_msg("Read %u events\n", *n); + return ret; +} + +int main (int argc, char *argv[]) +{ + unsigned int mask = IN_ALL_EVENTS; + char test_file_path[PATH_MAX]; + int fd, ifd, ifd2, ret; + unsigned int n; + + test_init(argc, argv); + + if (mkdir(dirname, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) { + pr_perror("Can't create directory %s", dirname); + return 1; + } + + snprintf(test_file_path, sizeof(test_file_path), "%s/%s", dirname, TEST_FILE); + + fd = open(test_file_path, O_CREAT, 0644); + if (fd < 0) { + pr_perror("Failed to create %s", test_file_path); + return 1; + } + close(fd); + + ifd = inotify_init1(IN_NONBLOCK); + if (ifd < 0) { + pr_perror("Failed inotify_init"); + return 1; + } + + ifd2 = inotify_init1(IN_NONBLOCK); + if (ifd2 < 0) { + pr_perror("Failed inotify_init"); + return 1; + } + + if (inotify_add_watch(ifd, test_file_path, mask) < 0) { + pr_perror("Failed inotify_add_watch"); + return 1; + } + + if (inotify_add_watch(ifd2, test_file_path, mask) < 0) { + pr_perror("Failed inotify_add_watch"); + return 1; + } + + test_daemon(); + test_waitsig(); + + ret = inotify_read_events(ifd, &n); + if (ret < 0) { + fail("Failed to read inotify events"); + return 1; + } else if (n != 0) { + fail("Found %d unexpected inotify events", n); + return 1; + } + + ret = inotify_read_events(ifd, &n); + if (ret < 0) { + fail("Failed to read inotify events"); + return 1; + } else if (n != 0) { + fail("Found %d unexpected inotify events", n); + return 1; + } + + close(ifd); + close(ifd2); + unlink(test_file_path); + pass(); + + return 0; +} From 5f91f920a8756e565d4a93a2542ba13865cc5733 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 8 Aug 2019 11:09:05 -0700 Subject: [PATCH 0163/2030] test: bring the lo interface up in each network namespace This is needed to workaround the problem with "ip route save": (00.113153) Running ip route save Error: ipv4: FIB table does not exist. Signed-off-by: Andrei Vagin --- test/zdtm/static/netns_sub.c | 4 ++++ test/zdtm/static/netns_sub.desc | 6 +++++- test/zdtm/static/netns_sub_veth.c | 2 ++ test/zdtm/static/netns_sub_veth.desc | 2 +- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/netns_sub.c b/test/zdtm/static/netns_sub.c index 6515057d8..9f77bf96b 100644 --- a/test/zdtm/static/netns_sub.c +++ b/test/zdtm/static/netns_sub.c @@ -104,6 +104,8 @@ int main(int argc, char **argv) pr_perror("unshare"); return 1; } + if (system("ip link set up dev lo")) + return 1; sk = create_socket(1); if (sk < 0) return 1; @@ -166,6 +168,8 @@ int main(int argc, char **argv) pr_perror("unshare"); return 1; } + if (system("ip link set up dev lo")) + return 1; sk = create_socket(2); if (sk < 0) return 1; diff --git a/test/zdtm/static/netns_sub.desc b/test/zdtm/static/netns_sub.desc index 7657ba45c..1f12518d2 100644 --- a/test/zdtm/static/netns_sub.desc +++ b/test/zdtm/static/netns_sub.desc @@ -1 +1,5 @@ -{'flavor': 'ns uns', 'flags': 'suid'} +{ + 'deps': ['/bin/sh', '/sbin/ip|/bin/ip'], + 'flavor': 'ns uns', + 'flags': 'suid' +} diff --git a/test/zdtm/static/netns_sub_veth.c b/test/zdtm/static/netns_sub_veth.c index 927827199..eedbb15c3 100644 --- a/test/zdtm/static/netns_sub_veth.c +++ b/test/zdtm/static/netns_sub_veth.c @@ -50,6 +50,8 @@ int main(int argc, char **argv) if (unshare(CLONE_NEWNET)) return 1; + if (system("ip link set up dev lo")) + return 1; task_waiter_complete(&lock, i); test_waitsig(); diff --git a/test/zdtm/static/netns_sub_veth.desc b/test/zdtm/static/netns_sub_veth.desc index ea9e15c8a..18f81a880 100644 --- a/test/zdtm/static/netns_sub_veth.desc +++ b/test/zdtm/static/netns_sub_veth.desc @@ -1,5 +1,5 @@ { - 'deps': ['/sbin/ip', '/bin/sh'], + 'deps': ['/sbin/ip|/bin/ip', '/bin/sh'], 'flags': 'suid', 'flavor': 'ns uns', 'feature': 'link_nsid', From f6ab4620748588d5d8991c858ae6c2a0b52a2b44 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 10 Aug 2019 02:33:29 +0100 Subject: [PATCH 0164/2030] vdso: Correctly track vdso position without vvar If vvar is absent vdso_before_vvar is initialized by "false". Which means that the check that supposed to track vdso/vvar pair went into wrong brackets. In result it broke CRIU on kernels that don't have vvar mapping. Simpilfy the code by moving the check for VVAR_BAD_SIZE outside of conditional for vdso_before_vvar. Reported-by: Cyrill Gorcunov Fixes: 0918c7667647 ("vdso/restorer: Always track vdso/vvar positions in vdso_maps_rt") Signed-off-by: Dmitry Safonov Acked-by: Cyrill Gorcunov Tested-by: Cyrill Gorcunov Signed-off-by: Andrei Vagin --- criu/pie/restorer.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 6f8f1ae54..390c0e1a9 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1293,13 +1293,16 @@ static int map_vdso(struct task_restore_args *args, bool compatible) return err; } + /* kernel may provide only vdso */ + if (rt->sym.vvar_size == VVAR_BAD_SIZE) { + rt->vdso_start = args->vdso_rt_parked_at; + rt->vvar_start = VVAR_BAD_ADDR; + return 0; + } + if (rt->sym.vdso_before_vvar) { rt->vdso_start = args->vdso_rt_parked_at; - /* kernel may provide only vdso */ - if (rt->sym.vvar_size != VVAR_BAD_SIZE) - rt->vvar_start = rt->vdso_start + rt->sym.vdso_size; - else - rt->vvar_start = VVAR_BAD_ADDR; + rt->vvar_start = rt->vdso_start + rt->sym.vdso_size; } else { rt->vvar_start = args->vdso_rt_parked_at; rt->vdso_start = rt->vvar_start + rt->sym.vvar_size; From 25460af822a629ed6e3ce1360ca035016127cd6c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 10 Aug 2019 23:46:19 +0300 Subject: [PATCH 0165/2030] kerndat: mark functions as static which are used in kerndat.c only Signed-off-by: Andrei Vagin --- criu/include/kerndat.h | 7 ------- criu/kerndat.c | 14 +++++++------- criu/uffd.c | 2 +- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 75e2130b2..c2164fa0a 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -15,10 +15,6 @@ struct stat; */ extern int kerndat_init(void); -extern int kerndat_get_dirty_track(void); -extern int kerndat_fdinfo_has_lock(void); -extern int kerndat_loginuid(void); -extern int kerndat_files_stat(bool early); enum pagemap_func { PM_UNKNOWN, @@ -90,7 +86,4 @@ enum { */ extern int kerndat_fs_virtualized(unsigned int which, u32 kdev); -extern int kerndat_tcp_repair(); -extern int kerndat_uffd(void); - #endif /* __CR_KERNDAT_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index b884f6d15..c158d6760 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -157,7 +157,7 @@ static void kerndat_mmap_min_addr(void) (unsigned long)kdat.mmap_min_addr); } -int kerndat_files_stat(bool early) +static int kerndat_files_stat(bool early) { static const uint32_t NR_OPEN_DEFAULT = 1024 * 1024; static const uint64_t MAX_FILES_DEFAULT = 8192; @@ -348,7 +348,7 @@ int kerndat_fs_virtualized(unsigned int which, u32 kdev) * this functionality under CONFIG_MEM_SOFT_DIRTY option. */ -int kerndat_get_dirty_track(void) +static int kerndat_get_dirty_track(void) { char *map; int pm2; @@ -471,7 +471,7 @@ static int get_task_size(void) return 0; } -int kerndat_fdinfo_has_lock() +static int kerndat_fdinfo_has_lock() { int fd, pfd = -1, exit_code = -1, len; char buf[PAGE_SIZE]; @@ -521,7 +521,7 @@ static int get_ipv6() return 0; } -int kerndat_loginuid(void) +static int kerndat_loginuid(void) { unsigned int saved_loginuid; int ret; @@ -742,7 +742,7 @@ err: return ret; } -int kerndat_has_inotify_setnextwd(void) +static int kerndat_has_inotify_setnextwd(void) { int ret = 0; int fd; @@ -765,7 +765,7 @@ int kerndat_has_inotify_setnextwd(void) return ret; } -int has_kcmp_epoll_tfd(void) +static int has_kcmp_epoll_tfd(void) { kcmp_epoll_slot_t slot = { }; int ret = -1, efd, tfd; @@ -907,7 +907,7 @@ unl: } } -int kerndat_uffd(void) +static int kerndat_uffd(void) { int uffd; diff --git a/criu/uffd.c b/criu/uffd.c index 5c1e32184..c47b35b1f 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -1417,7 +1417,7 @@ int cr_lazy_pages(bool daemon) int lazy_sk; int ret; - if (kerndat_uffd() || !kdat.has_uffd) + if (!kdat.has_uffd) return -1; if (prepare_dummy_pstree()) From 4e84d11c1f30e042dcd1f8d98872bc4abb8fa889 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 10 Aug 2019 23:53:40 +0300 Subject: [PATCH 0166/2030] kerndat: remove unused code Signed-off-by: Andrei Vagin --- criu/include/kerndat.h | 1 - criu/kerndat.c | 54 +++++------------------------------------- 2 files changed, 6 insertions(+), 49 deletions(-) diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index c2164fa0a..d93e07813 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -62,7 +62,6 @@ struct kerndat_s { bool has_nsid; bool has_link_nsid; unsigned int sysctl_nr_open; - unsigned long files_stat_max_files; bool x86_has_ptrace_fpu_xsave_bug; bool has_inotify_setnextwd; bool has_kcmp_epoll_tfd; diff --git a/criu/kerndat.c b/criu/kerndat.c index c158d6760..39cacb8fe 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -157,19 +157,12 @@ static void kerndat_mmap_min_addr(void) (unsigned long)kdat.mmap_min_addr); } -static int kerndat_files_stat(bool early) +static int kerndat_files_stat(void) { static const uint32_t NR_OPEN_DEFAULT = 1024 * 1024; - static const uint64_t MAX_FILES_DEFAULT = 8192; - uint64_t max_files; uint32_t nr_open; struct sysctl_req req[] = { - { - .name = "fs/file-max", - .arg = &max_files, - .type = CTL_U64, - }, { .name = "fs/nr_open", .arg = &nr_open, @@ -177,50 +170,15 @@ static int kerndat_files_stat(bool early) }, }; - if (!early) { - if (sysctl_op(req, ARRAY_SIZE(req), CTL_READ, 0)) { - pr_warn("Can't fetch file_stat, using kernel defaults\n"); - nr_open = NR_OPEN_DEFAULT; - max_files = MAX_FILES_DEFAULT; - } - } else { - char buf[64]; - int fd1, fd2; - ssize_t ret; - - fd1 = open("/proc/sys/fs/file-max", O_RDONLY); - fd2 = open("/proc/sys/fs/nr_open", O_RDONLY); - + if (sysctl_op(req, ARRAY_SIZE(req), CTL_READ, 0)) { + pr_warn("Can't fetch file_stat, using kernel defaults\n"); nr_open = NR_OPEN_DEFAULT; - max_files = MAX_FILES_DEFAULT; - - if (fd1 < 0 || fd2 < 0) { - pr_warn("Can't fetch file_stat, using kernel defaults\n"); - } else { - ret = read(fd1, buf, sizeof(buf) - 1); - if (ret > 0) { - buf[ret] = '\0'; - max_files = atol(buf); - } - ret = read(fd2, buf, sizeof(buf) - 1); - if (ret > 0) { - buf[ret] = '\0'; - nr_open = atol(buf); - } - } - - if (fd1 >= 0) - close(fd1); - if (fd2 >= 0) - close(fd2); } kdat.sysctl_nr_open = nr_open; - kdat.files_stat_max_files = max_files; - pr_debug("files stat: %s %lu, %s %u\n", - req[0].name, kdat.files_stat_max_files, - req[1].name, kdat.sysctl_nr_open); + pr_debug("files stat: %s %u\n", + req[0].name, kdat.sysctl_nr_open); return 0; } @@ -1088,7 +1046,7 @@ int kerndat_init(void) kerndat_lsm(); kerndat_mmap_min_addr(); - kerndat_files_stat(false); + kerndat_files_stat(); if (!ret) kerndat_save_cache(); From 1356a1def3fe2ced121ecaa6ed082154cc1060a4 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Tue, 13 Aug 2019 12:29:08 +0200 Subject: [PATCH 0167/2030] Replace references to github.com/xemul/criu Signed-off-by: Sebastiaan van Stijn --- Makefile.config | 2 +- criu/pagemap-cache.c | 2 +- criu/pie/parasite.c | 2 +- scripts/crit-setup.py | 2 +- scripts/travis/travis-tests | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile.config b/Makefile.config index 6f7324069..1e4352b9d 100644 --- a/Makefile.config +++ b/Makefile.config @@ -39,7 +39,7 @@ else $(info Note: Building without ia32 C/R, missed ia32 support in gcc) $(info $(info) That may be related to missing gcc-multilib in your) $(info $(info) distribution or you may have Debian with buggy toolchain) - $(info $(info) (issue https://github.com/xemul/criu/issues/315)) + $(info $(info) (issue https://github.com/checkpoint-restore/criu/issues/315)) endif endif diff --git a/criu/pagemap-cache.c b/criu/pagemap-cache.c index 61ab09387..d7915f99c 100644 --- a/criu/pagemap-cache.c +++ b/criu/pagemap-cache.c @@ -25,7 +25,7 @@ /* * It's a workaround for a kernel bug. In the 3.19 kernel when pagemap are read * for a few vma-s for one read call, it returns incorrect data. - * https://github.com/xemul/criu/issues/207 + * https://github.com/checkpoint-restore/criu/issues/207 */ static bool pagemap_cache_disabled; diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 9a179ef8b..387a976da 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -293,7 +293,7 @@ static int dump_creds(struct parasite_dump_creds *args) args->uids[3] = sys_setfsuid(-1L); /* - * FIXME In https://github.com/xemul/criu/issues/95 it is + * FIXME In https://github.com/checkpoint-restore/criu/issues/95 it is * been reported that only low 16 bits are set upon syscall * on ARMv7. * diff --git a/scripts/crit-setup.py b/scripts/crit-setup.py index 60fef6a07..f40588142 100644 --- a/scripts/crit-setup.py +++ b/scripts/crit-setup.py @@ -5,7 +5,7 @@ setup(name = "crit", description = "CRiu Image Tool", author = "CRIU team", author_email = "criu@openvz.org", - url = "https://github.com/xemul/criu", + url = "https://github.com/checkpoint-restore/criu", package_dir = {'pycriu': 'lib/py'}, packages = ["pycriu", "pycriu.images"], scripts = ["crit/crit"] diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index c055860fd..980d74734 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -100,7 +100,7 @@ umask 0000 export SKIP_PREP=1 # The 3.19 kernel (from Ubuntu 14.04) has a bug. When /proc/PID/pagemap # is read for a few VMAs in one read call, incorrect data is returned. -# See https://github.com/xemul/criu/issues/207 +# See https://github.com/checkpoint-restore/criu/issues/207 # Kernel 4.4 (from Ubuntu 14.04.5 update) fixes this. uname -r | grep -q ^3\.19 && export CRIU_PMC_OFF=1 From 2a76ecc9fd3f01e40f8a28ca5e005c8be7d9e116 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Tue, 13 Aug 2019 12:29:53 +0200 Subject: [PATCH 0168/2030] README: fix broken links to github.com/xemul/criu Signed-off-by: Sebastiaan van Stijn --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 95608a9ea..16e8452b5 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,8 @@ Linux kernel supporting checkpoint and restore for all the features it provides. looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. Here are some useful hints to get involved. -* We have both -- [very simple](https://github.com/xemul/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/xemul/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; -* CRIU does need [extensive testing](https://github.com/xemul/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); +* We have both -- [very simple](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; +* CRIU does need [extensive testing](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); * For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches); From bbd922ed32ece54830380abe7706c111203f1774 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 29 Jul 2019 14:21:45 +0000 Subject: [PATCH 0169/2030] travis: add podman test case This adds the same tests currently running for docker also for podman. In addition this also tests podman --export/--import (migration) support. Signed-off-by: Adrian Reber --- .travis.yml | 1 + scripts/travis/Makefile | 3 ++ scripts/travis/podman-test.sh | 69 +++++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100755 scripts/travis/podman-test.sh diff --git a/.travis.yml b/.travis.yml index 37db39412..82ba9fbc8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,7 @@ env: - TR_ARCH=fedora-rawhide - TR_ARCH=fedora-rawhide-aarch64 - TR_ARCH=centos + - TR_ARCH=podman-test matrix: allow_failures: - env: TR_ARCH=docker-test diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 77c937432..baddd6eb1 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -38,5 +38,8 @@ fedora-asan: docker-test: ./docker-test.sh +podman-test: + ./podman-test.sh + %: $(MAKE) -C ../build $@$(target-suffix) diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh new file mode 100755 index 000000000..9bd1f3d8b --- /dev/null +++ b/scripts/travis/podman-test.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -x -e -o pipefail + +add-apt-repository -y ppa:projectatomic/ppa + +apt-get install -qq \ + apt-transport-https \ + ca-certificates \ + curl \ + software-properties-common + +apt-get update -qq + +apt-get install -qqy podman + +export SKIP_TRAVIS_TEST=1 + +./travis-tests + +cd ../../ + +make install + +podman info + +criu --version + +podman run --name cr -d docker.io/library/alpine /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done' + +sleep 1 +for i in `seq 50`; do + echo "Test $i for podman container checkpoint" + podman exec cr ps axf + podman logs cr + [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + podman container checkpoint cr + [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + podman ps -a + podman container restore cr + [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + podman logs cr +done + +for i in `seq 50`; do + echo "Test $i for podman container checkpoint --export" + podman ps -a + podman exec cr ps axf + podman logs cr + [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + podman container checkpoint -l --export /tmp/chkpt.tar.gz + [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + podman ps -a + podman rm -fa + podman ps -a + podman container restore --import /tmp/chkpt.tar.gz + [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + podman container restore --name cr2 --import /tmp/chkpt.tar.gz + [ `podman ps -f name=cr2 -q | wc -l` -eq "1" ] + podman ps -a + podman logs cr + podman logs cr2 + podman ps -a + podman rm -fa + podman ps -a + podman container restore --import /tmp/chkpt.tar.gz + [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + podman ps -a + rm -f /tmp/chkpt.tar.gz +done From 5ff4fcb753c566f1c40a1973e8c68a270c3b515b Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 26 Aug 2019 18:19:23 +0300 Subject: [PATCH 0170/2030] zdtm: make inotify04 require restore After adding the test for fake inotify events cleanup on restore, we've detected that we also have the same problem on dump/predump, criu touches files that are watched and generates fake events: [root@snorch criu]# test/zdtm.py run -t zdtm/static/inotify04 --norst -k always === Run 1/1 ================ zdtm/static/inotify04 ======================== Run zdtm/static/inotify04 in h ======================== Start test ./inotify04 --pidfile=inotify04.pid --outfile=inotify04.out --dirname=inotify04.test Run criu dump =[log]=> dump/zdtm/static/inotify04/36/1/dump.log ------------------------ grep Error ------------------------ (00.004050) fsnotify: openable (inode match) as home/snorch/devel/criu/test/zdtm/static/inotify04.test/inotify-testfile (00.004052) fsnotify: Dumping /home/snorch/devel/criu/test/zdtm/static/inotify04.test/inotify-testfile as path for handle (00.004055) fsnotify: id 0x000007 flags 0x000800 (00.004071) 36 fdinfo 5: pos: 0 flags: 4000/0 (00.004080) Warn (criu/fsnotify.c:336): fsnotify: The 0x000008 inotify events will be dropped ------------------------ ERROR OVER ------------------------ Send the 15 signal to 36 Wait for zdtm/static/inotify04(36) to die for 0.100000 ############### Test zdtm/static/inotify04 FAIL at result check ################ Test output: ================================ 18:20:10.558: 36: Event 0x20 18:20:10.558: 36: Event 0x10 18:20:10.558: 36: Event 0x20 18:20:10.558: 36: Event 0x10 18:20:10.558: 36: Event 0x20 18:20:10.558: 36: Event 0x10 18:20:10.558: 36: Event 0x20 18:20:10.558: 36: Event 0x10 18:20:10.558: 36: Read 8 events 18:20:10.558: 36: FAIL: inotify04.c:105: Found 8 unexpected inotify events (errno = 11 (Resource temporarily unavailable)) <<< ================================ ##################################### FAIL ##################################### To suppress fails in jenkins make the inotify04 test 'reqrst'. Still need to cleanup (or do not create) these events on dump/predump. --- test/zdtm/static/inotify04.desc | 1 + 1 file changed, 1 insertion(+) create mode 100644 test/zdtm/static/inotify04.desc diff --git a/test/zdtm/static/inotify04.desc b/test/zdtm/static/inotify04.desc new file mode 100644 index 000000000..f19b3d438 --- /dev/null +++ b/test/zdtm/static/inotify04.desc @@ -0,0 +1 @@ +{'flags': 'reqrst'} From 5aa72e723707e2bd7e8ed9841c2ad392781d066d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 7 Sep 2019 15:46:22 +0300 Subject: [PATCH 0171/2030] py: Reformat everything into pep8 style As discussed on the mailing list, current .py files formatting does not conform to the world standard, so we should better reformat it. For this the yapf tool is used. The command I used was yapf -i $(find -name *.py) Signed-off-by: Pavel Emelyanov --- coredump/criu_coredump/coredump.py | 1191 ++++----- coredump/criu_coredump/elf.py | 1015 ++++---- lib/py/cli.py | 552 +++-- lib/py/criu.py | 409 ++-- lib/py/images/images.py | 813 +++--- lib/py/images/pb2dict.py | 576 +++-- scripts/crit-setup.py | 19 +- scripts/magic-gen.py | 88 +- soccr/test/run.py | 20 +- test/check_actions.py | 41 +- test/crit-recode.py | 100 +- test/exhaustive/pipe.py | 384 +-- test/exhaustive/unix.py | 1167 ++++----- test/inhfd/fifo.py | 46 +- test/inhfd/pipe.py | 14 +- test/inhfd/socket.py | 14 +- test/inhfd/tty.py | 35 +- test/others/ext-tty/run.py | 27 +- test/others/mounts/mounts.py | 47 +- test/others/rpc/config_file.py | 247 +- test/others/rpc/errno.py | 182 +- test/others/rpc/ps_test.py | 76 +- test/others/rpc/read.py | 2 +- test/others/rpc/restore-loop.py | 37 +- test/others/rpc/test.py | 75 +- test/others/rpc/version.py | 34 +- test/others/shell-job/run.py | 13 +- test/zdtm.py | 3681 +++++++++++++++------------- 28 files changed, 5738 insertions(+), 5167 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 2b0c37f1a..9b2c6c60c 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -36,795 +36,802 @@ from pycriu import images # Some memory-related constants PAGESIZE = 4096 status = { - "VMA_AREA_NONE" : 0 << 0, - "VMA_AREA_REGULAR" : 1 << 0, - "VMA_AREA_STACK" : 1 << 1, - "VMA_AREA_VSYSCALL" : 1 << 2, - "VMA_AREA_VDSO" : 1 << 3, - "VMA_FORCE_READ" : 1 << 4, - "VMA_AREA_HEAP" : 1 << 5, - "VMA_FILE_PRIVATE" : 1 << 6, - "VMA_FILE_SHARED" : 1 << 7, - "VMA_ANON_SHARED" : 1 << 8, - "VMA_ANON_PRIVATE" : 1 << 9, - "VMA_AREA_SYSVIPC" : 1 << 10, - "VMA_AREA_SOCKET" : 1 << 11, - "VMA_AREA_VVAR" : 1 << 12, - "VMA_AREA_AIORING" : 1 << 13, - "VMA_AREA_UNSUPP" : 1 << 31 + "VMA_AREA_NONE": 0 << 0, + "VMA_AREA_REGULAR": 1 << 0, + "VMA_AREA_STACK": 1 << 1, + "VMA_AREA_VSYSCALL": 1 << 2, + "VMA_AREA_VDSO": 1 << 3, + "VMA_FORCE_READ": 1 << 4, + "VMA_AREA_HEAP": 1 << 5, + "VMA_FILE_PRIVATE": 1 << 6, + "VMA_FILE_SHARED": 1 << 7, + "VMA_ANON_SHARED": 1 << 8, + "VMA_ANON_PRIVATE": 1 << 9, + "VMA_AREA_SYSVIPC": 1 << 10, + "VMA_AREA_SOCKET": 1 << 11, + "VMA_AREA_VVAR": 1 << 12, + "VMA_AREA_AIORING": 1 << 13, + "VMA_AREA_UNSUPP": 1 << 31 } -prot = { - "PROT_READ" : 0x1, - "PROT_WRITE" : 0x2, - "PROT_EXEC" : 0x4 -} +prot = {"PROT_READ": 0x1, "PROT_WRITE": 0x2, "PROT_EXEC": 0x4} + class elf_note: - nhdr = None # Elf_Nhdr; - owner = None # i.e. CORE or LINUX; - data = None # Ctypes structure with note data; + nhdr = None # Elf_Nhdr; + owner = None # i.e. CORE or LINUX; + data = None # Ctypes structure with note data; class coredump: - """ + """ A class to keep elf core dump components inside and functions to properly write them to file. """ - ehdr = None # Elf ehdr; - phdrs = [] # Array of Phdrs; - notes = [] # Array of elf_notes; - vmas = [] # Array of BytesIO with memory content; - # FIXME keeping all vmas in memory is a bad idea; + ehdr = None # Elf ehdr; + phdrs = [] # Array of Phdrs; + notes = [] # Array of elf_notes; + vmas = [] # Array of BytesIO with memory content; - def write(self, f): - """ + # FIXME keeping all vmas in memory is a bad idea; + + def write(self, f): + """ Write core dump to file f. """ - buf = io.BytesIO() - buf.write(self.ehdr) + buf = io.BytesIO() + buf.write(self.ehdr) - for phdr in self.phdrs: - buf.write(phdr) + for phdr in self.phdrs: + buf.write(phdr) - for note in self.notes: - buf.write(note.nhdr) - buf.write(note.owner) - buf.write("\0"*(8-len(note.owner))) - buf.write(note.data) + for note in self.notes: + buf.write(note.nhdr) + buf.write(note.owner) + buf.write("\0" * (8 - len(note.owner))) + buf.write(note.data) - offset = ctypes.sizeof(elf.Elf64_Ehdr()) - offset += (len(self.vmas) + 1)*ctypes.sizeof(elf.Elf64_Phdr()) + offset = ctypes.sizeof(elf.Elf64_Ehdr()) + offset += (len(self.vmas) + 1) * ctypes.sizeof(elf.Elf64_Phdr()) - filesz = 0 - for note in self.notes: - filesz += ctypes.sizeof(note.nhdr) + ctypes.sizeof(note.data) + 8 + filesz = 0 + for note in self.notes: + filesz += ctypes.sizeof(note.nhdr) + ctypes.sizeof(note.data) + 8 - note_align = PAGESIZE - ((offset + filesz) % PAGESIZE) + note_align = PAGESIZE - ((offset + filesz) % PAGESIZE) - if note_align == PAGESIZE: - note_align = 0 + if note_align == PAGESIZE: + note_align = 0 - if note_align != 0: - scratch = (ctypes.c_char * note_align)() - ctypes.memset(ctypes.addressof(scratch), 0, ctypes.sizeof(scratch)) - buf.write(scratch) + if note_align != 0: + scratch = (ctypes.c_char * note_align)() + ctypes.memset(ctypes.addressof(scratch), 0, ctypes.sizeof(scratch)) + buf.write(scratch) - for vma in self.vmas: - buf.write(vma.data) + for vma in self.vmas: + buf.write(vma.data) - buf.seek(0) - f.write(buf.read()) + buf.seek(0) + f.write(buf.read()) class coredump_generator: - """ + """ Generate core dump from criu images. """ - coredumps = {} # coredumps by pid; + coredumps = {} # coredumps by pid; - pstree = {} # process info by pid; - cores = {} # cores by pid; - mms = {} # mm by pid; - reg_files = None # reg-files; - pagemaps = {} # pagemap by pid; + pstree = {} # process info by pid; + cores = {} # cores by pid; + mms = {} # mm by pid; + reg_files = None # reg-files; + pagemaps = {} # pagemap by pid; - def _img_open_and_strip(self, name, single = False, pid = None): - """ + def _img_open_and_strip(self, name, single=False, pid=None): + """ Load criu image and strip it from magic and redundant list. """ - path = self._imgs_dir + "/" + name - if pid: - path += "-"+str(pid) - path += ".img" + path = self._imgs_dir + "/" + name + if pid: + path += "-" + str(pid) + path += ".img" - with open(path) as f: - img = images.load(f) + with open(path) as f: + img = images.load(f) - if single: - return img["entries"][0] - else: - return img["entries"] + if single: + return img["entries"][0] + else: + return img["entries"] - - def __call__(self, imgs_dir): - """ + def __call__(self, imgs_dir): + """ Parse criu images stored in directory imgs_dir to fill core dumps. """ - self._imgs_dir = imgs_dir - pstree = self._img_open_and_strip("pstree") + self._imgs_dir = imgs_dir + pstree = self._img_open_and_strip("pstree") - for p in pstree: - pid = p['pid'] + for p in pstree: + pid = p['pid'] - self.pstree[pid] = p - for tid in p['threads']: - self.cores[tid] = self._img_open_and_strip("core", True, tid) - self.mms[pid] = self._img_open_and_strip("mm", True, pid) - self.pagemaps[pid] = self._img_open_and_strip("pagemap", False, pid) + self.pstree[pid] = p + for tid in p['threads']: + self.cores[tid] = self._img_open_and_strip("core", True, tid) + self.mms[pid] = self._img_open_and_strip("mm", True, pid) + self.pagemaps[pid] = self._img_open_and_strip( + "pagemap", False, pid) - files = self._img_open_and_strip("files", False) - self.reg_files = [ x["reg"] for x in files if x["type"]=="REG" ] + files = self._img_open_and_strip("files", False) + self.reg_files = [x["reg"] for x in files if x["type"] == "REG"] - for pid in self.pstree: - self.coredumps[pid] = self._gen_coredump(pid) + for pid in self.pstree: + self.coredumps[pid] = self._gen_coredump(pid) - return self.coredumps + return self.coredumps - - def write(self, coredumps_dir, pid = None): - """ + def write(self, coredumps_dir, pid=None): + """ Write core dumpt to cores_dir directory. Specify pid to choose core dump of only one process. """ - for p in self.coredumps: - if pid and p != pid: - continue - with open(coredumps_dir+"/"+"core."+str(p), 'w+') as f: - self.coredumps[p].write(f) + for p in self.coredumps: + if pid and p != pid: + continue + with open(coredumps_dir + "/" + "core." + str(p), 'w+') as f: + self.coredumps[p].write(f) - def _gen_coredump(self, pid): - """ + def _gen_coredump(self, pid): + """ Generate core dump for pid. """ - cd = coredump() + cd = coredump() - # Generate everything backwards so it is easier to calculate offset. - cd.vmas = self._gen_vmas(pid) - cd.notes = self._gen_notes(pid) - cd.phdrs = self._gen_phdrs(pid, cd.notes, cd.vmas) - cd.ehdr = self._gen_ehdr(pid, cd.phdrs) + # Generate everything backwards so it is easier to calculate offset. + cd.vmas = self._gen_vmas(pid) + cd.notes = self._gen_notes(pid) + cd.phdrs = self._gen_phdrs(pid, cd.notes, cd.vmas) + cd.ehdr = self._gen_ehdr(pid, cd.phdrs) - return cd + return cd - def _gen_ehdr(self, pid, phdrs): - """ + def _gen_ehdr(self, pid, phdrs): + """ Generate elf header for process pid with program headers phdrs. """ - ehdr = elf.Elf64_Ehdr() + ehdr = elf.Elf64_Ehdr() - ctypes.memset(ctypes.addressof(ehdr), 0, ctypes.sizeof(ehdr)) - ehdr.e_ident[elf.EI_MAG0] = elf.ELFMAG0 - ehdr.e_ident[elf.EI_MAG1] = elf.ELFMAG1 - ehdr.e_ident[elf.EI_MAG2] = elf.ELFMAG2 - ehdr.e_ident[elf.EI_MAG3] = elf.ELFMAG3 - ehdr.e_ident[elf.EI_CLASS] = elf.ELFCLASS64 - ehdr.e_ident[elf.EI_DATA] = elf.ELFDATA2LSB - ehdr.e_ident[elf.EI_VERSION] = elf.EV_CURRENT + ctypes.memset(ctypes.addressof(ehdr), 0, ctypes.sizeof(ehdr)) + ehdr.e_ident[elf.EI_MAG0] = elf.ELFMAG0 + ehdr.e_ident[elf.EI_MAG1] = elf.ELFMAG1 + ehdr.e_ident[elf.EI_MAG2] = elf.ELFMAG2 + ehdr.e_ident[elf.EI_MAG3] = elf.ELFMAG3 + ehdr.e_ident[elf.EI_CLASS] = elf.ELFCLASS64 + ehdr.e_ident[elf.EI_DATA] = elf.ELFDATA2LSB + ehdr.e_ident[elf.EI_VERSION] = elf.EV_CURRENT - ehdr.e_type = elf.ET_CORE - ehdr.e_machine = elf.EM_X86_64 - ehdr.e_version = elf.EV_CURRENT - ehdr.e_phoff = ctypes.sizeof(elf.Elf64_Ehdr()) - ehdr.e_ehsize = ctypes.sizeof(elf.Elf64_Ehdr()) - ehdr.e_phentsize = ctypes.sizeof(elf.Elf64_Phdr()) - #FIXME Case len(phdrs) > PN_XNUM should be handled properly. - # See fs/binfmt_elf.c from linux kernel. - ehdr.e_phnum = len(phdrs) + ehdr.e_type = elf.ET_CORE + ehdr.e_machine = elf.EM_X86_64 + ehdr.e_version = elf.EV_CURRENT + ehdr.e_phoff = ctypes.sizeof(elf.Elf64_Ehdr()) + ehdr.e_ehsize = ctypes.sizeof(elf.Elf64_Ehdr()) + ehdr.e_phentsize = ctypes.sizeof(elf.Elf64_Phdr()) + #FIXME Case len(phdrs) > PN_XNUM should be handled properly. + # See fs/binfmt_elf.c from linux kernel. + ehdr.e_phnum = len(phdrs) - return ehdr + return ehdr - def _gen_phdrs(self, pid, notes, vmas): - """ + def _gen_phdrs(self, pid, notes, vmas): + """ Generate program headers for process pid. """ - phdrs = [] + phdrs = [] - offset = ctypes.sizeof(elf.Elf64_Ehdr()) - offset += (len(vmas) + 1)*ctypes.sizeof(elf.Elf64_Phdr()) + offset = ctypes.sizeof(elf.Elf64_Ehdr()) + offset += (len(vmas) + 1) * ctypes.sizeof(elf.Elf64_Phdr()) - filesz = 0 - for note in notes: - filesz += ctypes.sizeof(note.nhdr) + ctypes.sizeof(note.data) + 8 + filesz = 0 + for note in notes: + filesz += ctypes.sizeof(note.nhdr) + ctypes.sizeof(note.data) + 8 - # PT_NOTE - phdr = elf.Elf64_Phdr() - ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr)) - phdr.p_type = elf.PT_NOTE - phdr.p_offset = offset - phdr.p_filesz = filesz + # PT_NOTE + phdr = elf.Elf64_Phdr() + ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr)) + phdr.p_type = elf.PT_NOTE + phdr.p_offset = offset + phdr.p_filesz = filesz - phdrs.append(phdr) + phdrs.append(phdr) - note_align = PAGESIZE - ((offset + filesz) % PAGESIZE) + note_align = PAGESIZE - ((offset + filesz) % PAGESIZE) - if note_align == PAGESIZE: - note_align = 0 + if note_align == PAGESIZE: + note_align = 0 - offset += note_align + offset += note_align - # VMA phdrs + # VMA phdrs - for vma in vmas: - offset += filesz - filesz = vma.filesz - phdr = elf.Elf64_Phdr() - ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr)) - phdr.p_type = elf.PT_LOAD - phdr.p_align = PAGESIZE - phdr.p_paddr = 0 - phdr.p_offset = offset - phdr.p_vaddr = vma.start - phdr.p_memsz = vma.memsz - phdr.p_filesz = vma.filesz - phdr.p_flags = vma.flags + for vma in vmas: + offset += filesz + filesz = vma.filesz + phdr = elf.Elf64_Phdr() + ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr)) + phdr.p_type = elf.PT_LOAD + phdr.p_align = PAGESIZE + phdr.p_paddr = 0 + phdr.p_offset = offset + phdr.p_vaddr = vma.start + phdr.p_memsz = vma.memsz + phdr.p_filesz = vma.filesz + phdr.p_flags = vma.flags - phdrs.append(phdr) + phdrs.append(phdr) - return phdrs + return phdrs - def _gen_prpsinfo(self, pid): - """ + def _gen_prpsinfo(self, pid): + """ Generate NT_PRPSINFO note for process pid. """ - pstree = self.pstree[pid] - core = self.cores[pid] + pstree = self.pstree[pid] + core = self.cores[pid] - prpsinfo = elf.elf_prpsinfo() - ctypes.memset(ctypes.addressof(prpsinfo), 0, ctypes.sizeof(prpsinfo)) + prpsinfo = elf.elf_prpsinfo() + ctypes.memset(ctypes.addressof(prpsinfo), 0, ctypes.sizeof(prpsinfo)) - # FIXME TASK_ALIVE means that it is either running or sleeping, need to - # teach criu to distinguish them. - TASK_ALIVE = 0x1 - # XXX A bit of confusion here, as in ps "dead" and "zombie" - # state are two separate states, and we use TASK_DEAD for zombies. - TASK_DEAD = 0x2 - TASK_STOPPED = 0x3 - if core["tc"]["task_state"] == TASK_ALIVE: - prpsinfo.pr_state = 0 - if core["tc"]["task_state"] == TASK_DEAD: - prpsinfo.pr_state = 4 - if core["tc"]["task_state"] == TASK_STOPPED: - prpsinfo.pr_state = 3 - # Don't even ask me why it is so, just borrowed from linux - # source and made pr_state match. - prpsinfo.pr_sname = '.' if prpsinfo.pr_state > 5 else "RSDTZW"[prpsinfo.pr_state] - prpsinfo.pr_zomb = 1 if prpsinfo.pr_state == 4 else 0 - prpsinfo.pr_nice = core["thread_core"]["sched_prio"] if "sched_prio" in core["thread_core"] else 0 - prpsinfo.pr_flag = core["tc"]["flags"] - prpsinfo.pr_uid = core["thread_core"]["creds"]["uid"] - prpsinfo.pr_gid = core["thread_core"]["creds"]["gid"] - prpsinfo.pr_pid = pid - prpsinfo.pr_ppid = pstree["ppid"] - prpsinfo.pr_pgrp = pstree["pgid"] - prpsinfo.pr_sid = pstree["sid"] - prpsinfo.pr_fname = core["tc"]["comm"] - prpsinfo.pr_psargs = self._gen_cmdline(pid) + # FIXME TASK_ALIVE means that it is either running or sleeping, need to + # teach criu to distinguish them. + TASK_ALIVE = 0x1 + # XXX A bit of confusion here, as in ps "dead" and "zombie" + # state are two separate states, and we use TASK_DEAD for zombies. + TASK_DEAD = 0x2 + TASK_STOPPED = 0x3 + if core["tc"]["task_state"] == TASK_ALIVE: + prpsinfo.pr_state = 0 + if core["tc"]["task_state"] == TASK_DEAD: + prpsinfo.pr_state = 4 + if core["tc"]["task_state"] == TASK_STOPPED: + prpsinfo.pr_state = 3 + # Don't even ask me why it is so, just borrowed from linux + # source and made pr_state match. + prpsinfo.pr_sname = '.' if prpsinfo.pr_state > 5 else "RSDTZW" [ + prpsinfo.pr_state] + prpsinfo.pr_zomb = 1 if prpsinfo.pr_state == 4 else 0 + prpsinfo.pr_nice = core["thread_core"][ + "sched_prio"] if "sched_prio" in core["thread_core"] else 0 + prpsinfo.pr_flag = core["tc"]["flags"] + prpsinfo.pr_uid = core["thread_core"]["creds"]["uid"] + prpsinfo.pr_gid = core["thread_core"]["creds"]["gid"] + prpsinfo.pr_pid = pid + prpsinfo.pr_ppid = pstree["ppid"] + prpsinfo.pr_pgrp = pstree["pgid"] + prpsinfo.pr_sid = pstree["sid"] + prpsinfo.pr_fname = core["tc"]["comm"] + prpsinfo.pr_psargs = self._gen_cmdline(pid) - nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 5 - nhdr.n_descsz = ctypes.sizeof(elf.elf_prpsinfo()) - nhdr.n_type = elf.NT_PRPSINFO + nhdr = elf.Elf64_Nhdr() + nhdr.n_namesz = 5 + nhdr.n_descsz = ctypes.sizeof(elf.elf_prpsinfo()) + nhdr.n_type = elf.NT_PRPSINFO - note = elf_note() - note.data = prpsinfo - note.owner = "CORE" - note.nhdr = nhdr + note = elf_note() + note.data = prpsinfo + note.owner = "CORE" + note.nhdr = nhdr - return note + return note - def _gen_prstatus(self, pid, tid): - """ + def _gen_prstatus(self, pid, tid): + """ Generate NT_PRSTATUS note for thread tid of process pid. """ - core = self.cores[tid] - regs = core["thread_info"]["gpregs"] - pstree = self.pstree[pid] + core = self.cores[tid] + regs = core["thread_info"]["gpregs"] + pstree = self.pstree[pid] - prstatus = elf.elf_prstatus() + prstatus = elf.elf_prstatus() - ctypes.memset(ctypes.addressof(prstatus), 0, ctypes.sizeof(prstatus)) + ctypes.memset(ctypes.addressof(prstatus), 0, ctypes.sizeof(prstatus)) - #FIXME setting only some of the fields for now. Revisit later. - prstatus.pr_pid = tid - prstatus.pr_ppid = pstree["ppid"] - prstatus.pr_pgrp = pstree["pgid"] - prstatus.pr_sid = pstree["sid"] + #FIXME setting only some of the fields for now. Revisit later. + prstatus.pr_pid = tid + prstatus.pr_ppid = pstree["ppid"] + prstatus.pr_pgrp = pstree["pgid"] + prstatus.pr_sid = pstree["sid"] - prstatus.pr_reg.r15 = regs["r15"] - prstatus.pr_reg.r14 = regs["r14"] - prstatus.pr_reg.r13 = regs["r13"] - prstatus.pr_reg.r12 = regs["r12"] - prstatus.pr_reg.rbp = regs["bp"] - prstatus.pr_reg.rbx = regs["bx"] - prstatus.pr_reg.r11 = regs["r11"] - prstatus.pr_reg.r10 = regs["r10"] - prstatus.pr_reg.r9 = regs["r9"] - prstatus.pr_reg.r8 = regs["r8"] - prstatus.pr_reg.rax = regs["ax"] - prstatus.pr_reg.rcx = regs["cx"] - prstatus.pr_reg.rdx = regs["dx"] - prstatus.pr_reg.rsi = regs["si"] - prstatus.pr_reg.rdi = regs["di"] - prstatus.pr_reg.orig_rax = regs["orig_ax"] - prstatus.pr_reg.rip = regs["ip"] - prstatus.pr_reg.cs = regs["cs"] - prstatus.pr_reg.eflags = regs["flags"] - prstatus.pr_reg.rsp = regs["sp"] - prstatus.pr_reg.ss = regs["ss"] - prstatus.pr_reg.fs_base = regs["fs_base"] - prstatus.pr_reg.gs_base = regs["gs_base"] - prstatus.pr_reg.ds = regs["ds"] - prstatus.pr_reg.es = regs["es"] - prstatus.pr_reg.fs = regs["fs"] - prstatus.pr_reg.gs = regs["gs"] + prstatus.pr_reg.r15 = regs["r15"] + prstatus.pr_reg.r14 = regs["r14"] + prstatus.pr_reg.r13 = regs["r13"] + prstatus.pr_reg.r12 = regs["r12"] + prstatus.pr_reg.rbp = regs["bp"] + prstatus.pr_reg.rbx = regs["bx"] + prstatus.pr_reg.r11 = regs["r11"] + prstatus.pr_reg.r10 = regs["r10"] + prstatus.pr_reg.r9 = regs["r9"] + prstatus.pr_reg.r8 = regs["r8"] + prstatus.pr_reg.rax = regs["ax"] + prstatus.pr_reg.rcx = regs["cx"] + prstatus.pr_reg.rdx = regs["dx"] + prstatus.pr_reg.rsi = regs["si"] + prstatus.pr_reg.rdi = regs["di"] + prstatus.pr_reg.orig_rax = regs["orig_ax"] + prstatus.pr_reg.rip = regs["ip"] + prstatus.pr_reg.cs = regs["cs"] + prstatus.pr_reg.eflags = regs["flags"] + prstatus.pr_reg.rsp = regs["sp"] + prstatus.pr_reg.ss = regs["ss"] + prstatus.pr_reg.fs_base = regs["fs_base"] + prstatus.pr_reg.gs_base = regs["gs_base"] + prstatus.pr_reg.ds = regs["ds"] + prstatus.pr_reg.es = regs["es"] + prstatus.pr_reg.fs = regs["fs"] + prstatus.pr_reg.gs = regs["gs"] - nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 5 - nhdr.n_descsz = ctypes.sizeof(elf.elf_prstatus()) - nhdr.n_type = elf.NT_PRSTATUS + nhdr = elf.Elf64_Nhdr() + nhdr.n_namesz = 5 + nhdr.n_descsz = ctypes.sizeof(elf.elf_prstatus()) + nhdr.n_type = elf.NT_PRSTATUS - note = elf_note() - note.data = prstatus - note.owner = "CORE" - note.nhdr = nhdr + note = elf_note() + note.data = prstatus + note.owner = "CORE" + note.nhdr = nhdr - return note + return note - def _gen_fpregset(self, pid, tid): - """ + def _gen_fpregset(self, pid, tid): + """ Generate NT_FPREGSET note for thread tid of process pid. """ - core = self.cores[tid] - regs = core["thread_info"]["fpregs"] + core = self.cores[tid] + regs = core["thread_info"]["fpregs"] - fpregset = elf.elf_fpregset_t() - ctypes.memset(ctypes.addressof(fpregset), 0, ctypes.sizeof(fpregset)) + fpregset = elf.elf_fpregset_t() + ctypes.memset(ctypes.addressof(fpregset), 0, ctypes.sizeof(fpregset)) - fpregset.cwd = regs["cwd"] - fpregset.swd = regs["swd"] - fpregset.ftw = regs["twd"] - fpregset.fop = regs["fop"] - fpregset.rip = regs["rip"] - fpregset.rdp = regs["rdp"] - fpregset.mxcsr = regs["mxcsr"] - fpregset.mxcr_mask = regs["mxcsr_mask"] - fpregset.st_space = (ctypes.c_uint * len(regs["st_space"]))(*regs["st_space"]) - fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))(*regs["xmm_space"]) - #fpregset.padding = regs["padding"] unused + fpregset.cwd = regs["cwd"] + fpregset.swd = regs["swd"] + fpregset.ftw = regs["twd"] + fpregset.fop = regs["fop"] + fpregset.rip = regs["rip"] + fpregset.rdp = regs["rdp"] + fpregset.mxcsr = regs["mxcsr"] + fpregset.mxcr_mask = regs["mxcsr_mask"] + fpregset.st_space = (ctypes.c_uint * len(regs["st_space"]))( + *regs["st_space"]) + fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))( + *regs["xmm_space"]) + #fpregset.padding = regs["padding"] unused - nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 5 - nhdr.n_descsz = ctypes.sizeof(elf.elf_fpregset_t()) - nhdr.n_type = elf.NT_FPREGSET + nhdr = elf.Elf64_Nhdr() + nhdr.n_namesz = 5 + nhdr.n_descsz = ctypes.sizeof(elf.elf_fpregset_t()) + nhdr.n_type = elf.NT_FPREGSET - note = elf_note() - note.data = fpregset - note.owner = "CORE" - note.nhdr = nhdr + note = elf_note() + note.data = fpregset + note.owner = "CORE" + note.nhdr = nhdr - return note + return note - def _gen_x86_xstate(self, pid, tid): - """ + def _gen_x86_xstate(self, pid, tid): + """ Generate NT_X86_XSTATE note for thread tid of process pid. """ - core = self.cores[tid] - fpregs = core["thread_info"]["fpregs"] + core = self.cores[tid] + fpregs = core["thread_info"]["fpregs"] - data = elf.elf_xsave_struct() - ctypes.memset(ctypes.addressof(data), 0, ctypes.sizeof(data)) + data = elf.elf_xsave_struct() + ctypes.memset(ctypes.addressof(data), 0, ctypes.sizeof(data)) - data.i387.cwd = fpregs["cwd"] - data.i387.swd = fpregs["swd"] - data.i387.twd = fpregs["twd"] - data.i387.fop = fpregs["fop"] - data.i387.rip = fpregs["rip"] - data.i387.rdp = fpregs["rdp"] - data.i387.mxcsr = fpregs["mxcsr"] - data.i387.mxcsr_mask = fpregs["mxcsr_mask"] - data.i387.st_space = (ctypes.c_uint * len(fpregs["st_space"]))(*fpregs["st_space"]) - data.i387.xmm_space = (ctypes.c_uint * len(fpregs["xmm_space"]))(*fpregs["xmm_space"]) + data.i387.cwd = fpregs["cwd"] + data.i387.swd = fpregs["swd"] + data.i387.twd = fpregs["twd"] + data.i387.fop = fpregs["fop"] + data.i387.rip = fpregs["rip"] + data.i387.rdp = fpregs["rdp"] + data.i387.mxcsr = fpregs["mxcsr"] + data.i387.mxcsr_mask = fpregs["mxcsr_mask"] + data.i387.st_space = (ctypes.c_uint * len(fpregs["st_space"]))( + *fpregs["st_space"]) + data.i387.xmm_space = (ctypes.c_uint * len(fpregs["xmm_space"]))( + *fpregs["xmm_space"]) - if "xsave" in fpregs: - data.xsave_hdr.xstate_bv = fpregs["xsave"]["xstate_bv"] - data.ymmh.ymmh_space = (ctypes.c_uint * len(fpregs["xsave"]["ymmh_space"]))(*fpregs["xsave"]["ymmh_space"]) + if "xsave" in fpregs: + data.xsave_hdr.xstate_bv = fpregs["xsave"]["xstate_bv"] + data.ymmh.ymmh_space = (ctypes.c_uint * + len(fpregs["xsave"]["ymmh_space"]))( + *fpregs["xsave"]["ymmh_space"]) - nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 6 - nhdr.n_descsz = ctypes.sizeof(data) - nhdr.n_type = elf.NT_X86_XSTATE + nhdr = elf.Elf64_Nhdr() + nhdr.n_namesz = 6 + nhdr.n_descsz = ctypes.sizeof(data) + nhdr.n_type = elf.NT_X86_XSTATE - note = elf_note() - note.data = data - note.owner = "LINUX" - note.nhdr = nhdr + note = elf_note() + note.data = data + note.owner = "LINUX" + note.nhdr = nhdr - return note + return note - def _gen_siginfo(self, pid, tid): - """ + def _gen_siginfo(self, pid, tid): + """ Generate NT_SIGINFO note for thread tid of process pid. """ - siginfo = elf.siginfo_t() - # FIXME zeroify everything for now - ctypes.memset(ctypes.addressof(siginfo), 0, ctypes.sizeof(siginfo)) + siginfo = elf.siginfo_t() + # FIXME zeroify everything for now + ctypes.memset(ctypes.addressof(siginfo), 0, ctypes.sizeof(siginfo)) - nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 5 - nhdr.n_descsz = ctypes.sizeof(elf.siginfo_t()) - nhdr.n_type = elf.NT_SIGINFO + nhdr = elf.Elf64_Nhdr() + nhdr.n_namesz = 5 + nhdr.n_descsz = ctypes.sizeof(elf.siginfo_t()) + nhdr.n_type = elf.NT_SIGINFO - note = elf_note() - note.data = siginfo - note.owner = "CORE" - note.nhdr = nhdr + note = elf_note() + note.data = siginfo + note.owner = "CORE" + note.nhdr = nhdr - return note + return note - def _gen_auxv(self, pid): - """ + def _gen_auxv(self, pid): + """ Generate NT_AUXV note for thread tid of process pid. """ - mm = self.mms[pid] - num_auxv = len(mm["mm_saved_auxv"])/2 + mm = self.mms[pid] + num_auxv = len(mm["mm_saved_auxv"]) / 2 - class elf_auxv(ctypes.Structure): - _fields_ = [("auxv", elf.Elf64_auxv_t*num_auxv)] + class elf_auxv(ctypes.Structure): + _fields_ = [("auxv", elf.Elf64_auxv_t * num_auxv)] - auxv = elf_auxv() - for i in range(num_auxv): - auxv.auxv[i].a_type = mm["mm_saved_auxv"][i] - auxv.auxv[i].a_val = mm["mm_saved_auxv"][i+1] + auxv = elf_auxv() + for i in range(num_auxv): + auxv.auxv[i].a_type = mm["mm_saved_auxv"][i] + auxv.auxv[i].a_val = mm["mm_saved_auxv"][i + 1] - nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 5 - nhdr.n_descsz = ctypes.sizeof(elf_auxv()) - nhdr.n_type = elf.NT_AUXV + nhdr = elf.Elf64_Nhdr() + nhdr.n_namesz = 5 + nhdr.n_descsz = ctypes.sizeof(elf_auxv()) + nhdr.n_type = elf.NT_AUXV - note = elf_note() - note.data = auxv - note.owner = "CORE" - note.nhdr = nhdr + note = elf_note() + note.data = auxv + note.owner = "CORE" + note.nhdr = nhdr - return note + return note - def _gen_files(self, pid): - """ + def _gen_files(self, pid): + """ Generate NT_FILE note for process pid. """ - mm = self.mms[pid] + mm = self.mms[pid] - class mmaped_file_info: - start = None - end = None - file_ofs = None - name = None + class mmaped_file_info: + start = None + end = None + file_ofs = None + name = None - infos = [] - for vma in mm["vmas"]: - if vma["shmid"] == 0: - # shmid == 0 means that it is not a file - continue + infos = [] + for vma in mm["vmas"]: + if vma["shmid"] == 0: + # shmid == 0 means that it is not a file + continue - shmid = vma["shmid"] - size = vma["end"] - vma["start"] - off = vma["pgoff"]/PAGESIZE + shmid = vma["shmid"] + size = vma["end"] - vma["start"] + off = vma["pgoff"] / PAGESIZE - files = self.reg_files - fname = filter(lambda x: x["id"] == shmid, files)[0]["name"] + files = self.reg_files + fname = filter(lambda x: x["id"] == shmid, files)[0]["name"] - info = mmaped_file_info() - info.start = vma["start"] - info.end = vma["end"] - info.file_ofs = off - info.name = fname + info = mmaped_file_info() + info.start = vma["start"] + info.end = vma["end"] + info.file_ofs = off + info.name = fname - infos.append(info) + infos.append(info) - # /* - # * Format of NT_FILE note: - # * - # * long count -- how many files are mapped - # * long page_size -- units for file_ofs - # * array of [COUNT] elements of - # * long start - # * long end - # * long file_ofs - # * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... - # */ - fields = [] - fields.append(("count", ctypes.c_long)) - fields.append(("page_size", ctypes.c_long)) - for i in range(len(infos)): - fields.append(("start"+str(i), ctypes.c_long)) - fields.append(("end"+str(i), ctypes.c_long)) - fields.append(("file_ofs"+str(i), ctypes.c_long)) - for i in range(len(infos)): - fields.append(("name"+str(i), ctypes.c_char*(len(infos[i].name)+1))) + # /* + # * Format of NT_FILE note: + # * + # * long count -- how many files are mapped + # * long page_size -- units for file_ofs + # * array of [COUNT] elements of + # * long start + # * long end + # * long file_ofs + # * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + # */ + fields = [] + fields.append(("count", ctypes.c_long)) + fields.append(("page_size", ctypes.c_long)) + for i in range(len(infos)): + fields.append(("start" + str(i), ctypes.c_long)) + fields.append(("end" + str(i), ctypes.c_long)) + fields.append(("file_ofs" + str(i), ctypes.c_long)) + for i in range(len(infos)): + fields.append( + ("name" + str(i), ctypes.c_char * (len(infos[i].name) + 1))) - class elf_files(ctypes.Structure): - _fields_ = fields + class elf_files(ctypes.Structure): + _fields_ = fields - data = elf_files() - data.count = len(infos) - data.page_size = PAGESIZE - for i in range(len(infos)): - info = infos[i] - setattr(data, "start"+str(i), info.start) - setattr(data, "end"+str(i), info.end) - setattr(data, "file_ofs"+str(i), info.file_ofs) - setattr(data, "name"+str(i), info.name) + data = elf_files() + data.count = len(infos) + data.page_size = PAGESIZE + for i in range(len(infos)): + info = infos[i] + setattr(data, "start" + str(i), info.start) + setattr(data, "end" + str(i), info.end) + setattr(data, "file_ofs" + str(i), info.file_ofs) + setattr(data, "name" + str(i), info.name) - nhdr = elf.Elf64_Nhdr() + nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 5#XXX strlen + 1 - nhdr.n_descsz = ctypes.sizeof(elf_files()) - nhdr.n_type = elf.NT_FILE + nhdr.n_namesz = 5 #XXX strlen + 1 + nhdr.n_descsz = ctypes.sizeof(elf_files()) + nhdr.n_type = elf.NT_FILE - note = elf_note() - note.nhdr = nhdr - note.owner = "CORE" - note.data = data + note = elf_note() + note.nhdr = nhdr + note.owner = "CORE" + note.data = data - return note + return note - def _gen_thread_notes(self, pid, tid): - notes = [] + def _gen_thread_notes(self, pid, tid): + notes = [] - notes.append(self._gen_prstatus(pid, tid)) - notes.append(self._gen_fpregset(pid, tid)) - notes.append(self._gen_x86_xstate(pid, tid)) - notes.append(self._gen_siginfo(pid, tid)) + notes.append(self._gen_prstatus(pid, tid)) + notes.append(self._gen_fpregset(pid, tid)) + notes.append(self._gen_x86_xstate(pid, tid)) + notes.append(self._gen_siginfo(pid, tid)) - return notes + return notes - def _gen_notes(self, pid): - """ + def _gen_notes(self, pid): + """ Generate notes for core dump of process pid. """ - notes = [] + notes = [] - notes.append(self._gen_prpsinfo(pid)) + notes.append(self._gen_prpsinfo(pid)) - threads = self.pstree[pid]["threads"] + threads = self.pstree[pid]["threads"] - # Main thread first - notes += self._gen_thread_notes(pid, pid) + # Main thread first + notes += self._gen_thread_notes(pid, pid) - # Then other threads - for tid in threads: - if tid == pid: - continue + # Then other threads + for tid in threads: + if tid == pid: + continue - notes += self._gen_thread_notes(pid, tid) + notes += self._gen_thread_notes(pid, tid) - notes.append(self._gen_auxv(pid)) - notes.append(self._gen_files(pid)) + notes.append(self._gen_auxv(pid)) + notes.append(self._gen_files(pid)) - return notes + return notes - def _get_page(self, pid, page_no): - """ + def _get_page(self, pid, page_no): + """ Try to find memory page page_no in pages.img image for process pid. """ - pagemap = self.pagemaps[pid] + pagemap = self.pagemaps[pid] - # First entry is pagemap_head, we will need it later to open - # proper pages.img. - pages_id = pagemap[0]["pages_id"] - off = 0# in pages - for m in pagemap[1:]: - found = False - for i in range(m["nr_pages"]): - if m["vaddr"] + i*PAGESIZE == page_no*PAGESIZE: - found = True - break - off += 1 + # First entry is pagemap_head, we will need it later to open + # proper pages.img. + pages_id = pagemap[0]["pages_id"] + off = 0 # in pages + for m in pagemap[1:]: + found = False + for i in range(m["nr_pages"]): + if m["vaddr"] + i * PAGESIZE == page_no * PAGESIZE: + found = True + break + off += 1 - if not found: - continue + if not found: + continue - if "in_parent" in m and m["in_parent"] == True: - ppid = self.pstree[pid]["ppid"] - return self._get_page(ppid, page_no) - else: - with open(self._imgs_dir+"/"+"pages-"+str(pages_id)+".img") as f: - f.seek(off*PAGESIZE) - return f.read(PAGESIZE) + if "in_parent" in m and m["in_parent"] == True: + ppid = self.pstree[pid]["ppid"] + return self._get_page(ppid, page_no) + else: + with open(self._imgs_dir + "/" + "pages-" + str(pages_id) + + ".img") as f: + f.seek(off * PAGESIZE) + return f.read(PAGESIZE) - return None + return None - def _gen_mem_chunk(self, pid, vma, size): - """ + def _gen_mem_chunk(self, pid, vma, size): + """ Obtain vma contents for process pid. """ - f = None + f = None - if size == 0: - return "" + if size == 0: + return "" - if vma["status"] & status["VMA_AREA_VVAR"]: - #FIXME this is what gdb does, as vvar vma - # is not readable from userspace? - return "\0"*size - elif vma["status"] & status["VMA_AREA_VSYSCALL"]: - #FIXME need to dump it with criu or read from - # current process. - return "\0"*size + if vma["status"] & status["VMA_AREA_VVAR"]: + #FIXME this is what gdb does, as vvar vma + # is not readable from userspace? + return "\0" * size + elif vma["status"] & status["VMA_AREA_VSYSCALL"]: + #FIXME need to dump it with criu or read from + # current process. + return "\0" * size - if vma["status"] & status["VMA_FILE_SHARED"] or \ - vma["status"] & status["VMA_FILE_PRIVATE"]: - # Open file before iterating vma pages - shmid = vma["shmid"] - off = vma["pgoff"] + if vma["status"] & status["VMA_FILE_SHARED"] or \ + vma["status"] & status["VMA_FILE_PRIVATE"]: + # Open file before iterating vma pages + shmid = vma["shmid"] + off = vma["pgoff"] - files = self.reg_files - fname = filter(lambda x: x["id"] == shmid, files)[0]["name"] + files = self.reg_files + fname = filter(lambda x: x["id"] == shmid, files)[0]["name"] - f = open(fname) - f.seek(off) + f = open(fname) + f.seek(off) - start = vma["start"] - end = vma["start"] + size + start = vma["start"] + end = vma["start"] + size - # Split requested memory chunk into pages, so it could be - # pictured as: - # - # "----" -- part of page with memory outside of our vma; - # "XXXX" -- memory from our vma; - # - # Start page Pages in the middle End page - # [-----XXXXX]...[XXXXXXXXXX][XXXXXXXXXX]...[XXX-------] - # - # Each page could be found in pages.img or in a standalone - # file described by shmid field in vma entry and - # corresponding entry in reg-files.img. - # For VMA_FILE_PRIVATE vma, unchanged pages are taken from - # a file, and changed ones -- from pages.img. - # Finally, if no page is found neither in pages.img nor - # in file, hole in inserted -- a page filled with zeroes. - start_page = start/PAGESIZE - end_page = end/PAGESIZE + # Split requested memory chunk into pages, so it could be + # pictured as: + # + # "----" -- part of page with memory outside of our vma; + # "XXXX" -- memory from our vma; + # + # Start page Pages in the middle End page + # [-----XXXXX]...[XXXXXXXXXX][XXXXXXXXXX]...[XXX-------] + # + # Each page could be found in pages.img or in a standalone + # file described by shmid field in vma entry and + # corresponding entry in reg-files.img. + # For VMA_FILE_PRIVATE vma, unchanged pages are taken from + # a file, and changed ones -- from pages.img. + # Finally, if no page is found neither in pages.img nor + # in file, hole in inserted -- a page filled with zeroes. + start_page = start / PAGESIZE + end_page = end / PAGESIZE - buf = "" - for page_no in range(start_page, end_page+1): - page = None + buf = "" + for page_no in range(start_page, end_page + 1): + page = None - # Search for needed page in pages.img and reg-files.img - # and choose appropriate. - page_mem = self._get_page(pid, page_no) + # Search for needed page in pages.img and reg-files.img + # and choose appropriate. + page_mem = self._get_page(pid, page_no) - if f != None: - page = f.read(PAGESIZE) + if f != None: + page = f.read(PAGESIZE) - if page_mem != None: - # Page from pages.img has higher priority - # than one from maped file on disk. - page = page_mem + if page_mem != None: + # Page from pages.img has higher priority + # than one from maped file on disk. + page = page_mem - if page == None: - # Hole - page = PAGESIZE*"\0" + if page == None: + # Hole + page = PAGESIZE * "\0" - # If it is a start or end page, we need to read - # only part of it. - if page_no == start_page: - n_skip = start - page_no*PAGESIZE - if start_page == end_page: - n_read = size - else: - n_read = PAGESIZE - n_skip - elif page_no == end_page: - n_skip = 0 - n_read = end - page_no*PAGESIZE - else: - n_skip = 0 - n_read = PAGESIZE + # If it is a start or end page, we need to read + # only part of it. + if page_no == start_page: + n_skip = start - page_no * PAGESIZE + if start_page == end_page: + n_read = size + else: + n_read = PAGESIZE - n_skip + elif page_no == end_page: + n_skip = 0 + n_read = end - page_no * PAGESIZE + else: + n_skip = 0 + n_read = PAGESIZE - buf += page[n_skip : n_skip + n_read] + buf += page[n_skip:n_skip + n_read] - # Don't forget to close file. - if f != None: - f.close() + # Don't forget to close file. + if f != None: + f.close() - return buf + return buf - def _gen_cmdline(self, pid): - """ + def _gen_cmdline(self, pid): + """ Generate full command with arguments. """ - mm = self.mms[pid] + mm = self.mms[pid] - vma = {} - vma["start"] = mm["mm_arg_start"] - vma["end"] = mm["mm_arg_end"] - # Dummy flags and status. - vma["flags"] = 0 - vma["status"] = 0 - size = vma["end"] - vma["start"] + vma = {} + vma["start"] = mm["mm_arg_start"] + vma["end"] = mm["mm_arg_end"] + # Dummy flags and status. + vma["flags"] = 0 + vma["status"] = 0 + size = vma["end"] - vma["start"] - chunk = self._gen_mem_chunk(pid, vma, size) + chunk = self._gen_mem_chunk(pid, vma, size) - # Replace all '\0's with spaces. - return chunk.replace('\0', ' ') + # Replace all '\0's with spaces. + return chunk.replace('\0', ' ') - def _get_vma_dump_size(self, vma): - """ + def _get_vma_dump_size(self, vma): + """ Calculate amount of vma to put into core dump. """ - if vma["status"] & status["VMA_AREA_VVAR"] or \ - vma["status"] & status["VMA_AREA_VSYSCALL"] or \ - vma["status"] & status["VMA_AREA_VDSO"]: - size = vma["end"] - vma["start"] - elif vma["prot"] == 0: - size = 0 - elif vma["prot"] & prot["PROT_READ"] and \ - vma["prot"] & prot["PROT_EXEC"]: - size = PAGESIZE - elif vma["status"] & status["VMA_ANON_SHARED"] or \ - vma["status"] & status["VMA_FILE_SHARED"] or \ - vma["status"] & status["VMA_ANON_PRIVATE"] or \ - vma["status"] & status["VMA_FILE_PRIVATE"]: - size = vma["end"] - vma["start"] - else: - size = 0 + if vma["status"] & status["VMA_AREA_VVAR"] or \ + vma["status"] & status["VMA_AREA_VSYSCALL"] or \ + vma["status"] & status["VMA_AREA_VDSO"]: + size = vma["end"] - vma["start"] + elif vma["prot"] == 0: + size = 0 + elif vma["prot"] & prot["PROT_READ"] and \ + vma["prot"] & prot["PROT_EXEC"]: + size = PAGESIZE + elif vma["status"] & status["VMA_ANON_SHARED"] or \ + vma["status"] & status["VMA_FILE_SHARED"] or \ + vma["status"] & status["VMA_ANON_PRIVATE"] or \ + vma["status"] & status["VMA_FILE_PRIVATE"]: + size = vma["end"] - vma["start"] + else: + size = 0 - return size + return size - def _get_vma_flags(self, vma): - """ + def _get_vma_flags(self, vma): + """ Convert vma flags int elf flags. """ - flags = 0 + flags = 0 - if vma['prot'] & prot["PROT_READ"]: - flags = flags | elf.PF_R + if vma['prot'] & prot["PROT_READ"]: + flags = flags | elf.PF_R - if vma['prot'] & prot["PROT_WRITE"]: - flags = flags | elf.PF_W + if vma['prot'] & prot["PROT_WRITE"]: + flags = flags | elf.PF_W - if vma['prot'] & prot["PROT_EXEC"]: - flags = flags | elf.PF_X + if vma['prot'] & prot["PROT_EXEC"]: + flags = flags | elf.PF_X - return flags + return flags - def _gen_vmas(self, pid): - """ + def _gen_vmas(self, pid): + """ Generate vma contents for core dump for process pid. """ - mm = self.mms[pid] + mm = self.mms[pid] - class vma_class: - data = None - filesz = None - memsz = None - flags = None - start = None + class vma_class: + data = None + filesz = None + memsz = None + flags = None + start = None - vmas = [] - for vma in mm["vmas"]: - size = self._get_vma_dump_size(vma) + vmas = [] + for vma in mm["vmas"]: + size = self._get_vma_dump_size(vma) - chunk = self._gen_mem_chunk(pid, vma, size) + chunk = self._gen_mem_chunk(pid, vma, size) - v = vma_class() - v.filesz = self._get_vma_dump_size(vma) - v.data = self._gen_mem_chunk(pid, vma, v.filesz) - v.memsz = vma["end"] - vma["start"] - v.start = vma["start"] - v.flags = self._get_vma_flags(vma) + v = vma_class() + v.filesz = self._get_vma_dump_size(vma) + v.data = self._gen_mem_chunk(pid, vma, v.filesz) + v.memsz = vma["end"] - vma["start"] + v.start = vma["start"] + v.flags = self._get_vma_flags(vma) - vmas.append(v) + vmas.append(v) - return vmas + return vmas diff --git a/coredump/criu_coredump/elf.py b/coredump/criu_coredump/elf.py index 1da06a6fd..65da583c3 100644 --- a/coredump/criu_coredump/elf.py +++ b/coredump/criu_coredump/elf.py @@ -1,526 +1,685 @@ # Define structures and constants for generating elf file. import ctypes -Elf64_Half = ctypes.c_uint16 # typedef uint16_t Elf64_Half; -Elf64_Word = ctypes.c_uint32 # typedef uint32_t Elf64_Word; -Elf64_Addr = ctypes.c_uint64 # typedef uint64_t Elf64_Addr; -Elf64_Off = ctypes.c_uint64 # typedef uint64_t Elf64_Off; -Elf64_Xword = ctypes.c_uint64 # typedef uint64_t Elf64_Xword; +Elf64_Half = ctypes.c_uint16 # typedef uint16_t Elf64_Half; +Elf64_Word = ctypes.c_uint32 # typedef uint32_t Elf64_Word; +Elf64_Addr = ctypes.c_uint64 # typedef uint64_t Elf64_Addr; +Elf64_Off = ctypes.c_uint64 # typedef uint64_t Elf64_Off; +Elf64_Xword = ctypes.c_uint64 # typedef uint64_t Elf64_Xword; # Elf64_Ehdr related constants. # e_ident size. -EI_NIDENT = 16 # #define EI_NIDENT (16) +EI_NIDENT = 16 # #define EI_NIDENT (16) -EI_MAG0 = 0 # #define EI_MAG0 0 /* File identification byte 0 index */ -ELFMAG0 = 0x7f # #define ELFMAG0 0x7f /* Magic number byte 0 */ +EI_MAG0 = 0 # #define EI_MAG0 0 /* File identification byte 0 index */ +ELFMAG0 = 0x7f # #define ELFMAG0 0x7f /* Magic number byte 0 */ -EI_MAG1 = 1 # #define EI_MAG1 1 /* File identification byte 1 index */ -ELFMAG1 = ord('E') # #define ELFMAG1 'E' /* Magic number byte 1 */ +EI_MAG1 = 1 # #define EI_MAG1 1 /* File identification byte 1 index */ +ELFMAG1 = ord( + 'E') # #define ELFMAG1 'E' /* Magic number byte 1 */ -EI_MAG2 = 2 # #define EI_MAG2 2 /* File identification byte 2 index */ -ELFMAG2 = ord('L') # #define ELFMAG2 'L' /* Magic number byte 2 */ +EI_MAG2 = 2 # #define EI_MAG2 2 /* File identification byte 2 index */ +ELFMAG2 = ord( + 'L') # #define ELFMAG2 'L' /* Magic number byte 2 */ -EI_MAG3 = 3 # #define EI_MAG3 3 /* File identification byte 3 index */ -ELFMAG3 = ord('F') # #define ELFMAG3 'F' /* Magic number byte 3 */ +EI_MAG3 = 3 # #define EI_MAG3 3 /* File identification byte 3 index */ +ELFMAG3 = ord( + 'F') # #define ELFMAG3 'F' /* Magic number byte 3 */ -EI_CLASS = 4 # #define EI_CLASS 4 /* File class byte index */ +EI_CLASS = 4 # #define EI_CLASS 4 /* File class byte index */ -EI_DATA = 5 # #define EI_DATA 5 /* Data encoding byte index */ +EI_DATA = 5 # #define EI_DATA 5 /* Data encoding byte index */ -EI_VERSION = 6 # #define EI_VERSION 6 /* File version byte index */ +EI_VERSION = 6 # #define EI_VERSION 6 /* File version byte index */ -ELFDATA2LSB = 1 # #define ELFDATA2LSB 1 /* 2's complement, little endian */ +ELFDATA2LSB = 1 # #define ELFDATA2LSB 1 /* 2's complement, little endian */ -ELFCLASS64 = 2 # #define ELFCLASS64 2 /* 64-bit objects */ +ELFCLASS64 = 2 # #define ELFCLASS64 2 /* 64-bit objects */ # Legal values for e_type (object file type). -ET_CORE = 4 # #define ET_CORE 4 /* Core file */ +ET_CORE = 4 # #define ET_CORE 4 /* Core file */ # Legal values for e_machine (architecture). -EM_X86_64 = 62 # #define EM_X86_64 62 /* AMD x86-64 architecture */ +EM_X86_64 = 62 # #define EM_X86_64 62 /* AMD x86-64 architecture */ # Legal values for e_version (version). -EV_CURRENT = 1 # #define EV_CURRENT 1 /* Current version */ +EV_CURRENT = 1 # #define EV_CURRENT 1 /* Current version */ -class Elf64_Ehdr(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("e_ident", ctypes.c_ubyte*EI_NIDENT), # unsigned char e_ident[EI_NIDENT]; - ("e_type", Elf64_Half), # Elf64_Half e_type; - ("e_machine", Elf64_Half), # Elf64_Half e_machine; - ("e_version", Elf64_Word), # Elf64_Word e_version; - ("e_entry", Elf64_Addr), # Elf64_Addr e_entry; - ("e_phoff", Elf64_Off), # Elf64_Off e_phoff; - ("e_shoff", Elf64_Off), # Elf64_Off e_shoff; - ("e_flags", Elf64_Word), # Elf64_Word e_flags; - ("e_ehsize", Elf64_Half), # Elf64_Half e_ehsize; - ("e_phentsize", Elf64_Half), # Elf64_Half e_phentsize; - ("e_phnum", Elf64_Half), # Elf64_Half e_phnum; - ("e_shentsize", Elf64_Half), # Elf64_Half e_shentsize; - ("e_shnum", Elf64_Half), # Elf64_Half e_shnum; - ("e_shstrndx", Elf64_Half) # Elf64_Half e_shstrndx; - ] # } Elf64_Ehdr; + +class Elf64_Ehdr(ctypes.Structure): # typedef struct + _fields_ = [ # { + ("e_ident", + ctypes.c_ubyte * EI_NIDENT), # unsigned char e_ident[EI_NIDENT]; + ("e_type", Elf64_Half), # Elf64_Half e_type; + ("e_machine", Elf64_Half), # Elf64_Half e_machine; + ("e_version", Elf64_Word), # Elf64_Word e_version; + ("e_entry", Elf64_Addr), # Elf64_Addr e_entry; + ("e_phoff", Elf64_Off), # Elf64_Off e_phoff; + ("e_shoff", Elf64_Off), # Elf64_Off e_shoff; + ("e_flags", Elf64_Word), # Elf64_Word e_flags; + ("e_ehsize", Elf64_Half), # Elf64_Half e_ehsize; + ("e_phentsize", Elf64_Half), # Elf64_Half e_phentsize; + ("e_phnum", Elf64_Half), # Elf64_Half e_phnum; + ("e_shentsize", Elf64_Half), # Elf64_Half e_shentsize; + ("e_shnum", Elf64_Half), # Elf64_Half e_shnum; + ("e_shstrndx", Elf64_Half) # Elf64_Half e_shstrndx; + ] # } Elf64_Ehdr; # Elf64_Phdr related constants. # Legal values for p_type (segment type). -PT_LOAD = 1 # #define PT_LOAD 1 /* Loadable program segment */ -PT_NOTE = 4 # #define PT_NOTE 4 /* Auxiliary information */ +PT_LOAD = 1 # #define PT_LOAD 1 /* Loadable program segment */ +PT_NOTE = 4 # #define PT_NOTE 4 /* Auxiliary information */ # Legal values for p_flags (segment flags). -PF_X = 1 # #define PF_X (1 << 0) /* Segment is executable */ -PF_W = 1 << 1 # #define PF_W (1 << 1) /* Segment is writable */ -PF_R = 1 << 2 # #define PF_R (1 << 2) /* Segment is readable */ +PF_X = 1 # #define PF_X (1 << 0) /* Segment is executable */ +PF_W = 1 << 1 # #define PF_W (1 << 1) /* Segment is writable */ +PF_R = 1 << 2 # #define PF_R (1 << 2) /* Segment is readable */ -class Elf64_Phdr(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("p_type", Elf64_Word), # Elf64_Word p_type; - ("p_flags", Elf64_Word), # Elf64_Word p_flags; - ("p_offset", Elf64_Off), # Elf64_Off p_offset; - ("p_vaddr", Elf64_Addr), # Elf64_Addr p_vaddr; - ("p_paddr", Elf64_Addr), # Elf64_Addr p_paddr; - ("p_filesz", Elf64_Xword), # Elf64_Xword p_filesz; - ("p_memsz", Elf64_Xword), # Elf64_Xword p_memsz; - ("p_align", Elf64_Xword), # Elf64_Xword p_align; - ] # } Elf64_Phdr; + +class Elf64_Phdr(ctypes.Structure): # typedef struct + _fields_ = [ # { + ("p_type", Elf64_Word), # Elf64_Word p_type; + ("p_flags", Elf64_Word), # Elf64_Word p_flags; + ("p_offset", Elf64_Off), # Elf64_Off p_offset; + ("p_vaddr", Elf64_Addr), # Elf64_Addr p_vaddr; + ("p_paddr", Elf64_Addr), # Elf64_Addr p_paddr; + ("p_filesz", Elf64_Xword), # Elf64_Xword p_filesz; + ("p_memsz", Elf64_Xword), # Elf64_Xword p_memsz; + ("p_align", Elf64_Xword), # Elf64_Xword p_align; + ] # } Elf64_Phdr; # Elf64_auxv_t related constants. -class _Elf64_auxv_t_U(ctypes.Union): - _fields_ = [ - ("a_val", ctypes.c_uint64) - ] -class Elf64_auxv_t(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("a_type", ctypes.c_uint64), # uint64_t a_type; /* Entry type */ - ("a_un", _Elf64_auxv_t_U) # union - # { - # uint64_t a_val; /* Integer value */ - # /* We use to have pointer elements added here. We cannot do that, - # though, since it does not work when using 32-bit definitions - # on 64-bit platforms and vice versa. */ - # } a_un; - ] # } Elf64_auxv_t; +class _Elf64_auxv_t_U(ctypes.Union): + _fields_ = [("a_val", ctypes.c_uint64)] + + +class Elf64_auxv_t(ctypes.Structure): # typedef struct + _fields_ = [ # { + ("a_type", + ctypes.c_uint64), # uint64_t a_type; /* Entry type */ + ("a_un", _Elf64_auxv_t_U) # union + # { + # uint64_t a_val; /* Integer value */ + # /* We use to have pointer elements added here. We cannot do that, + # though, since it does not work when using 32-bit definitions + # on 64-bit platforms and vice versa. */ + # } a_un; + ] # } Elf64_auxv_t; # Elf64_Nhdr related constants. -NT_PRSTATUS = 1 # #define NT_PRSTATUS 1 /* Contains copy of prstatus struct */ -NT_FPREGSET = 2 # #define NT_FPREGSET 2 /* Contains copy of fpregset struct */ -NT_PRPSINFO = 3 # #define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */ -NT_AUXV = 6 # #define NT_AUXV 6 /* Contains copy of auxv array */ -NT_SIGINFO = 0x53494749 # #define NT_SIGINFO 0x53494749 /* Contains copy of siginfo_t, +NT_PRSTATUS = 1 # #define NT_PRSTATUS 1 /* Contains copy of prstatus struct */ +NT_FPREGSET = 2 # #define NT_FPREGSET 2 /* Contains copy of fpregset struct */ +NT_PRPSINFO = 3 # #define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */ +NT_AUXV = 6 # #define NT_AUXV 6 /* Contains copy of auxv array */ +NT_SIGINFO = 0x53494749 # #define NT_SIGINFO 0x53494749 /* Contains copy of siginfo_t, # size might increase */ -NT_FILE = 0x46494c45 # #define NT_FILE 0x46494c45 /* Contains information about mapped +NT_FILE = 0x46494c45 # #define NT_FILE 0x46494c45 /* Contains information about mapped # files */ -NT_X86_XSTATE = 0x202 # #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ +NT_X86_XSTATE = 0x202 # #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ -class Elf64_Nhdr(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("n_namesz", Elf64_Word), # Elf64_Word n_namesz; /* Length of the note's name. */ - ("n_descsz", Elf64_Word), # Elf64_Word n_descsz; /* Length of the note's descriptor. */ - ("n_type", Elf64_Word), # Elf64_Word n_type; /* Type of the note. */ - ] # } Elf64_Nhdr; + +class Elf64_Nhdr(ctypes.Structure): # typedef struct + _fields_ = [ # { + ( + "n_namesz", Elf64_Word + ), # Elf64_Word n_namesz; /* Length of the note's name. */ + ( + "n_descsz", Elf64_Word + ), # Elf64_Word n_descsz; /* Length of the note's descriptor. */ + ("n_type", Elf64_Word + ), # Elf64_Word n_type; /* Type of the note. */ + ] # } Elf64_Nhdr; # Elf64_Shdr related constants. -class Elf64_Shdr(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("sh_name", Elf64_Word), # Elf64_Word sh_name; /* Section name (string tbl index) */ - ("sh_type", Elf64_Word), # Elf64_Word sh_type; /* Section type */ - ("sh_flags", Elf64_Xword), # Elf64_Xword sh_flags; /* Section flags */ - ("sh_addr", Elf64_Addr), # Elf64_Addr sh_addr; /* Section virtual addr at execution */ - ("sh_offset", Elf64_Off), # Elf64_Off sh_offset; /* Section file offset */ - ("sh_size", Elf64_Xword), # Elf64_Xword sh_size; /* Section size in bytes */ - ("sh_link", Elf64_Word), # Elf64_Word sh_link; /* Link to another section */ - ("sh_info", Elf64_Word), # Elf64_Word sh_info; /* Additional section information */ - ("sh_addralign",Elf64_Xword), # Elf64_Xword sh_addralign; /* Section alignment */ - ("sh_entsize", Elf64_Xword) # Elf64_Xword sh_entsize; /* Entry size if section holds table */ - ] # } Elf64_Shdr; + +class Elf64_Shdr(ctypes.Structure): # typedef struct + _fields_ = [ # { + ( + "sh_name", Elf64_Word + ), # Elf64_Word sh_name; /* Section name (string tbl index) */ + ("sh_type", Elf64_Word + ), # Elf64_Word sh_type; /* Section type */ + ("sh_flags", Elf64_Xword + ), # Elf64_Xword sh_flags; /* Section flags */ + ( + "sh_addr", Elf64_Addr + ), # Elf64_Addr sh_addr; /* Section virtual addr at execution */ + ( + "sh_offset", Elf64_Off + ), # Elf64_Off sh_offset; /* Section file offset */ + ( + "sh_size", Elf64_Xword + ), # Elf64_Xword sh_size; /* Section size in bytes */ + ( + "sh_link", Elf64_Word + ), # Elf64_Word sh_link; /* Link to another section */ + ( + "sh_info", Elf64_Word + ), # Elf64_Word sh_info; /* Additional section information */ + ("sh_addralign", Elf64_Xword + ), # Elf64_Xword sh_addralign; /* Section alignment */ + ( + "sh_entsize", Elf64_Xword + ) # Elf64_Xword sh_entsize; /* Entry size if section holds table */ + ] # } Elf64_Shdr; # elf_prstatus related constants. + # Signal info. -class elf_siginfo(ctypes.Structure): # struct elf_siginfo - _fields_ = [ # { - ("si_signo", ctypes.c_int), # int si_signo; /* Signal number. */ - ("si_code", ctypes.c_int), # int si_code; /* Extra code. */ - ("si_errno", ctypes.c_int) # int si_errno; /* Errno. */ - ] # }; +class elf_siginfo(ctypes.Structure): # struct elf_siginfo + _fields_ = [ # { + ("si_signo", ctypes.c_int + ), # int si_signo; /* Signal number. */ + ("si_code", ctypes.c_int + ), # int si_code; /* Extra code. */ + ("si_errno", ctypes.c_int + ) # int si_errno; /* Errno. */ + ] # }; + # A time value that is accurate to the nearest # microsecond but also has a range of years. -class timeval(ctypes.Structure): # struct timeval - _fields_ = [ # { - ("tv_sec", ctypes.c_long), # __time_t tv_sec; /* Seconds. */ - ("tv_usec", ctypes.c_long) # __suseconds_t tv_usec; /* Microseconds. */ - ] # }; +class timeval(ctypes.Structure): # struct timeval + _fields_ = [ # { + ("tv_sec", + ctypes.c_long), # __time_t tv_sec; /* Seconds. */ + ("tv_usec", ctypes.c_long + ) # __suseconds_t tv_usec; /* Microseconds. */ + ] # }; + + +class user_regs_struct(ctypes.Structure): # struct user_regs_struct + _fields_ = [ # { + ("r15", + ctypes.c_ulonglong), # __extension__ unsigned long long int r15; + ("r14", + ctypes.c_ulonglong), # __extension__ unsigned long long int r14; + ("r13", + ctypes.c_ulonglong), # __extension__ unsigned long long int r13; + ("r12", + ctypes.c_ulonglong), # __extension__ unsigned long long int r12; + ("rbp", + ctypes.c_ulonglong), # __extension__ unsigned long long int rbp; + ("rbx", + ctypes.c_ulonglong), # __extension__ unsigned long long int rbx; + ("r11", + ctypes.c_ulonglong), # __extension__ unsigned long long int r11; + ("r10", + ctypes.c_ulonglong), # __extension__ unsigned long long int r10; + ("r9", + ctypes.c_ulonglong), # __extension__ unsigned long long int r9; + ("r8", + ctypes.c_ulonglong), # __extension__ unsigned long long int r8; + ("rax", + ctypes.c_ulonglong), # __extension__ unsigned long long int rax; + ("rcx", + ctypes.c_ulonglong), # __extension__ unsigned long long int rcx; + ("rdx", + ctypes.c_ulonglong), # __extension__ unsigned long long int rdx; + ("rsi", + ctypes.c_ulonglong), # __extension__ unsigned long long int rsi; + ("rdi", + ctypes.c_ulonglong), # __extension__ unsigned long long int rdi; + ("orig_rax", ctypes.c_ulonglong + ), # __extension__ unsigned long long int orig_rax; + ("rip", + ctypes.c_ulonglong), # __extension__ unsigned long long int rip; + ("cs", + ctypes.c_ulonglong), # __extension__ unsigned long long int cs; + ("eflags", + ctypes.c_ulonglong), # __extension__ unsigned long long int eflags; + ("rsp", + ctypes.c_ulonglong), # __extension__ unsigned long long int rsp; + ("ss", + ctypes.c_ulonglong), # __extension__ unsigned long long int ss; + ("fs_base", ctypes.c_ulonglong + ), # __extension__ unsigned long long int fs_base; + ("gs_base", ctypes.c_ulonglong + ), # __extension__ unsigned long long int gs_base; + ("ds", + ctypes.c_ulonglong), # __extension__ unsigned long long int ds; + ("es", + ctypes.c_ulonglong), # __extension__ unsigned long long int es; + ("fs", + ctypes.c_ulonglong), # __extension__ unsigned long long int fs; + ("gs", ctypes.c_ulonglong + ) # __extension__ unsigned long long int gs; + ] # }; -class user_regs_struct(ctypes.Structure): # struct user_regs_struct - _fields_ = [ # { - ("r15", ctypes.c_ulonglong), # __extension__ unsigned long long int r15; - ("r14", ctypes.c_ulonglong), # __extension__ unsigned long long int r14; - ("r13", ctypes.c_ulonglong), # __extension__ unsigned long long int r13; - ("r12", ctypes.c_ulonglong), # __extension__ unsigned long long int r12; - ("rbp", ctypes.c_ulonglong), # __extension__ unsigned long long int rbp; - ("rbx", ctypes.c_ulonglong), # __extension__ unsigned long long int rbx; - ("r11", ctypes.c_ulonglong), # __extension__ unsigned long long int r11; - ("r10", ctypes.c_ulonglong), # __extension__ unsigned long long int r10; - ("r9", ctypes.c_ulonglong), # __extension__ unsigned long long int r9; - ("r8", ctypes.c_ulonglong), # __extension__ unsigned long long int r8; - ("rax", ctypes.c_ulonglong), # __extension__ unsigned long long int rax; - ("rcx", ctypes.c_ulonglong), # __extension__ unsigned long long int rcx; - ("rdx", ctypes.c_ulonglong), # __extension__ unsigned long long int rdx; - ("rsi", ctypes.c_ulonglong), # __extension__ unsigned long long int rsi; - ("rdi", ctypes.c_ulonglong), # __extension__ unsigned long long int rdi; - ("orig_rax", ctypes.c_ulonglong), # __extension__ unsigned long long int orig_rax; - ("rip", ctypes.c_ulonglong), # __extension__ unsigned long long int rip; - ("cs", ctypes.c_ulonglong), # __extension__ unsigned long long int cs; - ("eflags", ctypes.c_ulonglong), # __extension__ unsigned long long int eflags; - ("rsp", ctypes.c_ulonglong), # __extension__ unsigned long long int rsp; - ("ss", ctypes.c_ulonglong), # __extension__ unsigned long long int ss; - ("fs_base", ctypes.c_ulonglong), # __extension__ unsigned long long int fs_base; - ("gs_base", ctypes.c_ulonglong), # __extension__ unsigned long long int gs_base; - ("ds", ctypes.c_ulonglong), # __extension__ unsigned long long int ds; - ("es", ctypes.c_ulonglong), # __extension__ unsigned long long int es; - ("fs", ctypes.c_ulonglong), # __extension__ unsigned long long int fs; - ("gs", ctypes.c_ulonglong) # __extension__ unsigned long long int gs; - ] # }; #elf_greg_t = ctypes.c_ulonglong #ELF_NGREG = ctypes.sizeof(user_regs_struct)/ctypes.sizeof(elf_greg_t) #elf_gregset_t = elf_greg_t*ELF_NGREG elf_gregset_t = user_regs_struct -class elf_prstatus(ctypes.Structure): # struct elf_prstatus - _fields_ = [ # { - ("pr_info", elf_siginfo), # struct elf_siginfo pr_info; /* Info associated with signal. */ - ("pr_cursig", ctypes.c_short), # short int pr_cursig; /* Current signal. */ - ("pr_sigpend", ctypes.c_ulong), # unsigned long int pr_sigpend; /* Set of pending signals. */ - ("pr_sighold", ctypes.c_ulong), # unsigned long int pr_sighold; /* Set of held signals. */ - ("pr_pid", ctypes.c_int), # __pid_t pr_pid; - ("pr_ppid", ctypes.c_int), # __pid_t pr_ppid; - ("pr_pgrp", ctypes.c_int), # __pid_t pr_pgrp; - ("pr_sid", ctypes.c_int), # __pid_t pr_sid; - ("pr_utime", timeval), # struct timeval pr_utime; /* User time. */ - ("pr_stime", timeval), # struct timeval pr_stime; /* System time. */ - ("pr_cutime", timeval), # struct timeval pr_cutime; /* Cumulative user time. */ - ("pr_cstime", timeval), # struct timeval pr_cstime; /* Cumulative system time. */ - ("pr_reg", elf_gregset_t), # elf_gregset_t pr_reg; /* GP registers. */ - ("pr_fpvalid", ctypes.c_int) # int pr_fpvalid; /* True if math copro being used. */ - ] # }; + +class elf_prstatus(ctypes.Structure): # struct elf_prstatus + _fields_ = [ # { + ( + "pr_info", elf_siginfo + ), # struct elf_siginfo pr_info; /* Info associated with signal. */ + ("pr_cursig", ctypes.c_short + ), # short int pr_cursig; /* Current signal. */ + ( + "pr_sigpend", ctypes.c_ulong + ), # unsigned long int pr_sigpend; /* Set of pending signals. */ + ( + "pr_sighold", ctypes.c_ulong + ), # unsigned long int pr_sighold; /* Set of held signals. */ + ("pr_pid", ctypes.c_int), # __pid_t pr_pid; + ("pr_ppid", ctypes.c_int), # __pid_t pr_ppid; + ("pr_pgrp", ctypes.c_int), # __pid_t pr_pgrp; + ("pr_sid", ctypes.c_int), # __pid_t pr_sid; + ("pr_utime", + timeval), # struct timeval pr_utime; /* User time. */ + ("pr_stime", timeval + ), # struct timeval pr_stime; /* System time. */ + ( + "pr_cutime", timeval + ), # struct timeval pr_cutime; /* Cumulative user time. */ + ( + "pr_cstime", timeval + ), # struct timeval pr_cstime; /* Cumulative system time. */ + ("pr_reg", elf_gregset_t + ), # elf_gregset_t pr_reg; /* GP registers. */ + ( + "pr_fpvalid", ctypes.c_int + ) # int pr_fpvalid; /* True if math copro being used. */ + ] # }; # elf_prpsinfo related constants. -ELF_PRARGSZ = 80 # #define ELF_PRARGSZ (80) /* Number of chars for args. */ - -class elf_prpsinfo(ctypes.Structure): # struct elf_prpsinfo - _fields_ = [ # { - ("pr_state", ctypes.c_byte), # char pr_state; /* Numeric process state. */ - ("pr_sname", ctypes.c_char), # char pr_sname; /* Char for pr_state. */ - ("pr_zomb", ctypes.c_byte), # char pr_zomb; /* Zombie. */ - ("pr_nice", ctypes.c_byte), # char pr_nice; /* Nice val. */ - ("pr_flag", ctypes.c_ulong), # unsigned long int pr_flag; /* Flags. */ - # #if __WORDSIZE == 32 - # unsigned short int pr_uid; - # unsigned short int pr_gid; - # #else - ("pr_uid", ctypes.c_uint), # unsigned int pr_uid; - ("pr_gid", ctypes.c_uint), # unsigned int pr_gid; - # #endif - ("pr_pid", ctypes.c_int), # int pr_pid, pr_ppid, pr_pgrp, pr_sid; - ("pr_ppid", ctypes.c_int), - ("pr_pgrp", ctypes.c_int), - ("pr_sid", ctypes.c_int), - # /* Lots missing */ - ("pr_fname", ctypes.c_char*16), # char pr_fname[16]; /* Filename of executable. */ - ("pr_psargs", ctypes.c_char*ELF_PRARGSZ) # char pr_psargs[ELF_PRARGSZ]; /* Initial part of arg list. */ - ] # }; +ELF_PRARGSZ = 80 # #define ELF_PRARGSZ (80) /* Number of chars for args. */ -class user_fpregs_struct(ctypes.Structure): # struct user_fpregs_struct - _fields_ = [ # { - ("cwd", ctypes.c_ushort), # unsigned short int cwd; - ("swd", ctypes.c_ushort), # unsigned short int swd; - ("ftw", ctypes.c_ushort), # unsigned short int ftw; - ("fop", ctypes.c_ushort), # unsigned short int fop; - ("rip", ctypes.c_ulonglong), # __extension__ unsigned long long int rip; - ("rdp", ctypes.c_ulonglong), # __extension__ unsigned long long int rdp; - ("mxcsr", ctypes.c_uint), # unsigned int mxcsr; - ("mxcr_mask", ctypes.c_uint), # unsigned int mxcr_mask; - ("st_space", ctypes.c_uint*32), # unsigned int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - ("xmm_space", ctypes.c_uint*64), # unsigned int xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ - ("padding", ctypes.c_uint*24), # unsigned int padding[24]; - ] # }; +class elf_prpsinfo(ctypes.Structure): # struct elf_prpsinfo + _fields_ = [ # { + ( + "pr_state", ctypes.c_byte + ), # char pr_state; /* Numeric process state. */ + ( + "pr_sname", ctypes.c_char + ), # char pr_sname; /* Char for pr_state. */ + ("pr_zomb", ctypes.c_byte + ), # char pr_zomb; /* Zombie. */ + ("pr_nice", ctypes.c_byte + ), # char pr_nice; /* Nice val. */ + ("pr_flag", ctypes.c_ulong + ), # unsigned long int pr_flag; /* Flags. */ + # #if __WORDSIZE == 32 + # unsigned short int pr_uid; + # unsigned short int pr_gid; + # #else + ("pr_uid", ctypes.c_uint), # unsigned int pr_uid; + ("pr_gid", ctypes.c_uint), # unsigned int pr_gid; + # #endif + ("pr_pid", ctypes.c_int), # int pr_pid, pr_ppid, pr_pgrp, pr_sid; + ("pr_ppid", ctypes.c_int), + ("pr_pgrp", ctypes.c_int), + ("pr_sid", ctypes.c_int), + # /* Lots missing */ + ( + "pr_fname", ctypes.c_char * 16 + ), # char pr_fname[16]; /* Filename of executable. */ + ( + "pr_psargs", ctypes.c_char * ELF_PRARGSZ + ) # char pr_psargs[ELF_PRARGSZ]; /* Initial part of arg list. */ + ] # }; + + +class user_fpregs_struct(ctypes.Structure): # struct user_fpregs_struct + _fields_ = [ # { + ("cwd", ctypes.c_ushort), # unsigned short int cwd; + ("swd", ctypes.c_ushort), # unsigned short int swd; + ("ftw", ctypes.c_ushort), # unsigned short int ftw; + ("fop", ctypes.c_ushort), # unsigned short int fop; + ("rip", + ctypes.c_ulonglong), # __extension__ unsigned long long int rip; + ("rdp", + ctypes.c_ulonglong), # __extension__ unsigned long long int rdp; + ("mxcsr", ctypes.c_uint), # unsigned int mxcsr; + ("mxcr_mask", ctypes.c_uint), # unsigned int mxcr_mask; + ( + "st_space", ctypes.c_uint * 32 + ), # unsigned int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + ( + "xmm_space", ctypes.c_uint * 64 + ), # unsigned int xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + ("padding", + ctypes.c_uint * 24), # unsigned int padding[24]; + ] # }; elf_fpregset_t = user_fpregs_struct - # siginfo_t related constants. -_SI_MAX_SIZE = 128 -_SI_PAD_SIZE = (_SI_MAX_SIZE/ctypes.sizeof(ctypes.c_int)) - 4 +_SI_MAX_SIZE = 128 +_SI_PAD_SIZE = (_SI_MAX_SIZE / ctypes.sizeof(ctypes.c_int)) - 4 - # /* kill(). */ -class _siginfo_t_U_kill(ctypes.Structure): # struct - _fields_ = [ # { - ("si_pid", ctypes.c_int), # __pid_t si_pid; /* Sending process ID. */ - ("si_uid", ctypes.c_uint) # __uid_t si_uid; /* Real user ID of sending process. */ - ] # } _kill; +# /* kill(). */ +class _siginfo_t_U_kill(ctypes.Structure): # struct + _fields_ = [ # { + ("si_pid", ctypes.c_int + ), # __pid_t si_pid; /* Sending process ID. */ + ( + "si_uid", ctypes.c_uint + ) # __uid_t si_uid; /* Real user ID of sending process. */ + ] # } _kill; # Type for data associated with a signal. -class sigval_t(ctypes.Union): # typedef union sigval - _fields_ = [ # { - ("sival_int", ctypes.c_int), # int sival_int; - ("sical_ptr", ctypes.c_void_p), # void *sival_ptr; - ] # } sigval_t; - - # /* POSIX.1b timers. */ -class _siginfo_t_U_timer(ctypes.Structure): # struct - _fields_ = [ # { - ("si_tid", ctypes.c_int), # int si_tid; /* Timer ID. */ - ("si_overrun", ctypes.c_int), # int si_overrun; /* Overrun count. */ - ("si_sigval", sigval_t) # sigval_t si_sigval; /* Signal value. */ - ] # } _timer; +class sigval_t(ctypes.Union): # typedef union sigval + _fields_ = [ # { + ("sival_int", ctypes.c_int), # int sival_int; + ("sical_ptr", ctypes.c_void_p), # void *sival_ptr; + ] # } sigval_t; - # /* POSIX.1b signals. */ -class _siginfo_t_U_rt(ctypes.Structure): # struct - _fields_ = [ # { - ("si_pid", ctypes.c_int), # __pid_t si_pid; /* Sending process ID. */ - ("si_uid", ctypes.c_uint), # __uid_t si_uid; /* Real user ID of sending process. */ - ("si_sigval", sigval_t) # sigval_t si_sigval; /* Signal value. */ - ] # } _rt; + # /* POSIX.1b timers. */ +class _siginfo_t_U_timer(ctypes.Structure): # struct + _fields_ = [ # { + ("si_tid", + ctypes.c_int), # int si_tid; /* Timer ID. */ + ("si_overrun", ctypes.c_int + ), # int si_overrun; /* Overrun count. */ + ("si_sigval", sigval_t + ) # sigval_t si_sigval; /* Signal value. */ + ] # } _timer; - # /* SIGCHLD. */ -class _siginfo_t_U_sigchld(ctypes.Structure): # struct - _fields_ = [ # { - ("si_pid", ctypes.c_int), # __pid_t si_pid; /* Which child. */ - ("si_uid", ctypes.c_uint), # __uid_t si_uid; /* Real user ID of sending process. */ - ("si_status", ctypes.c_int), # int si_status; /* Exit value or signal. */ - ("si_utime", ctypes.c_long), # __sigchld_clock_t si_utime; - ("si_stime", ctypes.c_long) # __sigchld_clock_t si_stime; - ] # } _sigchld; - - # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ -class _siginfo_t_U_sigfault(ctypes.Structure): # struct - _fields_ = [ # { - ("si_addr", ctypes.c_void_p), # void *si_addr; /* Faulting insn/memory ref. */ - ("si_addr_lsb", ctypes.c_short) # short int si_addr_lsb; /* Valid LSB of the reported address. */ - ] # } _sigfault; - - # /* SIGPOLL. */ -class _siginfo_t_U_sigpoll(ctypes.Structure): # struct - _fields_ = [ # { - ("si_band", ctypes.c_long), # long int si_band; /* Band event for SIGPOLL. */ - ("si_fd", ctypes.c_int) # int si_fd; - ] # } _sigpoll; + # /* POSIX.1b signals. */ +class _siginfo_t_U_rt(ctypes.Structure): # struct + _fields_ = [ # { + ("si_pid", ctypes.c_int + ), # __pid_t si_pid; /* Sending process ID. */ + ( + "si_uid", ctypes.c_uint + ), # __uid_t si_uid; /* Real user ID of sending process. */ + ("si_sigval", sigval_t + ) # sigval_t si_sigval; /* Signal value. */ + ] # } _rt; - # /* SIGSYS. */ -class _siginfo_t_U_sigsys(ctypes.Structure): # struct - _fields_ = [ # { - ("_call_addr", ctypes.c_void_p), # void *_call_addr; /* Calling user insn. */ - ("_syscall", ctypes.c_int), # int _syscall; /* Triggering system call number. */ - ("_arch", ctypes.c_uint) # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ - ] # } _sigsys; + # /* SIGCHLD. */ +class _siginfo_t_U_sigchld(ctypes.Structure): # struct + _fields_ = [ # { + ("si_pid", + ctypes.c_int), # __pid_t si_pid; /* Which child. */ + ( + "si_uid", ctypes.c_uint + ), # __uid_t si_uid; /* Real user ID of sending process. */ + ("si_status", ctypes.c_int + ), # int si_status; /* Exit value or signal. */ + ("si_utime", ctypes.c_long), # __sigchld_clock_t si_utime; + ("si_stime", ctypes.c_long) # __sigchld_clock_t si_stime; + ] # } _sigchld; -class _siginfo_t_U(ctypes.Union): # union - _fields_ = [ # { - ("_pad", ctypes.c_int*_SI_PAD_SIZE), # int _pad[__SI_PAD_SIZE]; - # - # /* kill(). */ - ("_kill", _siginfo_t_U_kill), # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # } _kill; - # - # /* POSIX.1b timers. */ - ("_timer", _siginfo_t_U_timer), # struct - # { - # int si_tid; /* Timer ID. */ - # int si_overrun; /* Overrun count. */ - # sigval_t si_sigval; /* Signal value. */ - # } _timer; - # - # /* POSIX.1b signals. */ - ("_rt", _siginfo_t_U_rt), # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # sigval_t si_sigval; /* Signal value. */ - # } _rt; - # - # /* SIGCHLD. */ - ("_sigchld", _siginfo_t_U_sigchld), # struct - # { - # __pid_t si_pid; /* Which child. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # int si_status; /* Exit value or signal. */ - # __sigchld_clock_t si_utime; - # __sigchld_clock_t si_stime; - # } _sigchld; - # - # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ - ("_sigfault", _siginfo_t_U_sigfault), # struct - # { - # void *si_addr; /* Faulting insn/memory ref. */ - # short int si_addr_lsb; /* Valid LSB of the reported address. */ - # } _sigfault; - # - # /* SIGPOLL. */ - ("_sigpoll", _siginfo_t_U_sigpoll), # struct - # { - # long int si_band; /* Band event for SIGPOLL. */ - # int si_fd; - # } _sigpoll; - # - # /* SIGSYS. */ - ("_sigsys", _siginfo_t_U_sigpoll) # struct - # { - # void *_call_addr; /* Calling user insn. */ - # int _syscall; /* Triggering system call number. */ - # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ - # } _sigsys; - ] # } _sifields; + # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ +class _siginfo_t_U_sigfault(ctypes.Structure): # struct + _fields_ = [ # { + ("si_addr", ctypes.c_void_p + ), # void *si_addr; /* Faulting insn/memory ref. */ + ( + "si_addr_lsb", ctypes.c_short + ) # short int si_addr_lsb; /* Valid LSB of the reported address. */ + ] # } _sigfault; -class siginfo_t(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("si_signo", ctypes.c_int), # int si_signo; /* Signal number. */ - ("si_errno", ctypes.c_int), # int si_errno; /* If non-zero, an errno value associated with - # this signal, as defined in . */ - ("si_code", ctypes.c_int), # int si_code; /* Signal code. */ - # - ("_sifields", _siginfo_t_U) # union - # { - # int _pad[__SI_PAD_SIZE]; - # - # /* kill(). */ - # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # } _kill; - # - # /* POSIX.1b timers. */ - # struct - # { - # int si_tid; /* Timer ID. */ - # int si_overrun; /* Overrun count. */ - # sigval_t si_sigval; /* Signal value. */ - # } _timer; - # - # /* POSIX.1b signals. */ - # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # sigval_t si_sigval; /* Signal value. */ - # } _rt; - # - # /* SIGCHLD. */ - # struct - # { - # __pid_t si_pid; /* Which child. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # int si_status; /* Exit value or signal. */ - # __sigchld_clock_t si_utime; - # __sigchld_clock_t si_stime; - # } _sigchld; - # - # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ - # struct - # { - # void *si_addr; /* Faulting insn/memory ref. */ - # short int si_addr_lsb; /* Valid LSB of the reported address. */ - # } _sigfault; - # - # /* SIGPOLL. */ - # struct - # { - # long int si_band; /* Band event for SIGPOLL. */ - # int si_fd; - # } _sigpoll; - # - # /* SIGSYS. */ - # struct - # { - # void *_call_addr; /* Calling user insn. */ - # int _syscall; /* Triggering system call number. */ - # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ - # } _sigsys; - # } _sifields; - ] # } siginfo_t __SI_ALIGNMENT; + + # /* SIGPOLL. */ +class _siginfo_t_U_sigpoll(ctypes.Structure): # struct + _fields_ = [ # { + ("si_band", ctypes.c_long + ), # long int si_band; /* Band event for SIGPOLL. */ + ("si_fd", ctypes.c_int) # int si_fd; + ] # } _sigpoll; + + + # /* SIGSYS. */ +class _siginfo_t_U_sigsys(ctypes.Structure): # struct + _fields_ = [ # { + ("_call_addr", ctypes.c_void_p + ), # void *_call_addr; /* Calling user insn. */ + ( + "_syscall", ctypes.c_int + ), # int _syscall; /* Triggering system call number. */ + ("_arch", ctypes.c_uint + ) # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ + ] # } _sigsys; + + +class _siginfo_t_U(ctypes.Union): # union + _fields_ = [ # { + ("_pad", + ctypes.c_int * _SI_PAD_SIZE), # int _pad[__SI_PAD_SIZE]; + # + # /* kill(). */ + ("_kill", _siginfo_t_U_kill), # struct + # { + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # } _kill; + # + # /* POSIX.1b timers. */ + ("_timer", _siginfo_t_U_timer), # struct + # { + # int si_tid; /* Timer ID. */ + # int si_overrun; /* Overrun count. */ + # sigval_t si_sigval; /* Signal value. */ + # } _timer; + # + # /* POSIX.1b signals. */ + ("_rt", _siginfo_t_U_rt), # struct + # { + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # sigval_t si_sigval; /* Signal value. */ + # } _rt; + # + # /* SIGCHLD. */ + ("_sigchld", _siginfo_t_U_sigchld), # struct + # { + # __pid_t si_pid; /* Which child. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # int si_status; /* Exit value or signal. */ + # __sigchld_clock_t si_utime; + # __sigchld_clock_t si_stime; + # } _sigchld; + # + # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ + ("_sigfault", _siginfo_t_U_sigfault), # struct + # { + # void *si_addr; /* Faulting insn/memory ref. */ + # short int si_addr_lsb; /* Valid LSB of the reported address. */ + # } _sigfault; + # + # /* SIGPOLL. */ + ("_sigpoll", _siginfo_t_U_sigpoll), # struct + # { + # long int si_band; /* Band event for SIGPOLL. */ + # int si_fd; + # } _sigpoll; + # + # /* SIGSYS. */ + ("_sigsys", _siginfo_t_U_sigpoll) # struct + # { + # void *_call_addr; /* Calling user insn. */ + # int _syscall; /* Triggering system call number. */ + # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ + # } _sigsys; + ] # } _sifields; + + +class siginfo_t(ctypes.Structure): # typedef struct + _fields_ = [ # { + ("si_signo", ctypes.c_int + ), # int si_signo; /* Signal number. */ + ( + "si_errno", ctypes.c_int + ), # int si_errno; /* If non-zero, an errno value associated with + # this signal, as defined in . */ + ("si_code", ctypes.c_int + ), # int si_code; /* Signal code. */ + # + ("_sifields", _siginfo_t_U) # union + # { + # int _pad[__SI_PAD_SIZE]; + # + # /* kill(). */ + # struct + # { + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # } _kill; + # + # /* POSIX.1b timers. */ + # struct + # { + # int si_tid; /* Timer ID. */ + # int si_overrun; /* Overrun count. */ + # sigval_t si_sigval; /* Signal value. */ + # } _timer; + # + # /* POSIX.1b signals. */ + # struct + # { + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # sigval_t si_sigval; /* Signal value. */ + # } _rt; + # + # /* SIGCHLD. */ + # struct + # { + # __pid_t si_pid; /* Which child. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # int si_status; /* Exit value or signal. */ + # __sigchld_clock_t si_utime; + # __sigchld_clock_t si_stime; + # } _sigchld; + # + # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ + # struct + # { + # void *si_addr; /* Faulting insn/memory ref. */ + # short int si_addr_lsb; /* Valid LSB of the reported address. */ + # } _sigfault; + # + # /* SIGPOLL. */ + # struct + # { + # long int si_band; /* Band event for SIGPOLL. */ + # int si_fd; + # } _sigpoll; + # + # /* SIGSYS. */ + # struct + # { + # void *_call_addr; /* Calling user insn. */ + # int _syscall; /* Triggering system call number. */ + # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ + # } _sigsys; + # } _sifields; + ] # } siginfo_t __SI_ALIGNMENT; # xsave related. -class ymmh_struct(ctypes.Structure): # struct ymmh_struct { - _fields_ = [ - ("ymmh_space", 64*ctypes.c_uint) # u32 ymmh_space[64]; - ] # } __packed; + +class ymmh_struct(ctypes.Structure): # struct ymmh_struct { + _fields_ = [("ymmh_space", 64 * ctypes.c_uint + ) # u32 ymmh_space[64]; + ] # } __packed; -class xsave_hdr_struct(ctypes.Structure): # struct xsave_hdr_struct { - _fields_ = [ - ("xstate_bv", ctypes.c_ulonglong), # u64 xstate_bv; - ("reserved1", ctypes.c_ulonglong*2), # u64 reserved1[2]; - ("reserved2", ctypes.c_ulonglong*5) # u64 reserved2[5]; - ] # } __packed; +class xsave_hdr_struct(ctypes.Structure): # struct xsave_hdr_struct { + _fields_ = [ + ("xstate_bv", ctypes.c_ulonglong + ), # u64 xstate_bv; + ("reserved1", ctypes.c_ulonglong * + 2), # u64 reserved1[2]; + ("reserved2", ctypes.c_ulonglong * 5 + ) # u64 reserved2[5]; + ] # } __packed; -class i387_fxsave_struct(ctypes.Structure): # struct i387_fxsave_struct { - _fields_ = [ - ("cwd", ctypes.c_ushort), # u16 cwd; /* Control Word */ - ("swd", ctypes.c_ushort), # u16 swd; /* Status Word */ - ("twd", ctypes.c_ushort), # u16 twd; /* Tag Word */ - ("fop", ctypes.c_ushort), # u16 fop; /* Last Instruction Opcode */ - # union { - # struct { - ("rip", ctypes.c_ulonglong), # u64 rip; /* Instruction Pointer */ - ("rdp", ctypes.c_ulonglong), # u64 rdp; /* Data Pointer */ - # }; - # struct { - # u32 fip; /* FPU IP Offset */ - # u32 fcs; /* FPU IP Selector */ - # u32 foo; /* FPU Operand Offset */ - # u32 fos; /* FPU Operand Selector */ - # }; - # }; - ("mxcsr", ctypes.c_uint), # u32 mxcsr; /* MXCSR Register State */ - ("mxcsr_mask", ctypes.c_uint), # u32 mxcsr_mask; /* MXCSR Mask */ - # - # /* 8*16 bytes for each FP-reg = 128 bytes */ - ("st_space", ctypes.c_uint*32), # u32 st_space[32]; -# - # /* 16*16 bytes for each XMM-reg = 256 bytes */ - ("xmm_space", ctypes.c_uint*64), # u32 xmm_space[64]; - # - ("padding", ctypes.c_uint*12), # u32 padding[12]; - # - # union { - ("padding1", ctypes.c_uint*12) # u32 padding1[12]; - # u32 sw_reserved[12]; - # }; - # - ] # } __aligned(16); +class i387_fxsave_struct(ctypes.Structure): # struct i387_fxsave_struct { + _fields_ = [ + ( + "cwd", ctypes.c_ushort + ), # u16 cwd; /* Control Word */ + ( + "swd", ctypes.c_ushort + ), # u16 swd; /* Status Word */ + ( + "twd", ctypes.c_ushort + ), # u16 twd; /* Tag Word */ + ( + "fop", ctypes.c_ushort + ), # u16 fop; /* Last Instruction Opcode */ + # union { + # struct { + ( + "rip", ctypes.c_ulonglong + ), # u64 rip; /* Instruction Pointer */ + ( + "rdp", ctypes.c_ulonglong + ), # u64 rdp; /* Data Pointer */ + # }; + # struct { + # u32 fip; /* FPU IP Offset */ + # u32 fcs; /* FPU IP Selector */ + # u32 foo; /* FPU Operand Offset */ + # u32 fos; /* FPU Operand Selector */ + # }; + # }; + ( + "mxcsr", ctypes.c_uint + ), # u32 mxcsr; /* MXCSR Register State */ + ( + "mxcsr_mask", ctypes.c_uint + ), # u32 mxcsr_mask; /* MXCSR Mask */ + # + # /* 8*16 bytes for each FP-reg = 128 bytes */ + ("st_space", ctypes.c_uint * 32 + ), # u32 st_space[32]; + # + # /* 16*16 bytes for each XMM-reg = 256 bytes */ + ("xmm_space", ctypes.c_uint * 64 + ), # u32 xmm_space[64]; + # + ("padding", ctypes.c_uint * 12 + ), # u32 padding[12]; + # + # union { + ("padding1", ctypes.c_uint * 12 + ) # u32 padding1[12]; + # u32 sw_reserved[12]; + # }; + # + ] # } __aligned(16); -class elf_xsave_struct(ctypes.Structure): # struct xsave_struct { - _fields_ = [ - ("i387", i387_fxsave_struct), # struct i387_fxsave_struct i387; - ("xsave_hdr", xsave_hdr_struct), # struct xsave_hdr_struct xsave_hdr; - ("ymmh", ymmh_struct) # struct ymmh_struct ymmh; - ] # } __aligned(FP_MIN_ALIGN_BYTES) __packed; +class elf_xsave_struct(ctypes.Structure): # struct xsave_struct { + _fields_ = [ + ("i387", + i387_fxsave_struct), # struct i387_fxsave_struct i387; + ("xsave_hdr", xsave_hdr_struct + ), # struct xsave_hdr_struct xsave_hdr; + ("ymmh", ymmh_struct) # struct ymmh_struct ymmh; + ] # } __aligned(FP_MIN_ALIGN_BYTES) __packed; diff --git a/lib/py/cli.py b/lib/py/cli.py index abaf0720c..da343022e 100755 --- a/lib/py/cli.py +++ b/lib/py/cli.py @@ -6,337 +6,409 @@ import os import pycriu + def inf(opts): - if opts['in']: - return open(opts['in'], 'rb') - else: - return sys.stdin + if opts['in']: + return open(opts['in'], 'rb') + else: + return sys.stdin + def outf(opts): - if opts['out']: - return open(opts['out'], 'w+') - else: - return sys.stdout + if opts['out']: + return open(opts['out'], 'w+') + else: + return sys.stdout + def dinf(opts, name): - return open(os.path.join(opts['dir'], name)) + return open(os.path.join(opts['dir'], name)) + def decode(opts): - indent = None + indent = None - try: - img = pycriu.images.load(inf(opts), opts['pretty'], opts['nopl']) - except pycriu.images.MagicException as exc: - print("Unknown magic %#x.\n"\ - "Maybe you are feeding me an image with "\ - "raw data(i.e. pages.img)?" % exc.magic, file=sys.stderr) - sys.exit(1) + try: + img = pycriu.images.load(inf(opts), opts['pretty'], opts['nopl']) + except pycriu.images.MagicException as exc: + print("Unknown magic %#x.\n"\ + "Maybe you are feeding me an image with "\ + "raw data(i.e. pages.img)?" % exc.magic, file=sys.stderr) + sys.exit(1) - if opts['pretty']: - indent = 4 + if opts['pretty']: + indent = 4 + + f = outf(opts) + json.dump(img, f, indent=indent) + if f == sys.stdout: + f.write("\n") - f = outf(opts) - json.dump(img, f, indent=indent) - if f == sys.stdout: - f.write("\n") def encode(opts): - img = json.load(inf(opts)) - pycriu.images.dump(img, outf(opts)) + img = json.load(inf(opts)) + pycriu.images.dump(img, outf(opts)) + def info(opts): - infs = pycriu.images.info(inf(opts)) - json.dump(infs, sys.stdout, indent = 4) - print() + infs = pycriu.images.info(inf(opts)) + json.dump(infs, sys.stdout, indent=4) + print() + def get_task_id(p, val): - return p[val] if val in p else p['ns_' + val][0] + return p[val] if val in p else p['ns_' + val][0] + + # # Explorers # -class ps_item: - def __init__(self, p, core): - self.pid = get_task_id(p, 'pid') - self.ppid = p['ppid'] - self.p = p - self.core = core - self.kids = [] -def show_ps(p, opts, depth = 0): - print("%7d%7d%7d %s%s" % (p.pid, get_task_id(p.p, 'pgid'), get_task_id(p.p, 'sid'), - ' ' * (4 * depth), p.core['tc']['comm'])) - for kid in p.kids: - show_ps(kid, opts, depth + 1) +class ps_item: + def __init__(self, p, core): + self.pid = get_task_id(p, 'pid') + self.ppid = p['ppid'] + self.p = p + self.core = core + self.kids = [] + + +def show_ps(p, opts, depth=0): + print("%7d%7d%7d %s%s" % + (p.pid, get_task_id(p.p, 'pgid'), get_task_id(p.p, 'sid'), ' ' * + (4 * depth), p.core['tc']['comm'])) + for kid in p.kids: + show_ps(kid, opts, depth + 1) + def explore_ps(opts): - pss = { } - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) - for p in ps_img['entries']: - core = pycriu.images.load(dinf(opts, 'core-%d.img' % get_task_id(p, 'pid'))) - ps = ps_item(p, core['entries'][0]) - pss[ps.pid] = ps + pss = {} + ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + for p in ps_img['entries']: + core = pycriu.images.load( + dinf(opts, 'core-%d.img' % get_task_id(p, 'pid'))) + ps = ps_item(p, core['entries'][0]) + pss[ps.pid] = ps - # Build tree - psr = None - for pid in pss: - p = pss[pid] - if p.ppid == 0: - psr = p - continue + # Build tree + psr = None + for pid in pss: + p = pss[pid] + if p.ppid == 0: + psr = p + continue - pp = pss[p.ppid] - pp.kids.append(p) + pp = pss[p.ppid] + pp.kids.append(p) + + print("%7s%7s%7s %s" % ('PID', 'PGID', 'SID', 'COMM')) + show_ps(psr, opts) - print("%7s%7s%7s %s" % ('PID', 'PGID', 'SID', 'COMM')) - show_ps(psr, opts) files_img = None + def ftype_find_in_files(opts, ft, fid): - global files_img + global files_img - if files_img is None: - try: - files_img = pycriu.images.load(dinf(opts, "files.img"))['entries'] - except: - files_img = [] + if files_img is None: + try: + files_img = pycriu.images.load(dinf(opts, "files.img"))['entries'] + except: + files_img = [] - if len(files_img) == 0: - return None + if len(files_img) == 0: + return None - for f in files_img: - if f['id'] == fid: - return f + for f in files_img: + if f['id'] == fid: + return f - return None + return None def ftype_find_in_image(opts, ft, fid, img): - f = ftype_find_in_files(opts, ft, fid) - if f: - return f[ft['field']] + f = ftype_find_in_files(opts, ft, fid) + if f: + return f[ft['field']] + + if ft['img'] == None: + ft['img'] = pycriu.images.load(dinf(opts, img))['entries'] + for f in ft['img']: + if f['id'] == fid: + return f + return None - if ft['img'] == None: - ft['img'] = pycriu.images.load(dinf(opts, img))['entries'] - for f in ft['img']: - if f['id'] == fid: - return f - return None def ftype_reg(opts, ft, fid): - rf = ftype_find_in_image(opts, ft, fid, 'reg-files.img') - return rf and rf['name'] or 'unknown path' + rf = ftype_find_in_image(opts, ft, fid, 'reg-files.img') + return rf and rf['name'] or 'unknown path' + def ftype_pipe(opts, ft, fid): - p = ftype_find_in_image(opts, ft, fid, 'pipes.img') - return p and 'pipe[%d]' % p['pipe_id'] or 'pipe[?]' + p = ftype_find_in_image(opts, ft, fid, 'pipes.img') + return p and 'pipe[%d]' % p['pipe_id'] or 'pipe[?]' + def ftype_unix(opts, ft, fid): - ux = ftype_find_in_image(opts, ft, fid, 'unixsk.img') - if not ux: - return 'unix[?]' + ux = ftype_find_in_image(opts, ft, fid, 'unixsk.img') + if not ux: + return 'unix[?]' + + n = ux['name'] and ' %s' % ux['name'] or '' + return 'unix[%d (%d)%s]' % (ux['ino'], ux['peer'], n) - n = ux['name'] and ' %s' % ux['name'] or '' - return 'unix[%d (%d)%s]' % (ux['ino'], ux['peer'], n) file_types = { - 'REG': {'get': ftype_reg, 'img': None, 'field': 'reg'}, - 'PIPE': {'get': ftype_pipe, 'img': None, 'field': 'pipe'}, - 'UNIXSK': {'get': ftype_unix, 'img': None, 'field': 'usk'}, + 'REG': { + 'get': ftype_reg, + 'img': None, + 'field': 'reg' + }, + 'PIPE': { + 'get': ftype_pipe, + 'img': None, + 'field': 'pipe' + }, + 'UNIXSK': { + 'get': ftype_unix, + 'img': None, + 'field': 'usk' + }, } -def ftype_gen(opts, ft, fid): - return '%s.%d' % (ft['typ'], fid) -files_cache = { } +def ftype_gen(opts, ft, fid): + return '%s.%d' % (ft['typ'], fid) + + +files_cache = {} + def get_file_str(opts, fd): - key = (fd['type'], fd['id']) - f = files_cache.get(key, None) - if not f: - ft = file_types.get(fd['type'], {'get': ftype_gen, 'typ': fd['type']}) - f = ft['get'](opts, ft, fd['id']) - files_cache[key] = f + key = (fd['type'], fd['id']) + f = files_cache.get(key, None) + if not f: + ft = file_types.get(fd['type'], {'get': ftype_gen, 'typ': fd['type']}) + f = ft['get'](opts, ft, fd['id']) + files_cache[key] = f + + return f - return f def explore_fds(opts): - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) - for p in ps_img['entries']: - pid = get_task_id(p, 'pid') - idi = pycriu.images.load(dinf(opts, 'ids-%s.img' % pid)) - fdt = idi['entries'][0]['files_id'] - fdi = pycriu.images.load(dinf(opts, 'fdinfo-%d.img' % fdt)) + ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + for p in ps_img['entries']: + pid = get_task_id(p, 'pid') + idi = pycriu.images.load(dinf(opts, 'ids-%s.img' % pid)) + fdt = idi['entries'][0]['files_id'] + fdi = pycriu.images.load(dinf(opts, 'fdinfo-%d.img' % fdt)) - print("%d" % pid) - for fd in fdi['entries']: - print("\t%7d: %s" % (fd['fd'], get_file_str(opts, fd))) + print("%d" % pid) + for fd in fdi['entries']: + print("\t%7d: %s" % (fd['fd'], get_file_str(opts, fd))) - fdi = pycriu.images.load(dinf(opts, 'fs-%d.img' % pid))['entries'][0] - print("\t%7s: %s" % ('cwd', get_file_str(opts, {'type': 'REG', 'id': fdi['cwd_id']}))) - print("\t%7s: %s" % ('root', get_file_str(opts, {'type': 'REG', 'id': fdi['root_id']}))) + fdi = pycriu.images.load(dinf(opts, 'fs-%d.img' % pid))['entries'][0] + print("\t%7s: %s" % + ('cwd', get_file_str(opts, { + 'type': 'REG', + 'id': fdi['cwd_id'] + }))) + print("\t%7s: %s" % + ('root', get_file_str(opts, { + 'type': 'REG', + 'id': fdi['root_id'] + }))) class vma_id: - def __init__(self): - self.__ids = {} - self.__last = 1 + def __init__(self): + self.__ids = {} + self.__last = 1 - def get(self, iid): - ret = self.__ids.get(iid, None) - if not ret: - ret = self.__last - self.__last += 1 - self.__ids[iid] = ret + def get(self, iid): + ret = self.__ids.get(iid, None) + if not ret: + ret = self.__last + self.__last += 1 + self.__ids[iid] = ret + + return ret - return ret def explore_mems(opts): - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) - vids = vma_id() - for p in ps_img['entries']: - pid = get_task_id(p, 'pid') - mmi = pycriu.images.load(dinf(opts, 'mm-%d.img' % pid))['entries'][0] + ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + vids = vma_id() + for p in ps_img['entries']: + pid = get_task_id(p, 'pid') + mmi = pycriu.images.load(dinf(opts, 'mm-%d.img' % pid))['entries'][0] - print("%d" % pid) - print("\t%-36s %s" % ('exe', get_file_str(opts, {'type': 'REG', 'id': mmi['exe_file_id']}))) + print("%d" % pid) + print("\t%-36s %s" % ('exe', + get_file_str(opts, { + 'type': 'REG', + 'id': mmi['exe_file_id'] + }))) - for vma in mmi['vmas']: - st = vma['status'] - if st & (1 << 10): - fn = ' ' + 'ips[%lx]' % vids.get(vma['shmid']) - elif st & (1 << 8): - fn = ' ' + 'shmem[%lx]' % vids.get(vma['shmid']) - elif st & (1 << 11): - fn = ' ' + 'packet[%lx]' % vids.get(vma['shmid']) - elif st & ((1 << 6) | (1 << 7)): - fn = ' ' + get_file_str(opts, {'type': 'REG', 'id': vma['shmid']}) - if vma['pgoff']: - fn += ' + %#lx' % vma['pgoff'] - if st & (1 << 7): - fn += ' (s)' - elif st & (1 << 1): - fn = ' [stack]' - elif st & (1 << 2): - fn = ' [vsyscall]' - elif st & (1 << 3): - fn = ' [vdso]' - elif vma['flags'] & 0x0100: # growsdown - fn = ' [stack?]' - else: - fn = '' + for vma in mmi['vmas']: + st = vma['status'] + if st & (1 << 10): + fn = ' ' + 'ips[%lx]' % vids.get(vma['shmid']) + elif st & (1 << 8): + fn = ' ' + 'shmem[%lx]' % vids.get(vma['shmid']) + elif st & (1 << 11): + fn = ' ' + 'packet[%lx]' % vids.get(vma['shmid']) + elif st & ((1 << 6) | (1 << 7)): + fn = ' ' + get_file_str(opts, { + 'type': 'REG', + 'id': vma['shmid'] + }) + if vma['pgoff']: + fn += ' + %#lx' % vma['pgoff'] + if st & (1 << 7): + fn += ' (s)' + elif st & (1 << 1): + fn = ' [stack]' + elif st & (1 << 2): + fn = ' [vsyscall]' + elif st & (1 << 3): + fn = ' [vdso]' + elif vma['flags'] & 0x0100: # growsdown + fn = ' [stack?]' + else: + fn = '' - if not st & (1 << 0): - fn += ' *' + if not st & (1 << 0): + fn += ' *' - prot = vma['prot'] & 0x1 and 'r' or '-' - prot += vma['prot'] & 0x2 and 'w' or '-' - prot += vma['prot'] & 0x4 and 'x' or '-' + prot = vma['prot'] & 0x1 and 'r' or '-' + prot += vma['prot'] & 0x2 and 'w' or '-' + prot += vma['prot'] & 0x4 and 'x' or '-' - astr = '%08lx-%08lx' % (vma['start'], vma['end']) - print("\t%-36s%s%s" % (astr, prot, fn)) + astr = '%08lx-%08lx' % (vma['start'], vma['end']) + print("\t%-36s%s%s" % (astr, prot, fn)) def explore_rss(opts): - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) - for p in ps_img['entries']: - pid = get_task_id(p, 'pid') - vmas = pycriu.images.load(dinf(opts, 'mm-%d.img' % pid))['entries'][0]['vmas'] - pms = pycriu.images.load(dinf(opts, 'pagemap-%d.img' % pid))['entries'] + ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + for p in ps_img['entries']: + pid = get_task_id(p, 'pid') + vmas = pycriu.images.load(dinf(opts, 'mm-%d.img' % + pid))['entries'][0]['vmas'] + pms = pycriu.images.load(dinf(opts, 'pagemap-%d.img' % pid))['entries'] - print("%d" % pid) - vmi = 0 - pvmi = -1 - for pm in pms[1:]: - pstr = '\t%lx / %-8d' % (pm['vaddr'], pm['nr_pages']) - while vmas[vmi]['end'] <= pm['vaddr']: - vmi += 1 + print("%d" % pid) + vmi = 0 + pvmi = -1 + for pm in pms[1:]: + pstr = '\t%lx / %-8d' % (pm['vaddr'], pm['nr_pages']) + while vmas[vmi]['end'] <= pm['vaddr']: + vmi += 1 - pme = pm['vaddr'] + (pm['nr_pages'] << 12) - vstr = '' - while vmas[vmi]['start'] < pme: - vma = vmas[vmi] - if vmi == pvmi: - vstr += ' ~' - else: - vstr += ' %08lx / %-8d' % (vma['start'], (vma['end'] - vma['start'])>>12) - if vma['status'] & ((1 << 6) | (1 << 7)): - vstr += ' ' + get_file_str(opts, {'type': 'REG', 'id': vma['shmid']}) - pvmi = vmi - vstr += '\n\t%23s' % '' - vmi += 1 + pme = pm['vaddr'] + (pm['nr_pages'] << 12) + vstr = '' + while vmas[vmi]['start'] < pme: + vma = vmas[vmi] + if vmi == pvmi: + vstr += ' ~' + else: + vstr += ' %08lx / %-8d' % ( + vma['start'], (vma['end'] - vma['start']) >> 12) + if vma['status'] & ((1 << 6) | (1 << 7)): + vstr += ' ' + get_file_str(opts, { + 'type': 'REG', + 'id': vma['shmid'] + }) + pvmi = vmi + vstr += '\n\t%23s' % '' + vmi += 1 - vmi -= 1 + vmi -= 1 - print('%-24s%s' % (pstr, vstr)) + print('%-24s%s' % (pstr, vstr)) +explorers = { + 'ps': explore_ps, + 'fds': explore_fds, + 'mems': explore_mems, + 'rss': explore_rss +} -explorers = { 'ps': explore_ps, 'fds': explore_fds, 'mems': explore_mems, 'rss': explore_rss } def explore(opts): - explorers[opts['what']](opts) + explorers[opts['what']](opts) + def main(): - desc = 'CRiu Image Tool' - parser = argparse.ArgumentParser(description=desc, - formatter_class=argparse.RawTextHelpFormatter) + desc = 'CRiu Image Tool' + parser = argparse.ArgumentParser( + description=desc, formatter_class=argparse.RawTextHelpFormatter) - subparsers = parser.add_subparsers(help='Use crit CMD --help for command-specific help') + subparsers = parser.add_subparsers( + help='Use crit CMD --help for command-specific help') - # Decode - decode_parser = subparsers.add_parser('decode', - help = 'convert criu image from binary type to json') - decode_parser.add_argument('--pretty', - help = 'Multiline with indents and some numerical fields in field-specific format', - action = 'store_true') - decode_parser.add_argument('-i', - '--in', - help = 'criu image in binary format to be decoded (stdin by default)') - decode_parser.add_argument('-o', - '--out', - help = 'where to put criu image in json format (stdout by default)') - decode_parser.set_defaults(func=decode, nopl=False) + # Decode + decode_parser = subparsers.add_parser( + 'decode', help='convert criu image from binary type to json') + decode_parser.add_argument( + '--pretty', + help= + 'Multiline with indents and some numerical fields in field-specific format', + action='store_true') + decode_parser.add_argument( + '-i', + '--in', + help='criu image in binary format to be decoded (stdin by default)') + decode_parser.add_argument( + '-o', + '--out', + help='where to put criu image in json format (stdout by default)') + decode_parser.set_defaults(func=decode, nopl=False) - # Encode - encode_parser = subparsers.add_parser('encode', - help = 'convert criu image from json type to binary') - encode_parser.add_argument('-i', - '--in', - help = 'criu image in json format to be encoded (stdin by default)') - encode_parser.add_argument('-o', - '--out', - help = 'where to put criu image in binary format (stdout by default)') - encode_parser.set_defaults(func=encode) + # Encode + encode_parser = subparsers.add_parser( + 'encode', help='convert criu image from json type to binary') + encode_parser.add_argument( + '-i', + '--in', + help='criu image in json format to be encoded (stdin by default)') + encode_parser.add_argument( + '-o', + '--out', + help='where to put criu image in binary format (stdout by default)') + encode_parser.set_defaults(func=encode) - # Info - info_parser = subparsers.add_parser('info', - help = 'show info about image') - info_parser.add_argument("in") - info_parser.set_defaults(func=info) + # Info + info_parser = subparsers.add_parser('info', help='show info about image') + info_parser.add_argument("in") + info_parser.set_defaults(func=info) - # Explore - x_parser = subparsers.add_parser('x', help = 'explore image dir') - x_parser.add_argument('dir') - x_parser.add_argument('what', choices = [ 'ps', 'fds', 'mems', 'rss']) - x_parser.set_defaults(func=explore) + # Explore + x_parser = subparsers.add_parser('x', help='explore image dir') + x_parser.add_argument('dir') + x_parser.add_argument('what', choices=['ps', 'fds', 'mems', 'rss']) + x_parser.set_defaults(func=explore) - # Show - show_parser = subparsers.add_parser('show', - help = "convert criu image from binary to human-readable json") - show_parser.add_argument("in") - show_parser.add_argument('--nopl', help = 'do not show entry payload (if exists)', action = 'store_true') - show_parser.set_defaults(func=decode, pretty=True, out=None) + # Show + show_parser = subparsers.add_parser( + 'show', help="convert criu image from binary to human-readable json") + show_parser.add_argument("in") + show_parser.add_argument('--nopl', + help='do not show entry payload (if exists)', + action='store_true') + show_parser.set_defaults(func=decode, pretty=True, out=None) - opts = vars(parser.parse_args()) + opts = vars(parser.parse_args()) - if not opts: - sys.stderr.write(parser.format_usage()) - sys.stderr.write("crit: error: too few arguments\n") - sys.exit(1) + if not opts: + sys.stderr.write(parser.format_usage()) + sys.stderr.write("crit: error: too few arguments\n") + sys.exit(1) + + opts["func"](opts) - opts["func"](opts) if __name__ == '__main__': - main() + main() diff --git a/lib/py/criu.py b/lib/py/criu.py index de1a214a3..d94fea9e1 100644 --- a/lib/py/criu.py +++ b/lib/py/criu.py @@ -8,325 +8,336 @@ import struct import pycriu.rpc_pb2 as rpc + class _criu_comm: - """ + """ Base class for communication classes. """ - COMM_SK = 0 - COMM_FD = 1 - COMM_BIN = 2 - comm_type = None - comm = None - sk = None + COMM_SK = 0 + COMM_FD = 1 + COMM_BIN = 2 + comm_type = None + comm = None + sk = None - def connect(self, daemon): - """ + def connect(self, daemon): + """ Connect to criu and return socket object. daemon -- is for whether or not criu should daemonize if executing criu from binary(comm_bin). """ - pass + pass - def disconnect(self): - """ + def disconnect(self): + """ Disconnect from criu. """ - pass + pass class _criu_comm_sk(_criu_comm): - """ + """ Communication class for unix socket. """ - def __init__(self, sk_path): - self.comm_type = self.COMM_SK - self.comm = sk_path - def connect(self, daemon): - self.sk = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET) - self.sk.connect(self.comm) + def __init__(self, sk_path): + self.comm_type = self.COMM_SK + self.comm = sk_path - return self.sk + def connect(self, daemon): + self.sk = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET) + self.sk.connect(self.comm) - def disconnect(self): - self.sk.close() + return self.sk + + def disconnect(self): + self.sk.close() class _criu_comm_fd(_criu_comm): - """ + """ Communication class for file descriptor. """ - def __init__(self, fd): - self.comm_type = self.COMM_FD - self.comm = fd - def connect(self, daemon): - self.sk = socket.fromfd(self.comm, socket.AF_UNIX, socket.SOCK_SEQPACKET) + def __init__(self, fd): + self.comm_type = self.COMM_FD + self.comm = fd - return self.sk + def connect(self, daemon): + self.sk = socket.fromfd(self.comm, socket.AF_UNIX, + socket.SOCK_SEQPACKET) + + return self.sk + + def disconnect(self): + self.sk.close() - def disconnect(self): - self.sk.close() class _criu_comm_bin(_criu_comm): - """ + """ Communication class for binary. """ - def __init__(self, bin_path): - self.comm_type = self.COMM_BIN - self.comm = bin_path - self.swrk = None - self.daemon = None - def connect(self, daemon): - # Kind of the same thing we do in libcriu - css = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) - flags = fcntl.fcntl(css[1], fcntl.F_GETFD) - fcntl.fcntl(css[1], fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) - flags = fcntl.fcntl(css[0], fcntl.F_GETFD) - fcntl.fcntl(css[0], fcntl.F_SETFD, flags & ~fcntl.FD_CLOEXEC) + def __init__(self, bin_path): + self.comm_type = self.COMM_BIN + self.comm = bin_path + self.swrk = None + self.daemon = None - self.daemon = daemon + def connect(self, daemon): + # Kind of the same thing we do in libcriu + css = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + flags = fcntl.fcntl(css[1], fcntl.F_GETFD) + fcntl.fcntl(css[1], fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) + flags = fcntl.fcntl(css[0], fcntl.F_GETFD) + fcntl.fcntl(css[0], fcntl.F_SETFD, flags & ~fcntl.FD_CLOEXEC) - p = os.fork() + self.daemon = daemon - if p == 0: - def exec_criu(): - os.close(0) - os.close(1) - os.close(2) + p = os.fork() - css[0].send(struct.pack('i', os.getpid())) - os.execv(self.comm, [self.comm, 'swrk', "%d" % css[0].fileno()]) - os._exit(1) + if p == 0: - if daemon: - # Python has no daemon(3) alternative, - # so we need to mimic it ourself. - p = os.fork() + def exec_criu(): + os.close(0) + os.close(1) + os.close(2) - if p == 0: - os.setsid() + css[0].send(struct.pack('i', os.getpid())) + os.execv(self.comm, + [self.comm, 'swrk', + "%d" % css[0].fileno()]) + os._exit(1) - exec_criu() - else: - os._exit(0) - else: - exec_criu() - else: - if daemon: - os.waitpid(p, 0) + if daemon: + # Python has no daemon(3) alternative, + # so we need to mimic it ourself. + p = os.fork() - css[0].close() - self.swrk = struct.unpack('i', css[1].recv(4))[0] - self.sk = css[1] + if p == 0: + os.setsid() - return self.sk + exec_criu() + else: + os._exit(0) + else: + exec_criu() + else: + if daemon: + os.waitpid(p, 0) - def disconnect(self): - self.sk.close() - if not self.daemon: - os.waitpid(self.swrk, 0) + css[0].close() + self.swrk = struct.unpack('i', css[1].recv(4))[0] + self.sk = css[1] + + return self.sk + + def disconnect(self): + self.sk.close() + if not self.daemon: + os.waitpid(self.swrk, 0) class CRIUException(Exception): - """ + """ Exception class for handling and storing criu errors. """ - typ = None - _str = None + typ = None + _str = None - def __str__(self): - return self._str + def __str__(self): + return self._str class CRIUExceptionInternal(CRIUException): - """ + """ Exception class for handling and storing internal errors. """ - def __init__(self, typ, s): - self.typ = typ - self._str = "%s failed with internal error: %s" % (rpc.criu_req_type.Name(self.typ), s) + + def __init__(self, typ, s): + self.typ = typ + self._str = "%s failed with internal error: %s" % ( + rpc.criu_req_type.Name(self.typ), s) class CRIUExceptionExternal(CRIUException): - """ + """ Exception class for handling and storing criu RPC errors. """ - def __init__(self, req_typ, resp_typ, errno): - self.typ = req_typ - self.resp_typ = resp_typ - self.errno = errno - self._str = self._gen_error_str() + def __init__(self, req_typ, resp_typ, errno): + self.typ = req_typ + self.resp_typ = resp_typ + self.errno = errno + self._str = self._gen_error_str() - def _gen_error_str(self): - s = "%s failed: " % (rpc.criu_req_type.Name(self.typ), ) + def _gen_error_str(self): + s = "%s failed: " % (rpc.criu_req_type.Name(self.typ), ) - if self.typ != self.resp_typ: - s += "Unexpected response type %d: " % (self.resp_typ, ) + if self.typ != self.resp_typ: + s += "Unexpected response type %d: " % (self.resp_typ, ) - s += "Error(%d): " % (self.errno, ) + s += "Error(%d): " % (self.errno, ) - if self.errno == errno.EBADRQC: - s += "Bad options" + if self.errno == errno.EBADRQC: + s += "Bad options" - if self.typ == rpc.DUMP: - if self.errno == errno.ESRCH: - s += "No process with such pid" + if self.typ == rpc.DUMP: + if self.errno == errno.ESRCH: + s += "No process with such pid" - if self.typ == rpc.RESTORE: - if self.errno == errno.EEXIST: - s += "Process with requested pid already exists" + if self.typ == rpc.RESTORE: + if self.errno == errno.EEXIST: + s += "Process with requested pid already exists" - s += "Unknown" + s += "Unknown" - return s + return s class criu: - """ + """ Call criu through RPC. """ - opts = None #CRIU options in pb format + opts = None #CRIU options in pb format - _comm = None #Communication method + _comm = None #Communication method - def __init__(self): - self.use_binary('criu') - self.opts = rpc.criu_opts() - self.sk = None + def __init__(self): + self.use_binary('criu') + self.opts = rpc.criu_opts() + self.sk = None - def use_sk(self, sk_name): - """ + def use_sk(self, sk_name): + """ Access criu using unix socket which that belongs to criu service daemon. """ - self._comm = _criu_comm_sk(sk_name) + self._comm = _criu_comm_sk(sk_name) - def use_fd(self, fd): - """ + def use_fd(self, fd): + """ Access criu using provided fd. """ - self._comm = _criu_comm_fd(fd) + self._comm = _criu_comm_fd(fd) - def use_binary(self, bin_name): - """ + def use_binary(self, bin_name): + """ Access criu by execing it using provided path to criu binary. """ - self._comm = _criu_comm_bin(bin_name) + self._comm = _criu_comm_bin(bin_name) - def _send_req_and_recv_resp(self, req): - """ + def _send_req_and_recv_resp(self, req): + """ As simple as send request and receive response. """ - # In case of self-dump we need to spawn criu swrk detached - # from our current process, as criu has a hard time separating - # process resources from its own if criu is located in a same - # process tree it is trying to dump. - daemon = False - if req.type == rpc.DUMP and not req.opts.HasField('pid'): - daemon = True + # In case of self-dump we need to spawn criu swrk detached + # from our current process, as criu has a hard time separating + # process resources from its own if criu is located in a same + # process tree it is trying to dump. + daemon = False + if req.type == rpc.DUMP and not req.opts.HasField('pid'): + daemon = True - try: - if not self.sk: - s = self._comm.connect(daemon) - else: - s = self.sk + try: + if not self.sk: + s = self._comm.connect(daemon) + else: + s = self.sk - if req.keep_open: - self.sk = s + if req.keep_open: + self.sk = s - s.send(req.SerializeToString()) + s.send(req.SerializeToString()) - buf = s.recv(len(s.recv(1, socket.MSG_TRUNC | socket.MSG_PEEK))) + buf = s.recv(len(s.recv(1, socket.MSG_TRUNC | socket.MSG_PEEK))) - if not req.keep_open: - self._comm.disconnect() + if not req.keep_open: + self._comm.disconnect() - resp = rpc.criu_resp() - resp.ParseFromString(buf) - except Exception as e: - raise CRIUExceptionInternal(req.type, str(e)) + resp = rpc.criu_resp() + resp.ParseFromString(buf) + except Exception as e: + raise CRIUExceptionInternal(req.type, str(e)) - return resp + return resp - def check(self): - """ + def check(self): + """ Checks whether the kernel support is up-to-date. """ - req = rpc.criu_req() - req.type = rpc.CHECK + req = rpc.criu_req() + req.type = rpc.CHECK - resp = self._send_req_and_recv_resp(req) + resp = self._send_req_and_recv_resp(req) - if not resp.success: - raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) + if not resp.success: + raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) - def dump(self): - """ + def dump(self): + """ Checkpoint a process/tree identified by opts.pid. """ - req = rpc.criu_req() - req.type = rpc.DUMP - req.opts.MergeFrom(self.opts) + req = rpc.criu_req() + req.type = rpc.DUMP + req.opts.MergeFrom(self.opts) - resp = self._send_req_and_recv_resp(req) + resp = self._send_req_and_recv_resp(req) - if not resp.success: - raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) + if not resp.success: + raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) - return resp.dump + return resp.dump - def pre_dump(self): - """ + def pre_dump(self): + """ Checkpoint a process/tree identified by opts.pid. """ - req = rpc.criu_req() - req.type = rpc.PRE_DUMP - req.opts.MergeFrom(self.opts) + req = rpc.criu_req() + req.type = rpc.PRE_DUMP + req.opts.MergeFrom(self.opts) - resp = self._send_req_and_recv_resp(req) + resp = self._send_req_and_recv_resp(req) - if not resp.success: - raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) + if not resp.success: + raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) - return resp.dump + return resp.dump - def restore(self): - """ + def restore(self): + """ Restore a process/tree. """ - req = rpc.criu_req() - req.type = rpc.RESTORE - req.opts.MergeFrom(self.opts) + req = rpc.criu_req() + req.type = rpc.RESTORE + req.opts.MergeFrom(self.opts) - resp = self._send_req_and_recv_resp(req) + resp = self._send_req_and_recv_resp(req) - if not resp.success: - raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) + if not resp.success: + raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) - return resp.restore + return resp.restore - def page_server_chld(self): - req = rpc.criu_req() - req.type = rpc.PAGE_SERVER_CHLD - req.opts.MergeFrom(self.opts) - req.keep_open = True + def page_server_chld(self): + req = rpc.criu_req() + req.type = rpc.PAGE_SERVER_CHLD + req.opts.MergeFrom(self.opts) + req.keep_open = True - resp = self._send_req_and_recv_resp(req) + resp = self._send_req_and_recv_resp(req) - if not resp.success: - raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) + if not resp.success: + raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) - return resp.ps + return resp.ps - def wait_pid(self, pid): - req = rpc.criu_req() - req.type = rpc.WAIT_PID - req.pid = pid + def wait_pid(self, pid): + req = rpc.criu_req() + req.type = rpc.WAIT_PID + req.pid = pid - resp = self._send_req_and_recv_resp(req) + resp = self._send_req_and_recv_resp(req) - if not resp.success: - raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) + if not resp.success: + raise CRIUExceptionExternal(req.type, resp.type, resp.cr_errno) - return resp.status + return resp.status diff --git a/lib/py/images/images.py b/lib/py/images/images.py index 7a9b9da6e..28c6d9e1f 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -48,8 +48,8 @@ from . import pb from . import pb2dict if "encodebytes" not in dir(base64): - base64.encodebytes = base64.encodestring - base64.decodebytes = base64.decodestring + base64.encodebytes = base64.encodestring + base64.decodebytes = base64.decodestring # # Predefined hardcoded constants @@ -57,233 +57,241 @@ sizeof_u16 = 2 sizeof_u32 = 4 sizeof_u64 = 8 + # A helper for rounding -def round_up(x,y): - return (((x - 1) | (y - 1)) + 1) +def round_up(x, y): + return (((x - 1) | (y - 1)) + 1) + class MagicException(Exception): - def __init__(self, magic): - self.magic = magic + def __init__(self, magic): + self.magic = magic + # Generic class to handle loading/dumping criu images entries from/to bin # format to/from dict(json). class entry_handler: - """ + """ Generic class to handle loading/dumping criu images entries from/to bin format to/from dict(json). """ - def __init__(self, payload, extra_handler=None): - """ + + def __init__(self, payload, extra_handler=None): + """ Sets payload class and extra handler class. """ - self.payload = payload - self.extra_handler = extra_handler + self.payload = payload + self.extra_handler = extra_handler - def load(self, f, pretty = False, no_payload = False): - """ + def load(self, f, pretty=False, no_payload=False): + """ Convert criu image entries from binary format to dict(json). Takes a file-like object and returnes a list with entries in dict(json) format. """ - entries = [] + entries = [] - while True: - entry = {} + while True: + entry = {} - # Read payload - pbuff = self.payload() - buf = f.read(4) - if buf == b'': - break - size, = struct.unpack('i', buf) - pbuff.ParseFromString(f.read(size)) - entry = pb2dict.pb2dict(pbuff, pretty) + # Read payload + pbuff = self.payload() + buf = f.read(4) + if buf == b'': + break + size, = struct.unpack('i', buf) + pbuff.ParseFromString(f.read(size)) + entry = pb2dict.pb2dict(pbuff, pretty) - # Read extra - if self.extra_handler: - if no_payload: - def human_readable(num): - for unit in ['','K','M','G','T','P','E','Z']: - if num < 1024.0: - if int(num) == num: - return "%d%sB" % (num, unit) - else: - return "%.1f%sB" % (num, unit) - num /= 1024.0 - return "%.1fYB" % num + # Read extra + if self.extra_handler: + if no_payload: - pl_size = self.extra_handler.skip(f, pbuff) - entry['extra'] = '... <%s>' % human_readable(pl_size) - else: - entry['extra'] = self.extra_handler.load(f, pbuff) + def human_readable(num): + for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: + if num < 1024.0: + if int(num) == num: + return "%d%sB" % (num, unit) + else: + return "%.1f%sB" % (num, unit) + num /= 1024.0 + return "%.1fYB" % num - entries.append(entry) + pl_size = self.extra_handler.skip(f, pbuff) + entry['extra'] = '... <%s>' % human_readable(pl_size) + else: + entry['extra'] = self.extra_handler.load(f, pbuff) - return entries + entries.append(entry) - def loads(self, s, pretty = False): - """ + return entries + + def loads(self, s, pretty=False): + """ Same as load(), but takes a string as an argument. """ - f = io.BytesIO(s) - return self.load(f, pretty) + f = io.BytesIO(s) + return self.load(f, pretty) - def dump(self, entries, f): - """ + def dump(self, entries, f): + """ Convert criu image entries from dict(json) format to binary. Takes a list of entries and a file-like object to write entries in binary format to. """ - for entry in entries: - extra = entry.pop('extra', None) + for entry in entries: + extra = entry.pop('extra', None) - # Write payload - pbuff = self.payload() - pb2dict.dict2pb(entry, pbuff) - pb_str = pbuff.SerializeToString() - size = len(pb_str) - f.write(struct.pack('i', size)) - f.write(pb_str) + # Write payload + pbuff = self.payload() + pb2dict.dict2pb(entry, pbuff) + pb_str = pbuff.SerializeToString() + size = len(pb_str) + f.write(struct.pack('i', size)) + f.write(pb_str) - # Write extra - if self.extra_handler and extra: - self.extra_handler.dump(extra, f, pbuff) + # Write extra + if self.extra_handler and extra: + self.extra_handler.dump(extra, f, pbuff) - def dumps(self, entries): - """ + def dumps(self, entries): + """ Same as dump(), but doesn't take file-like object and just returns a string. """ - f = io.BytesIO('') - self.dump(entries, f) - return f.read() + f = io.BytesIO('') + self.dump(entries, f) + return f.read() - def count(self, f): - """ + def count(self, f): + """ Counts the number of top-level object in the image file """ - entries = 0 + entries = 0 - while True: - buf = f.read(4) - if buf == '': - break - size, = struct.unpack('i', buf) - f.seek(size, 1) - entries += 1 + while True: + buf = f.read(4) + if buf == '': + break + size, = struct.unpack('i', buf) + f.seek(size, 1) + entries += 1 + + return entries - return entries # Special handler for pagemap.img class pagemap_handler: - """ + """ Special entry handler for pagemap.img, which is unique in a way that it has a header of pagemap_head type followed by entries of pagemap_entry type. """ - def load(self, f, pretty = False, no_payload = False): - entries = [] - pbuff = pb.pagemap_head() - while True: - buf = f.read(4) - if buf == b'': - break - size, = struct.unpack('i', buf) - pbuff.ParseFromString(f.read(size)) - entries.append(pb2dict.pb2dict(pbuff, pretty)) + def load(self, f, pretty=False, no_payload=False): + entries = [] - pbuff = pb.pagemap_entry() + pbuff = pb.pagemap_head() + while True: + buf = f.read(4) + if buf == b'': + break + size, = struct.unpack('i', buf) + pbuff.ParseFromString(f.read(size)) + entries.append(pb2dict.pb2dict(pbuff, pretty)) - return entries + pbuff = pb.pagemap_entry() - def loads(self, s, pretty = False): - f = io.BytesIO(s) - return self.load(f, pretty) + return entries - def dump(self, entries, f): - pbuff = pb.pagemap_head() - for item in entries: - pb2dict.dict2pb(item, pbuff) - pb_str = pbuff.SerializeToString() - size = len(pb_str) - f.write(struct.pack('i', size)) - f.write(pb_str) + def loads(self, s, pretty=False): + f = io.BytesIO(s) + return self.load(f, pretty) - pbuff = pb.pagemap_entry() + def dump(self, entries, f): + pbuff = pb.pagemap_head() + for item in entries: + pb2dict.dict2pb(item, pbuff) + pb_str = pbuff.SerializeToString() + size = len(pb_str) + f.write(struct.pack('i', size)) + f.write(pb_str) - def dumps(self, entries): - f = io.BytesIO('') - self.dump(entries, f) - return f.read() + pbuff = pb.pagemap_entry() + + def dumps(self, entries): + f = io.BytesIO('') + self.dump(entries, f) + return f.read() + + def count(self, f): + return entry_handler(None).count(f) - 1 - def count(self, f): - return entry_handler(None).count(f) - 1 # Special handler for ghost-file.img class ghost_file_handler: - def load(self, f, pretty = False, no_payload = False): - entries = [] + def load(self, f, pretty=False, no_payload=False): + entries = [] - gf = pb.ghost_file_entry() - buf = f.read(4) - size, = struct.unpack('i', buf) - gf.ParseFromString(f.read(size)) - g_entry = pb2dict.pb2dict(gf, pretty) + gf = pb.ghost_file_entry() + buf = f.read(4) + size, = struct.unpack('i', buf) + gf.ParseFromString(f.read(size)) + g_entry = pb2dict.pb2dict(gf, pretty) - if gf.chunks: - entries.append(g_entry) - while True: - gc = pb.ghost_chunk_entry() - buf = f.read(4) - if buf == '': - break - size, = struct.unpack('i', buf) - gc.ParseFromString(f.read(size)) - entry = pb2dict.pb2dict(gc, pretty) - if no_payload: - f.seek(gc.len, os.SEEK_CUR) - else: - entry['extra'] = base64.encodebytes(f.read(gc.len)) - entries.append(entry) - else: - if no_payload: - f.seek(0, os.SEEK_END) - else: - g_entry['extra'] = base64.encodebytes(f.read()) - entries.append(g_entry) + if gf.chunks: + entries.append(g_entry) + while True: + gc = pb.ghost_chunk_entry() + buf = f.read(4) + if buf == '': + break + size, = struct.unpack('i', buf) + gc.ParseFromString(f.read(size)) + entry = pb2dict.pb2dict(gc, pretty) + if no_payload: + f.seek(gc.len, os.SEEK_CUR) + else: + entry['extra'] = base64.encodebytes(f.read(gc.len)) + entries.append(entry) + else: + if no_payload: + f.seek(0, os.SEEK_END) + else: + g_entry['extra'] = base64.encodebytes(f.read()) + entries.append(g_entry) - return entries + return entries - def loads(self, s, pretty = False): - f = io.BytesIO(s) - return self.load(f, pretty) + def loads(self, s, pretty=False): + f = io.BytesIO(s) + return self.load(f, pretty) - def dump(self, entries, f): - pbuff = pb.ghost_file_entry() - item = entries.pop(0) - pb2dict.dict2pb(item, pbuff) - pb_str = pbuff.SerializeToString() - size = len(pb_str) - f.write(struct.pack('i', size)) - f.write(pb_str) + def dump(self, entries, f): + pbuff = pb.ghost_file_entry() + item = entries.pop(0) + pb2dict.dict2pb(item, pbuff) + pb_str = pbuff.SerializeToString() + size = len(pb_str) + f.write(struct.pack('i', size)) + f.write(pb_str) - if pbuff.chunks: - for item in entries: - pbuff = pb.ghost_chunk_entry() - pb2dict.dict2pb(item, pbuff) - pb_str = pbuff.SerializeToString() - size = len(pb_str) - f.write(struct.pack('i', size)) - f.write(pb_str) - f.write(base64.decodebytes(item['extra'])) - else: - f.write(base64.decodebytes(item['extra'])) + if pbuff.chunks: + for item in entries: + pbuff = pb.ghost_chunk_entry() + pb2dict.dict2pb(item, pbuff) + pb_str = pbuff.SerializeToString() + size = len(pb_str) + f.write(struct.pack('i', size)) + f.write(pb_str) + f.write(base64.decodebytes(item['extra'])) + else: + f.write(base64.decodebytes(item['extra'])) - def dumps(self, entries): - f = io.BytesIO('') - self.dump(entries, f) - return f.read() + def dumps(self, entries): + f = io.BytesIO('') + self.dump(entries, f) + return f.read() # In following extra handlers we use base64 encoding @@ -293,304 +301,317 @@ class ghost_file_handler: # do not store big amounts of binary data. They # are negligible comparing to pages size. class pipes_data_extra_handler: - def load(self, f, pload): - size = pload.bytes - data = f.read(size) - return base64.encodebytes(data) + def load(self, f, pload): + size = pload.bytes + data = f.read(size) + return base64.encodebytes(data) - def dump(self, extra, f, pload): - data = base64.decodebytes(extra) - f.write(data) + def dump(self, extra, f, pload): + data = base64.decodebytes(extra) + f.write(data) + + def skip(self, f, pload): + f.seek(pload.bytes, os.SEEK_CUR) + return pload.bytes - def skip(self, f, pload): - f.seek(pload.bytes, os.SEEK_CUR) - return pload.bytes class sk_queues_extra_handler: - def load(self, f, pload): - size = pload.length - data = f.read(size) - return base64.encodebytes(data) + def load(self, f, pload): + size = pload.length + data = f.read(size) + return base64.encodebytes(data) - def dump(self, extra, f, _unused): - data = base64.decodebytes(extra) - f.write(data) + def dump(self, extra, f, _unused): + data = base64.decodebytes(extra) + f.write(data) - def skip(self, f, pload): - f.seek(pload.length, os.SEEK_CUR) - return pload.length + def skip(self, f, pload): + f.seek(pload.length, os.SEEK_CUR) + return pload.length class tcp_stream_extra_handler: - def load(self, f, pbuff): - d = {} + def load(self, f, pbuff): + d = {} - inq = f.read(pbuff.inq_len) - outq = f.read(pbuff.outq_len) + inq = f.read(pbuff.inq_len) + outq = f.read(pbuff.outq_len) - d['inq'] = base64.encodebytes(inq) - d['outq'] = base64.encodebytes(outq) + d['inq'] = base64.encodebytes(inq) + d['outq'] = base64.encodebytes(outq) - return d + return d - def dump(self, extra, f, _unused): - inq = base64.decodebytes(extra['inq']) - outq = base64.decodebytes(extra['outq']) + def dump(self, extra, f, _unused): + inq = base64.decodebytes(extra['inq']) + outq = base64.decodebytes(extra['outq']) - f.write(inq) - f.write(outq) + f.write(inq) + f.write(outq) + + def skip(self, f, pbuff): + f.seek(0, os.SEEK_END) + return pbuff.inq_len + pbuff.outq_len - def skip(self, f, pbuff): - f.seek(0, os.SEEK_END) - return pbuff.inq_len + pbuff.outq_len class ipc_sem_set_handler: - def load(self, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - size = sizeof_u16 * entry['nsems'] - rounded = round_up(size, sizeof_u64) - s = array.array('H') - if s.itemsize != sizeof_u16: - raise Exception("Array size mismatch") - s.fromstring(f.read(size)) - f.seek(rounded - size, 1) - return s.tolist() + def load(self, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + size = sizeof_u16 * entry['nsems'] + rounded = round_up(size, sizeof_u64) + s = array.array('H') + if s.itemsize != sizeof_u16: + raise Exception("Array size mismatch") + s.fromstring(f.read(size)) + f.seek(rounded - size, 1) + return s.tolist() - def dump(self, extra, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - size = sizeof_u16 * entry['nsems'] - rounded = round_up(size, sizeof_u64) - s = array.array('H') - if s.itemsize != sizeof_u16: - raise Exception("Array size mismatch") - s.fromlist(extra) - if len(s) != entry['nsems']: - raise Exception("Number of semaphores mismatch") - f.write(s.tostring()) - f.write('\0' * (rounded - size)) + def dump(self, extra, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + size = sizeof_u16 * entry['nsems'] + rounded = round_up(size, sizeof_u64) + s = array.array('H') + if s.itemsize != sizeof_u16: + raise Exception("Array size mismatch") + s.fromlist(extra) + if len(s) != entry['nsems']: + raise Exception("Number of semaphores mismatch") + f.write(s.tostring()) + f.write('\0' * (rounded - size)) + + def skip(self, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + size = sizeof_u16 * entry['nsems'] + f.seek(round_up(size, sizeof_u64), os.SEEK_CUR) + return size - def skip(self, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - size = sizeof_u16 * entry['nsems'] - f.seek(round_up(size, sizeof_u64), os.SEEK_CUR) - return size class ipc_msg_queue_handler: - def load(self, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - messages = [] - for x in range (0, entry['qnum']): - buf = f.read(4) - if buf == '': - break - size, = struct.unpack('i', buf) - msg = pb.ipc_msg() - msg.ParseFromString(f.read(size)) - rounded = round_up(msg.msize, sizeof_u64) - data = f.read(msg.msize) - f.seek(rounded - msg.msize, 1) - messages.append(pb2dict.pb2dict(msg)) - messages.append(base64.encodebytes(data)) - return messages + def load(self, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + messages = [] + for x in range(0, entry['qnum']): + buf = f.read(4) + if buf == '': + break + size, = struct.unpack('i', buf) + msg = pb.ipc_msg() + msg.ParseFromString(f.read(size)) + rounded = round_up(msg.msize, sizeof_u64) + data = f.read(msg.msize) + f.seek(rounded - msg.msize, 1) + messages.append(pb2dict.pb2dict(msg)) + messages.append(base64.encodebytes(data)) + return messages - def dump(self, extra, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - for i in range (0, len(extra), 2): - msg = pb.ipc_msg() - pb2dict.dict2pb(extra[i], msg) - msg_str = msg.SerializeToString() - size = len(msg_str) - f.write(struct.pack('i', size)) - f.write(msg_str) - rounded = round_up(msg.msize, sizeof_u64) - data = base64.decodebytes(extra[i + 1]) - f.write(data[:msg.msize]) - f.write('\0' * (rounded - msg.msize)) + def dump(self, extra, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + for i in range(0, len(extra), 2): + msg = pb.ipc_msg() + pb2dict.dict2pb(extra[i], msg) + msg_str = msg.SerializeToString() + size = len(msg_str) + f.write(struct.pack('i', size)) + f.write(msg_str) + rounded = round_up(msg.msize, sizeof_u64) + data = base64.decodebytes(extra[i + 1]) + f.write(data[:msg.msize]) + f.write('\0' * (rounded - msg.msize)) - def skip(self, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - pl_len = 0 - for x in range (0, entry['qnum']): - buf = f.read(4) - if buf == '': - break - size, = struct.unpack('i', buf) - msg = pb.ipc_msg() - msg.ParseFromString(f.read(size)) - rounded = round_up(msg.msize, sizeof_u64) - f.seek(rounded, os.SEEK_CUR) - pl_len += size + msg.msize + def skip(self, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + pl_len = 0 + for x in range(0, entry['qnum']): + buf = f.read(4) + if buf == '': + break + size, = struct.unpack('i', buf) + msg = pb.ipc_msg() + msg.ParseFromString(f.read(size)) + rounded = round_up(msg.msize, sizeof_u64) + f.seek(rounded, os.SEEK_CUR) + pl_len += size + msg.msize + + return pl_len - return pl_len class ipc_shm_handler: - def load(self, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - size = entry['size'] - data = f.read(size) - rounded = round_up(size, sizeof_u32) - f.seek(rounded - size, 1) - return base64.encodebytes(data) + def load(self, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + size = entry['size'] + data = f.read(size) + rounded = round_up(size, sizeof_u32) + f.seek(rounded - size, 1) + return base64.encodebytes(data) - def dump(self, extra, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - size = entry['size'] - data = base64.decodebytes(extra) - rounded = round_up(size, sizeof_u32) - f.write(data[:size]) - f.write('\0' * (rounded - size)) + def dump(self, extra, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + size = entry['size'] + data = base64.decodebytes(extra) + rounded = round_up(size, sizeof_u32) + f.write(data[:size]) + f.write('\0' * (rounded - size)) - def skip(self, f, pbuff): - entry = pb2dict.pb2dict(pbuff) - size = entry['size'] - rounded = round_up(size, sizeof_u32) - f.seek(rounded, os.SEEK_CUR) - return size + def skip(self, f, pbuff): + entry = pb2dict.pb2dict(pbuff) + size = entry['size'] + rounded = round_up(size, sizeof_u32) + f.seek(rounded, os.SEEK_CUR) + return size handlers = { - 'INVENTORY' : entry_handler(pb.inventory_entry), - 'CORE' : entry_handler(pb.core_entry), - 'IDS' : entry_handler(pb.task_kobj_ids_entry), - 'CREDS' : entry_handler(pb.creds_entry), - 'UTSNS' : entry_handler(pb.utsns_entry), - 'IPC_VAR' : entry_handler(pb.ipc_var_entry), - 'FS' : entry_handler(pb.fs_entry), - 'GHOST_FILE' : ghost_file_handler(), - 'MM' : entry_handler(pb.mm_entry), - 'CGROUP' : entry_handler(pb.cgroup_entry), - 'TCP_STREAM' : entry_handler(pb.tcp_stream_entry, tcp_stream_extra_handler()), - 'STATS' : entry_handler(pb.stats_entry), - 'PAGEMAP' : pagemap_handler(), # Special one - 'PSTREE' : entry_handler(pb.pstree_entry), - 'REG_FILES' : entry_handler(pb.reg_file_entry), - 'NS_FILES' : entry_handler(pb.ns_file_entry), - 'EVENTFD_FILE' : entry_handler(pb.eventfd_file_entry), - 'EVENTPOLL_FILE' : entry_handler(pb.eventpoll_file_entry), - 'EVENTPOLL_TFD' : entry_handler(pb.eventpoll_tfd_entry), - 'SIGNALFD' : entry_handler(pb.signalfd_entry), - 'TIMERFD' : entry_handler(pb.timerfd_entry), - 'INOTIFY_FILE' : entry_handler(pb.inotify_file_entry), - 'INOTIFY_WD' : entry_handler(pb.inotify_wd_entry), - 'FANOTIFY_FILE' : entry_handler(pb.fanotify_file_entry), - 'FANOTIFY_MARK' : entry_handler(pb.fanotify_mark_entry), - 'VMAS' : entry_handler(pb.vma_entry), - 'PIPES' : entry_handler(pb.pipe_entry), - 'FIFO' : entry_handler(pb.fifo_entry), - 'SIGACT' : entry_handler(pb.sa_entry), - 'NETLINK_SK' : entry_handler(pb.netlink_sk_entry), - 'REMAP_FPATH' : entry_handler(pb.remap_file_path_entry), - 'MNTS' : entry_handler(pb.mnt_entry), - 'TTY_FILES' : entry_handler(pb.tty_file_entry), - 'TTY_INFO' : entry_handler(pb.tty_info_entry), - 'TTY_DATA' : entry_handler(pb.tty_data_entry), - 'RLIMIT' : entry_handler(pb.rlimit_entry), - 'TUNFILE' : entry_handler(pb.tunfile_entry), - 'EXT_FILES' : entry_handler(pb.ext_file_entry), - 'IRMAP_CACHE' : entry_handler(pb.irmap_cache_entry), - 'FILE_LOCKS' : entry_handler(pb.file_lock_entry), - 'FDINFO' : entry_handler(pb.fdinfo_entry), - 'UNIXSK' : entry_handler(pb.unix_sk_entry), - 'INETSK' : entry_handler(pb.inet_sk_entry), - 'PACKETSK' : entry_handler(pb.packet_sock_entry), - 'ITIMERS' : entry_handler(pb.itimer_entry), - 'POSIX_TIMERS' : entry_handler(pb.posix_timer_entry), - 'NETDEV' : entry_handler(pb.net_device_entry), - 'PIPES_DATA' : entry_handler(pb.pipe_data_entry, pipes_data_extra_handler()), - 'FIFO_DATA' : entry_handler(pb.pipe_data_entry, pipes_data_extra_handler()), - 'SK_QUEUES' : entry_handler(pb.sk_packet_entry, sk_queues_extra_handler()), - 'IPCNS_SHM' : entry_handler(pb.ipc_shm_entry, ipc_shm_handler()), - 'IPCNS_SEM' : entry_handler(pb.ipc_sem_entry, ipc_sem_set_handler()), - 'IPCNS_MSG' : entry_handler(pb.ipc_msg_entry, ipc_msg_queue_handler()), - 'NETNS' : entry_handler(pb.netns_entry), - 'USERNS' : entry_handler(pb.userns_entry), - 'SECCOMP' : entry_handler(pb.seccomp_entry), - 'AUTOFS' : entry_handler(pb.autofs_entry), - 'FILES' : entry_handler(pb.file_entry), - 'CPUINFO' : entry_handler(pb.cpuinfo_entry), - } + 'INVENTORY': entry_handler(pb.inventory_entry), + 'CORE': entry_handler(pb.core_entry), + 'IDS': entry_handler(pb.task_kobj_ids_entry), + 'CREDS': entry_handler(pb.creds_entry), + 'UTSNS': entry_handler(pb.utsns_entry), + 'IPC_VAR': entry_handler(pb.ipc_var_entry), + 'FS': entry_handler(pb.fs_entry), + 'GHOST_FILE': ghost_file_handler(), + 'MM': entry_handler(pb.mm_entry), + 'CGROUP': entry_handler(pb.cgroup_entry), + 'TCP_STREAM': entry_handler(pb.tcp_stream_entry, + tcp_stream_extra_handler()), + 'STATS': entry_handler(pb.stats_entry), + 'PAGEMAP': pagemap_handler(), # Special one + 'PSTREE': entry_handler(pb.pstree_entry), + 'REG_FILES': entry_handler(pb.reg_file_entry), + 'NS_FILES': entry_handler(pb.ns_file_entry), + 'EVENTFD_FILE': entry_handler(pb.eventfd_file_entry), + 'EVENTPOLL_FILE': entry_handler(pb.eventpoll_file_entry), + 'EVENTPOLL_TFD': entry_handler(pb.eventpoll_tfd_entry), + 'SIGNALFD': entry_handler(pb.signalfd_entry), + 'TIMERFD': entry_handler(pb.timerfd_entry), + 'INOTIFY_FILE': entry_handler(pb.inotify_file_entry), + 'INOTIFY_WD': entry_handler(pb.inotify_wd_entry), + 'FANOTIFY_FILE': entry_handler(pb.fanotify_file_entry), + 'FANOTIFY_MARK': entry_handler(pb.fanotify_mark_entry), + 'VMAS': entry_handler(pb.vma_entry), + 'PIPES': entry_handler(pb.pipe_entry), + 'FIFO': entry_handler(pb.fifo_entry), + 'SIGACT': entry_handler(pb.sa_entry), + 'NETLINK_SK': entry_handler(pb.netlink_sk_entry), + 'REMAP_FPATH': entry_handler(pb.remap_file_path_entry), + 'MNTS': entry_handler(pb.mnt_entry), + 'TTY_FILES': entry_handler(pb.tty_file_entry), + 'TTY_INFO': entry_handler(pb.tty_info_entry), + 'TTY_DATA': entry_handler(pb.tty_data_entry), + 'RLIMIT': entry_handler(pb.rlimit_entry), + 'TUNFILE': entry_handler(pb.tunfile_entry), + 'EXT_FILES': entry_handler(pb.ext_file_entry), + 'IRMAP_CACHE': entry_handler(pb.irmap_cache_entry), + 'FILE_LOCKS': entry_handler(pb.file_lock_entry), + 'FDINFO': entry_handler(pb.fdinfo_entry), + 'UNIXSK': entry_handler(pb.unix_sk_entry), + 'INETSK': entry_handler(pb.inet_sk_entry), + 'PACKETSK': entry_handler(pb.packet_sock_entry), + 'ITIMERS': entry_handler(pb.itimer_entry), + 'POSIX_TIMERS': entry_handler(pb.posix_timer_entry), + 'NETDEV': entry_handler(pb.net_device_entry), + 'PIPES_DATA': entry_handler(pb.pipe_data_entry, + pipes_data_extra_handler()), + 'FIFO_DATA': entry_handler(pb.pipe_data_entry, pipes_data_extra_handler()), + 'SK_QUEUES': entry_handler(pb.sk_packet_entry, sk_queues_extra_handler()), + 'IPCNS_SHM': entry_handler(pb.ipc_shm_entry, ipc_shm_handler()), + 'IPCNS_SEM': entry_handler(pb.ipc_sem_entry, ipc_sem_set_handler()), + 'IPCNS_MSG': entry_handler(pb.ipc_msg_entry, ipc_msg_queue_handler()), + 'NETNS': entry_handler(pb.netns_entry), + 'USERNS': entry_handler(pb.userns_entry), + 'SECCOMP': entry_handler(pb.seccomp_entry), + 'AUTOFS': entry_handler(pb.autofs_entry), + 'FILES': entry_handler(pb.file_entry), + 'CPUINFO': entry_handler(pb.cpuinfo_entry), +} + def __rhandler(f): - # Images v1.1 NOTE: First read "first" magic. - img_magic, = struct.unpack('i', f.read(4)) - if img_magic in (magic.by_name['IMG_COMMON'], magic.by_name['IMG_SERVICE']): - img_magic, = struct.unpack('i', f.read(4)) + # Images v1.1 NOTE: First read "first" magic. + img_magic, = struct.unpack('i', f.read(4)) + if img_magic in (magic.by_name['IMG_COMMON'], + magic.by_name['IMG_SERVICE']): + img_magic, = struct.unpack('i', f.read(4)) - try: - m = magic.by_val[img_magic] - except: - raise MagicException(img_magic) + try: + m = magic.by_val[img_magic] + except: + raise MagicException(img_magic) - try: - handler = handlers[m] - except: - raise Exception("No handler found for image with magic " + m) + try: + handler = handlers[m] + except: + raise Exception("No handler found for image with magic " + m) - return m, handler + return m, handler -def load(f, pretty = False, no_payload = False): - """ + +def load(f, pretty=False, no_payload=False): + """ Convert criu image from binary format to dict(json). Takes a file-like object to read criu image from. Returns criu image in dict(json) format. """ - image = {} + image = {} - m, handler = __rhandler(f) + m, handler = __rhandler(f) - image['magic'] = m - image['entries'] = handler.load(f, pretty, no_payload) + image['magic'] = m + image['entries'] = handler.load(f, pretty, no_payload) + + return image - return image def info(f): - res = {} + res = {} - m, handler = __rhandler(f) + m, handler = __rhandler(f) - res['magic'] = m - res['count'] = handler.count(f) + res['magic'] = m + res['count'] = handler.count(f) - return res + return res -def loads(s, pretty = False): - """ + +def loads(s, pretty=False): + """ Same as load(), but takes a string. """ - f = io.BytesIO(s) - return load(f, pretty) + f = io.BytesIO(s) + return load(f, pretty) + def dump(img, f): - """ + """ Convert criu image from dict(json) format to binary. Takes an image in dict(json) format and file-like object to write to. """ - m = img['magic'] - magic_val = magic.by_name[img['magic']] + m = img['magic'] + magic_val = magic.by_name[img['magic']] - # Images v1.1 NOTE: use "second" magic to identify what "first" - # should be written. - if m != 'INVENTORY': - if m in ('STATS', 'IRMAP_CACHE'): - f.write(struct.pack('i', magic.by_name['IMG_SERVICE'])) - else: - f.write(struct.pack('i', magic.by_name['IMG_COMMON'])) + # Images v1.1 NOTE: use "second" magic to identify what "first" + # should be written. + if m != 'INVENTORY': + if m in ('STATS', 'IRMAP_CACHE'): + f.write(struct.pack('i', magic.by_name['IMG_SERVICE'])) + else: + f.write(struct.pack('i', magic.by_name['IMG_COMMON'])) - f.write(struct.pack('i', magic_val)) + f.write(struct.pack('i', magic_val)) - try: - handler = handlers[m] - except: - raise Exception("No handler found for image with such magic") + try: + handler = handlers[m] + except: + raise Exception("No handler found for image with such magic") + + handler.dump(img['entries'], f) - handler.dump(img['entries'], f) def dumps(img): - """ + """ Same as dump(), but takes only an image and returns a string. """ - f = io.BytesIO(b'') - dump(img, f) - return f.getvalue() + f = io.BytesIO(b'') + dump(img, f) + return f.getvalue() diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index 6b4a772c7..daaa7297e 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -9,8 +9,8 @@ import base64 import quopri if "encodebytes" not in dir(base64): - base64.encodebytes = base64.encodestring - base64.decodebytes = base64.decodestring + base64.encodebytes = base64.encodestring + base64.decodebytes = base64.decodestring # pb2dict and dict2pb are methods to convert pb to/from dict. # Inspired by: @@ -29,350 +29,396 @@ if "encodebytes" not in dir(base64): # enums to string value too. (i.e. "march : x86_64" is better then # "march : 1"). - _basic_cast = { - FD.TYPE_FIXED64 : int, - FD.TYPE_FIXED32 : int, - FD.TYPE_SFIXED64 : int, - FD.TYPE_SFIXED32 : int, - - FD.TYPE_INT64 : int, - FD.TYPE_UINT64 : int, - FD.TYPE_SINT64 : int, - - FD.TYPE_INT32 : int, - FD.TYPE_UINT32 : int, - FD.TYPE_SINT32 : int, - - FD.TYPE_BOOL : bool, - - FD.TYPE_STRING : str + FD.TYPE_FIXED64: int, + FD.TYPE_FIXED32: int, + FD.TYPE_SFIXED64: int, + FD.TYPE_SFIXED32: int, + FD.TYPE_INT64: int, + FD.TYPE_UINT64: int, + FD.TYPE_SINT64: int, + FD.TYPE_INT32: int, + FD.TYPE_UINT32: int, + FD.TYPE_SINT32: int, + FD.TYPE_BOOL: bool, + FD.TYPE_STRING: str } + def _marked_as_hex(field): - return field.GetOptions().Extensions[opts_pb2.criu].hex + return field.GetOptions().Extensions[opts_pb2.criu].hex + def _marked_as_ip(field): - return field.GetOptions().Extensions[opts_pb2.criu].ipadd + return field.GetOptions().Extensions[opts_pb2.criu].ipadd + def _marked_as_flags(field): - return field.GetOptions().Extensions[opts_pb2.criu].flags + return field.GetOptions().Extensions[opts_pb2.criu].flags + def _marked_as_dev(field): - return field.GetOptions().Extensions[opts_pb2.criu].dev + return field.GetOptions().Extensions[opts_pb2.criu].dev + def _marked_as_odev(field): - return field.GetOptions().Extensions[opts_pb2.criu].odev + return field.GetOptions().Extensions[opts_pb2.criu].odev + def _marked_as_dict(field): - return field.GetOptions().Extensions[opts_pb2.criu].dict + return field.GetOptions().Extensions[opts_pb2.criu].dict + def _custom_conv(field): - return field.GetOptions().Extensions[opts_pb2.criu].conv + return field.GetOptions().Extensions[opts_pb2.criu].conv + mmap_prot_map = [ - ('PROT_READ', 0x1), - ('PROT_WRITE', 0x2), - ('PROT_EXEC', 0x4), + ('PROT_READ', 0x1), + ('PROT_WRITE', 0x2), + ('PROT_EXEC', 0x4), ] mmap_flags_map = [ - ('MAP_SHARED', 0x1), - ('MAP_PRIVATE', 0x2), - ('MAP_ANON', 0x20), - ('MAP_GROWSDOWN', 0x0100), + ('MAP_SHARED', 0x1), + ('MAP_PRIVATE', 0x2), + ('MAP_ANON', 0x20), + ('MAP_GROWSDOWN', 0x0100), ] mmap_status_map = [ - ('VMA_AREA_NONE', 0 << 0), - ('VMA_AREA_REGULAR', 1 << 0), - ('VMA_AREA_STACK', 1 << 1), - ('VMA_AREA_VSYSCALL', 1 << 2), - ('VMA_AREA_VDSO', 1 << 3), - ('VMA_AREA_HEAP', 1 << 5), - - ('VMA_FILE_PRIVATE', 1 << 6), - ('VMA_FILE_SHARED', 1 << 7), - ('VMA_ANON_SHARED', 1 << 8), - ('VMA_ANON_PRIVATE', 1 << 9), - - ('VMA_AREA_SYSVIPC', 1 << 10), - ('VMA_AREA_SOCKET', 1 << 11), - ('VMA_AREA_VVAR', 1 << 12), - ('VMA_AREA_AIORING', 1 << 13), - - ('VMA_UNSUPP', 1 << 31), + ('VMA_AREA_NONE', 0 << 0), + ('VMA_AREA_REGULAR', 1 << 0), + ('VMA_AREA_STACK', 1 << 1), + ('VMA_AREA_VSYSCALL', 1 << 2), + ('VMA_AREA_VDSO', 1 << 3), + ('VMA_AREA_HEAP', 1 << 5), + ('VMA_FILE_PRIVATE', 1 << 6), + ('VMA_FILE_SHARED', 1 << 7), + ('VMA_ANON_SHARED', 1 << 8), + ('VMA_ANON_PRIVATE', 1 << 9), + ('VMA_AREA_SYSVIPC', 1 << 10), + ('VMA_AREA_SOCKET', 1 << 11), + ('VMA_AREA_VVAR', 1 << 12), + ('VMA_AREA_AIORING', 1 << 13), + ('VMA_UNSUPP', 1 << 31), ] rfile_flags_map = [ - ('O_WRONLY', 0o1), - ('O_RDWR', 0o2), - ('O_APPEND', 0o2000), - ('O_DIRECT', 0o40000), - ('O_LARGEFILE', 0o100000), + ('O_WRONLY', 0o1), + ('O_RDWR', 0o2), + ('O_APPEND', 0o2000), + ('O_DIRECT', 0o40000), + ('O_LARGEFILE', 0o100000), ] pmap_flags_map = [ - ('PE_PARENT', 1 << 0), - ('PE_LAZY', 1 << 1), - ('PE_PRESENT', 1 << 2), + ('PE_PARENT', 1 << 0), + ('PE_LAZY', 1 << 1), + ('PE_PRESENT', 1 << 2), ] flags_maps = { - 'mmap.prot' : mmap_prot_map, - 'mmap.flags' : mmap_flags_map, - 'mmap.status' : mmap_status_map, - 'rfile.flags' : rfile_flags_map, - 'pmap.flags' : pmap_flags_map, + 'mmap.prot': mmap_prot_map, + 'mmap.flags': mmap_flags_map, + 'mmap.status': mmap_status_map, + 'rfile.flags': rfile_flags_map, + 'pmap.flags': pmap_flags_map, } gen_maps = { - 'task_state' : { 1: 'Alive', 3: 'Zombie', 6: 'Stopped' }, + 'task_state': { + 1: 'Alive', + 3: 'Zombie', + 6: 'Stopped' + }, } sk_maps = { - 'family' : { 1: 'UNIX', - 2: 'INET', - 10: 'INET6', - 16: 'NETLINK', - 17: 'PACKET' }, - 'type' : { 1: 'STREAM', - 2: 'DGRAM', - 3: 'RAW', - 5: 'SEQPACKET', - 10: 'PACKET' }, - 'state' : { 1: 'ESTABLISHED', - 2: 'SYN_SENT', - 3: 'SYN_RECV', - 4: 'FIN_WAIT1', - 5: 'FIN_WAIT2', - 6: 'TIME_WAIT', - 7: 'CLOSE', - 8: 'CLOSE_WAIT', - 9: 'LAST_ACK', - 10: 'LISTEN' }, - 'proto' : { 0: 'IP', - 6: 'TCP', - 17: 'UDP', - 136: 'UDPLITE' }, + 'family': { + 1: 'UNIX', + 2: 'INET', + 10: 'INET6', + 16: 'NETLINK', + 17: 'PACKET' + }, + 'type': { + 1: 'STREAM', + 2: 'DGRAM', + 3: 'RAW', + 5: 'SEQPACKET', + 10: 'PACKET' + }, + 'state': { + 1: 'ESTABLISHED', + 2: 'SYN_SENT', + 3: 'SYN_RECV', + 4: 'FIN_WAIT1', + 5: 'FIN_WAIT2', + 6: 'TIME_WAIT', + 7: 'CLOSE', + 8: 'CLOSE_WAIT', + 9: 'LAST_ACK', + 10: 'LISTEN' + }, + 'proto': { + 0: 'IP', + 6: 'TCP', + 17: 'UDP', + 136: 'UDPLITE' + }, } -gen_rmaps = { k: {v2:k2 for k2,v2 in list(v.items())} for k,v in list(gen_maps.items()) } -sk_rmaps = { k: {v2:k2 for k2,v2 in list(v.items())} for k,v in list(sk_maps.items()) } +gen_rmaps = { + k: {v2: k2 + for k2, v2 in list(v.items())} + for k, v in list(gen_maps.items()) +} +sk_rmaps = { + k: {v2: k2 + for k2, v2 in list(v.items())} + for k, v in list(sk_maps.items()) +} dict_maps = { - 'gen' : ( gen_maps, gen_rmaps ), - 'sk' : ( sk_maps, sk_rmaps ), + 'gen': (gen_maps, gen_rmaps), + 'sk': (sk_maps, sk_rmaps), } + def map_flags(value, flags_map): - bs = [x[0] for x in [x for x in flags_map if value & x[1]]] - value &= ~sum([x[1] for x in flags_map]) - if value: - bs.append("0x%x" % value) - return " | ".join(bs) + bs = [x[0] for x in [x for x in flags_map if value & x[1]]] + value &= ~sum([x[1] for x in flags_map]) + if value: + bs.append("0x%x" % value) + return " | ".join(bs) + def unmap_flags(value, flags_map): - if value == '': - return 0 + if value == '': + return 0 - bd = dict(flags_map) - return sum([int(str(bd.get(x, x)), 0) for x in [x.strip() for x in value.split('|')]]) + bd = dict(flags_map) + return sum([ + int(str(bd.get(x, x)), 0) + for x in [x.strip() for x in value.split('|')] + ]) + + +kern_minorbits = 20 # This is how kernel encodes dev_t in new format -kern_minorbits = 20 # This is how kernel encodes dev_t in new format def decode_dev(field, value): - if _marked_as_odev(field): - return "%d:%d" % (os.major(value), os.minor(value)) - else: - return "%d:%d" % (value >> kern_minorbits, value & ((1 << kern_minorbits) - 1)) + if _marked_as_odev(field): + return "%d:%d" % (os.major(value), os.minor(value)) + else: + return "%d:%d" % (value >> kern_minorbits, + value & ((1 << kern_minorbits) - 1)) + def encode_dev(field, value): - dev = [int(x) for x in value.split(':')] - if _marked_as_odev(field): - return os.makedev(dev[0], dev[1]) - else: - return dev[0] << kern_minorbits | dev[1] + dev = [int(x) for x in value.split(':')] + if _marked_as_odev(field): + return os.makedev(dev[0], dev[1]) + else: + return dev[0] << kern_minorbits | dev[1] + def encode_base64(value): - return base64.encodebytes(value) + return base64.encodebytes(value) + + def decode_base64(value): - return base64.decodebytes(value) + return base64.decodebytes(value) + def encode_unix(value): - return quopri.encodestring(value) -def decode_unix(value): - return quopri.decodestring(value) + return quopri.encodestring(value) + + +def decode_unix(value): + return quopri.decodestring(value) + + +encode = {'unix_name': encode_unix} +decode = {'unix_name': decode_unix} -encode = { 'unix_name': encode_unix } -decode = { 'unix_name': decode_unix } def get_bytes_enc(field): - c = _custom_conv(field) - if c: - return encode[c] - else: - return encode_base64 + c = _custom_conv(field) + if c: + return encode[c] + else: + return encode_base64 + def get_bytes_dec(field): - c = _custom_conv(field) - if c: - return decode[c] - else: - return decode_base64 + c = _custom_conv(field) + if c: + return decode[c] + else: + return decode_base64 + def is_string(value): - # Python 3 compatibility - if "basestring" in __builtins__: - string_types = basestring # noqa: F821 - else: - string_types = (str, bytes) - return isinstance(value, string_types) + # Python 3 compatibility + if "basestring" in __builtins__: + string_types = basestring # noqa: F821 + else: + string_types = (str, bytes) + return isinstance(value, string_types) -def _pb2dict_cast(field, value, pretty = False, is_hex = False): - if not is_hex: - is_hex = _marked_as_hex(field) - if field.type == FD.TYPE_MESSAGE: - return pb2dict(value, pretty, is_hex) - elif field.type == FD.TYPE_BYTES: - return get_bytes_enc(field)(value) - elif field.type == FD.TYPE_ENUM: - return field.enum_type.values_by_number.get(value, None).name - elif field.type in _basic_cast: - cast = _basic_cast[field.type] - if pretty and (cast == int): - if is_hex: - # Fields that have (criu).hex = true option set - # should be stored in hex string format. - return "0x%x" % value +def _pb2dict_cast(field, value, pretty=False, is_hex=False): + if not is_hex: + is_hex = _marked_as_hex(field) - if _marked_as_dev(field): - return decode_dev(field, value) + if field.type == FD.TYPE_MESSAGE: + return pb2dict(value, pretty, is_hex) + elif field.type == FD.TYPE_BYTES: + return get_bytes_enc(field)(value) + elif field.type == FD.TYPE_ENUM: + return field.enum_type.values_by_number.get(value, None).name + elif field.type in _basic_cast: + cast = _basic_cast[field.type] + if pretty and (cast == int): + if is_hex: + # Fields that have (criu).hex = true option set + # should be stored in hex string format. + return "0x%x" % value - flags = _marked_as_flags(field) - if flags: - try: - flags_map = flags_maps[flags] - except: - return "0x%x" % value # flags are better seen as hex anyway - else: - return map_flags(value, flags_map) + if _marked_as_dev(field): + return decode_dev(field, value) - dct = _marked_as_dict(field) - if dct: - return dict_maps[dct][0][field.name].get(value, cast(value)) + flags = _marked_as_flags(field) + if flags: + try: + flags_map = flags_maps[flags] + except Exception: + return "0x%x" % value # flags are better seen as hex anyway + else: + return map_flags(value, flags_map) - return cast(value) - else: - raise Exception("Field(%s) has unsupported type %d" % (field.name, field.type)) + dct = _marked_as_dict(field) + if dct: + return dict_maps[dct][0][field.name].get(value, cast(value)) -def pb2dict(pb, pretty = False, is_hex = False): - """ - Convert protobuf msg to dictionary. - Takes a protobuf message and returns a dict. - """ - d = collections.OrderedDict() if pretty else {} - for field, value in pb.ListFields(): - if field.label == FD.LABEL_REPEATED: - d_val = [] - if pretty and _marked_as_ip(field): - if len(value) == 1: - v = socket.ntohl(value[0]) - addr = IPv4Address(v) - else: - v = 0 + (socket.ntohl(value[0]) << (32 * 3)) + \ - (socket.ntohl(value[1]) << (32 * 2)) + \ - (socket.ntohl(value[2]) << (32 * 1)) + \ - (socket.ntohl(value[3])) - addr = IPv6Address(v) + return cast(value) + else: + raise Exception("Field(%s) has unsupported type %d" % + (field.name, field.type)) - d_val.append(addr.compressed) - else: - for v in value: - d_val.append(_pb2dict_cast(field, v, pretty, is_hex)) - else: - d_val = _pb2dict_cast(field, value, pretty, is_hex) - d[field.name] = d_val - return d +def pb2dict(pb, pretty=False, is_hex=False): + """ + Convert protobuf msg to dictionary. + Takes a protobuf message and returns a dict. + """ + d = collections.OrderedDict() if pretty else {} + for field, value in pb.ListFields(): + if field.label == FD.LABEL_REPEATED: + d_val = [] + if pretty and _marked_as_ip(field): + if len(value) == 1: + v = socket.ntohl(value[0]) + addr = IPv4Address(v) + else: + v = 0 + (socket.ntohl(value[0]) << (32 * 3)) + \ + (socket.ntohl(value[1]) << (32 * 2)) + \ + (socket.ntohl(value[2]) << (32 * 1)) + \ + (socket.ntohl(value[3])) + addr = IPv6Address(v) + + d_val.append(addr.compressed) + else: + for v in value: + d_val.append(_pb2dict_cast(field, v, pretty, is_hex)) + else: + d_val = _pb2dict_cast(field, value, pretty, is_hex) + + d[field.name] = d_val + return d + def _dict2pb_cast(field, value): - # Not considering TYPE_MESSAGE here, as repeated - # and non-repeated messages need special treatment - # in this case, and are hadled separately. - if field.type == FD.TYPE_BYTES: - return get_bytes_dec(field)(value) - elif field.type == FD.TYPE_ENUM: - return field.enum_type.values_by_name.get(value, None).number - elif field.type in _basic_cast: - cast = _basic_cast[field.type] - if (cast == int) and is_string(value): - if _marked_as_dev(field): - return encode_dev(field, value) + # Not considering TYPE_MESSAGE here, as repeated + # and non-repeated messages need special treatment + # in this case, and are hadled separately. + if field.type == FD.TYPE_BYTES: + return get_bytes_dec(field)(value) + elif field.type == FD.TYPE_ENUM: + return field.enum_type.values_by_name.get(value, None).number + elif field.type in _basic_cast: + cast = _basic_cast[field.type] + if (cast == int) and is_string(value): + if _marked_as_dev(field): + return encode_dev(field, value) - flags = _marked_as_flags(field) - if flags: - try: - flags_map = flags_maps[flags] - except: - pass # Try to use plain string cast - else: - return unmap_flags(value, flags_map) + flags = _marked_as_flags(field) + if flags: + try: + flags_map = flags_maps[flags] + except Exception: + pass # Try to use plain string cast + else: + return unmap_flags(value, flags_map) - dct = _marked_as_dict(field) - if dct: - ret = dict_maps[dct][1][field.name].get(value, None) - if ret == None: - ret = cast(value, 0) - return ret + dct = _marked_as_dict(field) + if dct: + ret = dict_maps[dct][1][field.name].get(value, None) + if ret is None: + ret = cast(value, 0) + return ret + + # Some int or long fields might be stored as hex + # strings. See _pb2dict_cast. + return cast(value, 0) + else: + return cast(value) + else: + raise Exception("Field(%s) has unsupported type %d" % + (field.name, field.type)) - # Some int or long fields might be stored as hex - # strings. See _pb2dict_cast. - return cast(value, 0) - else: - return cast(value) - else: - raise Exception("Field(%s) has unsupported type %d" % (field.name, field.type)) def dict2pb(d, pb): - """ - Convert dictionary to protobuf msg. - Takes dict and protobuf message to be merged into. - """ - for field in pb.DESCRIPTOR.fields: - if field.name not in d: - continue - value = d[field.name] - if field.label == FD.LABEL_REPEATED: - pb_val = getattr(pb, field.name, None) - if is_string(value[0]) and _marked_as_ip(field): - val = ip_address(value[0]) - if val.version == 4: - pb_val.append(socket.htonl(int(val))) - elif val.version == 6: - ival = int(val) - pb_val.append(socket.htonl((ival >> (32 * 3)) & 0xFFFFFFFF)) - pb_val.append(socket.htonl((ival >> (32 * 2)) & 0xFFFFFFFF)) - pb_val.append(socket.htonl((ival >> (32 * 1)) & 0xFFFFFFFF)) - pb_val.append(socket.htonl((ival >> (32 * 0)) & 0xFFFFFFFF)) - else: - raise Exception("Unknown IP address version %d" % val.version) - continue + """ + Convert dictionary to protobuf msg. + Takes dict and protobuf message to be merged into. + """ + for field in pb.DESCRIPTOR.fields: + if field.name not in d: + continue + value = d[field.name] + if field.label == FD.LABEL_REPEATED: + pb_val = getattr(pb, field.name, None) + if is_string(value[0]) and _marked_as_ip(field): + val = ip_address(value[0]) + if val.version == 4: + pb_val.append(socket.htonl(int(val))) + elif val.version == 6: + ival = int(val) + pb_val.append(socket.htonl((ival >> (32 * 3)) & 0xFFFFFFFF)) + pb_val.append(socket.htonl((ival >> (32 * 2)) & 0xFFFFFFFF)) + pb_val.append(socket.htonl((ival >> (32 * 1)) & 0xFFFFFFFF)) + pb_val.append(socket.htonl((ival >> (32 * 0)) & 0xFFFFFFFF)) + else: + raise Exception("Unknown IP address version %d" % + val.version) + continue - for v in value: - if field.type == FD.TYPE_MESSAGE: - dict2pb(v, pb_val.add()) - else: - pb_val.append(_dict2pb_cast(field, v)) - else: - if field.type == FD.TYPE_MESSAGE: - # SetInParent method acts just like has_* = true in C, - # and helps to properly treat cases when we have optional - # field with empty repeated inside. - getattr(pb, field.name).SetInParent() + for v in value: + if field.type == FD.TYPE_MESSAGE: + dict2pb(v, pb_val.add()) + else: + pb_val.append(_dict2pb_cast(field, v)) + else: + if field.type == FD.TYPE_MESSAGE: + # SetInParent method acts just like has_* = true in C, + # and helps to properly treat cases when we have optional + # field with empty repeated inside. + getattr(pb, field.name).SetInParent() - dict2pb(value, getattr(pb, field.name, None)) - else: - setattr(pb, field.name, _dict2pb_cast(field, value)) - return pb + dict2pb(value, getattr(pb, field.name, None)) + else: + setattr(pb, field.name, _dict2pb_cast(field, value)) + return pb diff --git a/scripts/crit-setup.py b/scripts/crit-setup.py index f40588142..871e55921 100644 --- a/scripts/crit-setup.py +++ b/scripts/crit-setup.py @@ -1,12 +1,11 @@ from distutils.core import setup -setup(name = "crit", - version = "0.0.1", - description = "CRiu Image Tool", - author = "CRIU team", - author_email = "criu@openvz.org", - url = "https://github.com/checkpoint-restore/criu", - package_dir = {'pycriu': 'lib/py'}, - packages = ["pycriu", "pycriu.images"], - scripts = ["crit/crit"] - ) +setup(name="crit", + version="0.0.1", + description="CRiu Image Tool", + author="CRIU team", + author_email="criu@openvz.org", + url="https://github.com/checkpoint-restore/criu", + package_dir={'pycriu': 'lib/py'}, + packages=["pycriu", "pycriu.images"], + scripts=["crit/crit"]) diff --git a/scripts/magic-gen.py b/scripts/magic-gen.py index 7088f634d..3d9777735 100755 --- a/scripts/magic-gen.py +++ b/scripts/magic-gen.py @@ -1,61 +1,63 @@ #!/bin/env python2 import sys + # This program parses criu magic.h file and produces # magic.py with all *_MAGIC constants except RAW and V1. def main(argv): - if len(argv) != 3: - print("Usage: magic-gen.py path/to/image.h path/to/magic.py") - exit(1) + if len(argv) != 3: + print("Usage: magic-gen.py path/to/image.h path/to/magic.py") + exit(1) - magic_c_header = argv[1] - magic_py = argv[2] + magic_c_header = argv[1] + magic_py = argv[2] - out = open(magic_py, 'w+') + out = open(magic_py, 'w+') - # all_magic is used to parse constructions like: - # #define PAGEMAP_MAGIC 0x56084025 - # #define SHMEM_PAGEMAP_MAGIC PAGEMAP_MAGIC - all_magic = {} - # and magic is used to store only unique magic. - magic = {} + # all_magic is used to parse constructions like: + # #define PAGEMAP_MAGIC 0x56084025 + # #define SHMEM_PAGEMAP_MAGIC PAGEMAP_MAGIC + all_magic = {} + # and magic is used to store only unique magic. + magic = {} - f = open(magic_c_header, 'r') - for line in f: - split = line.split() + f = open(magic_c_header, 'r') + for line in f: + split = line.split() - if len(split) < 3: - continue + if len(split) < 3: + continue - if not '#define' in split[0]: - continue + if not '#define' in split[0]: + continue - key = split[1] - value = split[2] + key = split[1] + value = split[2] - if value in all_magic: - value = all_magic[value] - else: - magic[key] = value + if value in all_magic: + value = all_magic[value] + else: + magic[key] = value - all_magic[key] = value + all_magic[key] = value + + out.write('#Autogenerated. Do not edit!\n') + out.write('by_name = {}\n') + out.write('by_val = {}\n') + for k, v in list(magic.items()): + # We don't need RAW or V1 magic, because + # they can't be used to identify images. + if v == '0x0' or v == '1' or k == '0x0' or v == '1': + continue + if k.endswith("_MAGIC"): + # Just cutting _MAGIC suffix + k = k[:-6] + v = int(v, 16) + out.write("by_name['" + k + "'] = " + str(v) + "\n") + out.write("by_val[" + str(v) + "] = '" + k + "'\n") + f.close() + out.close() - out.write('#Autogenerated. Do not edit!\n') - out.write('by_name = {}\n') - out.write('by_val = {}\n') - for k,v in list(magic.items()): - # We don't need RAW or V1 magic, because - # they can't be used to identify images. - if v == '0x0' or v == '1' or k == '0x0' or v == '1': - continue - if k.endswith("_MAGIC"): - # Just cutting _MAGIC suffix - k = k[:-6] - v = int(v, 16) - out.write("by_name['"+ k +"'] = "+ str(v) +"\n") - out.write("by_val["+ str(v) +"] = '"+ k +"'\n") - f.close() - out.close() if __name__ == "__main__": - main(sys.argv) + main(sys.argv) diff --git a/soccr/test/run.py b/soccr/test/run.py index a25c29263..446584a71 100644 --- a/soccr/test/run.py +++ b/soccr/test/run.py @@ -13,17 +13,17 @@ sport = os.getenv("TCP_SPORT", "12345") dport = os.getenv("TCP_DPORT", "54321") print(sys.argv[1]) -args = [sys.argv[1], - "--addr", src, "--port", sport, "--seq", "555", - "--next", - "--addr", dst, "--port", dport, "--seq", "666", - "--reverse", "--", "./tcp-test.py"] +args = [ + sys.argv[1], "--addr", src, "--port", sport, "--seq", "555", "--next", + "--addr", dst, "--port", dport, "--seq", "666", "--reverse", "--", + "./tcp-test.py" +] -p1 = Popen(args + ["dst"], stdout = PIPE, stdin = PIPE) +p1 = Popen(args + ["dst"], stdout=PIPE, stdin=PIPE) -args.remove("--reverse"); +args.remove("--reverse") -p2 = Popen(args + ["src"], stdout = PIPE, stdin = PIPE) +p2 = Popen(args + ["src"], stdout=PIPE, stdin=PIPE) p1.stdout.read(5) p2.stdout.read(5) @@ -42,7 +42,7 @@ str2 = m.hexdigest() if str2 != eval(s): print("FAIL", repr(str2), repr(s)) - sys.exit(5); + sys.exit(5) s = p1.stdout.read() m = hashlib.md5() @@ -52,7 +52,7 @@ str1 = m.hexdigest() s = p2.stdout.read() if str1 != eval(s): print("FAIL", repr(str1), s) - sys.exit(5); + sys.exit(5) if p1.wait(): sys.exit(1) diff --git a/test/check_actions.py b/test/check_actions.py index 0e3daf178..ae909e668 100755 --- a/test/check_actions.py +++ b/test/check_actions.py @@ -4,37 +4,38 @@ import sys import os actions = set(['pre-dump', 'pre-restore', 'post-dump', 'setup-namespaces', \ - 'post-setup-namespaces', 'post-restore', 'post-resume', \ - 'network-lock', 'network-unlock' ]) + 'post-setup-namespaces', 'post-restore', 'post-resume', \ + 'network-lock', 'network-unlock' ]) errors = [] af = os.path.dirname(os.path.abspath(__file__)) + '/actions_called.txt' for act in open(af): - act = act.strip().split() - act.append('EMPTY') - act.append('EMPTY') + act = act.strip().split() + act.append('EMPTY') + act.append('EMPTY') - if act[0] == 'EMPTY': - raise Exception("Error in test, bogus actions line") + if act[0] == 'EMPTY': + raise Exception("Error in test, bogus actions line") - if act[1] == 'EMPTY': - errors.append('Action %s misses CRTOOLS_IMAGE_DIR' % act[0]) + if act[1] == 'EMPTY': + errors.append('Action %s misses CRTOOLS_IMAGE_DIR' % act[0]) - if act[0] in ('post-dump', 'setup-namespaces', 'post-setup-namespaces', \ - 'post-restore', 'post-resume', 'network-lock', 'network-unlock'): - if act[2] == 'EMPTY': - errors.append('Action %s misses CRTOOLS_INIT_PID' % act[0]) - elif not act[2].isdigit() or int(act[2]) == 0: - errors.append('Action %s PID is not number (%s)' % (act[0], act[2])) + if act[0] in ('post-dump', 'setup-namespaces', 'post-setup-namespaces', \ + 'post-restore', 'post-resume', 'network-lock', 'network-unlock'): + if act[2] == 'EMPTY': + errors.append('Action %s misses CRTOOLS_INIT_PID' % act[0]) + elif not act[2].isdigit() or int(act[2]) == 0: + errors.append('Action %s PID is not number (%s)' % + (act[0], act[2])) - actions -= set([act[0]]) + actions -= set([act[0]]) if actions: - errors.append('Not all actions called: %r' % actions) + errors.append('Not all actions called: %r' % actions) if errors: - for x in errors: - print(x) - sys.exit(1) + for x in errors: + print(x) + sys.exit(1) print('PASS') diff --git a/test/crit-recode.py b/test/crit-recode.py index 441f7757e..a7dcc7272 100755 --- a/test/crit-recode.py +++ b/test/crit-recode.py @@ -6,70 +6,72 @@ import sys import os import subprocess -find = subprocess.Popen(['find', 'test/dump/', '-size', '+0', '-name', '*.img'], - stdout = subprocess.PIPE) +find = subprocess.Popen( + ['find', 'test/dump/', '-size', '+0', '-name', '*.img'], + stdout=subprocess.PIPE) test_pass = True + def recode_and_check(imgf, o_img, pretty): - try: - pb = pycriu.images.loads(o_img, pretty) - except pycriu.images.MagicException as me: - print("%s magic %x error" % (imgf, me.magic)) - return False - except Exception as e: - print("%s %sdecode fails: %s" % (imgf, pretty and 'pretty ' or '', e)) - return False + try: + pb = pycriu.images.loads(o_img, pretty) + except pycriu.images.MagicException as me: + print("%s magic %x error" % (imgf, me.magic)) + return False + except Exception as e: + print("%s %sdecode fails: %s" % (imgf, pretty and 'pretty ' or '', e)) + return False - try: - r_img = pycriu.images.dumps(pb) - except Exception as e: - r_img = pycriu.images.dumps(pb) - print("%s %s encode fails: %s" % (imgf, pretty and 'pretty ' or '', e)) - return False + try: + r_img = pycriu.images.dumps(pb) + except Exception as e: + r_img = pycriu.images.dumps(pb) + print("%s %s encode fails: %s" % (imgf, pretty and 'pretty ' or '', e)) + return False - if o_img != r_img: - print("%s %s recode mismatch" % (imgf, pretty and 'pretty ' or '')) - return False + if o_img != r_img: + print("%s %s recode mismatch" % (imgf, pretty and 'pretty ' or '')) + return False - return True + return True for imgf in find.stdout.readlines(): - imgf = imgf.strip() - imgf_b = os.path.basename(imgf) + imgf = imgf.strip() + imgf_b = os.path.basename(imgf) - if imgf_b.startswith(b'pages-'): - continue - if imgf_b.startswith(b'iptables-'): - continue - if imgf_b.startswith(b'ip6tables-'): - continue - if imgf_b.startswith(b'route-'): - continue - if imgf_b.startswith(b'route6-'): - continue - if imgf_b.startswith(b'ifaddr-'): - continue - if imgf_b.startswith(b'tmpfs-'): - continue - if imgf_b.startswith(b'netns-ct-'): - continue - if imgf_b.startswith(b'netns-exp-'): - continue - if imgf_b.startswith(b'rule-'): - continue + if imgf_b.startswith(b'pages-'): + continue + if imgf_b.startswith(b'iptables-'): + continue + if imgf_b.startswith(b'ip6tables-'): + continue + if imgf_b.startswith(b'route-'): + continue + if imgf_b.startswith(b'route6-'): + continue + if imgf_b.startswith(b'ifaddr-'): + continue + if imgf_b.startswith(b'tmpfs-'): + continue + if imgf_b.startswith(b'netns-ct-'): + continue + if imgf_b.startswith(b'netns-exp-'): + continue + if imgf_b.startswith(b'rule-'): + continue - o_img = open(imgf.decode(), "rb").read() - if not recode_and_check(imgf, o_img, False): - test_pass = False - if not recode_and_check(imgf, o_img, True): - test_pass = False + o_img = open(imgf.decode(), "rb").read() + if not recode_and_check(imgf, o_img, False): + test_pass = False + if not recode_and_check(imgf, o_img, True): + test_pass = False find.wait() if not test_pass: - print("FAIL") - sys.exit(1) + print("FAIL") + sys.exit(1) print("PASS") diff --git a/test/exhaustive/pipe.py b/test/exhaustive/pipe.py index 17e065800..fdadc480c 100755 --- a/test/exhaustive/pipe.py +++ b/test/exhaustive/pipe.py @@ -8,125 +8,127 @@ import time import sys import subprocess -criu_bin='../../criu/criu' +criu_bin = '../../criu/criu' + def mix(nr_tasks, nr_pipes): - # Returned is the list of combinations. - # Each combination is the lists of pipe descriptors. - # Each pipe descriptor is a 2-elemtn tuple, that contains values - # for R and W ends of pipes, each being a bit-field denoting in - # which tasks the respective end should be opened or not. + # Returned is the list of combinations. + # Each combination is the lists of pipe descriptors. + # Each pipe descriptor is a 2-elemtn tuple, that contains values + # for R and W ends of pipes, each being a bit-field denoting in + # which tasks the respective end should be opened or not. - # First -- make a full set of combinations for a single pipe. - max_idx = 1 << nr_tasks - pipe_mix = [[(r, w)] for r in range(0, max_idx) for w in range(0, max_idx)] + # First -- make a full set of combinations for a single pipe. + max_idx = 1 << nr_tasks + pipe_mix = [[(r, w)] for r in range(0, max_idx) for w in range(0, max_idx)] - # Now, for every pipe throw another one into the game making - # all possible combinations of what was seen before with the - # newbie. - pipes_mix = pipe_mix - for t in range(1, nr_pipes): - pipes_mix = [ o + n for o in pipes_mix for n in pipe_mix ] + # Now, for every pipe throw another one into the game making + # all possible combinations of what was seen before with the + # newbie. + pipes_mix = pipe_mix + for t in range(1, nr_pipes): + pipes_mix = [o + n for o in pipes_mix for n in pipe_mix] - return pipes_mix + return pipes_mix # Called by a test sub-process. It just closes the not needed ends # of pipes and sleeps waiting for death. def make_pipes(task_nr, nr_pipes, pipes, comb, status_pipe): - print('\t\tMake pipes for %d' % task_nr) - # We need to make sure that pipes have their - # ends according to comb for task_nr + print('\t\tMake pipes for %d' % task_nr) + # We need to make sure that pipes have their + # ends according to comb for task_nr - for i in range(0, nr_pipes): - # Read end - if not (comb[i][0] & (1 << task_nr)): - os.close(pipes[i][0]) - # Write end - if not (comb[i][1] & (1 << task_nr)): - os.close(pipes[i][1]) + for i in range(0, nr_pipes): + # Read end + if not (comb[i][0] & (1 << task_nr)): + os.close(pipes[i][0]) + # Write end + if not (comb[i][1] & (1 << task_nr)): + os.close(pipes[i][1]) - os.write(status_pipe, '0') - os.close(status_pipe) - while True: - time.sleep(100) + os.write(status_pipe, '0') + os.close(status_pipe) + while True: + time.sleep(100) def get_pipe_ino(pid, fd): - try: - return os.stat('/proc/%d/fd/%d' % (pid, fd)).st_ino - except: - return None + try: + return os.stat('/proc/%d/fd/%d' % (pid, fd)).st_ino + except: + return None def get_pipe_rw(pid, fd): - for l in open('/proc/%d/fdinfo/%d' % (pid, fd)): - if l.startswith('flags:'): - f = l.split(None, 1)[1][-2] - if f == '0': - return 0 # Read - elif f == '1': - return 1 # Write - break + for l in open('/proc/%d/fdinfo/%d' % (pid, fd)): + if l.startswith('flags:'): + f = l.split(None, 1)[1][-2] + if f == '0': + return 0 # Read + elif f == '1': + return 1 # Write + break - raise Exception('Unexpected fdinfo contents') + raise Exception('Unexpected fdinfo contents') def check_pipe_y(pid, fd, rw, inos): - ino = get_pipe_ino(pid, fd) - if ino == None: - return 'missing ' - if not inos.has_key(fd): - inos[fd] = ino - elif inos[fd] != ino: - return 'wrong ' - mod = get_pipe_rw(pid, fd) - if mod != rw: - return 'badmode ' - return None + ino = get_pipe_ino(pid, fd) + if ino == None: + return 'missing ' + if not inos.has_key(fd): + inos[fd] = ino + elif inos[fd] != ino: + return 'wrong ' + mod = get_pipe_rw(pid, fd) + if mod != rw: + return 'badmode ' + return None def check_pipe_n(pid, fd): - ino = get_pipe_ino(pid, fd) - if ino == None: - return None - else: - return 'present ' + ino = get_pipe_ino(pid, fd) + if ino == None: + return None + else: + return 'present ' def check_pipe_end(kids, fd, comb, rw, inos): - t_nr = 0 - for t_pid in kids: - if comb & (1 << t_nr): - res = check_pipe_y(t_pid, fd, rw, inos) - else: - res = check_pipe_n(t_pid, fd) - if res != None: - return res + 'kid(%d)' % t_nr - t_nr += 1 - return None + t_nr = 0 + for t_pid in kids: + if comb & (1 << t_nr): + res = check_pipe_y(t_pid, fd, rw, inos) + else: + res = check_pipe_n(t_pid, fd) + if res != None: + return res + 'kid(%d)' % t_nr + t_nr += 1 + return None def check_pipe(kids, fds, comb, inos): - for e in (0, 1): # 0 == R, 1 == W, see get_pipe_rw() - res = check_pipe_end(kids, fds[e], comb[e], e, inos) - if res != None: - return res + 'end(%d)' % e - return None + for e in (0, 1): # 0 == R, 1 == W, see get_pipe_rw() + res = check_pipe_end(kids, fds[e], comb[e], e, inos) + if res != None: + return res + 'end(%d)' % e + return None + def check_pipes(kids, pipes, comb): - # Kids contain pids - # Pipes contain pipe FDs - # Comb contain list of pairs of bits for RW ends - p_nr = 0 - p_inos = {} - for p_fds in pipes: - res = check_pipe(kids, p_fds, comb[p_nr], p_inos) - if res != None: - return res + 'pipe(%d)' % p_nr - p_nr += 1 + # Kids contain pids + # Pipes contain pipe FDs + # Comb contain list of pairs of bits for RW ends + p_nr = 0 + p_inos = {} + for p_fds in pipes: + res = check_pipe(kids, p_fds, comb[p_nr], p_inos) + if res != None: + return res + 'pipe(%d)' % p_nr + p_nr += 1 - return None + return None # Run by test main process. It opens pipes, then forks kids that @@ -134,128 +136,134 @@ def check_pipes(kids, pipes, comb): # and waits for a signal (unix socket message) to start checking # the kids' FD tables. def make_comb(comb, opts, status_pipe): - print('\tMake pipes') - # 1st -- make needed pipes - pipes = [] - for p in range(0, opts.pipes): - pipes.append(os.pipe()) + print('\tMake pipes') + # 1st -- make needed pipes + pipes = [] + for p in range(0, opts.pipes): + pipes.append(os.pipe()) - # Fork the kids that'll make pipes - kc_pipe = os.pipe() - kids = [] - for t in range(0, opts.tasks): - pid = os.fork() - if pid == 0: - os.close(status_pipe) - os.close(kc_pipe[0]) - make_pipes(t, opts.pipes, pipes, comb, kc_pipe[1]) - sys.exit(1) - kids.append(pid) + # Fork the kids that'll make pipes + kc_pipe = os.pipe() + kids = [] + for t in range(0, opts.tasks): + pid = os.fork() + if pid == 0: + os.close(status_pipe) + os.close(kc_pipe[0]) + make_pipes(t, opts.pipes, pipes, comb, kc_pipe[1]) + sys.exit(1) + kids.append(pid) - os.close(kc_pipe[1]) - for p in pipes: - os.close(p[0]) - os.close(p[1]) + os.close(kc_pipe[1]) + for p in pipes: + os.close(p[0]) + os.close(p[1]) - # Wait for kids to get ready - k_res = '' - while True: - v = os.read(kc_pipe[0], 16) - if v == '': - break - k_res += v - os.close(kc_pipe[0]) + # Wait for kids to get ready + k_res = '' + while True: + v = os.read(kc_pipe[0], 16) + if v == '': + break + k_res += v + os.close(kc_pipe[0]) - ex_code = 1 - if k_res == '0' * opts.tasks: - print('\tWait for C/R') - cmd_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) - cmd_sk.bind('\0CRIUPCSK') + ex_code = 1 + if k_res == '0' * opts.tasks: + print('\tWait for C/R') + cmd_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) + cmd_sk.bind('\0CRIUPCSK') - # Kids are ready, so is socket for kicking us. Notify the - # parent task that we are good to go. - os.write(status_pipe, '0') - os.close(status_pipe) - v = cmd_sk.recv(16) - if v == '0': - print('\tCheck pipes') - res = check_pipes(kids, pipes, comb) - if res == None: - ex_code = 0 - else: - print('\tFAIL %s' % res) + # Kids are ready, so is socket for kicking us. Notify the + # parent task that we are good to go. + os.write(status_pipe, '0') + os.close(status_pipe) + v = cmd_sk.recv(16) + if v == '0': + print('\tCheck pipes') + res = check_pipes(kids, pipes, comb) + if res == None: + ex_code = 0 + else: + print('\tFAIL %s' % res) - # Just kill kids, all checks are done by us, we don't need'em any more - for t in kids: - os.kill(t, signal.SIGKILL) - os.waitpid(t, 0) + # Just kill kids, all checks are done by us, we don't need'em any more + for t in kids: + os.kill(t, signal.SIGKILL) + os.waitpid(t, 0) - return ex_code + return ex_code def cr_test(pid): - print('C/R test') - img_dir = 'pimg_%d' % pid - try: - os.mkdir(img_dir) - subprocess.check_call([criu_bin, 'dump', '-t', '%d' % pid, '-D', img_dir, '-o', 'dump.log', '-v4', '-j']) - except: - print('`- dump fail') - return False + print('C/R test') + img_dir = 'pimg_%d' % pid + try: + os.mkdir(img_dir) + subprocess.check_call([ + criu_bin, 'dump', '-t', + '%d' % pid, '-D', img_dir, '-o', 'dump.log', '-v4', '-j' + ]) + except: + print('`- dump fail') + return False - try: - os.waitpid(pid, 0) - subprocess.check_call([criu_bin, 'restore', '-D', img_dir, '-o', 'rst.log', '-v4', '-j', '-d', '-S']) - except: - print('`- restore fail') - return False + try: + os.waitpid(pid, 0) + subprocess.check_call([ + criu_bin, 'restore', '-D', img_dir, '-o', 'rst.log', '-v4', '-j', + '-d', '-S' + ]) + except: + print('`- restore fail') + return False - return True + return True def run(comb, opts): - print('Checking %r' % comb) - cpipe = os.pipe() - pid = os.fork() - if pid == 0: - os.close(cpipe[0]) - ret = make_comb(comb, opts, cpipe[1]) - sys.exit(ret) + print('Checking %r' % comb) + cpipe = os.pipe() + pid = os.fork() + if pid == 0: + os.close(cpipe[0]) + ret = make_comb(comb, opts, cpipe[1]) + sys.exit(ret) - # Wait for the main process to get ready - os.close(cpipe[1]) - res = os.read(cpipe[0], 16) - os.close(cpipe[0]) + # Wait for the main process to get ready + os.close(cpipe[1]) + res = os.read(cpipe[0], 16) + os.close(cpipe[0]) - if res == '0': - res = cr_test(pid) + if res == '0': + res = cr_test(pid) - print('Wake up test') - s = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) - if res: - res = '0' - else: - res = 'X' - try: - # Kick the test to check its state - s.sendto(res, '\0CRIUPCSK') - except: - # Restore might have failed or smth else happened - os.kill(pid, signal.SIGKILL) - s.close() + print('Wake up test') + s = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) + if res: + res = '0' + else: + res = 'X' + try: + # Kick the test to check its state + s.sendto(res, '\0CRIUPCSK') + except: + # Restore might have failed or smth else happened + os.kill(pid, signal.SIGKILL) + s.close() - # Wait for the guy to exit and get the result (PASS/FAIL) - p, st = os.waitpid(pid, 0) - if os.WIFEXITED(st): - st = os.WEXITSTATUS(st) + # Wait for the guy to exit and get the result (PASS/FAIL) + p, st = os.waitpid(pid, 0) + if os.WIFEXITED(st): + st = os.WEXITSTATUS(st) - print('Done (%d, pid == %d)' % (st, pid)) - return st == 0 + print('Done (%d, pid == %d)' % (st, pid)) + return st == 0 p = argparse.ArgumentParser("CRIU test suite") -p.add_argument("--tasks", help = "Number of tasks", default = '2') -p.add_argument("--pipes", help = "Number of pipes", default = '2') +p.add_argument("--tasks", help="Number of tasks", default='2') +p.add_argument("--pipes", help="Number of pipes", default='2') opts = p.parse_args() opts.tasks = int(opts.tasks) opts.pipes = int(opts.pipes) @@ -263,8 +271,8 @@ opts.pipes = int(opts.pipes) pipe_combs = mix(opts.tasks, opts.pipes) for comb in pipe_combs: - if not run(comb, opts): - print('FAIL') - break + if not run(comb, opts): + print('FAIL') + break else: - print('PASS') + print('PASS') diff --git a/test/exhaustive/unix.py b/test/exhaustive/unix.py index 41053bd0d..98dbbb7b0 100755 --- a/test/exhaustive/unix.py +++ b/test/exhaustive/unix.py @@ -9,11 +9,11 @@ import signal import fcntl import stat -criu_bin='../../criu/criu' +criu_bin = '../../criu/criu' sk_type_s = { - socket.SOCK_STREAM: "S", - socket.SOCK_DGRAM: "D", + socket.SOCK_STREAM: "S", + socket.SOCK_DGRAM: "D", } # Actions that can be done by test. Actions are not only syscall @@ -25,721 +25,739 @@ sk_type_s = { # - do() method, that actually does what's required # - show() method to return the string description of what's done + def mk_socket(st, typ): - st.sk_id += 1 - sk = sock(st.sk_id, typ) - st.add_socket(sk) - return sk + st.sk_id += 1 + sk = sock(st.sk_id, typ) + st.add_socket(sk) + return sk + class act_socket: - def __init__(self, typ): - self.typ = typ + def __init__(self, typ): + self.typ = typ - def act(self, st): - sk = mk_socket(st, self.typ) - self.sk_id = sk.sk_id + def act(self, st): + sk = mk_socket(st, self.typ) + self.sk_id = sk.sk_id - def do(self, st): - sk = socket.socket(socket.AF_UNIX, self.typ, 0) - st.real_sockets[self.sk_id] = sk + def do(self, st): + sk = socket.socket(socket.AF_UNIX, self.typ, 0) + st.real_sockets[self.sk_id] = sk - def show(self): - return 'socket(%s) = %d' % (sk_type_s[self.typ], self.sk_id) + def show(self): + return 'socket(%s) = %d' % (sk_type_s[self.typ], self.sk_id) class act_close: - def __init__(self, sk_id): - self.sk_id = sk_id + def __init__(self, sk_id): + self.sk_id = sk_id - def act(self, st): - sk = st.get_socket(self.sk_id) - st.del_socket(sk) - for ic in sk.icons: - sk = st.get_socket(ic) - st.del_socket(sk) + def act(self, st): + sk = st.get_socket(self.sk_id) + st.del_socket(sk) + for ic in sk.icons: + sk = st.get_socket(ic) + st.del_socket(sk) - def do(self, st): - sk = st.real_sockets.pop(self.sk_id) - sk.close() + def do(self, st): + sk = st.real_sockets.pop(self.sk_id) + sk.close() - def show(self): - return 'close(%d)' % self.sk_id + def show(self): + return 'close(%d)' % self.sk_id class act_listen: - def __init__(self, sk_id): - self.sk_id = sk_id + def __init__(self, sk_id): + self.sk_id = sk_id - def act(self, st): - sk = st.get_socket(self.sk_id) - sk.listen = True + def act(self, st): + sk = st.get_socket(self.sk_id) + sk.listen = True - def do(self, st): - sk = st.real_sockets[self.sk_id] - sk.listen(10) + def do(self, st): + sk = st.real_sockets[self.sk_id] + sk.listen(10) - def show(self): - return 'listen(%d)' % self.sk_id + def show(self): + return 'listen(%d)' % self.sk_id class act_bind: - def __init__(self, sk_id, name_id): - self.sk_id = sk_id - self.name_id = name_id + def __init__(self, sk_id, name_id): + self.sk_id = sk_id + self.name_id = name_id - def act(self, st): - sk = st.get_socket(self.sk_id) - sk.name = self.name_id + def act(self, st): + sk = st.get_socket(self.sk_id) + sk.name = self.name_id - def do(self, st): - sk = st.real_sockets[self.sk_id] - sk.bind(sock.real_name_for(self.name_id)) + def do(self, st): + sk = st.real_sockets[self.sk_id] + sk.bind(sock.real_name_for(self.name_id)) - def show(self): - return 'bind(%d, $name-%d)' % (self.sk_id, self.name_id) + def show(self): + return 'bind(%d, $name-%d)' % (self.sk_id, self.name_id) class act_connect: - def __init__(self, sk_id, listen_sk_id): - self.sk_id = sk_id - self.lsk_id = listen_sk_id + def __init__(self, sk_id, listen_sk_id): + self.sk_id = sk_id + self.lsk_id = listen_sk_id - def act(self, st): - sk = st.get_socket(self.sk_id) - if st.sk_type == socket.SOCK_STREAM: - lsk = st.get_socket(self.lsk_id) - psk = mk_socket(st, socket.SOCK_STREAM) - psk.visible = False - sk.peer = psk.sk_id - psk.peer = sk.sk_id - psk.name = lsk.name - lsk.icons.append(psk.sk_id) - lsk.icons_seq += 1 - else: - sk.peer = self.lsk_id - psk = st.get_socket(self.lsk_id) - psk.icons_seq += 1 + def act(self, st): + sk = st.get_socket(self.sk_id) + if st.sk_type == socket.SOCK_STREAM: + lsk = st.get_socket(self.lsk_id) + psk = mk_socket(st, socket.SOCK_STREAM) + psk.visible = False + sk.peer = psk.sk_id + psk.peer = sk.sk_id + psk.name = lsk.name + lsk.icons.append(psk.sk_id) + lsk.icons_seq += 1 + else: + sk.peer = self.lsk_id + psk = st.get_socket(self.lsk_id) + psk.icons_seq += 1 - def do(self, st): - sk = st.real_sockets[self.sk_id] - sk.connect(sock.real_name_for(self.lsk_id)) + def do(self, st): + sk = st.real_sockets[self.sk_id] + sk.connect(sock.real_name_for(self.lsk_id)) - def show(self): - return 'connect(%d, $name-%d)' % (self.sk_id, self.lsk_id) + def show(self): + return 'connect(%d, $name-%d)' % (self.sk_id, self.lsk_id) class act_accept: - def __init__(self, sk_id): - self.sk_id = sk_id + def __init__(self, sk_id): + self.sk_id = sk_id - def act(self, st): - lsk = st.get_socket(self.sk_id) - iid = lsk.icons.pop(0) - nsk = st.get_socket(iid) - nsk.visible = True - self.nsk_id = nsk.sk_id + def act(self, st): + lsk = st.get_socket(self.sk_id) + iid = lsk.icons.pop(0) + nsk = st.get_socket(iid) + nsk.visible = True + self.nsk_id = nsk.sk_id - def do(self, st): - sk = st.real_sockets[self.sk_id] - nsk, ai = sk.accept() - if self.nsk_id in st.real_sockets: - raise Exception("SK ID conflict") - st.real_sockets[self.nsk_id] = nsk + def do(self, st): + sk = st.real_sockets[self.sk_id] + nsk, ai = sk.accept() + if self.nsk_id in st.real_sockets: + raise Exception("SK ID conflict") + st.real_sockets[self.nsk_id] = nsk - def show(self): - return 'accept(%d) = %d' % (self.sk_id, self.nsk_id) + def show(self): + return 'accept(%d) = %d' % (self.sk_id, self.nsk_id) class act_sendmsg: - def __init__(self, sk_id, to_id): - self.sk_id = sk_id - self.to_id = to_id - self.direct_send = None + def __init__(self, sk_id, to_id): + self.sk_id = sk_id + self.to_id = to_id + self.direct_send = None - def act(self, st): - sk = st.get_socket(self.sk_id) - msg = (sk.sk_id, sk.outseq) - self.msg_id = sk.outseq - sk.outseq += 1 - psk = st.get_socket(self.to_id) - psk.inqueue.append(msg) - self.direct_send = (sk.peer == psk.sk_id) + def act(self, st): + sk = st.get_socket(self.sk_id) + msg = (sk.sk_id, sk.outseq) + self.msg_id = sk.outseq + sk.outseq += 1 + psk = st.get_socket(self.to_id) + psk.inqueue.append(msg) + self.direct_send = (sk.peer == psk.sk_id) - def do(self, st): - sk = st.real_sockets[self.sk_id] - msgv = act_sendmsg.msgval(self.msg_id) - if self.direct_send: - sk.send(msgv) - else: - sk.sendto(msgv, sock.real_name_for(self.to_id)) + def do(self, st): + sk = st.real_sockets[self.sk_id] + msgv = act_sendmsg.msgval(self.msg_id) + if self.direct_send: + sk.send(msgv) + else: + sk.sendto(msgv, sock.real_name_for(self.to_id)) - def show(self): - return 'send(%d, %d, $message-%d)' % (self.sk_id, self.to_id, self.msg_id) + def show(self): + return 'send(%d, %d, $message-%d)' % (self.sk_id, self.to_id, + self.msg_id) + + @staticmethod + def msgval(msgid, pref=''): + return '%sMSG%d' % (pref, msgid) - @staticmethod - def msgval(msgid, pref = ''): - return '%sMSG%d' % (pref, msgid) # # Description of a socket # class sock: - def __init__(self, sk_id, sock_type): - # ID of a socket. Since states and sockets are cloned - # while we scan the tree of states the only valid way - # to address a socket is to find one by ID. - self.sk_id = sk_id - # The socket.SOCK_FOO value - self.sk_type = sock_type - # Sockets that haven't yet been accept()-ed are in the - # state, but user cannot operate on them. Also this - # invisibility contributes to state description since - # connection to not accepted socket is not the same - # as connection to accepted one. - self.visible = True - # The listen() was called. - self.listen = False - # The bind() was called. Also set by accept(), the name - # inherits from listener. - self.name = None - # The connect() was called. Set on two sockets when the - # connect() is called. - self.peer = None - # Progress on accepting connections. Used to check when - # it's OK to close the socket (see comment below). - self.icons_seq = 0 - # List of IDs of sockets that can be accept()-ed - self.icons = [] - # Number to generate message contents. - self.outseq = 0 - # Incoming queue of messages. - self.inqueue = [] + def __init__(self, sk_id, sock_type): + # ID of a socket. Since states and sockets are cloned + # while we scan the tree of states the only valid way + # to address a socket is to find one by ID. + self.sk_id = sk_id + # The socket.SOCK_FOO value + self.sk_type = sock_type + # Sockets that haven't yet been accept()-ed are in the + # state, but user cannot operate on them. Also this + # invisibility contributes to state description since + # connection to not accepted socket is not the same + # as connection to accepted one. + self.visible = True + # The listen() was called. + self.listen = False + # The bind() was called. Also set by accept(), the name + # inherits from listener. + self.name = None + # The connect() was called. Set on two sockets when the + # connect() is called. + self.peer = None + # Progress on accepting connections. Used to check when + # it's OK to close the socket (see comment below). + self.icons_seq = 0 + # List of IDs of sockets that can be accept()-ed + self.icons = [] + # Number to generate message contents. + self.outseq = 0 + # Incoming queue of messages. + self.inqueue = [] - def clone(self): - sk = sock(self.sk_id, self.sk_type) - sk.visible = self.visible - sk.listen = self.listen - sk.name = self.name - sk.peer = self.peer - sk.icons_seq = self.icons_seq - sk.icons = list(self.icons) - sk.outseq = self.outseq - sk.inqueue = list(self.inqueue) - return sk + def clone(self): + sk = sock(self.sk_id, self.sk_type) + sk.visible = self.visible + sk.listen = self.listen + sk.name = self.name + sk.peer = self.peer + sk.icons_seq = self.icons_seq + sk.icons = list(self.icons) + sk.outseq = self.outseq + sk.inqueue = list(self.inqueue) + return sk - def get_actions(self, st): - if not self.visible: - return [] + def get_actions(self, st): + if not self.visible: + return [] - if st.sk_type == socket.SOCK_STREAM: - return self.get_stream_actions(st) - else: - return self.get_dgram_actions(st) + if st.sk_type == socket.SOCK_STREAM: + return self.get_stream_actions(st) + else: + return self.get_dgram_actions(st) - def get_send_action(self, to, st): - # However, if peer has a message from us at - # the queue tail, sending a new one doesn't - # really make sense - want_msg = True - if len(to.inqueue) != 0: - lmsg = to.inqueue[-1] - if lmsg[0] == self.sk_id: - want_msg = False - if want_msg: - return [ act_sendmsg(self.sk_id, to.sk_id) ] - else: - return [ ] + def get_send_action(self, to, st): + # However, if peer has a message from us at + # the queue tail, sending a new one doesn't + # really make sense + want_msg = True + if len(to.inqueue) != 0: + lmsg = to.inqueue[-1] + if lmsg[0] == self.sk_id: + want_msg = False + if want_msg: + return [act_sendmsg(self.sk_id, to.sk_id)] + else: + return [] - def get_stream_actions(self, st): - act_list = [] + def get_stream_actions(self, st): + act_list = [] - # Any socket can be closed, but closing a socket - # that hasn't contributed to some new states is - # just waste of time, so we close only connected - # sockets or listeners that has at least one - # incoming connection pendig or served + # Any socket can be closed, but closing a socket + # that hasn't contributed to some new states is + # just waste of time, so we close only connected + # sockets or listeners that has at least one + # incoming connection pendig or served - if self.listen: - if self.icons: - act_list.append(act_accept(self.sk_id)) - if self.icons_seq: - act_list.append(act_close(self.sk_id)) - elif self.peer: - act_list.append(act_close(self.sk_id)) - # Connected sockets can send and receive messages - # But receiving seem not to produce any new states, - # so only sending - # Also sending to a closed socket doesn't work - psk = st.get_socket(self.peer, True) - if psk: - act_list += self.get_send_action(psk, st) - else: - for psk in st.sockets: - if psk.listen and psk.name: - act_list.append(act_connect(self.sk_id, psk.sk_id)) + if self.listen: + if self.icons: + act_list.append(act_accept(self.sk_id)) + if self.icons_seq: + act_list.append(act_close(self.sk_id)) + elif self.peer: + act_list.append(act_close(self.sk_id)) + # Connected sockets can send and receive messages + # But receiving seem not to produce any new states, + # so only sending + # Also sending to a closed socket doesn't work + psk = st.get_socket(self.peer, True) + if psk: + act_list += self.get_send_action(psk, st) + else: + for psk in st.sockets: + if psk.listen and psk.name: + act_list.append(act_connect(self.sk_id, psk.sk_id)) - # Listen on not-bound socket is prohibited as - # well as binding a listening socket - if not self.name: - # TODO: support for file paths (see real_name_for) - # TODO: these names can overlap each other - act_list.append(act_bind(self.sk_id, self.sk_id)) - else: - act_list.append(act_listen(self.sk_id)) + # Listen on not-bound socket is prohibited as + # well as binding a listening socket + if not self.name: + # TODO: support for file paths (see real_name_for) + # TODO: these names can overlap each other + act_list.append(act_bind(self.sk_id, self.sk_id)) + else: + act_list.append(act_listen(self.sk_id)) - return act_list + return act_list - def get_dgram_actions(self, st): - act_list = [] + def get_dgram_actions(self, st): + act_list = [] - # Dgram socket can bind at any time - if not self.name: - act_list.append(act_bind(self.sk_id, self.sk_id)) + # Dgram socket can bind at any time + if not self.name: + act_list.append(act_bind(self.sk_id, self.sk_id)) - # Can connect to peer-less sockets - for psk in st.sockets: - if psk == self: - continue - if psk.peer != None and psk.peer != self.sk_id: - # Peer by someone else, can do nothing - continue + # Can connect to peer-less sockets + for psk in st.sockets: + if psk == self: + continue + if psk.peer != None and psk.peer != self.sk_id: + # Peer by someone else, can do nothing + continue - # Peer-less psk or having us as peer - # We can connect to or send messages - if psk.name and self.peer != psk.sk_id: - act_list.append(act_connect(self.sk_id, psk.sk_id)) + # Peer-less psk or having us as peer + # We can connect to or send messages + if psk.name and self.peer != psk.sk_id: + act_list.append(act_connect(self.sk_id, psk.sk_id)) - if psk.name or self.peer == psk.sk_id: - act_list += self.get_send_action(psk, st) + if psk.name or self.peer == psk.sk_id: + act_list += self.get_send_action(psk, st) - if self.outseq != 0 or self.icons_seq != 0: - act_list.append(act_close(self.sk_id)) + if self.outseq != 0 or self.icons_seq != 0: + act_list.append(act_close(self.sk_id)) - return act_list + return act_list - @staticmethod - def name_of(sk): - if not sk: - return 'X' - elif not sk.visible: - return 'H' - elif sk.name: - return 'B' - else: - return 'A' + @staticmethod + def name_of(sk): + if not sk: + return 'X' + elif not sk.visible: + return 'H' + elif sk.name: + return 'B' + else: + return 'A' - @staticmethod - def real_name_for(sk_id): - return "\0" + "CRSK%d" % sk_id + @staticmethod + def real_name_for(sk_id): + return "\0" + "CRSK%d" % sk_id - # The describe() generates a string that represents - # a state of a socket. Called by state.describe(), see - # comment there about what description is. - def describe(self, st): - dsc = '%s' % sk_type_s[self.sk_type] - dsc += sock.name_of(self) + # The describe() generates a string that represents + # a state of a socket. Called by state.describe(), see + # comment there about what description is. + def describe(self, st): + dsc = '%s' % sk_type_s[self.sk_type] + dsc += sock.name_of(self) - if self.listen: - dsc += 'L' - if self.peer: - psk = st.get_socket(self.peer, True) - dsc += '-C%s' % sock.name_of(psk) - if self.icons: - i_dsc = '' - for c in self.icons: - psk = st.get_socket(c) - psk = st.get_socket(psk.peer, True) - i_dsc += sock.name_of(psk) - dsc += '-I%s' % i_dsc - if self.inqueue: - froms = set() - for m in self.inqueue: - froms.add(m[0]) - q_dsc = '' - for f in froms: - fsk = st.get_socket(f, True) - q_dsc += sock.name_of(fsk) - dsc += '-M%s' % q_dsc - return dsc + if self.listen: + dsc += 'L' + if self.peer: + psk = st.get_socket(self.peer, True) + dsc += '-C%s' % sock.name_of(psk) + if self.icons: + i_dsc = '' + for c in self.icons: + psk = st.get_socket(c) + psk = st.get_socket(psk.peer, True) + i_dsc += sock.name_of(psk) + dsc += '-I%s' % i_dsc + if self.inqueue: + froms = set() + for m in self.inqueue: + froms.add(m[0]) + q_dsc = '' + for f in froms: + fsk = st.get_socket(f, True) + q_dsc += sock.name_of(fsk) + dsc += '-M%s' % q_dsc + return dsc class state: - def __init__(self, max_sockets, sk_type): - self.sockets = [] - self.sk_id = 0 - self.steps = [] - self.real_sockets = {} - self.sockets_left = max_sockets - self.sk_type = sk_type + def __init__(self, max_sockets, sk_type): + self.sockets = [] + self.sk_id = 0 + self.steps = [] + self.real_sockets = {} + self.sockets_left = max_sockets + self.sk_type = sk_type - def add_socket(self, sk): - self.sockets.append(sk) + def add_socket(self, sk): + self.sockets.append(sk) - def del_socket(self, sk): - self.sockets.remove(sk) + def del_socket(self, sk): + self.sockets.remove(sk) - def get_socket(self, sk_id, can_be_null = False): - for sk in self.sockets: - if sk.sk_id == sk_id: - return sk + def get_socket(self, sk_id, can_be_null=False): + for sk in self.sockets: + if sk.sk_id == sk_id: + return sk - if not can_be_null: - raise Exception("%d socket not in list" % sk_id) + if not can_be_null: + raise Exception("%d socket not in list" % sk_id) - return None + return None - def get_actions(self): - act_list = [] + def get_actions(self): + act_list = [] - # Any socket in the state we can change it - for sk in self.sockets: - act_list += sk.get_actions(self) + # Any socket in the state we can change it + for sk in self.sockets: + act_list += sk.get_actions(self) - if self.sockets_left > 0: - act_list.append(act_socket(self.sk_type)) - self.sockets_left -= 1 + if self.sockets_left > 0: + act_list.append(act_socket(self.sk_type)) + self.sockets_left -= 1 - return act_list + return act_list - def clone(self): - nst = state(self.sockets_left, self.sk_type) - for sk in self.sockets: - nst.sockets.append(sk.clone()) - nst.sk_id = self.sk_id - nst.steps = list(self.steps) - return nst + def clone(self): + nst = state(self.sockets_left, self.sk_type) + for sk in self.sockets: + nst.sockets.append(sk.clone()) + nst.sk_id = self.sk_id + nst.steps = list(self.steps) + return nst - # Generates textual description of a state. Different states - # may have same descriptions, e.g. if we have two sockets and - # only one of them is in listen state, we don't care which - # one in which. At the same time really different states - # shouldn't map to the same string. - def describe(self): - sks = [x.describe(self) for x in self.sockets] - sks = sorted(sks) - return '_'.join(sks) + # Generates textual description of a state. Different states + # may have same descriptions, e.g. if we have two sockets and + # only one of them is in listen state, we don't care which + # one in which. At the same time really different states + # shouldn't map to the same string. + def describe(self): + sks = [x.describe(self) for x in self.sockets] + sks = sorted(sks) + return '_'.join(sks) def set_nonblock(sk): - fd = sk.fileno() - flags = fcntl.fcntl(fd, fcntl.F_GETFL) - fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK) + fd = sk.fileno() + flags = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, flags | os.O_NONBLOCK) -CHK_FAIL_UNKNOWN = 10 -CHK_FAIL_SOCKET = 11 -CHK_FAIL_STAT = 12 -CHK_FAIL_LISTEN = 13 -CHK_FAIL_NAME = 14 -CHK_FAIL_ACCEPT = 15 -CHK_FAIL_RECV_0 = 16 -CHK_FAIL_RECV_MIX = 17 -CHK_FAIL_CONNECT = 18 -CHK_FAIL_CONNECT2 = 19 -CHK_FAIL_KILLED = 20 -CHK_FAIL_DUMP = 21 -CHK_FAIL_RESTORE = 22 -CHK_PASS = 42 +CHK_FAIL_UNKNOWN = 10 +CHK_FAIL_SOCKET = 11 +CHK_FAIL_STAT = 12 +CHK_FAIL_LISTEN = 13 +CHK_FAIL_NAME = 14 +CHK_FAIL_ACCEPT = 15 +CHK_FAIL_RECV_0 = 16 +CHK_FAIL_RECV_MIX = 17 +CHK_FAIL_CONNECT = 18 +CHK_FAIL_CONNECT2 = 19 +CHK_FAIL_KILLED = 20 +CHK_FAIL_DUMP = 21 +CHK_FAIL_RESTORE = 22 + +CHK_PASS = 42 fail_desc = { - CHK_FAIL_UNKNOWN: 'Aliens invaded the test', - CHK_FAIL_LISTEN: 'Listen state lost on restore', - CHK_FAIL_NAME: 'Name lost on restore', - CHK_FAIL_ACCEPT: 'Incoming connection lost on restore', - CHK_FAIL_RECV_0: 'Message lost on restore', - CHK_FAIL_RECV_MIX: 'Message misorder on restore', - CHK_FAIL_CONNECT: 'Connectivity broken on restore', - CHK_FAIL_CONNECT2: 'Connectivity broken the hard way on restore', - CHK_FAIL_KILLED: 'Test process died unexpectedly', - CHK_FAIL_DUMP: 'Cannot dump', - CHK_FAIL_RESTORE: 'Cannot restore', + CHK_FAIL_UNKNOWN: 'Aliens invaded the test', + CHK_FAIL_LISTEN: 'Listen state lost on restore', + CHK_FAIL_NAME: 'Name lost on restore', + CHK_FAIL_ACCEPT: 'Incoming connection lost on restore', + CHK_FAIL_RECV_0: 'Message lost on restore', + CHK_FAIL_RECV_MIX: 'Message misorder on restore', + CHK_FAIL_CONNECT: 'Connectivity broken on restore', + CHK_FAIL_CONNECT2: 'Connectivity broken the hard way on restore', + CHK_FAIL_KILLED: 'Test process died unexpectedly', + CHK_FAIL_DUMP: 'Cannot dump', + CHK_FAIL_RESTORE: 'Cannot restore', } + def chk_real_state(st): - # Before enything else -- check that we still have - # all the sockets at hands - for sk in st.sockets: - if not sk.visible: - continue + # Before enything else -- check that we still have + # all the sockets at hands + for sk in st.sockets: + if not sk.visible: + continue - # In theory we can have key-not-found exception here, - # but this has nothing to do with sockets restore, - # since it's just bytes in memory, so ... we assume - # that we have object here and just check for it in - # the fdtable - rsk = st.real_sockets[sk.sk_id] - try: - s_st = os.fstat(rsk.fileno()) - except: - print('FAIL: Socket %d lost' % sk.sk_id) - return CHK_FAIL_SOCKET - if not stat.S_ISSOCK(s_st.st_mode): - print('FAIL: Not a socket %d at %d' % (sk.sk_id, rsk.fileno())) - return CHK_FAIL_STAT + # In theory we can have key-not-found exception here, + # but this has nothing to do with sockets restore, + # since it's just bytes in memory, so ... we assume + # that we have object here and just check for it in + # the fdtable + rsk = st.real_sockets[sk.sk_id] + try: + s_st = os.fstat(rsk.fileno()) + except: + print('FAIL: Socket %d lost' % sk.sk_id) + return CHK_FAIL_SOCKET + if not stat.S_ISSOCK(s_st.st_mode): + print('FAIL: Not a socket %d at %d' % (sk.sk_id, rsk.fileno())) + return CHK_FAIL_STAT - # First -- check the listen states and names - for sk in st.sockets: - if not sk.visible: - continue + # First -- check the listen states and names + for sk in st.sockets: + if not sk.visible: + continue - rsk = st.real_sockets[sk.sk_id] - r_listen = rsk.getsockopt(socket.SOL_SOCKET, socket.SO_ACCEPTCONN) - if (sk.listen and r_listen == 0) or (not sk.listen and r_listen == 1): - print("FAIL: Socket %d listen %d, expected %d" - % (sk.sk_id, r_listen, sk.listen and 1 or 0)) - return CHK_FAIL_LISTEN + rsk = st.real_sockets[sk.sk_id] + r_listen = rsk.getsockopt(socket.SOL_SOCKET, socket.SO_ACCEPTCONN) + if (sk.listen and r_listen == 0) or (not sk.listen and r_listen == 1): + print("FAIL: Socket %d listen %d, expected %d" % + (sk.sk_id, r_listen, sk.listen and 1 or 0)) + return CHK_FAIL_LISTEN - if sk.name: - r_name = rsk.getsockname() - w_name = sock.real_name_for(sk.name) - if r_name != w_name: - print('FAIL: Socket %d name mismatch [%s], want [%s]' - % (sk.sk_id, r_name, w_name)) - return CHK_FAIL_NAME + if sk.name: + r_name = rsk.getsockname() + w_name = sock.real_name_for(sk.name) + if r_name != w_name: + print('FAIL: Socket %d name mismatch [%s], want [%s]' % + (sk.sk_id, r_name, w_name)) + return CHK_FAIL_NAME - # Second -- check (accept) pending connections - for sk in st.sockets: - if not sk.listen: - continue + # Second -- check (accept) pending connections + for sk in st.sockets: + if not sk.listen: + continue - rsk = st.real_sockets[sk.sk_id] - set_nonblock(rsk) + rsk = st.real_sockets[sk.sk_id] + set_nonblock(rsk) - while sk.icons: - # Do act_accept to change the state properly - # and not write the code twice - acc = act_accept(sk.sk_id) - acc.act(st) - try: - acc.do(st) - except: - print('FAIL: Cannot accept pending connection for %d' % sk.sk_id) - return CHK_FAIL_ACCEPT + while sk.icons: + # Do act_accept to change the state properly + # and not write the code twice + acc = act_accept(sk.sk_id) + acc.act(st) + try: + acc.do(st) + except: + print('FAIL: Cannot accept pending connection for %d' % + sk.sk_id) + return CHK_FAIL_ACCEPT - print(' `- did %s' % acc.show()) + print(' `- did %s' % acc.show()) - # Third -- check inqueues - for sk in st.sockets: - if not sk.inqueue: - continue + # Third -- check inqueues + for sk in st.sockets: + if not sk.inqueue: + continue - rsk = st.real_sockets[sk.sk_id] - set_nonblock(rsk) + rsk = st.real_sockets[sk.sk_id] + set_nonblock(rsk) - while sk.inqueue: - msg = sk.inqueue.pop(0) - try: - r_msg, m_from = rsk.recvfrom(128) - except: - print('FAIL: No message in queue for %d' % sk.sk_id) - return CHK_FAIL_RECV_0 + while sk.inqueue: + msg = sk.inqueue.pop(0) + try: + r_msg, m_from = rsk.recvfrom(128) + except: + print('FAIL: No message in queue for %d' % sk.sk_id) + return CHK_FAIL_RECV_0 - w_msg = act_sendmsg.msgval(msg[1]) - if r_msg != w_msg: - print('FAIL: Message misorder: %s want %s (from %d)' - %(r_msg, w_msg, msg[0])) - return CHK_FAIL_RECV_MIX + w_msg = act_sendmsg.msgval(msg[1]) + if r_msg != w_msg: + print('FAIL: Message misorder: %s want %s (from %d)' % + (r_msg, w_msg, msg[0])) + return CHK_FAIL_RECV_MIX - # TODO -- check sender - print(' `- recvd %d.%d msg %s -> %d' - % (msg[0], msg[1], m_from, sk.sk_id)) + # TODO -- check sender + print(' `- recvd %d.%d msg %s -> %d' % + (msg[0], msg[1], m_from, sk.sk_id)) - # Finally, after all sockets are visible and all inqueues are - # drained -- check the sockets connectivity - for sk in st.sockets: - if not sk.peer: - continue + # Finally, after all sockets are visible and all inqueues are + # drained -- check the sockets connectivity + for sk in st.sockets: + if not sk.peer: + continue - # Closed connection with one peer alive. Cannot check. - if not sk.peer in st.real_sockets: - continue + # Closed connection with one peer alive. Cannot check. + if not sk.peer in st.real_sockets: + continue - rsk = st.real_sockets[sk.sk_id] - psk = st.real_sockets[sk.peer] - set_nonblock(psk) - msgv = act_sendmsg.msgval(3 * sk.sk_id + 5 * sk.peer, 'C') # just random + rsk = st.real_sockets[sk.sk_id] + psk = st.real_sockets[sk.peer] + set_nonblock(psk) + msgv = act_sendmsg.msgval(3 * sk.sk_id + 5 * sk.peer, + 'C') # just random - try: - rsk.send(msgv) - rmsg = psk.recv(128) - except: - print('FAIL: Connectivity %d -> %d lost' % (sk.sk_id, sk.peer)) - return CHK_FAIL_CONNECT + try: + rsk.send(msgv) + rmsg = psk.recv(128) + except: + print('FAIL: Connectivity %d -> %d lost' % (sk.sk_id, sk.peer)) + return CHK_FAIL_CONNECT - # If sockets are not connected the recv above - # would generate exception and the check would - # fail. But just in case we've screwed the queues - # the hard way -- also check for the message being - # delivered for real - if rmsg != msgv: - print('FAIL: Connectivity %d -> %d not verified' - % (sk.sk_id, sk.peer)) - return CHK_FAIL_CONNECT2 + # If sockets are not connected the recv above + # would generate exception and the check would + # fail. But just in case we've screwed the queues + # the hard way -- also check for the message being + # delivered for real + if rmsg != msgv: + print('FAIL: Connectivity %d -> %d not verified' % + (sk.sk_id, sk.peer)) + return CHK_FAIL_CONNECT2 - print(' `- checked %d -> %d with %s' % (sk.sk_id, sk.peer, msgv)) + print(' `- checked %d -> %d with %s' % (sk.sk_id, sk.peer, msgv)) - return CHK_PASS + return CHK_PASS def chk_state(st, opts): - print("Will check state") + print("Will check state") - sigsk_name = "\0" + "CRSIGSKC" - signal_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) - signal_sk.bind(sigsk_name) + sigsk_name = "\0" + "CRSIGSKC" + signal_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) + signal_sk.bind(sigsk_name) - # FIXME Ideally call to criu should be performed by the run_state's - # pid!=0 branch, but for simplicity we fork the kid which has the - # same set of sockets we do, then dump it. Then restore and notify - # via dgram socket to check its state. Current task still has all - # the same sockets :) so we close them not to produce bind() name - # conflicts on restore + # FIXME Ideally call to criu should be performed by the run_state's + # pid!=0 branch, but for simplicity we fork the kid which has the + # same set of sockets we do, then dump it. Then restore and notify + # via dgram socket to check its state. Current task still has all + # the same sockets :) so we close them not to produce bind() name + # conflicts on restore - pid = os.fork() - if pid == 0: - msg = signal_sk.recv(64) - ret = chk_real_state(st) - sys.exit(ret) + pid = os.fork() + if pid == 0: + msg = signal_sk.recv(64) + ret = chk_real_state(st) + sys.exit(ret) - signal_sk.close() - for rsk in st.real_sockets.values(): - rsk.close() + signal_sk.close() + for rsk in st.real_sockets.values(): + rsk.close() - print("`- dump") - img_path = "sti_" + st.describe() - try: - os.mkdir(img_path) - subprocess.check_call([criu_bin, "dump", "-t", "%d" % pid, "-D", img_path, "-v4", "-o", "dump.log", "-j"]) - except: - print("Dump failed") - os.kill(pid, signal.SIGKILL) - return CHK_FAIL_DUMP + print("`- dump") + img_path = "sti_" + st.describe() + try: + os.mkdir(img_path) + subprocess.check_call([ + criu_bin, "dump", "-t", + "%d" % pid, "-D", img_path, "-v4", "-o", "dump.log", "-j" + ]) + except: + print("Dump failed") + os.kill(pid, signal.SIGKILL) + return CHK_FAIL_DUMP - print("`- restore") - try: - os.waitpid(pid, 0) - subprocess.check_call([criu_bin, "restore", "-D", img_path, "-v4", "-o", "rst.log", "-j", "-d", "-S"]) - except: - print("Restore failed") - return CHK_FAIL_RESTORE + print("`- restore") + try: + os.waitpid(pid, 0) + subprocess.check_call([ + criu_bin, "restore", "-D", img_path, "-v4", "-o", "rst.log", "-j", + "-d", "-S" + ]) + except: + print("Restore failed") + return CHK_FAIL_RESTORE - print("`- check") - signal_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) - try: - signal_sk.sendto('check', sigsk_name) - except: - # Probably the peer has died before us or smth else went wrong - os.kill(pid, signal.SIGKILL) + print("`- check") + signal_sk = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM, 0) + try: + signal_sk.sendto('check', sigsk_name) + except: + # Probably the peer has died before us or smth else went wrong + os.kill(pid, signal.SIGKILL) - wp, status = os.waitpid(pid, 0) - if os.WIFEXITED(status): - status = os.WEXITSTATUS(status) - if status != CHK_PASS: - print("`- exited with %d" % status) - return status - elif os.WIFSIGNALED(status): - status = os.WTERMSIG(status) - print("`- killed with %d" % status) - return CHK_FAIL_KILLED - else: - return CHK_FAIL_UNKNOWN + wp, status = os.waitpid(pid, 0) + if os.WIFEXITED(status): + status = os.WEXITSTATUS(status) + if status != CHK_PASS: + print("`- exited with %d" % status) + return status + elif os.WIFSIGNALED(status): + status = os.WTERMSIG(status) + print("`- killed with %d" % status) + return CHK_FAIL_KILLED + else: + return CHK_FAIL_UNKNOWN - return CHK_PASS + return CHK_PASS def run_state(st, opts): - print("Will run state") - pid = os.fork() - if pid != 0: - wpid, status = os.wait() - if os.WIFEXITED(status): - status = os.WEXITSTATUS(status) - elif os.WIFSIGNALED(status): - status = CHK_FAIL_KILLED - else: - status = CHK_FAIL_UNKNOWN - return status + print("Will run state") + pid = os.fork() + if pid != 0: + wpid, status = os.wait() + if os.WIFEXITED(status): + status = os.WEXITSTATUS(status) + elif os.WIFSIGNALED(status): + status = CHK_FAIL_KILLED + else: + status = CHK_FAIL_UNKNOWN + return status - # Try the states in subprocess so that once - # it exits the created sockets are removed - for step in st.steps: - step.do(st) + # Try the states in subprocess so that once + # it exits the created sockets are removed + for step in st.steps: + step.do(st) - if not opts.run: - ret = chk_state(st, opts) - else: - ret = chk_real_state(st) + if not opts.run: + ret = chk_state(st, opts) + else: + ret = chk_real_state(st) - sys.exit(ret) + sys.exit(ret) -def proceed(st, seen, failed, opts, depth = 0): - desc = st.describe() - if not desc: - pass - elif not desc in seen: - # When scanning the tree we run and try only states that - # differ, but don't stop tree traversal on them. This is - # because sometimes we can get into the already seen state - # using less steps and it's better to proceed as we have - # depth to move forward and generate more states. - seen[desc] = len(st.steps) - print('%s' % desc) - for s in st.steps: - print('\t%s' % s.show()) +def proceed(st, seen, failed, opts, depth=0): + desc = st.describe() + if not desc: + pass + elif not desc in seen: + # When scanning the tree we run and try only states that + # differ, but don't stop tree traversal on them. This is + # because sometimes we can get into the already seen state + # using less steps and it's better to proceed as we have + # depth to move forward and generate more states. + seen[desc] = len(st.steps) + print('%s' % desc) + for s in st.steps: + print('\t%s' % s.show()) - if not opts.gen: - ret = run_state(st, opts) - if ret != CHK_PASS: - failed.add((desc, ret)) - if not opts.keep: - return False - else: - # Don't even proceed with this state if we've already - # seen one but get there with less steps - seen_score = seen[desc] - if len(st.steps) > seen_score: - return True - else: - seen[desc] = len(st.steps) + if not opts.gen: + ret = run_state(st, opts) + if ret != CHK_PASS: + failed.add((desc, ret)) + if not opts.keep: + return False + else: + # Don't even proceed with this state if we've already + # seen one but get there with less steps + seen_score = seen[desc] + if len(st.steps) > seen_score: + return True + else: + seen[desc] = len(st.steps) - if depth >= opts.depth: - return True + if depth >= opts.depth: + return True - actions = st.get_actions() - for act in actions: - nst = st.clone() - act.act(nst) - nst.steps.append(act) - if not proceed(nst, seen, failed, opts, depth + 1): - return False + actions = st.get_actions() + for act in actions: + nst = st.clone() + act.act(nst) + nst.steps.append(act) + if not proceed(nst, seen, failed, opts, depth + 1): + return False - return True + return True p = argparse.ArgumentParser("CRIU test suite") -p.add_argument("--depth", help = "Depth of generated tree", default = '8') -p.add_argument("--sockets", help = "Maximum number of sockets", default = '1') -p.add_argument("--dgram", help = "Use SOCK_DGRAM sockets", action = 'store_true') -p.add_argument("--stream", help = "Use SOCK_STREAM sockets", action = 'store_true') -p.add_argument("--gen", help = "Only generate and show states", action = 'store_true') -p.add_argument("--run", help = "Run the states, but don't C/R", action = 'store_true') -p.add_argument("--keep", help = "Don't stop on error", action = 'store_true') +p.add_argument("--depth", help="Depth of generated tree", default='8') +p.add_argument("--sockets", help="Maximum number of sockets", default='1') +p.add_argument("--dgram", help="Use SOCK_DGRAM sockets", action='store_true') +p.add_argument("--stream", help="Use SOCK_STREAM sockets", action='store_true') +p.add_argument("--gen", + help="Only generate and show states", + action='store_true') +p.add_argument("--run", + help="Run the states, but don't C/R", + action='store_true') +p.add_argument("--keep", help="Don't stop on error", action='store_true') opts = p.parse_args() opts.depth = int(opts.depth) # XXX: does it make any sense to mix two types in one go? if opts.stream and opts.dgram: - print('Choose only one type') - sys.exit(1) + print('Choose only one type') + sys.exit(1) if opts.stream: - sk_type = socket.SOCK_STREAM + sk_type = socket.SOCK_STREAM elif opts.dgram: - sk_type = socket.SOCK_DGRAM + sk_type = socket.SOCK_DGRAM else: - print('Choose some type') - sys.exit(1) + print('Choose some type') + sys.exit(1) st = state(int(opts.sockets), sk_type) seen = {} @@ -747,8 +765,9 @@ failed = set() proceed(st, seen, failed, opts) if len(failed) == 0: - print('PASS (%d states)' % len(seen)) + print('PASS (%d states)' % len(seen)) else: - print('FAIL %d/%d' % (len(failed), len(seen))) - for f in failed: - print("\t%-50s: %s" % (f[0], fail_desc.get(f[1], 'unknown reason %d' % f[1]))) + print('FAIL %d/%d' % (len(failed), len(seen))) + for f in failed: + print("\t%-50s: %s" % + (f[0], fail_desc.get(f[1], 'unknown reason %d' % f[1]))) diff --git a/test/inhfd/fifo.py b/test/inhfd/fifo.py index 64e5f8f13..2d20e4dbf 100755 --- a/test/inhfd/fifo.py +++ b/test/inhfd/fifo.py @@ -5,35 +5,35 @@ id_str = "" def create_fds(): - tdir = tempfile.mkdtemp("zdtm.inhfd.XXXXXX") - if os.system("mount -t tmpfs zdtm.inhfd %s" % tdir) != 0: - raise Exception("Unable to mount tmpfs") - tfifo = os.path.join(tdir, "test_fifo") - os.mkfifo(tfifo) - fd2 = open(tfifo, "w+b", buffering=0) - fd1 = open(tfifo, "rb") - os.system("umount -l %s" % tdir) - os.rmdir(tdir) + tdir = tempfile.mkdtemp("zdtm.inhfd.XXXXXX") + if os.system("mount -t tmpfs zdtm.inhfd %s" % tdir) != 0: + raise Exception("Unable to mount tmpfs") + tfifo = os.path.join(tdir, "test_fifo") + os.mkfifo(tfifo) + fd2 = open(tfifo, "w+b", buffering=0) + fd1 = open(tfifo, "rb") + os.system("umount -l %s" % tdir) + os.rmdir(tdir) - mnt_id = -1 - with open("/proc/self/fdinfo/%d" % fd1.fileno()) as f: - for line in f: - line = line.split() - if line[0] == "mnt_id:": - mnt_id = int(line[1]) - break - else: - raise Exception("Unable to find mnt_id") + mnt_id = -1 + with open("/proc/self/fdinfo/%d" % fd1.fileno()) as f: + for line in f: + line = line.split() + if line[0] == "mnt_id:": + mnt_id = int(line[1]) + break + else: + raise Exception("Unable to find mnt_id") - global id_str - id_str = "file[%x:%x]" % (mnt_id, os.fstat(fd1.fileno()).st_ino) + global id_str + id_str = "file[%x:%x]" % (mnt_id, os.fstat(fd1.fileno()).st_ino) - return [(fd2, fd1)] + return [(fd2, fd1)] def filename(pipef): - return id_str + return id_str def dump_opts(sockf): - return ["--external", id_str] + return ["--external", id_str] diff --git a/test/inhfd/pipe.py b/test/inhfd/pipe.py index 318dc862d..8d8318d5b 100755 --- a/test/inhfd/pipe.py +++ b/test/inhfd/pipe.py @@ -2,16 +2,16 @@ import os def create_fds(): - pipes = [] - for i in range(10): - (fd1, fd2) = os.pipe() - pipes.append((os.fdopen(fd2, "wb"), os.fdopen(fd1, "rb"))) - return pipes + pipes = [] + for i in range(10): + (fd1, fd2) = os.pipe() + pipes.append((os.fdopen(fd2, "wb"), os.fdopen(fd1, "rb"))) + return pipes def filename(pipef): - return 'pipe:[%d]' % os.fstat(pipef.fileno()).st_ino + return 'pipe:[%d]' % os.fstat(pipef.fileno()).st_ino def dump_opts(sockf): - return [] + return [] diff --git a/test/inhfd/socket.py b/test/inhfd/socket.py index feba0e0c6..9cea16ffb 100755 --- a/test/inhfd/socket.py +++ b/test/inhfd/socket.py @@ -3,19 +3,19 @@ import os def create_fds(): - (sk1, sk2) = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) - (sk3, sk4) = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) - return [(sk1.makefile("wb"), sk2.makefile("rb")), - (sk3.makefile("wb"), sk4.makefile("rb"))] + (sk1, sk2) = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) + (sk3, sk4) = socket.socketpair(socket.AF_UNIX, socket.SOCK_STREAM) + return [(sk1.makefile("wb"), sk2.makefile("rb")), + (sk3.makefile("wb"), sk4.makefile("rb"))] def __sock_ino(sockf): - return os.fstat(sockf.fileno()).st_ino + return os.fstat(sockf.fileno()).st_ino def filename(sockf): - return 'socket:[%d]' % __sock_ino(sockf) + return 'socket:[%d]' % __sock_ino(sockf) def dump_opts(sockf): - return ['--external', 'unix[%d]' % __sock_ino(sockf)] + return ['--external', 'unix[%d]' % __sock_ino(sockf)] diff --git a/test/inhfd/tty.py b/test/inhfd/tty.py index ae76a96d4..c11a57117 100755 --- a/test/inhfd/tty.py +++ b/test/inhfd/tty.py @@ -4,34 +4,33 @@ import os import pty import termios - ctl = False def child_prep(fd): - global ctl - if ctl: - return - ctl = True - fcntl.ioctl(fd.fileno(), termios.TIOCSCTTY, 1) + global ctl + if ctl: + return + ctl = True + fcntl.ioctl(fd.fileno(), termios.TIOCSCTTY, 1) def create_fds(): - ttys = [] - for i in range(10): - (fd1, fd2) = pty.openpty() - newattr = termios.tcgetattr(fd1) - newattr[3] &= ~termios.ICANON & ~termios.ECHO - termios.tcsetattr(fd1, termios.TCSADRAIN, newattr) - ttys.append((os.fdopen(fd1, "wb"), os.fdopen(fd2, "rb"))) - return ttys + ttys = [] + for i in range(10): + (fd1, fd2) = pty.openpty() + newattr = termios.tcgetattr(fd1) + newattr[3] &= ~termios.ICANON & ~termios.ECHO + termios.tcsetattr(fd1, termios.TCSADRAIN, newattr) + ttys.append((os.fdopen(fd1, "wb"), os.fdopen(fd2, "rb"))) + return ttys def filename(pipef): - st = os.fstat(pipef.fileno()) - return 'tty[%x:%x]' % (st.st_rdev, st.st_dev) + st = os.fstat(pipef.fileno()) + return 'tty[%x:%x]' % (st.st_rdev, st.st_dev) def dump_opts(sockf): - st = os.fstat(sockf.fileno()) - return "--external", 'tty[%x:%x]' % (st.st_rdev, st.st_dev) + st = os.fstat(sockf.fileno()) + return "--external", 'tty[%x:%x]' % (st.st_rdev, st.st_dev) diff --git a/test/others/ext-tty/run.py b/test/others/ext-tty/run.py index f44b1d946..b1dcb4a5a 100755 --- a/test/others/ext-tty/run.py +++ b/test/others/ext-tty/run.py @@ -5,32 +5,41 @@ import os, sys, time, signal, pty master, slave = pty.openpty() p = subprocess.Popen(["setsid", "--ctty", "sleep", "10000"], - stdin = slave, stdout = slave, stderr = slave, close_fds = True) + stdin=slave, + stdout=slave, + stderr=slave, + close_fds=True) st = os.stat("/proc/self/fd/%d" % slave) ttyid = "tty[%x:%x]" % (st.st_rdev, st.st_dev) os.close(slave) time.sleep(1) -ret = subprocess.Popen(["../../../criu/criu", "dump", "-t", str(p.pid), "-v4", "--external", ttyid]).wait() +ret = subprocess.Popen([ + "../../../criu/criu", "dump", "-t", + str(p.pid), "-v4", "--external", ttyid +]).wait() if ret: - sys.exit(ret) + sys.exit(ret) p.wait() -new_master, slave = pty.openpty() # get another pty pair +new_master, slave = pty.openpty() # get another pty pair os.close(master) ttyid = "fd[%d]:tty[%x:%x]" % (slave, st.st_rdev, st.st_dev) -ret = subprocess.Popen(["../../../criu/criu", "restore", "-v4", "--inherit-fd", ttyid, "--restore-sibling", "--restore-detach"]).wait() +ret = subprocess.Popen([ + "../../../criu/criu", "restore", "-v4", "--inherit-fd", ttyid, + "--restore-sibling", "--restore-detach" +]).wait() if ret: - sys.exit(ret) + sys.exit(ret) os.close(slave) -os.waitpid(-1, os.WNOHANG) # is the process alive +os.waitpid(-1, os.WNOHANG) # is the process alive os.close(new_master) _, status = os.wait() if not os.WIFSIGNALED(status) or os.WTERMSIG(status) != signal.SIGHUP: - print(status) - sys.exit(1) + print(status) + sys.exit(1) print("PASS") diff --git a/test/others/mounts/mounts.py b/test/others/mounts/mounts.py index dc65ba45c..70b0be5fa 100755 --- a/test/others/mounts/mounts.py +++ b/test/others/mounts/mounts.py @@ -1,31 +1,36 @@ import os import tempfile, random + def mount(src, dst, shared, private, slave): - cmd = "mount" - if shared: - cmd += " --make-shared" - if private: - cmd += " --make-private" - if slave: - cmd += " --make-slave" - if src: - cmd += " --bind '%s' '%s'" % (src, dst) - else: - cmd += " -t tmpfs none '%s'" % (dst) + cmd = "mount" + if shared: + cmd += " --make-shared" + if private: + cmd += " --make-private" + if slave: + cmd += " --make-slave" + if src: + cmd += " --bind '%s' '%s'" % (src, dst) + else: + cmd += " -t tmpfs none '%s'" % (dst) - print(cmd) - ret = os.system(cmd) - if ret: - print("failed") + print(cmd) + ret = os.system(cmd) + if ret: + print("failed") -root = tempfile.mkdtemp(prefix = "root.mount", dir = "/tmp") + +root = tempfile.mkdtemp(prefix="root.mount", dir="/tmp") mount(None, root, 1, 0, 0) mounts = [root] for i in range(10): - dstdir = random.choice(mounts) - dst = tempfile.mkdtemp(prefix = "mount", dir = dstdir) - src = random.choice(mounts + [None]) - mount(src, dst, random.randint(0,100) > 50, random.randint(0,100) > 90, random.randint(0,100) > 50) - mounts.append(dst) + dstdir = random.choice(mounts) + dst = tempfile.mkdtemp(prefix="mount", dir=dstdir) + src = random.choice(mounts + [None]) + mount(src, dst, + random.randint(0, 100) > 50, + random.randint(0, 100) > 90, + random.randint(0, 100) > 50) + mounts.append(dst) diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py index 23a06615f..3579ac76f 100755 --- a/test/others/rpc/config_file.py +++ b/test/others/rpc/config_file.py @@ -14,169 +14,174 @@ does_not_exist = 'does-not.exist' def setup_swrk(): - print('Connecting to CRIU in swrk mode.') - css = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) - swrk = subprocess.Popen(['./criu', "swrk", "%d" % css[0].fileno()]) - css[0].close() - return swrk, css[1] + print('Connecting to CRIU in swrk mode.') + css = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + swrk = subprocess.Popen(['./criu', "swrk", "%d" % css[0].fileno()]) + css[0].close() + return swrk, css[1] def setup_config_file(content): - # Creating a temporary file which will be used as configuration file. - fd, path = mkstemp() + # Creating a temporary file which will be used as configuration file. + fd, path = mkstemp() - with os.fdopen(fd, 'w') as f: - f.write(content) + with os.fdopen(fd, 'w') as f: + f.write(content) - os.environ['CRIU_CONFIG_FILE'] = path + os.environ['CRIU_CONFIG_FILE'] = path - return path + return path def cleanup_config_file(path): - if os.environ.get('CRIU_CONFIG_FILE', None) is not None: - del os.environ['CRIU_CONFIG_FILE'] - os.unlink(path) + if os.environ.get('CRIU_CONFIG_FILE', None) is not None: + del os.environ['CRIU_CONFIG_FILE'] + os.unlink(path) def cleanup_output(path): - for f in (does_not_exist, log_file): - f = os.path.join(path, f) - if os.access(f, os.F_OK): - os.unlink(f) + for f in (does_not_exist, log_file): + f = os.path.join(path, f) + if os.access(f, os.F_OK): + os.unlink(f) def setup_criu_dump_request(): - # Create criu msg, set it's type to dump request - # and set dump options. Checkout more options in protobuf/rpc.proto - req = rpc.criu_req() - req.type = rpc.DUMP - req.opts.leave_running = True - req.opts.log_level = 4 - req.opts.log_file = log_file - req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) - # Not necessary, just for testing - req.opts.tcp_established = True - req.opts.shell_job = True - return req + # Create criu msg, set it's type to dump request + # and set dump options. Checkout more options in protobuf/rpc.proto + req = rpc.criu_req() + req.type = rpc.DUMP + req.opts.leave_running = True + req.opts.log_level = 4 + req.opts.log_file = log_file + req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) + # Not necessary, just for testing + req.opts.tcp_established = True + req.opts.shell_job = True + return req def do_rpc(s, req): - # Send request - s.send(req.SerializeToString()) + # Send request + s.send(req.SerializeToString()) - # Recv response - resp = rpc.criu_resp() - MAX_MSG_SIZE = 1024 - resp.ParseFromString(s.recv(MAX_MSG_SIZE)) + # Recv response + resp = rpc.criu_resp() + MAX_MSG_SIZE = 1024 + resp.ParseFromString(s.recv(MAX_MSG_SIZE)) - s.close() - return resp + s.close() + return resp def test_broken_configuration_file(): - # Testing RPC configuration file mode with a broken configuration file. - # This should fail - content = 'hopefully-this-option-will-never=exist' - path = setup_config_file(content) - swrk, s = setup_swrk() - s.close() - # This test is only about detecting wrong configuration files. - # If we do not sleep it might happen that we kill CRIU before - # it parses the configuration file. A short sleep makes sure - # that the configuration file has been parsed. Hopefully. - # (I am sure this will fail horribly at some point) - time.sleep(0.3) - swrk.kill() - return_code = swrk.wait() - # delete temporary file again - cleanup_config_file(path) - if return_code != 1: - print('FAIL: CRIU should have returned 1 instead of %d' % return_code) - sys.exit(-1) + # Testing RPC configuration file mode with a broken configuration file. + # This should fail + content = 'hopefully-this-option-will-never=exist' + path = setup_config_file(content) + swrk, s = setup_swrk() + s.close() + # This test is only about detecting wrong configuration files. + # If we do not sleep it might happen that we kill CRIU before + # it parses the configuration file. A short sleep makes sure + # that the configuration file has been parsed. Hopefully. + # (I am sure this will fail horribly at some point) + time.sleep(0.3) + swrk.kill() + return_code = swrk.wait() + # delete temporary file again + cleanup_config_file(path) + if return_code != 1: + print('FAIL: CRIU should have returned 1 instead of %d' % return_code) + sys.exit(-1) def search_in_log_file(log, message): - with open(os.path.join(args['dir'], log)) as f: - if message not in f.read(): - print('FAIL: Missing the expected error message (%s) in the log file' % message) - sys.exit(-1) + with open(os.path.join(args['dir'], log)) as f: + if message not in f.read(): + print( + 'FAIL: Missing the expected error message (%s) in the log file' + % message) + sys.exit(-1) def check_results(resp, log): - # Check if the specified log file exists - if not os.path.isfile(os.path.join(args['dir'], log)): - print('FAIL: Expected log file %s does not exist' % log) - sys.exit(-1) - # Dump should have failed with: 'The criu itself is within dumped tree' - if resp.type != rpc.DUMP: - print('FAIL: Unexpected msg type %r' % resp.type) - sys.exit(-1) - if 'The criu itself is within dumped tree' not in resp.cr_errmsg: - print('FAIL: Missing the expected error message in RPC response') - sys.exit(-1) - # Look into the log file for the same message - search_in_log_file(log, 'The criu itself is within dumped tree') + # Check if the specified log file exists + if not os.path.isfile(os.path.join(args['dir'], log)): + print('FAIL: Expected log file %s does not exist' % log) + sys.exit(-1) + # Dump should have failed with: 'The criu itself is within dumped tree' + if resp.type != rpc.DUMP: + print('FAIL: Unexpected msg type %r' % resp.type) + sys.exit(-1) + if 'The criu itself is within dumped tree' not in resp.cr_errmsg: + print('FAIL: Missing the expected error message in RPC response') + sys.exit(-1) + # Look into the log file for the same message + search_in_log_file(log, 'The criu itself is within dumped tree') def test_rpc_without_configuration_file(): - # Testing without configuration file - # Just doing a dump and checking for the logfile - req = setup_criu_dump_request() - _, s = setup_swrk() - resp = do_rpc(s, req) - s.close() - check_results(resp, log_file) + # Testing without configuration file + # Just doing a dump and checking for the logfile + req = setup_criu_dump_request() + _, s = setup_swrk() + resp = do_rpc(s, req) + s.close() + check_results(resp, log_file) def test_rpc_with_configuration_file(): - # Testing with configuration file - # Just doing a dump and checking for the logfile + # Testing with configuration file + # Just doing a dump and checking for the logfile - # Setting a different log file via configuration file - # This should not work as RPC settings overwrite configuration - # file settings in the default configuration. - log = does_not_exist - content = 'log-file ' + log + '\n' - content += 'no-tcp-established\nno-shell-job' - path = setup_config_file(content) - req = setup_criu_dump_request() - _, s = setup_swrk() - do_rpc(s, req) - s.close() - cleanup_config_file(path) - # Check if the specified log file exists - # It should not as configuration files do not overwrite RPC values. - if os.path.isfile(os.path.join(args['dir'], log)): - print('FAIL: log file %s should not exist' % log) - sys.exit(-1) + # Setting a different log file via configuration file + # This should not work as RPC settings overwrite configuration + # file settings in the default configuration. + log = does_not_exist + content = 'log-file ' + log + '\n' + content += 'no-tcp-established\nno-shell-job' + path = setup_config_file(content) + req = setup_criu_dump_request() + _, s = setup_swrk() + do_rpc(s, req) + s.close() + cleanup_config_file(path) + # Check if the specified log file exists + # It should not as configuration files do not overwrite RPC values. + if os.path.isfile(os.path.join(args['dir'], log)): + print('FAIL: log file %s should not exist' % log) + sys.exit(-1) def test_rpc_with_configuration_file_overwriting_rpc(): - # Testing with configuration file - # Just doing a dump and checking for the logfile + # Testing with configuration file + # Just doing a dump and checking for the logfile - # Setting a different log file via configuration file - # This should not work as RPC settings overwrite configuration - # file settings in the default configuration. - log = does_not_exist - content = 'log-file ' + log + '\n' - content += 'no-tcp-established\nno-shell-job' - path = setup_config_file(content) - # Only set the configuration file via RPC; - # not via environment variable - del os.environ['CRIU_CONFIG_FILE'] - req = setup_criu_dump_request() - req.opts.config_file = path - _, s = setup_swrk() - resp = do_rpc(s, req) - s.close() - cleanup_config_file(path) - check_results(resp, log) + # Setting a different log file via configuration file + # This should not work as RPC settings overwrite configuration + # file settings in the default configuration. + log = does_not_exist + content = 'log-file ' + log + '\n' + content += 'no-tcp-established\nno-shell-job' + path = setup_config_file(content) + # Only set the configuration file via RPC; + # not via environment variable + del os.environ['CRIU_CONFIG_FILE'] + req = setup_criu_dump_request() + req.opts.config_file = path + _, s = setup_swrk() + resp = do_rpc(s, req) + s.close() + cleanup_config_file(path) + check_results(resp, log) -parser = argparse.ArgumentParser(description="Test config files using CRIU RPC") -parser.add_argument('dir', type = str, help = "Directory where CRIU images should be placed") +parser = argparse.ArgumentParser( + description="Test config files using CRIU RPC") +parser.add_argument('dir', + type=str, + help="Directory where CRIU images should be placed") args = vars(parser.parse_args()) diff --git a/test/others/rpc/errno.py b/test/others/rpc/errno.py index ee9e90d8c..49cb622de 100755 --- a/test/others/rpc/errno.py +++ b/test/others/rpc/errno.py @@ -6,130 +6,136 @@ import rpc_pb2 as rpc import argparse parser = argparse.ArgumentParser(description="Test errno reported by CRIU RPC") -parser.add_argument('socket', type = str, help = "CRIU service socket") -parser.add_argument('dir', type = str, help = "Directory where CRIU images should be placed") +parser.add_argument('socket', type=str, help="CRIU service socket") +parser.add_argument('dir', + type=str, + help="Directory where CRIU images should be placed") args = vars(parser.parse_args()) + # Prepare dir for images class test: - def __init__(self): - self.imgs_fd = os.open(args['dir'], os.O_DIRECTORY) - self.s = -1 - self._MAX_MSG_SIZE = 1024 + def __init__(self): + self.imgs_fd = os.open(args['dir'], os.O_DIRECTORY) + self.s = -1 + self._MAX_MSG_SIZE = 1024 - def connect(self): - self.s = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET) - self.s.connect(args['socket']) + def connect(self): + self.s = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET) + self.s.connect(args['socket']) - def get_base_req(self): - req = rpc.criu_req() - req.opts.log_level = 4 - req.opts.images_dir_fd = self.imgs_fd - return req + def get_base_req(self): + req = rpc.criu_req() + req.opts.log_level = 4 + req.opts.images_dir_fd = self.imgs_fd + return req - def send_req(self, req): - self.connect() - self.s.send(req.SerializeToString()) + def send_req(self, req): + self.connect() + self.s.send(req.SerializeToString()) - def recv_resp(self): - resp = rpc.criu_resp() - resp.ParseFromString(self.s.recv(self._MAX_MSG_SIZE)) - return resp + def recv_resp(self): + resp = rpc.criu_resp() + resp.ParseFromString(self.s.recv(self._MAX_MSG_SIZE)) + return resp - def check_resp(self, resp, typ, err): - if resp.type != typ: - raise Exception('Unexpected responce type ' + str(resp.type)) + def check_resp(self, resp, typ, err): + if resp.type != typ: + raise Exception('Unexpected responce type ' + str(resp.type)) - if resp.success: - raise Exception('Unexpected success = True') + if resp.success: + raise Exception('Unexpected success = True') - if err and resp.cr_errno != err: - raise Exception('Unexpected cr_errno ' + str(resp.cr_errno)) + if err and resp.cr_errno != err: + raise Exception('Unexpected cr_errno ' + str(resp.cr_errno)) - def no_process(self): - print('Try to dump unexisting process') - # Get pid of non-existing process. - # Suppose max_pid is not taken by any process. - with open("/proc/sys/kernel/pid_max", "r") as f: - pid = int(f.readline()) - try: - os.kill(pid, 0) - except OSError: - pass - else: - raise Exception('max pid is taken') + def no_process(self): + print('Try to dump unexisting process') + # Get pid of non-existing process. + # Suppose max_pid is not taken by any process. + with open("/proc/sys/kernel/pid_max", "r") as f: + pid = int(f.readline()) + try: + os.kill(pid, 0) + except OSError: + pass + else: + raise Exception('max pid is taken') - # Ask criu to dump non-existing process. - req = self.get_base_req() - req.type = rpc.DUMP - req.opts.pid = pid + # Ask criu to dump non-existing process. + req = self.get_base_req() + req.type = rpc.DUMP + req.opts.pid = pid - self.send_req(req) - resp = self.recv_resp() + self.send_req(req) + resp = self.recv_resp() - self.check_resp(resp, rpc.DUMP, errno.ESRCH) + self.check_resp(resp, rpc.DUMP, errno.ESRCH) - print('Success') + print('Success') - def process_exists(self): - print('Try to restore process which pid is already taken by other process') + def process_exists(self): + print( + 'Try to restore process which pid is already taken by other process' + ) - # Perform self-dump - req = self.get_base_req() - req.type = rpc.DUMP - req.opts.leave_running = True + # Perform self-dump + req = self.get_base_req() + req.type = rpc.DUMP + req.opts.leave_running = True - self.send_req(req) - resp = self.recv_resp() + self.send_req(req) + resp = self.recv_resp() - if resp.success != True: - raise Exception('Self-dump failed') + if resp.success != True: + raise Exception('Self-dump failed') - # Ask to restore process from images of ourselves - req = self.get_base_req() - req.type = rpc.RESTORE + # Ask to restore process from images of ourselves + req = self.get_base_req() + req.type = rpc.RESTORE - self.send_req(req) - resp = self.recv_resp() + self.send_req(req) + resp = self.recv_resp() - self.check_resp(resp, rpc.RESTORE, errno.EEXIST) + self.check_resp(resp, rpc.RESTORE, errno.EEXIST) - print('Success') + print('Success') - def bad_options(self): - print('Try to send criu invalid opts') + def bad_options(self): + print('Try to send criu invalid opts') - # Subdirs are not allowed in log_file - req = self.get_base_req() - req.type = rpc.DUMP - req.opts.log_file = "../file.log" + # Subdirs are not allowed in log_file + req = self.get_base_req() + req.type = rpc.DUMP + req.opts.log_file = "../file.log" - self.send_req(req) - resp = self.recv_resp() + self.send_req(req) + resp = self.recv_resp() - self.check_resp(resp, rpc.DUMP, errno.EBADRQC) + self.check_resp(resp, rpc.DUMP, errno.EBADRQC) - print('Success') + print('Success') - def bad_request(self): - print('Try to send criu invalid request type') + def bad_request(self): + print('Try to send criu invalid request type') - req = self.get_base_req() - req.type = rpc.NOTIFY + req = self.get_base_req() + req.type = rpc.NOTIFY - self.send_req(req) - resp = self.recv_resp() + self.send_req(req) + resp = self.recv_resp() - self.check_resp(resp, rpc.EMPTY, None) + self.check_resp(resp, rpc.EMPTY, None) - print('Success') + print('Success') + + def run(self): + self.no_process() + self.process_exists() + self.bad_options() + self.bad_request() - def run(self): - self.no_process() - self.process_exists() - self.bad_options() - self.bad_request() t = test() t.run() diff --git a/test/others/rpc/ps_test.py b/test/others/rpc/ps_test.py index 1872120fc..d16efd3f6 100755 --- a/test/others/rpc/ps_test.py +++ b/test/others/rpc/ps_test.py @@ -5,8 +5,10 @@ import rpc_pb2 as rpc import argparse parser = argparse.ArgumentParser(description="Test page-server using CRIU RPC") -parser.add_argument('socket', type = str, help = "CRIU service socket") -parser.add_argument('dir', type = str, help = "Directory where CRIU images should be placed") +parser.add_argument('socket', type=str, help="CRIU service socket") +parser.add_argument('dir', + type=str, + help="Directory where CRIU images should be placed") args = vars(parser.parse_args()) @@ -16,45 +18,45 @@ s.connect(args['socket']) # Start page-server print('Starting page-server') -req = rpc.criu_req() -req.type = rpc.PAGE_SERVER -req.opts.log_file = 'page-server.log' -req.opts.log_level = 4 -req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) +req = rpc.criu_req() +req.type = rpc.PAGE_SERVER +req.opts.log_file = 'page-server.log' +req.opts.log_level = 4 +req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) s.send(req.SerializeToString()) -resp = rpc.criu_resp() +resp = rpc.criu_resp() MAX_MSG_SIZE = 1024 resp.ParseFromString(s.recv(MAX_MSG_SIZE)) if resp.type != rpc.PAGE_SERVER: - print('Unexpected msg type') - sys.exit(1) + print('Unexpected msg type') + sys.exit(1) else: - if resp.success: - # check if pid even exists - try: - os.kill(resp.ps.pid, 0) - except OSError as err: - if err.errno == errno.ESRCH: - print('No process with page-server pid %d' %(resp.ps.pid)) - else: - print('Can\'t check that process %d exists' %(resp.ps.pid)) - sys.exit(1) - print('Success, page-server pid %d started on port %u' %(resp.ps.pid, resp.ps.port)) - else: - print('Failed to start page-server') - sys.exit(1) - + if resp.success: + # check if pid even exists + try: + os.kill(resp.ps.pid, 0) + except OSError as err: + if err.errno == errno.ESRCH: + print('No process with page-server pid %d' % (resp.ps.pid)) + else: + print('Can\'t check that process %d exists' % (resp.ps.pid)) + sys.exit(1) + print('Success, page-server pid %d started on port %u' % + (resp.ps.pid, resp.ps.port)) + else: + print('Failed to start page-server') + sys.exit(1) # Perform self-dump print('Dumping myself using page-server') -req.type = rpc.DUMP -req.opts.ps.port = resp.ps.port -req.opts.ps.address = "127.0.0.1" -req.opts.log_file = 'dump.log' -req.opts.leave_running = True +req.type = rpc.DUMP +req.opts.ps.port = resp.ps.port +req.opts.ps.address = "127.0.0.1" +req.opts.log_file = 'dump.log' +req.opts.leave_running = True s.close() s = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET) @@ -64,11 +66,11 @@ s.send(req.SerializeToString()) resp.ParseFromString(s.recv(MAX_MSG_SIZE)) if resp.type != rpc.DUMP: - print('Unexpected msg type') - sys.exit(1) + print('Unexpected msg type') + sys.exit(1) else: - if resp.success: - print('Success') - else: - print('Fail') - sys.exit(1) + if resp.success: + print('Success') + else: + print('Fail') + sys.exit(1) diff --git a/test/others/rpc/read.py b/test/others/rpc/read.py index bbf69b6cb..ff7e5c1a0 100644 --- a/test/others/rpc/read.py +++ b/test/others/rpc/read.py @@ -12,6 +12,6 @@ r = f.read(1) f.close() if r == '\0': - sys.exit(0) + sys.exit(0) sys.exit(-1) diff --git a/test/others/rpc/restore-loop.py b/test/others/rpc/restore-loop.py index ce5786a56..c81567426 100755 --- a/test/others/rpc/restore-loop.py +++ b/test/others/rpc/restore-loop.py @@ -4,9 +4,12 @@ import socket, os, sys import rpc_pb2 as rpc import argparse -parser = argparse.ArgumentParser(description="Test ability to restore a process from images using CRIU RPC") -parser.add_argument('socket', type = str, help = "CRIU service socket") -parser.add_argument('dir', type = str, help = "Directory where CRIU images could be found") +parser = argparse.ArgumentParser( + description="Test ability to restore a process from images using CRIU RPC") +parser.add_argument('socket', type=str, help="CRIU service socket") +parser.add_argument('dir', + type=str, + help="Directory where CRIU images could be found") args = vars(parser.parse_args()) @@ -16,30 +19,30 @@ s.connect(args['socket']) # Create criu msg, set it's type to dump request # and set dump options. Checkout more options in protobuf/rpc.proto -req = rpc.criu_req() -req.type = rpc.RESTORE -req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) +req = rpc.criu_req() +req.type = rpc.RESTORE +req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) # As the dumped process is running with setsid this should not # be necessary. There seems to be a problem for this testcase # in combination with alpine's setsid. # The dump is now done with -j and the restore also. -req.opts.shell_job = True +req.opts.shell_job = True # Send request s.send(req.SerializeToString()) # Recv response -resp = rpc.criu_resp() -MAX_MSG_SIZE = 1024 +resp = rpc.criu_resp() +MAX_MSG_SIZE = 1024 resp.ParseFromString(s.recv(MAX_MSG_SIZE)) if resp.type != rpc.RESTORE: - print('Unexpected msg type') - sys.exit(-1) + print('Unexpected msg type') + sys.exit(-1) else: - if resp.success: - print('Restore success') - else: - print('Restore fail') - sys.exit(-1) - print("PID of the restored program is %d\n" %(resp.restore.pid)) + if resp.success: + print('Restore success') + else: + print('Restore fail') + sys.exit(-1) + print("PID of the restored program is %d\n" % (resp.restore.pid)) diff --git a/test/others/rpc/test.py b/test/others/rpc/test.py index 0addbaedc..9a35e0e97 100755 --- a/test/others/rpc/test.py +++ b/test/others/rpc/test.py @@ -4,9 +4,12 @@ import socket, os, sys import rpc_pb2 as rpc import argparse -parser = argparse.ArgumentParser(description="Test dump/restore using CRIU RPC") -parser.add_argument('socket', type = str, help = "CRIU service socket") -parser.add_argument('dir', type = str, help = "Directory where CRIU images should be placed") +parser = argparse.ArgumentParser( + description="Test dump/restore using CRIU RPC") +parser.add_argument('socket', type=str, help="CRIU service socket") +parser.add_argument('dir', + type=str, + help="Directory where CRIU images should be placed") args = vars(parser.parse_args()) @@ -16,32 +19,32 @@ s.connect(args['socket']) # Create criu msg, set it's type to dump request # and set dump options. Checkout more options in protobuf/rpc.proto -req = rpc.criu_req() -req.type = rpc.DUMP -req.opts.leave_running = True -req.opts.log_level = 4 -req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) +req = rpc.criu_req() +req.type = rpc.DUMP +req.opts.leave_running = True +req.opts.log_level = 4 +req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) # Send request s.send(req.SerializeToString()) # Recv response -resp = rpc.criu_resp() -MAX_MSG_SIZE = 1024 +resp = rpc.criu_resp() +MAX_MSG_SIZE = 1024 resp.ParseFromString(s.recv(MAX_MSG_SIZE)) if resp.type != rpc.DUMP: - print('Unexpected msg type') - sys.exit(-1) + print('Unexpected msg type') + sys.exit(-1) else: - if resp.success: - print('Success') - else: - print('Fail') - sys.exit(-1) + if resp.success: + print('Success') + else: + print('Fail') + sys.exit(-1) - if resp.dump.restored: - print('Restored') + if resp.dump.restored: + print('Restored') # Connect to service socket s = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET) @@ -61,21 +64,21 @@ MAX_MSG_SIZE = 1024 resp.ParseFromString(s.recv(MAX_MSG_SIZE)) if resp.type != rpc.VERSION: - print('RPC: Unexpected msg type') - sys.exit(-1) + print('RPC: Unexpected msg type') + sys.exit(-1) else: - if resp.success: - print('RPC: Success') - print('CRIU major %d' % resp.version.major_number) - print('CRIU minor %d' % resp.version.minor_number) - if resp.version.HasField('gitid'): - print('CRIU gitid %s' % resp.version.gitid) - if resp.version.HasField('sublevel'): - print('CRIU sublevel %s' % resp.version.sublevel) - if resp.version.HasField('extra'): - print('CRIU extra %s' % resp.version.extra) - if resp.version.HasField('name'): - print('CRIU name %s' % resp.version.name) - else: - print('Fail') - sys.exit(-1) + if resp.success: + print('RPC: Success') + print('CRIU major %d' % resp.version.major_number) + print('CRIU minor %d' % resp.version.minor_number) + if resp.version.HasField('gitid'): + print('CRIU gitid %s' % resp.version.gitid) + if resp.version.HasField('sublevel'): + print('CRIU sublevel %s' % resp.version.sublevel) + if resp.version.HasField('extra'): + print('CRIU extra %s' % resp.version.extra) + if resp.version.HasField('name'): + print('CRIU name %s' % resp.version.name) + else: + print('Fail') + sys.exit(-1) diff --git a/test/others/rpc/version.py b/test/others/rpc/version.py index 247bc466d..f978c6c37 100755 --- a/test/others/rpc/version.py +++ b/test/others/rpc/version.py @@ -27,21 +27,21 @@ MAX_MSG_SIZE = 1024 resp.ParseFromString(s.recv(MAX_MSG_SIZE)) if resp.type != rpc.VERSION: - print('RPC: Unexpected msg type') - sys.exit(-1) + print('RPC: Unexpected msg type') + sys.exit(-1) else: - if resp.success: - print('RPC: Success') - print('CRIU major %d' % resp.version.major_number) - print('CRIU minor %d' % resp.version.minor_number) - if resp.version.HasField('gitid'): - print('CRIU gitid %s' % resp.version.gitid) - if resp.version.HasField('sublevel'): - print('CRIU sublevel %s' % resp.version.sublevel) - if resp.version.HasField('extra'): - print('CRIU extra %s' % resp.version.extra) - if resp.version.HasField('name'): - print('CRIU name %s' % resp.version.name) - else: - print('Fail') - sys.exit(-1) + if resp.success: + print('RPC: Success') + print('CRIU major %d' % resp.version.major_number) + print('CRIU minor %d' % resp.version.minor_number) + if resp.version.HasField('gitid'): + print('CRIU gitid %s' % resp.version.gitid) + if resp.version.HasField('sublevel'): + print('CRIU sublevel %s' % resp.version.sublevel) + if resp.version.HasField('extra'): + print('CRIU extra %s' % resp.version.extra) + if resp.version.HasField('name'): + print('CRIU name %s' % resp.version.name) + else: + print('Fail') + sys.exit(-1) diff --git a/test/others/shell-job/run.py b/test/others/shell-job/run.py index 4f4dfadef..bd5c42509 100755 --- a/test/others/shell-job/run.py +++ b/test/others/shell-job/run.py @@ -6,15 +6,17 @@ cr_bin = "../../../criu/criu" os.chdir(os.getcwd()) + def create_pty(): - (fd1, fd2) = pty.openpty() - return (os.fdopen(fd1, "w+"), os.fdopen(fd2, "w+")) + (fd1, fd2) = pty.openpty() + return (os.fdopen(fd1, "w+"), os.fdopen(fd2, "w+")) + if not os.access("work", os.X_OK): os.mkdir("work", 0755) open("running", "w").close() -m,s = create_pty() +m, s = create_pty() p = os.pipe() pr = os.fdopen(p[0], "r") pw = os.fdopen(p[1], "w") @@ -46,14 +48,15 @@ if ret != 0: os.wait() os.unlink("running") -m,s = create_pty() +m, s = create_pty() cpid = os.fork() if cpid == 0: os.setsid() fcntl.ioctl(m.fileno(), termios.TIOCSCTTY, 1) cmd = [cr_bin, "restore", "-j", "-D", "work", "-v"] print("Run: %s" % " ".join(cmd)) - ret = subprocess.Popen([cr_bin, "restore", "-j", "-D", "work", "-v"]).wait() + ret = subprocess.Popen([cr_bin, "restore", "-j", "-D", "work", + "-v"]).wait() if ret != 0: sys.exit(1) sys.exit(0) diff --git a/test/zdtm.py b/test/zdtm.py index c52964528..0153c6058 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -32,26 +32,26 @@ prev_line = None def alarm(*args): - print("==== ALARM ====") + print("==== ALARM ====") signal.signal(signal.SIGALRM, alarm) def traceit(f, e, a): - if e == "line": - lineno = f.f_lineno - fil = f.f_globals["__file__"] - if fil.endswith("zdtm.py"): - global prev_line - line = linecache.getline(fil, lineno) - if line == prev_line: - print(" ...") - else: - prev_line = line - print("+%4d: %s" % (lineno, line.rstrip())) + if e == "line": + lineno = f.f_lineno + fil = f.f_globals["__file__"] + if fil.endswith("zdtm.py"): + global prev_line + line = linecache.getline(fil, lineno) + if line == prev_line: + print(" ...") + else: + prev_line = line + print("+%4d: %s" % (lineno, line.rstrip())) - return traceit + return traceit # Root dir for ns and uns flavors. All tests @@ -60,17 +60,17 @@ tests_root = None def clean_tests_root(): - global tests_root - if tests_root and tests_root[0] == os.getpid(): - os.rmdir(tests_root[1]) + global tests_root + if tests_root and tests_root[0] == os.getpid(): + os.rmdir(tests_root[1]) def make_tests_root(): - global tests_root - if not tests_root: - tests_root = (os.getpid(), tempfile.mkdtemp("", "criu-root-", "/tmp")) - atexit.register(clean_tests_root) - return tests_root[1] + global tests_root + if not tests_root: + tests_root = (os.getpid(), tempfile.mkdtemp("", "criu-root-", "/tmp")) + atexit.register(clean_tests_root) + return tests_root[1] # Report generation @@ -79,60 +79,61 @@ report_dir = None def init_report(path): - global report_dir - report_dir = path - if not os.access(report_dir, os.F_OK): - os.makedirs(report_dir) + global report_dir + report_dir = path + if not os.access(report_dir, os.F_OK): + os.makedirs(report_dir) def add_to_report(path, tgt_name): - global report_dir - if report_dir: - tgt_path = os.path.join(report_dir, tgt_name) - att = 0 - while os.access(tgt_path, os.F_OK): - tgt_path = os.path.join(report_dir, tgt_name + ".%d" % att) - att += 1 + global report_dir + if report_dir: + tgt_path = os.path.join(report_dir, tgt_name) + att = 0 + while os.access(tgt_path, os.F_OK): + tgt_path = os.path.join(report_dir, tgt_name + ".%d" % att) + att += 1 - ignore = shutil.ignore_patterns('*.socket') - if os.path.isdir(path): - shutil.copytree(path, tgt_path, ignore = ignore) - else: - if not os.path.exists(os.path.dirname(tgt_path)): - os.mkdir(os.path.dirname(tgt_path)) - shutil.copy2(path, tgt_path) + ignore = shutil.ignore_patterns('*.socket') + if os.path.isdir(path): + shutil.copytree(path, tgt_path, ignore=ignore) + else: + if not os.path.exists(os.path.dirname(tgt_path)): + os.mkdir(os.path.dirname(tgt_path)) + shutil.copy2(path, tgt_path) def add_to_output(path): - global report_dir - if not report_dir: - return + global report_dir + if not report_dir: + return - output_path = os.path.join(report_dir, "output") - with open(path, "r") as fdi, open(output_path, "a") as fdo: - for line in fdi: - fdo.write(line) + output_path = os.path.join(report_dir, "output") + with open(path, "r") as fdi, open(output_path, "a") as fdo: + for line in fdi: + fdo.write(line) prev_crash_reports = set(glob.glob("/tmp/zdtm-core-*.txt")) def check_core_files(): - reports = set(glob.glob("/tmp/zdtm-core-*.txt")) - prev_crash_reports - if not reports: - return False + reports = set(glob.glob("/tmp/zdtm-core-*.txt")) - prev_crash_reports + if not reports: + return False - while subprocess.Popen(r"ps axf | grep 'abrt\.sh'", shell = True).wait() == 0: - time.sleep(1) + while subprocess.Popen(r"ps axf | grep 'abrt\.sh'", + shell=True).wait() == 0: + time.sleep(1) - for i in reports: - add_to_report(i, os.path.basename(i)) - print_sep(i) - with open(i, "r") as report: - print(report.read()) - print_sep(i) + for i in reports: + add_to_report(i, os.path.basename(i)) + print_sep(i) + with open(i, "r") as report: + print(report.read()) + print_sep(i) - return True + return True # Arch we run on @@ -147,148 +148,161 @@ arch = os.uname()[4] class host_flavor: - def __init__(self, opts): - self.name = "host" - self.ns = False - self.root = None + def __init__(self, opts): + self.name = "host" + self.ns = False + self.root = None - def init(self, l_bins, x_bins): - pass + def init(self, l_bins, x_bins): + pass - def fini(self): - pass + def fini(self): + pass - @staticmethod - def clean(): - pass + @staticmethod + def clean(): + pass class ns_flavor: - __root_dirs = ["/bin", "/sbin", "/etc", "/lib", "/lib64", "/dev", "/dev/pts", "/dev/net", "/tmp", "/usr", "/proc", "/run"] + __root_dirs = [ + "/bin", "/sbin", "/etc", "/lib", "/lib64", "/dev", "/dev/pts", + "/dev/net", "/tmp", "/usr", "/proc", "/run" + ] - def __init__(self, opts): - self.name = "ns" - self.ns = True - self.uns = False - self.root = make_tests_root() - self.root_mounted = False + def __init__(self, opts): + self.name = "ns" + self.ns = True + self.uns = False + self.root = make_tests_root() + self.root_mounted = False - def __copy_one(self, fname): - tfname = self.root + fname - if not os.access(tfname, os.F_OK): - # Copying should be atomic as tests can be - # run in parallel - try: - os.makedirs(self.root + os.path.dirname(fname)) - except OSError as e: - if e.errno != errno.EEXIST: - raise - dst = tempfile.mktemp(".tso", "", self.root + os.path.dirname(fname)) - shutil.copy2(fname, dst) - os.rename(dst, tfname) + def __copy_one(self, fname): + tfname = self.root + fname + if not os.access(tfname, os.F_OK): + # Copying should be atomic as tests can be + # run in parallel + try: + os.makedirs(self.root + os.path.dirname(fname)) + except OSError as e: + if e.errno != errno.EEXIST: + raise + dst = tempfile.mktemp(".tso", "", + self.root + os.path.dirname(fname)) + shutil.copy2(fname, dst) + os.rename(dst, tfname) - def __copy_libs(self, binary): - ldd = subprocess.Popen(["ldd", binary], stdout = subprocess.PIPE) - xl = re.compile(r'^(linux-gate.so|linux-vdso(64)?.so|not a dynamic|.*\s*ldd\s)') + def __copy_libs(self, binary): + ldd = subprocess.Popen(["ldd", binary], stdout=subprocess.PIPE) + xl = re.compile( + r'^(linux-gate.so|linux-vdso(64)?.so|not a dynamic|.*\s*ldd\s)') - # This Mayakovsky-style code gets list of libraries a binary - # needs minus vdso and gate .so-s - libs = map(lambda x: x[1] == '=>' and x[2] or x[0], - map(lambda x: str(x).split(), - filter(lambda x: not xl.match(x), - map(lambda x: str(x).strip(), - filter(lambda x: str(x).startswith('\t'), ldd.stdout.read().decode('ascii').splitlines()))))) + # This Mayakovsky-style code gets list of libraries a binary + # needs minus vdso and gate .so-s + libs = map( + lambda x: x[1] == '=>' and x[2] or x[0], + map( + lambda x: str(x).split(), + filter( + lambda x: not xl.match(x), + map( + lambda x: str(x).strip(), + filter(lambda x: str(x).startswith('\t'), + ldd.stdout.read().decode( + 'ascii').splitlines()))))) - ldd.wait() + ldd.wait() - for lib in libs: - if not os.access(lib, os.F_OK): - raise test_fail_exc("Can't find lib %s required by %s" % (lib, binary)) - self.__copy_one(lib) + for lib in libs: + if not os.access(lib, os.F_OK): + raise test_fail_exc("Can't find lib %s required by %s" % + (lib, binary)) + self.__copy_one(lib) - def __mknod(self, name, rdev = None): - name = "/dev/" + name - if not rdev: - if not os.access(name, os.F_OK): - print("Skipping %s at root" % name) - return - else: - rdev = os.stat(name).st_rdev + def __mknod(self, name, rdev=None): + name = "/dev/" + name + if not rdev: + if not os.access(name, os.F_OK): + print("Skipping %s at root" % name) + return + else: + rdev = os.stat(name).st_rdev - name = self.root + name - os.mknod(name, stat.S_IFCHR, rdev) - os.chmod(name, 0o666) + name = self.root + name + os.mknod(name, stat.S_IFCHR, rdev) + os.chmod(name, 0o666) - def __construct_root(self): - for dir in self.__root_dirs: - os.mkdir(self.root + dir) - os.chmod(self.root + dir, 0o777) + def __construct_root(self): + for dir in self.__root_dirs: + os.mkdir(self.root + dir) + os.chmod(self.root + dir, 0o777) - for ldir in ["/bin", "/sbin", "/lib", "/lib64"]: - os.symlink(".." + ldir, self.root + "/usr" + ldir) + for ldir in ["/bin", "/sbin", "/lib", "/lib64"]: + os.symlink(".." + ldir, self.root + "/usr" + ldir) - self.__mknod("tty", os.makedev(5, 0)) - self.__mknod("null", os.makedev(1, 3)) - self.__mknod("net/tun") - self.__mknod("rtc") - self.__mknod("autofs", os.makedev(10, 235)) + self.__mknod("tty", os.makedev(5, 0)) + self.__mknod("null", os.makedev(1, 3)) + self.__mknod("net/tun") + self.__mknod("rtc") + self.__mknod("autofs", os.makedev(10, 235)) - def __copy_deps(self, deps): - for d in deps.split('|'): - if os.access(d, os.F_OK): - self.__copy_one(d) - self.__copy_libs(d) - return - raise test_fail_exc("Deps check %s failed" % deps) + def __copy_deps(self, deps): + for d in deps.split('|'): + if os.access(d, os.F_OK): + self.__copy_one(d) + self.__copy_libs(d) + return + raise test_fail_exc("Deps check %s failed" % deps) - def init(self, l_bins, x_bins): - subprocess.check_call(["mount", "--make-slave", "--bind", ".", self.root]) - self.root_mounted = True + def init(self, l_bins, x_bins): + subprocess.check_call( + ["mount", "--make-slave", "--bind", ".", self.root]) + self.root_mounted = True - if not os.access(self.root + "/.constructed", os.F_OK): - with open(os.path.abspath(__file__)) as o: - fcntl.flock(o, fcntl.LOCK_EX) - if not os.access(self.root + "/.constructed", os.F_OK): - print("Construct root for %s" % l_bins[0]) - self.__construct_root() - os.mknod(self.root + "/.constructed", stat.S_IFREG | 0o600) + if not os.access(self.root + "/.constructed", os.F_OK): + with open(os.path.abspath(__file__)) as o: + fcntl.flock(o, fcntl.LOCK_EX) + if not os.access(self.root + "/.constructed", os.F_OK): + print("Construct root for %s" % l_bins[0]) + self.__construct_root() + os.mknod(self.root + "/.constructed", stat.S_IFREG | 0o600) - for b in l_bins: - self.__copy_libs(b) - for b in x_bins: - self.__copy_deps(b) + for b in l_bins: + self.__copy_libs(b) + for b in x_bins: + self.__copy_deps(b) - def fini(self): - if self.root_mounted: - subprocess.check_call(["./umount2", self.root]) - self.root_mounted = False + def fini(self): + if self.root_mounted: + subprocess.check_call(["./umount2", self.root]) + self.root_mounted = False - @staticmethod - def clean(): - for d in ns_flavor.__root_dirs: - p = './' + d - print('Remove %s' % p) - if os.access(p, os.F_OK): - shutil.rmtree('./' + d) + @staticmethod + def clean(): + for d in ns_flavor.__root_dirs: + p = './' + d + print('Remove %s' % p) + if os.access(p, os.F_OK): + shutil.rmtree('./' + d) - if os.access('./.constructed', os.F_OK): - os.unlink('./.constructed') + if os.access('./.constructed', os.F_OK): + os.unlink('./.constructed') class userns_flavor(ns_flavor): - def __init__(self, opts): - ns_flavor.__init__(self, opts) - self.name = "userns" - self.uns = True + def __init__(self, opts): + ns_flavor.__init__(self, opts) + self.name = "userns" + self.uns = True - def init(self, l_bins, x_bins): - # To be able to create roots_yard in CRIU - os.chmod(".", os.stat(".").st_mode | 0o077) - ns_flavor.init(self, l_bins, x_bins) + def init(self, l_bins, x_bins): + # To be able to create roots_yard in CRIU + os.chmod(".", os.stat(".").st_mode | 0o077) + ns_flavor.init(self, l_bins, x_bins) - @staticmethod - def clean(): - pass + @staticmethod + def clean(): + pass flavors = {'h': host_flavor, 'ns': ns_flavor, 'uns': userns_flavor} @@ -300,47 +314,47 @@ flavors_codes = dict(zip(range(len(flavors)), sorted(flavors.keys()))) def encode_flav(f): - return sorted(flavors.keys()).index(f) + 128 + return sorted(flavors.keys()).index(f) + 128 def decode_flav(i): - return flavors_codes.get(i - 128, "unknown") + return flavors_codes.get(i - 128, "unknown") def tail(path): - p = subprocess.Popen(['tail', '-n1', path], - stdout = subprocess.PIPE) - out = p.stdout.readline() - p.wait() - return out.decode() + p = subprocess.Popen(['tail', '-n1', path], stdout=subprocess.PIPE) + out = p.stdout.readline() + p.wait() + return out.decode() def rpidfile(path): - with open(path) as fd: - return fd.readline().strip() + with open(path) as fd: + return fd.readline().strip() -def wait_pid_die(pid, who, tmo = 30): - stime = 0.1 - while stime < tmo: - try: - os.kill(int(pid), 0) - except OSError as e: - if e.errno != errno.ESRCH: - print(e) - break +def wait_pid_die(pid, who, tmo=30): + stime = 0.1 + while stime < tmo: + try: + os.kill(int(pid), 0) + except OSError as e: + if e.errno != errno.ESRCH: + print(e) + break - print("Wait for %s(%d) to die for %f" % (who, pid, stime)) - time.sleep(stime) - stime *= 2 - else: - subprocess.Popen(["ps", "-p", str(pid)]).wait() - subprocess.Popen(["ps", "axf", str(pid)]).wait() - raise test_fail_exc("%s die" % who) + print("Wait for %s(%d) to die for %f" % (who, pid, stime)) + time.sleep(stime) + stime *= 2 + else: + subprocess.Popen(["ps", "-p", str(pid)]).wait() + subprocess.Popen(["ps", "axf", str(pid)]).wait() + raise test_fail_exc("%s die" % who) def test_flag(tdesc, flag): - return flag in tdesc.get('flags', '').split() + return flag in tdesc.get('flags', '').split() + # # Exception thrown when something inside the test goes wrong, @@ -350,16 +364,17 @@ def test_flag(tdesc, flag): class test_fail_exc(Exception): - def __init__(self, step): - self.step = step + def __init__(self, step): + self.step = step - def __str__(self): - return str(self.step) + def __str__(self): + return str(self.step) class test_fail_expected_exc(Exception): - def __init__(self, cr_action): - self.cr_action = cr_action + def __init__(self, cr_action): + self.cr_action = cr_action + # # A test from zdtm/ directory. @@ -367,418 +382,440 @@ class test_fail_expected_exc(Exception): class zdtm_test: - def __init__(self, name, desc, flavor, freezer): - self.__name = name - self.__desc = desc - self.__freezer = None - self.__make_action('cleanout') - self.__pid = 0 - self.__flavor = flavor - self.__freezer = freezer - self._bins = [name] - self._env = {} - self._deps = desc.get('deps', []) - self.auto_reap = True - self.__timeout = int(self.__desc.get('timeout') or 30) + def __init__(self, name, desc, flavor, freezer): + self.__name = name + self.__desc = desc + self.__freezer = None + self.__make_action('cleanout') + self.__pid = 0 + self.__flavor = flavor + self.__freezer = freezer + self._bins = [name] + self._env = {} + self._deps = desc.get('deps', []) + self.auto_reap = True + self.__timeout = int(self.__desc.get('timeout') or 30) - def __make_action(self, act, env = None, root = None): - sys.stdout.flush() # Not to let make's messages appear before ours - tpath = self.__name + '.' + act - s_args = ['make', '--no-print-directory', - '-C', os.path.dirname(tpath), - os.path.basename(tpath)] + def __make_action(self, act, env=None, root=None): + sys.stdout.flush() # Not to let make's messages appear before ours + tpath = self.__name + '.' + act + s_args = [ + 'make', '--no-print-directory', '-C', + os.path.dirname(tpath), + os.path.basename(tpath) + ] - if env: - env = dict(os.environ, **env) + if env: + env = dict(os.environ, **env) - s = subprocess.Popen(s_args, env = env, cwd = root, close_fds = True, - preexec_fn = self.__freezer and self.__freezer.attach or None) - if act == "pid": - try_run_hook(self, ["--post-start"]) - if s.wait(): - raise test_fail_exc(str(s_args)) + s = subprocess.Popen( + s_args, + env=env, + cwd=root, + close_fds=True, + preexec_fn=self.__freezer and self.__freezer.attach or None) + if act == "pid": + try_run_hook(self, ["--post-start"]) + if s.wait(): + raise test_fail_exc(str(s_args)) - if self.__freezer: - self.__freezer.freeze() + if self.__freezer: + self.__freezer.freeze() - def __pidfile(self): - return self.__name + '.pid' + def __pidfile(self): + return self.__name + '.pid' - def __wait_task_die(self): - wait_pid_die(int(self.__pid), self.__name, self.__timeout) + def __wait_task_die(self): + wait_pid_die(int(self.__pid), self.__name, self.__timeout) - def __add_wperms(self): - # Add write perms for .out and .pid files - for b in self._bins: - p = os.path.dirname(b) - os.chmod(p, os.stat(p).st_mode | 0o222) + def __add_wperms(self): + # Add write perms for .out and .pid files + for b in self._bins: + p = os.path.dirname(b) + os.chmod(p, os.stat(p).st_mode | 0o222) - def start(self): - self.__flavor.init(self._bins, self._deps) + def start(self): + self.__flavor.init(self._bins, self._deps) - print("Start test") + print("Start test") - env = self._env - if not self.__freezer.kernel: - env['ZDTM_THREAD_BOMB'] = "5" + env = self._env + if not self.__freezer.kernel: + env['ZDTM_THREAD_BOMB'] = "5" - if test_flag(self.__desc, 'pre-dump-notify'): - env['ZDTM_NOTIFY_FDIN'] = "100" - env['ZDTM_NOTIFY_FDOUT'] = "101" + if test_flag(self.__desc, 'pre-dump-notify'): + env['ZDTM_NOTIFY_FDIN'] = "100" + env['ZDTM_NOTIFY_FDOUT'] = "101" - if not test_flag(self.__desc, 'suid'): - # Numbers should match those in criu - env['ZDTM_UID'] = "18943" - env['ZDTM_GID'] = "58467" - env['ZDTM_GROUPS'] = "27495 48244" - self.__add_wperms() - else: - print("Test is SUID") + if not test_flag(self.__desc, 'suid'): + # Numbers should match those in criu + env['ZDTM_UID'] = "18943" + env['ZDTM_GID'] = "58467" + env['ZDTM_GROUPS'] = "27495 48244" + self.__add_wperms() + else: + print("Test is SUID") - if self.__flavor.ns: - env['ZDTM_NEWNS'] = "1" - env['ZDTM_ROOT'] = self.__flavor.root - env['PATH'] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + if self.__flavor.ns: + env['ZDTM_NEWNS'] = "1" + env['ZDTM_ROOT'] = self.__flavor.root + env['PATH'] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" - if self.__flavor.uns: - env['ZDTM_USERNS'] = "1" - self.__add_wperms() - if os.getenv("GCOV"): - criu_dir = os.path.dirname(os.getcwd()) - criu_dir_r = "%s%s" % (self.__flavor.root, criu_dir) + if self.__flavor.uns: + env['ZDTM_USERNS'] = "1" + self.__add_wperms() + if os.getenv("GCOV"): + criu_dir = os.path.dirname(os.getcwd()) + criu_dir_r = "%s%s" % (self.__flavor.root, criu_dir) - env['ZDTM_CRIU'] = os.path.dirname(os.getcwd()) - subprocess.check_call(["mkdir", "-p", criu_dir_r]) + env['ZDTM_CRIU'] = os.path.dirname(os.getcwd()) + subprocess.check_call(["mkdir", "-p", criu_dir_r]) - self.__make_action('pid', env, self.__flavor.root) + self.__make_action('pid', env, self.__flavor.root) - try: - os.kill(int(self.getpid()), 0) - except Exception as e: - raise test_fail_exc("start: %s" % e) + try: + os.kill(int(self.getpid()), 0) + except Exception as e: + raise test_fail_exc("start: %s" % e) - if not self.static(): - # Wait less than a second to give the test chance to - # move into some semi-random state - time.sleep(random.random()) + if not self.static(): + # Wait less than a second to give the test chance to + # move into some semi-random state + time.sleep(random.random()) - def kill(self, sig = signal.SIGKILL): - self.__freezer.thaw() - if self.__pid: - print("Send the %d signal to %s" % (sig, self.__pid)) - os.kill(int(self.__pid), sig) - self.gone(sig == signal.SIGKILL) + def kill(self, sig=signal.SIGKILL): + self.__freezer.thaw() + if self.__pid: + print("Send the %d signal to %s" % (sig, self.__pid)) + os.kill(int(self.__pid), sig) + self.gone(sig == signal.SIGKILL) - self.__flavor.fini() + self.__flavor.fini() - def pre_dump_notify(self): - env = self._env + def pre_dump_notify(self): + env = self._env - if 'ZDTM_NOTIFY_FDIN' not in env: - return + if 'ZDTM_NOTIFY_FDIN' not in env: + return - if self.__pid == 0: - self.getpid() + if self.__pid == 0: + self.getpid() - notify_fdout_path = "/proc/%s/fd/%s" % (self.__pid, env['ZDTM_NOTIFY_FDOUT']) - notify_fdin_path = "/proc/%s/fd/%s" % (self.__pid, env['ZDTM_NOTIFY_FDIN']) + notify_fdout_path = "/proc/%s/fd/%s" % (self.__pid, + env['ZDTM_NOTIFY_FDOUT']) + notify_fdin_path = "/proc/%s/fd/%s" % (self.__pid, + env['ZDTM_NOTIFY_FDIN']) - print("Send pre-dump notify to %s" % (self.__pid)) - with open(notify_fdout_path, "rb") as fdout: - with open(notify_fdin_path, "wb") as fdin: - fdin.write(struct.pack("i", 0)) - fdin.flush() - print("Wait pre-dump notify reply") - ret = struct.unpack('i', fdout.read(4)) - print("Completed pre-dump notify with %d" % (ret)) + print("Send pre-dump notify to %s" % (self.__pid)) + with open(notify_fdout_path, "rb") as fdout: + with open(notify_fdin_path, "wb") as fdin: + fdin.write(struct.pack("i", 0)) + fdin.flush() + print("Wait pre-dump notify reply") + ret = struct.unpack('i', fdout.read(4)) + print("Completed pre-dump notify with %d" % (ret)) - def stop(self): - self.__freezer.thaw() - self.getpid() # Read the pid from pidfile back - self.kill(signal.SIGTERM) + def stop(self): + self.__freezer.thaw() + self.getpid() # Read the pid from pidfile back + self.kill(signal.SIGTERM) - res = tail(self.__name + '.out') - if 'PASS' not in list(map(lambda s: s.strip(), res.split())): - if os.access(self.__name + '.out.inprogress', os.F_OK): - print_sep(self.__name + '.out.inprogress') - with open(self.__name + '.out.inprogress') as fd: - print(fd.read()) - print_sep(self.__name + '.out.inprogress') - raise test_fail_exc("result check") + res = tail(self.__name + '.out') + if 'PASS' not in list(map(lambda s: s.strip(), res.split())): + if os.access(self.__name + '.out.inprogress', os.F_OK): + print_sep(self.__name + '.out.inprogress') + with open(self.__name + '.out.inprogress') as fd: + print(fd.read()) + print_sep(self.__name + '.out.inprogress') + raise test_fail_exc("result check") - def getpid(self): - if self.__pid == 0: - self.__pid = rpidfile(self.__pidfile()) + def getpid(self): + if self.__pid == 0: + self.__pid = rpidfile(self.__pidfile()) - return self.__pid + return self.__pid - def getname(self): - return self.__name + def getname(self): + return self.__name - def __getcropts(self): - opts = self.__desc.get('opts', '').split() + ["--pidfile", os.path.realpath(self.__pidfile())] - if self.__flavor.ns: - opts += ["--root", self.__flavor.root] - if test_flag(self.__desc, 'crlib'): - opts += ["-L", os.path.dirname(os.path.realpath(self.__name)) + '/lib'] - return opts + def __getcropts(self): + opts = self.__desc.get('opts', '').split() + [ + "--pidfile", os.path.realpath(self.__pidfile()) + ] + if self.__flavor.ns: + opts += ["--root", self.__flavor.root] + if test_flag(self.__desc, 'crlib'): + opts += [ + "-L", + os.path.dirname(os.path.realpath(self.__name)) + '/lib' + ] + return opts - def getdopts(self): - return self.__getcropts() + self.__freezer.getdopts() + self.__desc.get('dopts', '').split() + def getdopts(self): + return self.__getcropts() + self.__freezer.getdopts( + ) + self.__desc.get('dopts', '').split() - def getropts(self): - return self.__getcropts() + self.__freezer.getropts() + self.__desc.get('ropts', '').split() + def getropts(self): + return self.__getcropts() + self.__freezer.getropts( + ) + self.__desc.get('ropts', '').split() - def unlink_pidfile(self): - self.__pid = 0 - os.unlink(self.__pidfile()) + def unlink_pidfile(self): + self.__pid = 0 + os.unlink(self.__pidfile()) - def gone(self, force = True): - if not self.auto_reap: - pid, status = os.waitpid(int(self.__pid), 0) - if pid != int(self.__pid): - raise test_fail_exc("kill pid mess") + def gone(self, force=True): + if not self.auto_reap: + pid, status = os.waitpid(int(self.__pid), 0) + if pid != int(self.__pid): + raise test_fail_exc("kill pid mess") - self.__wait_task_die() - self.__pid = 0 - if force: - os.unlink(self.__pidfile()) + self.__wait_task_die() + self.__pid = 0 + if force: + os.unlink(self.__pidfile()) - def print_output(self): - if os.access(self.__name + '.out', os.R_OK): - print("Test output: " + "=" * 32) - with open(self.__name + '.out') as output: - print(output.read()) - print(" <<< " + "=" * 32) + def print_output(self): + if os.access(self.__name + '.out', os.R_OK): + print("Test output: " + "=" * 32) + with open(self.__name + '.out') as output: + print(output.read()) + print(" <<< " + "=" * 32) - def static(self): - return self.__name.split('/')[1] == 'static' + def static(self): + return self.__name.split('/')[1] == 'static' - def ns(self): - return self.__flavor.ns + def ns(self): + return self.__flavor.ns - def blocking(self): - return test_flag(self.__desc, 'crfail') + def blocking(self): + return test_flag(self.__desc, 'crfail') - @staticmethod - def available(): - if not os.access("umount2", os.X_OK): - subprocess.check_call(["make", "umount2"]) - if not os.access("zdtm_ct", os.X_OK): - subprocess.check_call(["make", "zdtm_ct"]) - if not os.access("zdtm/lib/libzdtmtst.a", os.F_OK): - subprocess.check_call(["make", "-C", "zdtm/"]) - subprocess.check_call(["flock", "zdtm_mount_cgroups.lock", "./zdtm_mount_cgroups"]) + @staticmethod + def available(): + if not os.access("umount2", os.X_OK): + subprocess.check_call(["make", "umount2"]) + if not os.access("zdtm_ct", os.X_OK): + subprocess.check_call(["make", "zdtm_ct"]) + if not os.access("zdtm/lib/libzdtmtst.a", os.F_OK): + subprocess.check_call(["make", "-C", "zdtm/"]) + subprocess.check_call( + ["flock", "zdtm_mount_cgroups.lock", "./zdtm_mount_cgroups"]) - @staticmethod - def cleanup(): - subprocess.check_call(["flock", "zdtm_mount_cgroups.lock", "./zdtm_umount_cgroups"]) + @staticmethod + def cleanup(): + subprocess.check_call( + ["flock", "zdtm_mount_cgroups.lock", "./zdtm_umount_cgroups"]) def load_module_from_file(name, path): - if sys.version_info[0] == 3 and sys.version_info[1] >= 5: - import importlib.util - spec = importlib.util.spec_from_file_location(name, path) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) - else: - import imp - mod = imp.load_source(name, path) - return mod + if sys.version_info[0] == 3 and sys.version_info[1] >= 5: + import importlib.util + spec = importlib.util.spec_from_file_location(name, path) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + else: + import imp + mod = imp.load_source(name, path) + return mod class inhfd_test: - def __init__(self, name, desc, flavor, freezer): - self.__name = os.path.basename(name) - print("Load %s" % name) - self.__fdtyp = load_module_from_file(self.__name, name) - self.__peer_pid = 0 - self.__files = None - self.__peer_file_names = [] - self.__dump_opts = [] - self.__messages = {} + def __init__(self, name, desc, flavor, freezer): + self.__name = os.path.basename(name) + print("Load %s" % name) + self.__fdtyp = load_module_from_file(self.__name, name) + self.__peer_pid = 0 + self.__files = None + self.__peer_file_names = [] + self.__dump_opts = [] + self.__messages = {} - def __get_message(self, i): - m = self.__messages.get(i, None) - if not m: - m = b"".join([random.choice(string.ascii_letters).encode() for _ in range(10)]) + b"%06d" % i - self.__messages[i] = m - return m + def __get_message(self, i): + m = self.__messages.get(i, None) + if not m: + m = b"".join([ + random.choice(string.ascii_letters).encode() for _ in range(10) + ]) + b"%06d" % i + self.__messages[i] = m + return m - def start(self): - self.__files = self.__fdtyp.create_fds() + def start(self): + self.__files = self.__fdtyp.create_fds() - # Check FDs returned for inter-connection - i = 0 - for my_file, peer_file in self.__files: - msg = self.__get_message(i) - my_file.write(msg) - my_file.flush() - data = peer_file.read(len(msg)) - if data != msg: - raise test_fail_exc("FDs screwup: %r %r" % (msg, data)) - i += 1 + # Check FDs returned for inter-connection + i = 0 + for my_file, peer_file in self.__files: + msg = self.__get_message(i) + my_file.write(msg) + my_file.flush() + data = peer_file.read(len(msg)) + if data != msg: + raise test_fail_exc("FDs screwup: %r %r" % (msg, data)) + i += 1 - start_pipe = os.pipe() - self.__peer_pid = os.fork() - if self.__peer_pid == 0: - os.setsid() + start_pipe = os.pipe() + self.__peer_pid = os.fork() + if self.__peer_pid == 0: + os.setsid() - for _, peer_file in self.__files: - getattr(self.__fdtyp, "child_prep", lambda fd: None)(peer_file) + for _, peer_file in self.__files: + getattr(self.__fdtyp, "child_prep", lambda fd: None)(peer_file) - try: - os.unlink(self.__name + ".out") - except Exception as e: - print(e) - fd = os.open(self.__name + ".out", os.O_WRONLY | os.O_APPEND | os.O_CREAT) - os.dup2(fd, 1) - os.dup2(fd, 2) - os.close(fd) - fd = os.open("/dev/null", os.O_RDONLY) - os.dup2(fd, 0) - for my_file, _ in self.__files: - my_file.close() - os.close(start_pipe[0]) - os.close(start_pipe[1]) - i = 0 - for _, peer_file in self.__files: - msg = self.__get_message(i) - my_file.close() - try: - data = peer_file.read(16) - except Exception as e: - print("Unable to read a peer file: %s" % e) - sys.exit(1) + try: + os.unlink(self.__name + ".out") + except Exception as e: + print(e) + fd = os.open(self.__name + ".out", + os.O_WRONLY | os.O_APPEND | os.O_CREAT) + os.dup2(fd, 1) + os.dup2(fd, 2) + os.close(fd) + fd = os.open("/dev/null", os.O_RDONLY) + os.dup2(fd, 0) + for my_file, _ in self.__files: + my_file.close() + os.close(start_pipe[0]) + os.close(start_pipe[1]) + i = 0 + for _, peer_file in self.__files: + msg = self.__get_message(i) + my_file.close() + try: + data = peer_file.read(16) + except Exception as e: + print("Unable to read a peer file: %s" % e) + sys.exit(1) - if data != msg: - print("%r %r" % (data, msg)) - i += 1 - sys.exit(data == msg and 42 or 2) + if data != msg: + print("%r %r" % (data, msg)) + i += 1 + sys.exit(data == msg and 42 or 2) - os.close(start_pipe[1]) - os.read(start_pipe[0], 12) - os.close(start_pipe[0]) + os.close(start_pipe[1]) + os.read(start_pipe[0], 12) + os.close(start_pipe[0]) - for _, peer_file in self.__files: - self.__peer_file_names.append(self.__fdtyp.filename(peer_file)) - self.__dump_opts += self.__fdtyp.dump_opts(peer_file) + for _, peer_file in self.__files: + self.__peer_file_names.append(self.__fdtyp.filename(peer_file)) + self.__dump_opts += self.__fdtyp.dump_opts(peer_file) - self.__fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid)) + self.__fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid)) - def stop(self): - fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid)) - if fds != self.__fds: - raise test_fail_exc("File descriptors mismatch: %s %s" % (fds, self.__fds)) - i = 0 - for my_file, _ in self.__files: - msg = self.__get_message(i) - my_file.write(msg) - my_file.flush() - i += 1 - pid, status = os.waitpid(self.__peer_pid, 0) - with open(self.__name + ".out") as output: - print(output.read()) - self.__peer_pid = 0 - if not os.WIFEXITED(status) or os.WEXITSTATUS(status) != 42: - raise test_fail_exc("test failed with %d" % status) + def stop(self): + fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid)) + if fds != self.__fds: + raise test_fail_exc("File descriptors mismatch: %s %s" % + (fds, self.__fds)) + i = 0 + for my_file, _ in self.__files: + msg = self.__get_message(i) + my_file.write(msg) + my_file.flush() + i += 1 + pid, status = os.waitpid(self.__peer_pid, 0) + with open(self.__name + ".out") as output: + print(output.read()) + self.__peer_pid = 0 + if not os.WIFEXITED(status) or os.WEXITSTATUS(status) != 42: + raise test_fail_exc("test failed with %d" % status) - def kill(self): - if self.__peer_pid: - os.kill(self.__peer_pid, signal.SIGKILL) + def kill(self): + if self.__peer_pid: + os.kill(self.__peer_pid, signal.SIGKILL) - def getname(self): - return self.__name + def getname(self): + return self.__name - def getpid(self): - return "%s" % self.__peer_pid + def getpid(self): + return "%s" % self.__peer_pid - def gone(self, force = True): - os.waitpid(self.__peer_pid, 0) - wait_pid_die(self.__peer_pid, self.__name) - self.__files = None + def gone(self, force=True): + os.waitpid(self.__peer_pid, 0) + wait_pid_die(self.__peer_pid, self.__name) + self.__files = None - def getdopts(self): - return self.__dump_opts + def getdopts(self): + return self.__dump_opts - def getropts(self): - self.__files = self.__fdtyp.create_fds() - ropts = ["--restore-sibling"] - for i in range(len(self.__files)): - my_file, peer_file = self.__files[i] - fd = peer_file.fileno() - fdflags = fcntl.fcntl(fd, fcntl.F_GETFD) & ~fcntl.FD_CLOEXEC - fcntl.fcntl(fd, fcntl.F_SETFD, fdflags) - peer_file_name = self.__peer_file_names[i] - ropts.extend(["--inherit-fd", "fd[%d]:%s" % (fd, peer_file_name)]) - return ropts + def getropts(self): + self.__files = self.__fdtyp.create_fds() + ropts = ["--restore-sibling"] + for i in range(len(self.__files)): + my_file, peer_file = self.__files[i] + fd = peer_file.fileno() + fdflags = fcntl.fcntl(fd, fcntl.F_GETFD) & ~fcntl.FD_CLOEXEC + fcntl.fcntl(fd, fcntl.F_SETFD, fdflags) + peer_file_name = self.__peer_file_names[i] + ropts.extend(["--inherit-fd", "fd[%d]:%s" % (fd, peer_file_name)]) + return ropts - def print_output(self): - pass + def print_output(self): + pass - def static(self): - return True + def static(self): + return True - def blocking(self): - return False + def blocking(self): + return False - @staticmethod - def available(): - pass + @staticmethod + def available(): + pass - @staticmethod - def cleanup(): - pass + @staticmethod + def cleanup(): + pass class groups_test(zdtm_test): - def __init__(self, name, desc, flavor, freezer): - zdtm_test.__init__(self, 'zdtm/lib/groups', desc, flavor, freezer) - if flavor.ns: - self.__real_name = name - with open(name) as fd: - self.__subs = map(lambda x: x.strip(), fd.readlines()) - print("Subs:\n%s" % '\n'.join(self.__subs)) - else: - self.__real_name = '' - self.__subs = [] + def __init__(self, name, desc, flavor, freezer): + zdtm_test.__init__(self, 'zdtm/lib/groups', desc, flavor, freezer) + if flavor.ns: + self.__real_name = name + with open(name) as fd: + self.__subs = map(lambda x: x.strip(), fd.readlines()) + print("Subs:\n%s" % '\n'.join(self.__subs)) + else: + self.__real_name = '' + self.__subs = [] - self._bins += self.__subs - self._deps += get_test_desc('zdtm/lib/groups')['deps'] - self._env = {'ZDTM_TESTS': self.__real_name} + self._bins += self.__subs + self._deps += get_test_desc('zdtm/lib/groups')['deps'] + self._env = {'ZDTM_TESTS': self.__real_name} - def __get_start_cmd(self, name): - tdir = os.path.dirname(name) - tname = os.path.basename(name) + def __get_start_cmd(self, name): + tdir = os.path.dirname(name) + tname = os.path.basename(name) - s_args = ['make', '--no-print-directory', '-C', tdir] - subprocess.check_call(s_args + [tname + '.cleanout']) - s = subprocess.Popen(s_args + ['--dry-run', tname + '.pid'], stdout = subprocess.PIPE) - cmd = s.stdout.readlines().pop().strip() - s.wait() + s_args = ['make', '--no-print-directory', '-C', tdir] + subprocess.check_call(s_args + [tname + '.cleanout']) + s = subprocess.Popen(s_args + ['--dry-run', tname + '.pid'], + stdout=subprocess.PIPE) + cmd = s.stdout.readlines().pop().strip() + s.wait() - return 'cd /' + tdir + ' && ' + cmd + return 'cd /' + tdir + ' && ' + cmd - def start(self): - if (self.__subs): - with open(self.__real_name + '.start', 'w') as f: - for test in self.__subs: - cmd = self.__get_start_cmd(test) - f.write(cmd + '\n') + def start(self): + if (self.__subs): + with open(self.__real_name + '.start', 'w') as f: + for test in self.__subs: + cmd = self.__get_start_cmd(test) + f.write(cmd + '\n') - with open(self.__real_name + '.stop', 'w') as f: - for test in self.__subs: - f.write('kill -TERM `cat /%s.pid`\n' % test) + with open(self.__real_name + '.stop', 'w') as f: + for test in self.__subs: + f.write('kill -TERM `cat /%s.pid`\n' % test) - zdtm_test.start(self) + zdtm_test.start(self) - def stop(self): - zdtm_test.stop(self) + def stop(self): + zdtm_test.stop(self) - for test in self.__subs: - res = tail(test + '.out') - if 'PASS' not in res.split(): - raise test_fail_exc("sub %s result check" % test) + for test in self.__subs: + res = tail(test + '.out') + if 'PASS' not in res.split(): + raise test_fail_exc("sub %s result check" % test) test_classes = {'zdtm': zdtm_test, 'inhfd': inhfd_test, 'groups': groups_test} @@ -791,495 +828,543 @@ join_ns_file = '/run/netns/zdtm_netns' class criu_cli: - @staticmethod - def run(action, args, criu_bin, fault = None, strace = [], preexec = None, nowait = False): - env = dict(os.environ, ASAN_OPTIONS = "log_path=asan.log:disable_coredump=0:detect_leaks=0") + @staticmethod + def run(action, + args, + criu_bin, + fault=None, + strace=[], + preexec=None, + nowait=False): + env = dict( + os.environ, + ASAN_OPTIONS="log_path=asan.log:disable_coredump=0:detect_leaks=0") - if fault: - print("Forcing %s fault" % fault) - env['CRIU_FAULT'] = fault + if fault: + print("Forcing %s fault" % fault) + env['CRIU_FAULT'] = fault - cr = subprocess.Popen(strace + [criu_bin, action, "--no-default-config"] + args, - env = env, close_fds = False, preexec_fn = preexec) - if nowait: - return cr - return cr.wait() + cr = subprocess.Popen(strace + + [criu_bin, action, "--no-default-config"] + args, + env=env, + close_fds=False, + preexec_fn=preexec) + if nowait: + return cr + return cr.wait() class criu_rpc_process: - def wait(self): - return self.criu.wait_pid(self.pid) + def wait(self): + return self.criu.wait_pid(self.pid) - def terminate(self): - os.kill(self.pid, signal.SIGTERM) + def terminate(self): + os.kill(self.pid, signal.SIGTERM) class criu_rpc: - @staticmethod - def __set_opts(criu, args, ctx): - while len(args) != 0: - arg = args.pop(0) - if arg == '-v4': - criu.opts.log_level = 4 - continue - if arg == '-o': - criu.opts.log_file = args.pop(0) - continue - if arg == '-D': - criu.opts.images_dir_fd = os.open(args.pop(0), os.O_DIRECTORY) - ctx['imgd'] = criu.opts.images_dir_fd - continue - if arg == '-t': - criu.opts.pid = int(args.pop(0)) - continue - if arg == '--pidfile': - ctx['pidf'] = args.pop(0) - continue - if arg == '--timeout': - criu.opts.timeout = int(args.pop(0)) - continue - if arg == '--restore-detached': - # Set by service by default - ctx['rd'] = True - continue - if arg == '--root': - criu.opts.root = args.pop(0) - continue - if arg == '--external': - criu.opts.external.append(args.pop(0)) - continue - if arg == '--status-fd': - fd = int(args.pop(0)) - os.write(fd, b"\0") - fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) - continue - if arg == '--port': - criu.opts.ps.port = int(args.pop(0)) - continue - if arg == '--address': - criu.opts.ps.address = args.pop(0) - continue - if arg == '--page-server': - continue - if arg == '--prev-images-dir': - criu.opts.parent_img = args.pop(0) - continue - if arg == '--track-mem': - criu.opts.track_mem = True - continue - if arg == '--tcp-established': - criu.opts.tcp_established = True - continue - if arg == '--restore-sibling': - criu.opts.rst_sibling = True - continue - if arg == "--inherit-fd": - inhfd = criu.opts.inherit_fd.add() - key = args.pop(0) - fd, key = key.split(":", 1) - inhfd.fd = int(fd[3:-1]) - inhfd.key = key - continue + @staticmethod + def __set_opts(criu, args, ctx): + while len(args) != 0: + arg = args.pop(0) + if arg == '-v4': + criu.opts.log_level = 4 + continue + if arg == '-o': + criu.opts.log_file = args.pop(0) + continue + if arg == '-D': + criu.opts.images_dir_fd = os.open(args.pop(0), os.O_DIRECTORY) + ctx['imgd'] = criu.opts.images_dir_fd + continue + if arg == '-t': + criu.opts.pid = int(args.pop(0)) + continue + if arg == '--pidfile': + ctx['pidf'] = args.pop(0) + continue + if arg == '--timeout': + criu.opts.timeout = int(args.pop(0)) + continue + if arg == '--restore-detached': + # Set by service by default + ctx['rd'] = True + continue + if arg == '--root': + criu.opts.root = args.pop(0) + continue + if arg == '--external': + criu.opts.external.append(args.pop(0)) + continue + if arg == '--status-fd': + fd = int(args.pop(0)) + os.write(fd, b"\0") + fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + continue + if arg == '--port': + criu.opts.ps.port = int(args.pop(0)) + continue + if arg == '--address': + criu.opts.ps.address = args.pop(0) + continue + if arg == '--page-server': + continue + if arg == '--prev-images-dir': + criu.opts.parent_img = args.pop(0) + continue + if arg == '--track-mem': + criu.opts.track_mem = True + continue + if arg == '--tcp-established': + criu.opts.tcp_established = True + continue + if arg == '--restore-sibling': + criu.opts.rst_sibling = True + continue + if arg == "--inherit-fd": + inhfd = criu.opts.inherit_fd.add() + key = args.pop(0) + fd, key = key.split(":", 1) + inhfd.fd = int(fd[3:-1]) + inhfd.key = key + continue - raise test_fail_exc('RPC for %s required' % arg) + raise test_fail_exc('RPC for %s required' % arg) - @staticmethod - def run(action, args, criu_bin, fault = None, strace = [], preexec = None, nowait = False): - if fault: - raise test_fail_exc('RPC and FAULT not supported') - if strace: - raise test_fail_exc('RPC and SAT not supported') - if preexec: - raise test_fail_exc('RPC and PREEXEC not supported') + @staticmethod + def run(action, + args, + criu_bin, + fault=None, + strace=[], + preexec=None, + nowait=False): + if fault: + raise test_fail_exc('RPC and FAULT not supported') + if strace: + raise test_fail_exc('RPC and SAT not supported') + if preexec: + raise test_fail_exc('RPC and PREEXEC not supported') - ctx = {} # Object used to keep info untill action is done - criu = crpc.criu() - criu.use_binary(criu_bin) - criu_rpc.__set_opts(criu, args, ctx) - p = None + ctx = {} # Object used to keep info untill action is done + criu = crpc.criu() + criu.use_binary(criu_bin) + criu_rpc.__set_opts(criu, args, ctx) + p = None - try: - if action == 'dump': - criu.dump() - elif action == 'pre-dump': - criu.pre_dump() - elif action == 'restore': - if 'rd' not in ctx: - raise test_fail_exc('RPC Non-detached restore is impossible') + try: + if action == 'dump': + criu.dump() + elif action == 'pre-dump': + criu.pre_dump() + elif action == 'restore': + if 'rd' not in ctx: + raise test_fail_exc( + 'RPC Non-detached restore is impossible') - res = criu.restore() - pidf = ctx.get('pidf') - if pidf: - with open(pidf, 'w') as fd: - fd.write('%d\n' % res.pid) - elif action == "page-server": - res = criu.page_server_chld() - p = criu_rpc_process() - p.pid = res.pid - p.criu = criu - else: - raise test_fail_exc('RPC for %s required' % action) - except crpc.CRIUExceptionExternal as e: - print("Fail", e) - ret = -1 - else: - ret = 0 + res = criu.restore() + pidf = ctx.get('pidf') + if pidf: + with open(pidf, 'w') as fd: + fd.write('%d\n' % res.pid) + elif action == "page-server": + res = criu.page_server_chld() + p = criu_rpc_process() + p.pid = res.pid + p.criu = criu + else: + raise test_fail_exc('RPC for %s required' % action) + except crpc.CRIUExceptionExternal as e: + print("Fail", e) + ret = -1 + else: + ret = 0 - imgd = ctx.get('imgd') - if imgd: - os.close(imgd) + imgd = ctx.get('imgd') + if imgd: + os.close(imgd) - if nowait and ret == 0: - return p + if nowait and ret == 0: + return p - return ret + return ret class criu: - def __init__(self, opts): - self.__test = None - self.__dump_path = None - self.__iter = 0 - self.__prev_dump_iter = None - self.__page_server = bool(opts['page_server']) - self.__remote_lazy_pages = bool(opts['remote_lazy_pages']) - self.__lazy_pages = (self.__remote_lazy_pages or - bool(opts['lazy_pages'])) - self.__lazy_migrate = bool(opts['lazy_migrate']) - self.__restore_sibling = bool(opts['sibling']) - self.__join_ns = bool(opts['join_ns']) - self.__empty_ns = bool(opts['empty_ns']) - self.__fault = opts['fault'] - self.__script = opts['script'] - self.__sat = bool(opts['sat']) - self.__dedup = bool(opts['dedup']) - self.__mdedup = bool(opts['noauto_dedup']) - self.__user = bool(opts['user']) - self.__leave_stopped = bool(opts['stop']) - self.__criu = (opts['rpc'] and criu_rpc or criu_cli) - self.__show_stats = bool(opts['show_stats']) - self.__lazy_pages_p = None - self.__page_server_p = None - self.__dump_process = None - self.__tls = self.__tls_options() if opts['tls'] else [] - self.__criu_bin = opts['criu_bin'] - self.__crit_bin = opts['crit_bin'] + def __init__(self, opts): + self.__test = None + self.__dump_path = None + self.__iter = 0 + self.__prev_dump_iter = None + self.__page_server = bool(opts['page_server']) + self.__remote_lazy_pages = bool(opts['remote_lazy_pages']) + self.__lazy_pages = (self.__remote_lazy_pages or + bool(opts['lazy_pages'])) + self.__lazy_migrate = bool(opts['lazy_migrate']) + self.__restore_sibling = bool(opts['sibling']) + self.__join_ns = bool(opts['join_ns']) + self.__empty_ns = bool(opts['empty_ns']) + self.__fault = opts['fault'] + self.__script = opts['script'] + self.__sat = bool(opts['sat']) + self.__dedup = bool(opts['dedup']) + self.__mdedup = bool(opts['noauto_dedup']) + self.__user = bool(opts['user']) + self.__leave_stopped = bool(opts['stop']) + self.__criu = (opts['rpc'] and criu_rpc or criu_cli) + self.__show_stats = bool(opts['show_stats']) + self.__lazy_pages_p = None + self.__page_server_p = None + self.__dump_process = None + self.__tls = self.__tls_options() if opts['tls'] else [] + self.__criu_bin = opts['criu_bin'] + self.__crit_bin = opts['crit_bin'] - def fini(self): - if self.__lazy_migrate: - ret = self.__dump_process.wait() - if self.__lazy_pages_p: - ret = self.__lazy_pages_p.wait() - grep_errors(os.path.join(self.__ddir(), "lazy-pages.log")) - self.__lazy_pages_p = None - if ret: - raise test_fail_exc("criu lazy-pages exited with %s" % ret) - if self.__page_server_p: - ret = self.__page_server_p.wait() - grep_errors(os.path.join(self.__ddir(), "page-server.log")) - self.__page_server_p = None - if ret: - raise test_fail_exc("criu page-server exited with %s" % ret) - if self.__dump_process: - ret = self.__dump_process.wait() - grep_errors(os.path.join(self.__ddir(), "dump.log")) - self.__dump_process = None - if ret: - raise test_fail_exc("criu dump exited with %s" % ret) - return + def fini(self): + if self.__lazy_migrate: + ret = self.__dump_process.wait() + if self.__lazy_pages_p: + ret = self.__lazy_pages_p.wait() + grep_errors(os.path.join(self.__ddir(), "lazy-pages.log")) + self.__lazy_pages_p = None + if ret: + raise test_fail_exc("criu lazy-pages exited with %s" % ret) + if self.__page_server_p: + ret = self.__page_server_p.wait() + grep_errors(os.path.join(self.__ddir(), "page-server.log")) + self.__page_server_p = None + if ret: + raise test_fail_exc("criu page-server exited with %s" % ret) + if self.__dump_process: + ret = self.__dump_process.wait() + grep_errors(os.path.join(self.__ddir(), "dump.log")) + self.__dump_process = None + if ret: + raise test_fail_exc("criu dump exited with %s" % ret) + return - def logs(self): - return self.__dump_path + def logs(self): + return self.__dump_path - def set_test(self, test): - self.__test = test - self.__dump_path = "dump/" + test.getname() + "/" + test.getpid() - if os.path.exists(self.__dump_path): - for i in range(100): - newpath = self.__dump_path + "." + str(i) - if not os.path.exists(newpath): - os.rename(self.__dump_path, newpath) - break - else: - raise test_fail_exc("couldn't find dump dir %s" % self.__dump_path) + def set_test(self, test): + self.__test = test + self.__dump_path = "dump/" + test.getname() + "/" + test.getpid() + if os.path.exists(self.__dump_path): + for i in range(100): + newpath = self.__dump_path + "." + str(i) + if not os.path.exists(newpath): + os.rename(self.__dump_path, newpath) + break + else: + raise test_fail_exc("couldn't find dump dir %s" % + self.__dump_path) - os.makedirs(self.__dump_path) + os.makedirs(self.__dump_path) - def cleanup(self): - if self.__dump_path: - print("Removing %s" % self.__dump_path) - shutil.rmtree(self.__dump_path) + def cleanup(self): + if self.__dump_path: + print("Removing %s" % self.__dump_path) + shutil.rmtree(self.__dump_path) - def __tls_options(self): - pki_dir = os.path.dirname(os.path.abspath(__file__)) + "/pki" - return ["--tls", "--tls-no-cn-verify", - "--tls-key", pki_dir + "/key.pem", - "--tls-cert", pki_dir + "/cert.pem", - "--tls-cacert", pki_dir + "/cacert.pem"] + def __tls_options(self): + pki_dir = os.path.dirname(os.path.abspath(__file__)) + "/pki" + return [ + "--tls", "--tls-no-cn-verify", "--tls-key", pki_dir + "/key.pem", + "--tls-cert", pki_dir + "/cert.pem", "--tls-cacert", + pki_dir + "/cacert.pem" + ] - def __ddir(self): - return os.path.join(self.__dump_path, "%d" % self.__iter) + def __ddir(self): + return os.path.join(self.__dump_path, "%d" % self.__iter) - def set_user_id(self): - # Numbers should match those in zdtm_test - os.setresgid(58467, 58467, 58467) - os.setresuid(18943, 18943, 18943) + def set_user_id(self): + # Numbers should match those in zdtm_test + os.setresgid(58467, 58467, 58467) + os.setresuid(18943, 18943, 18943) - def __criu_act(self, action, opts = [], log = None, nowait = False): - if not log: - log = action + ".log" + def __criu_act(self, action, opts=[], log=None, nowait=False): + if not log: + log = action + ".log" - s_args = ["-o", log, "-D", self.__ddir(), "-v4"] + opts + s_args = ["-o", log, "-D", self.__ddir(), "-v4"] + opts - with open(os.path.join(self.__ddir(), action + '.cropt'), 'w') as f: - f.write(' '.join(s_args) + '\n') + with open(os.path.join(self.__ddir(), action + '.cropt'), 'w') as f: + f.write(' '.join(s_args) + '\n') - print("Run criu " + action) + print("Run criu " + action) - strace = [] - if self.__sat: - fname = os.path.join(self.__ddir(), action + '.strace') - print_fname(fname, 'strace') - strace = ["strace", "-o", fname, '-T'] - if action == 'restore': - strace += ['-f'] - s_args += ['--action-script', os.getcwd() + '/../scripts/fake-restore.sh'] + strace = [] + if self.__sat: + fname = os.path.join(self.__ddir(), action + '.strace') + print_fname(fname, 'strace') + strace = ["strace", "-o", fname, '-T'] + if action == 'restore': + strace += ['-f'] + s_args += [ + '--action-script', + os.getcwd() + '/../scripts/fake-restore.sh' + ] - if self.__script: - s_args += ['--action-script', self.__script] + if self.__script: + s_args += ['--action-script', self.__script] - if action == "restore": - preexec = None - else: - preexec = self.__user and self.set_user_id or None + if action == "restore": + preexec = None + else: + preexec = self.__user and self.set_user_id or None - __ddir = self.__ddir() + __ddir = self.__ddir() - status_fds = None - if nowait: - status_fds = os.pipe() - fd = status_fds[1] - fdflags = fcntl.fcntl(fd, fcntl.F_GETFD) - fcntl.fcntl(fd, fcntl.F_SETFD, fdflags & ~fcntl.FD_CLOEXEC) - s_args += ["--status-fd", str(fd)] + status_fds = None + if nowait: + status_fds = os.pipe() + fd = status_fds[1] + fdflags = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFD, fdflags & ~fcntl.FD_CLOEXEC) + s_args += ["--status-fd", str(fd)] - with open("/proc/sys/kernel/ns_last_pid") as ns_last_pid_fd: - ns_last_pid = ns_last_pid_fd.read() + with open("/proc/sys/kernel/ns_last_pid") as ns_last_pid_fd: + ns_last_pid = ns_last_pid_fd.read() - ret = self.__criu.run(action, s_args, self.__criu_bin, self.__fault, strace, preexec, nowait) + ret = self.__criu.run(action, s_args, self.__criu_bin, self.__fault, + strace, preexec, nowait) - if nowait: - os.close(status_fds[1]) - if os.read(status_fds[0], 1) != b'\0': - ret = ret.wait() - if self.__test.blocking(): - raise test_fail_expected_exc(action) - else: - raise test_fail_exc("criu %s exited with %s" % (action, ret)) - os.close(status_fds[0]) - return ret + if nowait: + os.close(status_fds[1]) + if os.read(status_fds[0], 1) != b'\0': + ret = ret.wait() + if self.__test.blocking(): + raise test_fail_expected_exc(action) + else: + raise test_fail_exc("criu %s exited with %s" % + (action, ret)) + os.close(status_fds[0]) + return ret - grep_errors(os.path.join(__ddir, log)) - if ret != 0: - if self.__fault and int(self.__fault) < 128: - try_run_hook(self.__test, ["--fault", action]) - if action == "dump": - # create a clean directory for images - os.rename(__ddir, __ddir + ".fail") - os.mkdir(__ddir) - os.chmod(__ddir, 0o777) - else: - # on restore we move only a log file, because we need images - os.rename(os.path.join(__ddir, log), os.path.join(__ddir, log + ".fail")) - # restore ns_last_pid to avoid a case when criu gets - # PID of one of restored processes. - with open("/proc/sys/kernel/ns_last_pid", "w+") as fd: - fd.write(ns_last_pid) - # try again without faults - print("Run criu " + action) - ret = self.__criu.run(action, s_args, self.__criu_bin, False, strace, preexec) - grep_errors(os.path.join(__ddir, log)) - if ret == 0: - return - rst_succeeded = os.access(os.path.join(__ddir, "restore-succeeded"), os.F_OK) - if self.__test.blocking() or (self.__sat and action == 'restore' and rst_succeeded): - raise test_fail_expected_exc(action) - else: - raise test_fail_exc("CRIU %s" % action) + grep_errors(os.path.join(__ddir, log)) + if ret != 0: + if self.__fault and int(self.__fault) < 128: + try_run_hook(self.__test, ["--fault", action]) + if action == "dump": + # create a clean directory for images + os.rename(__ddir, __ddir + ".fail") + os.mkdir(__ddir) + os.chmod(__ddir, 0o777) + else: + # on restore we move only a log file, because we need images + os.rename(os.path.join(__ddir, log), + os.path.join(__ddir, log + ".fail")) + # restore ns_last_pid to avoid a case when criu gets + # PID of one of restored processes. + with open("/proc/sys/kernel/ns_last_pid", "w+") as fd: + fd.write(ns_last_pid) + # try again without faults + print("Run criu " + action) + ret = self.__criu.run(action, s_args, self.__criu_bin, False, + strace, preexec) + grep_errors(os.path.join(__ddir, log)) + if ret == 0: + return + rst_succeeded = os.access( + os.path.join(__ddir, "restore-succeeded"), os.F_OK) + if self.__test.blocking() or (self.__sat and action == 'restore' and + rst_succeeded): + raise test_fail_expected_exc(action) + else: + raise test_fail_exc("CRIU %s" % action) - def __stats_file(self, action): - return os.path.join(self.__ddir(), "stats-%s" % action) + def __stats_file(self, action): + return os.path.join(self.__ddir(), "stats-%s" % action) - def show_stats(self, action): - if not self.__show_stats: - return + def show_stats(self, action): + if not self.__show_stats: + return - subprocess.Popen([self.__crit_bin, "show", self.__stats_file(action)]).wait() + subprocess.Popen([self.__crit_bin, "show", + self.__stats_file(action)]).wait() - def check_pages_counts(self): - if not os.access(self.__stats_file("dump"), os.R_OK): - return + def check_pages_counts(self): + if not os.access(self.__stats_file("dump"), os.R_OK): + return - stats_written = -1 - with open(self.__stats_file("dump"), 'rb') as stfile: - stats = crpc.images.load(stfile) - stent = stats['entries'][0]['dump'] - stats_written = int(stent['shpages_written']) + int(stent['pages_written']) + stats_written = -1 + with open(self.__stats_file("dump"), 'rb') as stfile: + stats = crpc.images.load(stfile) + stent = stats['entries'][0]['dump'] + stats_written = int(stent['shpages_written']) + int( + stent['pages_written']) - real_written = 0 - for f in os.listdir(self.__ddir()): - if f.startswith('pages-'): - real_written += os.path.getsize(os.path.join(self.__ddir(), f)) + real_written = 0 + for f in os.listdir(self.__ddir()): + if f.startswith('pages-'): + real_written += os.path.getsize(os.path.join(self.__ddir(), f)) - r_pages = real_written / mmap.PAGESIZE - r_off = real_written % mmap.PAGESIZE - if (stats_written != r_pages) or (r_off != 0): - print("ERROR: bad page counts, stats = %d real = %d(%d)" % (stats_written, r_pages, r_off)) - raise test_fail_exc("page counts mismatch") + r_pages = real_written / mmap.PAGESIZE + r_off = real_written % mmap.PAGESIZE + if (stats_written != r_pages) or (r_off != 0): + print("ERROR: bad page counts, stats = %d real = %d(%d)" % + (stats_written, r_pages, r_off)) + raise test_fail_exc("page counts mismatch") - def dump(self, action, opts = []): - self.__iter += 1 - os.mkdir(self.__ddir()) - os.chmod(self.__ddir(), 0o777) + def dump(self, action, opts=[]): + self.__iter += 1 + os.mkdir(self.__ddir()) + os.chmod(self.__ddir(), 0o777) - a_opts = ["-t", self.__test.getpid()] - if self.__prev_dump_iter: - a_opts += ["--prev-images-dir", "../%d" % self.__prev_dump_iter, "--track-mem"] - self.__prev_dump_iter = self.__iter + a_opts = ["-t", self.__test.getpid()] + if self.__prev_dump_iter: + a_opts += [ + "--prev-images-dir", + "../%d" % self.__prev_dump_iter, "--track-mem" + ] + self.__prev_dump_iter = self.__iter - if self.__page_server: - print("Adding page server") + if self.__page_server: + print("Adding page server") - ps_opts = ["--port", "12345"] + self.__tls - if self.__dedup: - ps_opts += ["--auto-dedup"] + ps_opts = ["--port", "12345"] + self.__tls + if self.__dedup: + ps_opts += ["--auto-dedup"] - self.__page_server_p = self.__criu_act("page-server", opts = ps_opts, nowait = True) - a_opts += ["--page-server", "--address", "127.0.0.1", "--port", "12345"] + self.__tls + self.__page_server_p = self.__criu_act("page-server", + opts=ps_opts, + nowait=True) + a_opts += [ + "--page-server", "--address", "127.0.0.1", "--port", "12345" + ] + self.__tls - a_opts += self.__test.getdopts() + a_opts += self.__test.getdopts() - if self.__dedup: - a_opts += ["--auto-dedup"] + if self.__dedup: + a_opts += ["--auto-dedup"] - a_opts += ["--timeout", "10"] + a_opts += ["--timeout", "10"] - criu_dir = os.path.dirname(os.getcwd()) - if os.getenv("GCOV"): - a_opts.append('--external') - a_opts.append('mnt[%s]:zdtm' % criu_dir) + criu_dir = os.path.dirname(os.getcwd()) + if os.getenv("GCOV"): + a_opts.append('--external') + a_opts.append('mnt[%s]:zdtm' % criu_dir) - if self.__leave_stopped: - a_opts += ['--leave-stopped'] - if self.__empty_ns: - a_opts += ['--empty-ns', 'net'] + if self.__leave_stopped: + a_opts += ['--leave-stopped'] + if self.__empty_ns: + a_opts += ['--empty-ns', 'net'] - nowait = False - if self.__lazy_migrate and action == "dump": - a_opts += ["--lazy-pages", "--port", "12345"] + self.__tls - nowait = True - self.__dump_process = self.__criu_act(action, opts = a_opts + opts, nowait = nowait) - if self.__mdedup and self.__iter > 1: - self.__criu_act("dedup", opts = []) + nowait = False + if self.__lazy_migrate and action == "dump": + a_opts += ["--lazy-pages", "--port", "12345"] + self.__tls + nowait = True + self.__dump_process = self.__criu_act(action, + opts=a_opts + opts, + nowait=nowait) + if self.__mdedup and self.__iter > 1: + self.__criu_act("dedup", opts=[]) - self.show_stats("dump") - self.check_pages_counts() + self.show_stats("dump") + self.check_pages_counts() - if self.__leave_stopped: - pstree_check_stopped(self.__test.getpid()) - pstree_signal(self.__test.getpid(), signal.SIGKILL) + if self.__leave_stopped: + pstree_check_stopped(self.__test.getpid()) + pstree_signal(self.__test.getpid(), signal.SIGKILL) - if self.__page_server_p: - ret = self.__page_server_p.wait() - grep_errors(os.path.join(self.__ddir(), "page-server.log")) - self.__page_server_p = None - if ret: - raise test_fail_exc("criu page-server exited with %d" % ret) + if self.__page_server_p: + ret = self.__page_server_p.wait() + grep_errors(os.path.join(self.__ddir(), "page-server.log")) + self.__page_server_p = None + if ret: + raise test_fail_exc("criu page-server exited with %d" % ret) - def restore(self): - r_opts = [] - if self.__restore_sibling: - r_opts = ["--restore-sibling"] - self.__test.auto_reap = False - r_opts += self.__test.getropts() - if self.__join_ns: - r_opts.append("--join-ns") - r_opts.append("net:%s" % join_ns_file) - if self.__empty_ns: - r_opts += ['--empty-ns', 'net'] - r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh'] + def restore(self): + r_opts = [] + if self.__restore_sibling: + r_opts = ["--restore-sibling"] + self.__test.auto_reap = False + r_opts += self.__test.getropts() + if self.__join_ns: + r_opts.append("--join-ns") + r_opts.append("net:%s" % join_ns_file) + if self.__empty_ns: + r_opts += ['--empty-ns', 'net'] + r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh'] - if self.__dedup: - r_opts += ["--auto-dedup"] + if self.__dedup: + r_opts += ["--auto-dedup"] - if self.__dedup: - r_opts += ["--auto-dedup"] + self.__prev_dump_iter = None + criu_dir = os.path.dirname(os.getcwd()) + if os.getenv("GCOV"): + r_opts.append('--external') + r_opts.append('mnt[zdtm]:%s' % criu_dir) - self.__prev_dump_iter = None - criu_dir = os.path.dirname(os.getcwd()) - if os.getenv("GCOV"): - r_opts.append('--external') - r_opts.append('mnt[zdtm]:%s' % criu_dir) + if self.__lazy_pages or self.__lazy_migrate: + lp_opts = [] + if self.__remote_lazy_pages or self.__lazy_migrate: + lp_opts += [ + "--page-server", "--port", "12345", "--address", + "127.0.0.1" + ] + self.__tls - if self.__lazy_pages or self.__lazy_migrate: - lp_opts = [] - if self.__remote_lazy_pages or self.__lazy_migrate: - lp_opts += ["--page-server", "--port", "12345", - "--address", "127.0.0.1"] + self.__tls + if self.__remote_lazy_pages: + ps_opts = [ + "--pidfile", "ps.pid", "--port", "12345", "--lazy-pages" + ] + self.__tls + self.__page_server_p = self.__criu_act("page-server", + opts=ps_opts, + nowait=True) + self.__lazy_pages_p = self.__criu_act("lazy-pages", + opts=lp_opts, + nowait=True) + r_opts += ["--lazy-pages"] - if self.__remote_lazy_pages: - ps_opts = ["--pidfile", "ps.pid", - "--port", "12345", "--lazy-pages"] + self.__tls - self.__page_server_p = self.__criu_act("page-server", opts = ps_opts, nowait = True) - self.__lazy_pages_p = self.__criu_act("lazy-pages", opts = lp_opts, nowait = True) - r_opts += ["--lazy-pages"] + if self.__leave_stopped: + r_opts += ['--leave-stopped'] - if self.__leave_stopped: - r_opts += ['--leave-stopped'] + self.__criu_act("restore", opts=r_opts + ["--restore-detached"]) + self.show_stats("restore") - self.__criu_act("restore", opts = r_opts + ["--restore-detached"]) - self.show_stats("restore") + if self.__leave_stopped: + pstree_check_stopped(self.__test.getpid()) + pstree_signal(self.__test.getpid(), signal.SIGCONT) - if self.__leave_stopped: - pstree_check_stopped(self.__test.getpid()) - pstree_signal(self.__test.getpid(), signal.SIGCONT) + @staticmethod + def check(feature): + return criu_cli.run( + "check", ["--no-default-config", "-v0", "--feature", feature], + opts['criu_bin']) == 0 - @staticmethod - def check(feature): - return criu_cli.run("check", ["--no-default-config", "-v0", - "--feature", feature], opts['criu_bin']) == 0 + @staticmethod + def available(): + if not os.access(opts['criu_bin'], os.X_OK): + print("CRIU binary not found at %s" % opts['criu_bin']) + sys.exit(1) - @staticmethod - def available(): - if not os.access(opts['criu_bin'], os.X_OK): - print("CRIU binary not found at %s" % opts['criu_bin']) - sys.exit(1) - - def kill(self): - if self.__lazy_pages_p: - self.__lazy_pages_p.terminate() - print("criu lazy-pages exited with %s" % self.__lazy_pages_p.wait()) - grep_errors(os.path.join(self.__ddir(), "lazy-pages.log")) - self.__lazy_pages_p = None - if self.__page_server_p: - self.__page_server_p.terminate() - print("criu page-server exited with %s" % self.__page_server_p.wait()) - grep_errors(os.path.join(self.__ddir(), "page-server.log")) - self.__page_server_p = None - if self.__dump_process: - self.__dump_process.terminate() - print("criu dump exited with %s" % self.__dump_process.wait()) - grep_errors(os.path.join(self.__ddir(), "dump.log")) - self.__dump_process = None + def kill(self): + if self.__lazy_pages_p: + self.__lazy_pages_p.terminate() + print("criu lazy-pages exited with %s" % + self.__lazy_pages_p.wait()) + grep_errors(os.path.join(self.__ddir(), "lazy-pages.log")) + self.__lazy_pages_p = None + if self.__page_server_p: + self.__page_server_p.terminate() + print("criu page-server exited with %s" % + self.__page_server_p.wait()) + grep_errors(os.path.join(self.__ddir(), "page-server.log")) + self.__page_server_p = None + if self.__dump_process: + self.__dump_process.terminate() + print("criu dump exited with %s" % self.__dump_process.wait()) + grep_errors(os.path.join(self.__ddir(), "dump.log")) + self.__dump_process = None def try_run_hook(test, args): - hname = test.getname() + '.hook' - if os.access(hname, os.X_OK): - print("Running %s(%s)" % (hname, ', '.join(args))) - hook = subprocess.Popen([hname] + args) - if hook.wait() != 0: - raise test_fail_exc("hook " + " ".join(args)) + hname = test.getname() + '.hook' + if os.access(hname, os.X_OK): + print("Running %s(%s)" % (hname, ', '.join(args))) + hook = subprocess.Popen([hname] + args) + if hook.wait() != 0: + raise test_fail_exc("hook " + " ".join(args)) # @@ -1290,583 +1375,615 @@ do_sbs = False def init_sbs(): - if sys.stdout.isatty(): - global do_sbs - do_sbs = True - else: - print("Can't do step-by-step in this runtime") + if sys.stdout.isatty(): + global do_sbs + do_sbs = True + else: + print("Can't do step-by-step in this runtime") def sbs(what): - if do_sbs: - input("Pause at %s. Press Enter to continue." % what) + if do_sbs: + input("Pause at %s. Press Enter to continue." % what) # # Main testing entity -- dump (probably with pre-dumps) and restore # def iter_parm(opt, dflt): - x = ((opt or str(dflt)) + ":0").split(':') - return (range(0, int(x[0])), float(x[1])) + x = ((opt or str(dflt)) + ":0").split(':') + return (range(0, int(x[0])), float(x[1])) def cr(cr_api, test, opts): - if opts['nocr']: - return + if opts['nocr']: + return - cr_api.set_test(test) + cr_api.set_test(test) - iters = iter_parm(opts['iters'], 1) - for i in iters[0]: - pres = iter_parm(opts['pre'], 0) - for p in pres[0]: - if opts['snaps']: - cr_api.dump("dump", opts = ["--leave-running", "--track-mem"]) - else: - cr_api.dump("pre-dump") - try_run_hook(test, ["--post-pre-dump"]) - test.pre_dump_notify() - time.sleep(pres[1]) + iters = iter_parm(opts['iters'], 1) + for i in iters[0]: + pres = iter_parm(opts['pre'], 0) + for p in pres[0]: + if opts['snaps']: + cr_api.dump("dump", opts=["--leave-running", "--track-mem"]) + else: + cr_api.dump("pre-dump") + try_run_hook(test, ["--post-pre-dump"]) + test.pre_dump_notify() + time.sleep(pres[1]) - sbs('pre-dump') + sbs('pre-dump') - os.environ["ZDTM_TEST_PID"] = str(test.getpid()) - if opts['norst']: - try_run_hook(test, ["--pre-dump"]) - cr_api.dump("dump", opts = ["--leave-running"]) - else: - try_run_hook(test, ["--pre-dump"]) - cr_api.dump("dump") - if not opts['lazy_migrate']: - test.gone() - else: - test.unlink_pidfile() - sbs('pre-restore') - try_run_hook(test, ["--pre-restore"]) - cr_api.restore() - os.environ["ZDTM_TEST_PID"] = str(test.getpid()) - os.environ["ZDTM_IMG_DIR"] = cr_api.logs() - try_run_hook(test, ["--post-restore"]) - sbs('post-restore') + os.environ["ZDTM_TEST_PID"] = str(test.getpid()) + if opts['norst']: + try_run_hook(test, ["--pre-dump"]) + cr_api.dump("dump", opts=["--leave-running"]) + else: + try_run_hook(test, ["--pre-dump"]) + cr_api.dump("dump") + if not opts['lazy_migrate']: + test.gone() + else: + test.unlink_pidfile() + sbs('pre-restore') + try_run_hook(test, ["--pre-restore"]) + cr_api.restore() + os.environ["ZDTM_TEST_PID"] = str(test.getpid()) + os.environ["ZDTM_IMG_DIR"] = cr_api.logs() + try_run_hook(test, ["--post-restore"]) + sbs('post-restore') - time.sleep(iters[1]) + time.sleep(iters[1]) # Additional checks that can be done outside of test process + def get_visible_state(test): - maps = {} - files = {} - mounts = {} + maps = {} + files = {} + mounts = {} - if not getattr(test, "static", lambda: False)() or \ - not getattr(test, "ns", lambda: False)(): - return ({}, {}, {}) + if not getattr(test, "static", lambda: False)() or \ + not getattr(test, "ns", lambda: False)(): + return ({}, {}, {}) - r = re.compile('^[0-9]+$') - pids = filter(lambda p: r.match(p), os.listdir("/proc/%s/root/proc/" % test.getpid())) - for pid in pids: - files[pid] = set(os.listdir("/proc/%s/root/proc/%s/fd" % (test.getpid(), pid))) + r = re.compile('^[0-9]+$') + pids = filter(lambda p: r.match(p), + os.listdir("/proc/%s/root/proc/" % test.getpid())) + for pid in pids: + files[pid] = set( + os.listdir("/proc/%s/root/proc/%s/fd" % (test.getpid(), pid))) - cmaps = [[0, 0, ""]] - last = 0 - mapsfd = open("/proc/%s/root/proc/%s/maps" % (test.getpid(), pid)) - for mp in mapsfd: - m = list(map(lambda x: int('0x' + x, 0), mp.split()[0].split('-'))) + cmaps = [[0, 0, ""]] + last = 0 + mapsfd = open("/proc/%s/root/proc/%s/maps" % (test.getpid(), pid)) + for mp in mapsfd: + m = list(map(lambda x: int('0x' + x, 0), mp.split()[0].split('-'))) - m.append(mp.split()[1]) + m.append(mp.split()[1]) - f = "/proc/%s/root/proc/%s/map_files/%s" % (test.getpid(), pid, mp.split()[0]) - if os.access(f, os.F_OK): - st = os.lstat(f) - m.append(oct(st.st_mode)) + f = "/proc/%s/root/proc/%s/map_files/%s" % (test.getpid(), pid, + mp.split()[0]) + if os.access(f, os.F_OK): + st = os.lstat(f) + m.append(oct(st.st_mode)) - if cmaps[last][1] == m[0] and cmaps[last][2] == m[2]: - cmaps[last][1] = m[1] - else: - cmaps.append(m) - last += 1 - mapsfd.close() + if cmaps[last][1] == m[0] and cmaps[last][2] == m[2]: + cmaps[last][1] = m[1] + else: + cmaps.append(m) + last += 1 + mapsfd.close() - maps[pid] = set(map(lambda x: '%x-%x %s' % (x[0], x[1], " ".join(x[2:])), cmaps)) + maps[pid] = set( + map(lambda x: '%x-%x %s' % (x[0], x[1], " ".join(x[2:])), cmaps)) - cmounts = [] - try: - r = re.compile(r"^\S+\s\S+\s\S+\s(\S+)\s(\S+)\s(\S+)\s[^-]*?(shared)?[^-]*?(master)?[^-]*?-") - with open("/proc/%s/root/proc/%s/mountinfo" % (test.getpid(), pid)) as mountinfo: - for m in mountinfo: - cmounts.append(r.match(m).groups()) - except IOError as e: - if e.errno != errno.EINVAL: - raise e - mounts[pid] = cmounts - return files, maps, mounts + cmounts = [] + try: + r = re.compile( + r"^\S+\s\S+\s\S+\s(\S+)\s(\S+)\s(\S+)\s[^-]*?(shared)?[^-]*?(master)?[^-]*?-" + ) + with open("/proc/%s/root/proc/%s/mountinfo" % + (test.getpid(), pid)) as mountinfo: + for m in mountinfo: + cmounts.append(r.match(m).groups()) + except IOError as e: + if e.errno != errno.EINVAL: + raise e + mounts[pid] = cmounts + return files, maps, mounts def check_visible_state(test, state, opts): - new = get_visible_state(test) + new = get_visible_state(test) - for pid in state[0].keys(): - fnew = new[0][pid] - fold = state[0][pid] - if fnew != fold: - print("%s: Old files lost: %s" % (pid, fold - fnew)) - print("%s: New files appeared: %s" % (pid, fnew - fold)) - raise test_fail_exc("fds compare") + for pid in state[0].keys(): + fnew = new[0][pid] + fold = state[0][pid] + if fnew != fold: + print("%s: Old files lost: %s" % (pid, fold - fnew)) + print("%s: New files appeared: %s" % (pid, fnew - fold)) + raise test_fail_exc("fds compare") - old_maps = state[1][pid] - new_maps = new[1][pid] - if os.getenv("COMPAT_TEST"): - # the vsyscall vma isn't unmapped from x32 processes - vsyscall = u"ffffffffff600000-ffffffffff601000 r-xp" - if vsyscall in new_maps and vsyscall not in old_maps: - new_maps.remove(vsyscall) - if old_maps != new_maps: - print("%s: Old maps lost: %s" % (pid, old_maps - new_maps)) - print("%s: New maps appeared: %s" % (pid, new_maps - old_maps)) - if not opts['fault']: # skip parasite blob - raise test_fail_exc("maps compare") + old_maps = state[1][pid] + new_maps = new[1][pid] + if os.getenv("COMPAT_TEST"): + # the vsyscall vma isn't unmapped from x32 processes + vsyscall = u"ffffffffff600000-ffffffffff601000 r-xp" + if vsyscall in new_maps and vsyscall not in old_maps: + new_maps.remove(vsyscall) + if old_maps != new_maps: + print("%s: Old maps lost: %s" % (pid, old_maps - new_maps)) + print("%s: New maps appeared: %s" % (pid, new_maps - old_maps)) + if not opts['fault']: # skip parasite blob + raise test_fail_exc("maps compare") - old_mounts = state[2][pid] - new_mounts = new[2][pid] - for i in range(len(old_mounts)): - m = old_mounts.pop(0) - if m in new_mounts: - new_mounts.remove(m) - else: - old_mounts.append(m) - if old_mounts or new_mounts: - print("%s: Old mounts lost: %s" % (pid, old_mounts)) - print("%s: New mounts appeared: %s" % (pid, new_mounts)) - raise test_fail_exc("mounts compare") + old_mounts = state[2][pid] + new_mounts = new[2][pid] + for i in range(len(old_mounts)): + m = old_mounts.pop(0) + if m in new_mounts: + new_mounts.remove(m) + else: + old_mounts.append(m) + if old_mounts or new_mounts: + print("%s: Old mounts lost: %s" % (pid, old_mounts)) + print("%s: New mounts appeared: %s" % (pid, new_mounts)) + raise test_fail_exc("mounts compare") - if '--link-remap' in test.getdopts(): - import glob - link_remap_list = glob.glob(os.path.dirname(test.getname()) + '/link_remap*') - if link_remap_list: - print("%s: link-remap files left: %s" % (test.getname(), link_remap_list)) - raise test_fail_exc("link remaps left") + if '--link-remap' in test.getdopts(): + import glob + link_remap_list = glob.glob( + os.path.dirname(test.getname()) + '/link_remap*') + if link_remap_list: + print("%s: link-remap files left: %s" % + (test.getname(), link_remap_list)) + raise test_fail_exc("link remaps left") class noop_freezer: - def __init__(self): - self.kernel = False + def __init__(self): + self.kernel = False - def attach(self): - pass + def attach(self): + pass - def freeze(self): - pass + def freeze(self): + pass - def thaw(self): - pass + def thaw(self): + pass - def getdopts(self): - return [] + def getdopts(self): + return [] - def getropts(self): - return [] + def getropts(self): + return [] class cg_freezer: - def __init__(self, path, state): - self.__path = '/sys/fs/cgroup/freezer/' + path - self.__state = state - self.kernel = True + def __init__(self, path, state): + self.__path = '/sys/fs/cgroup/freezer/' + path + self.__state = state + self.kernel = True - def attach(self): - if not os.access(self.__path, os.F_OK): - os.makedirs(self.__path) - with open(self.__path + '/tasks', 'w') as f: - f.write('0') + def attach(self): + if not os.access(self.__path, os.F_OK): + os.makedirs(self.__path) + with open(self.__path + '/tasks', 'w') as f: + f.write('0') - def __set_state(self, state): - with open(self.__path + '/freezer.state', 'w') as f: - f.write(state) + def __set_state(self, state): + with open(self.__path + '/freezer.state', 'w') as f: + f.write(state) - def freeze(self): - if self.__state.startswith('f'): - self.__set_state('FROZEN') + def freeze(self): + if self.__state.startswith('f'): + self.__set_state('FROZEN') - def thaw(self): - if self.__state.startswith('f'): - self.__set_state('THAWED') + def thaw(self): + if self.__state.startswith('f'): + self.__set_state('THAWED') - def getdopts(self): - return ['--freeze-cgroup', self.__path, '--manage-cgroups'] + def getdopts(self): + return ['--freeze-cgroup', self.__path, '--manage-cgroups'] - def getropts(self): - return ['--manage-cgroups'] + def getropts(self): + return ['--manage-cgroups'] def get_freezer(desc): - if not desc: - return noop_freezer() + if not desc: + return noop_freezer() - fd = desc.split(':') - fr = cg_freezer(path = fd[0], state = fd[1]) - return fr + fd = desc.split(':') + fr = cg_freezer(path=fd[0], state=fd[1]) + return fr def cmp_ns(ns1, match, ns2, msg): - ns1_ino = os.stat(ns1).st_ino - ns2_ino = os.stat(ns2).st_ino - if eval("%r %s %r" % (ns1_ino, match, ns2_ino)): - print("%s match (%r %s %r) fail" % (msg, ns1_ino, match, ns2_ino)) - raise test_fail_exc("%s compare" % msg) + ns1_ino = os.stat(ns1).st_ino + ns2_ino = os.stat(ns2).st_ino + if eval("%r %s %r" % (ns1_ino, match, ns2_ino)): + print("%s match (%r %s %r) fail" % (msg, ns1_ino, match, ns2_ino)) + raise test_fail_exc("%s compare" % msg) def check_joinns_state(t): - cmp_ns("/proc/%s/ns/net" % t.getpid(), "!=", join_ns_file, "join-ns") + cmp_ns("/proc/%s/ns/net" % t.getpid(), "!=", join_ns_file, "join-ns") def pstree_each_pid(root_pid): - f_children_path = "/proc/{0}/task/{0}/children".format(root_pid) - child_pids = [] - try: - with open(f_children_path, "r") as f_children: - pid_line = f_children.readline().strip(" \n") - if pid_line: - child_pids += pid_line.split(" ") - except Exception as e: - print("Unable to read /proc/*/children: %s" % e) - return # process is dead + f_children_path = "/proc/{0}/task/{0}/children".format(root_pid) + child_pids = [] + try: + with open(f_children_path, "r") as f_children: + pid_line = f_children.readline().strip(" \n") + if pid_line: + child_pids += pid_line.split(" ") + except Exception as e: + print("Unable to read /proc/*/children: %s" % e) + return # process is dead - yield root_pid - for child_pid in child_pids: - for pid in pstree_each_pid(child_pid): - yield pid + yield root_pid + for child_pid in child_pids: + for pid in pstree_each_pid(child_pid): + yield pid def is_proc_stopped(pid): - def get_thread_status(thread_dir): - try: - with open(os.path.join(thread_dir, "status")) as f_status: - for line in f_status.readlines(): - if line.startswith("State:"): - return line.split(":", 1)[1].strip().split(" ")[0] - except Exception as e: - print("Unable to read a thread status: %s" % e) - pass # process is dead - return None + def get_thread_status(thread_dir): + try: + with open(os.path.join(thread_dir, "status")) as f_status: + for line in f_status.readlines(): + if line.startswith("State:"): + return line.split(":", 1)[1].strip().split(" ")[0] + except Exception as e: + print("Unable to read a thread status: %s" % e) + pass # process is dead + return None - def is_thread_stopped(status): - return (status is None) or (status == "T") or (status == "Z") + def is_thread_stopped(status): + return (status is None) or (status == "T") or (status == "Z") - tasks_dir = "/proc/%s/task" % pid - thread_dirs = [] - try: - thread_dirs = os.listdir(tasks_dir) - except Exception as e: - print("Unable to read threads: %s" % e) - pass # process is dead + tasks_dir = "/proc/%s/task" % pid + thread_dirs = [] + try: + thread_dirs = os.listdir(tasks_dir) + except Exception as e: + print("Unable to read threads: %s" % e) + pass # process is dead - for thread_dir in thread_dirs: - thread_status = get_thread_status(os.path.join(tasks_dir, thread_dir)) - if not is_thread_stopped(thread_status): - return False + for thread_dir in thread_dirs: + thread_status = get_thread_status(os.path.join(tasks_dir, thread_dir)) + if not is_thread_stopped(thread_status): + return False - if not is_thread_stopped(get_thread_status("/proc/%s" % pid)): - return False + if not is_thread_stopped(get_thread_status("/proc/%s" % pid)): + return False - return True + return True def pstree_check_stopped(root_pid): - for pid in pstree_each_pid(root_pid): - if not is_proc_stopped(pid): - raise test_fail_exc("CRIU --leave-stopped %s" % pid) + for pid in pstree_each_pid(root_pid): + if not is_proc_stopped(pid): + raise test_fail_exc("CRIU --leave-stopped %s" % pid) def pstree_signal(root_pid, signal): - for pid in pstree_each_pid(root_pid): - try: - os.kill(int(pid), signal) - except Exception as e: - print("Unable to kill %d: %s" % (pid, e)) - pass # process is dead + for pid in pstree_each_pid(root_pid): + try: + os.kill(int(pid), signal) + except Exception as e: + print("Unable to kill %d: %s" % (pid, e)) + pass # process is dead def do_run_test(tname, tdesc, flavs, opts): - tcname = tname.split('/')[0] - tclass = test_classes.get(tcname, None) - if not tclass: - print("Unknown test class %s" % tcname) - return + tcname = tname.split('/')[0] + tclass = test_classes.get(tcname, None) + if not tclass: + print("Unknown test class %s" % tcname) + return - if opts['report']: - init_report(opts['report']) - if opts['sbs']: - init_sbs() + if opts['report']: + init_report(opts['report']) + if opts['sbs']: + init_sbs() - fcg = get_freezer(opts['freezecg']) + fcg = get_freezer(opts['freezecg']) - for f in flavs: - print_sep("Run %s in %s" % (tname, f)) - if opts['dry_run']: - continue - flav = flavors[f](opts) - t = tclass(tname, tdesc, flav, fcg) - cr_api = criu(opts) + for f in flavs: + print_sep("Run %s in %s" % (tname, f)) + if opts['dry_run']: + continue + flav = flavors[f](opts) + t = tclass(tname, tdesc, flav, fcg) + cr_api = criu(opts) - try: - t.start() - s = get_visible_state(t) - try: - cr(cr_api, t, opts) - except test_fail_expected_exc as e: - if e.cr_action == "dump": - t.stop() - else: - check_visible_state(t, s, opts) - if opts['join_ns']: - check_joinns_state(t) - t.stop() - cr_api.fini() - try_run_hook(t, ["--clean"]) - except test_fail_exc as e: - print_sep("Test %s FAIL at %s" % (tname, e.step), '#') - t.print_output() - t.kill() - cr_api.kill() - try_run_hook(t, ["--clean"]) - if cr_api.logs(): - add_to_report(cr_api.logs(), tname.replace('/', '_') + "_" + f + "/images") - if opts['keep_img'] == 'never': - cr_api.cleanup() - # When option --keep-going not specified this exit - # does two things: exits from subprocess and aborts the - # main script execution on the 1st error met - sys.exit(encode_flav(f)) - else: - if opts['keep_img'] != 'always': - cr_api.cleanup() - print_sep("Test %s PASS" % tname) + try: + t.start() + s = get_visible_state(t) + try: + cr(cr_api, t, opts) + except test_fail_expected_exc as e: + if e.cr_action == "dump": + t.stop() + else: + check_visible_state(t, s, opts) + if opts['join_ns']: + check_joinns_state(t) + t.stop() + cr_api.fini() + try_run_hook(t, ["--clean"]) + except test_fail_exc as e: + print_sep("Test %s FAIL at %s" % (tname, e.step), '#') + t.print_output() + t.kill() + cr_api.kill() + try_run_hook(t, ["--clean"]) + if cr_api.logs(): + add_to_report(cr_api.logs(), + tname.replace('/', '_') + "_" + f + "/images") + if opts['keep_img'] == 'never': + cr_api.cleanup() + # When option --keep-going not specified this exit + # does two things: exits from subprocess and aborts the + # main script execution on the 1st error met + sys.exit(encode_flav(f)) + else: + if opts['keep_img'] != 'always': + cr_api.cleanup() + print_sep("Test %s PASS" % tname) class Launcher: - def __init__(self, opts, nr_tests): - self.__opts = opts - self.__total = nr_tests - self.__runtest = 0 - self.__nr = 0 - self.__max = int(opts['parallel'] or 1) - self.__subs = {} - self.__fail = False - self.__file_report = None - self.__junit_file = None - self.__junit_test_cases = None - self.__failed = [] - self.__nr_skip = 0 - if self.__max > 1 and self.__total > 1: - self.__use_log = True - elif opts['report']: - self.__use_log = True - else: - self.__use_log = False + def __init__(self, opts, nr_tests): + self.__opts = opts + self.__total = nr_tests + self.__runtest = 0 + self.__nr = 0 + self.__max = int(opts['parallel'] or 1) + self.__subs = {} + self.__fail = False + self.__file_report = None + self.__junit_file = None + self.__junit_test_cases = None + self.__failed = [] + self.__nr_skip = 0 + if self.__max > 1 and self.__total > 1: + self.__use_log = True + elif opts['report']: + self.__use_log = True + else: + self.__use_log = False - if opts['report'] and (opts['keep_going'] or self.__total == 1): - global TestSuite, TestCase - from junit_xml import TestSuite, TestCase - now = datetime.datetime.now() - att = 0 - reportname = os.path.join(report_dir, "criu-testreport.tap") - junitreport = os.path.join(report_dir, "criu-testreport.xml") - while os.access(reportname, os.F_OK) or os.access(junitreport, os.F_OK): - reportname = os.path.join(report_dir, "criu-testreport" + ".%d.tap" % att) - junitreport = os.path.join(report_dir, "criu-testreport" + ".%d.xml" % att) - att += 1 + if opts['report'] and (opts['keep_going'] or self.__total == 1): + global TestSuite, TestCase + from junit_xml import TestSuite, TestCase + now = datetime.datetime.now() + att = 0 + reportname = os.path.join(report_dir, "criu-testreport.tap") + junitreport = os.path.join(report_dir, "criu-testreport.xml") + while os.access(reportname, os.F_OK) or os.access( + junitreport, os.F_OK): + reportname = os.path.join(report_dir, + "criu-testreport" + ".%d.tap" % att) + junitreport = os.path.join(report_dir, + "criu-testreport" + ".%d.xml" % att) + att += 1 - self.__junit_file = open(junitreport, 'a') - self.__junit_test_cases = [] + self.__junit_file = open(junitreport, 'a') + self.__junit_test_cases = [] - self.__file_report = open(reportname, 'a') - print(u"TAP version 13", file=self.__file_report) - print(u"# Hardware architecture: " + arch, file=self.__file_report) - print(u"# Timestamp: " + now.strftime("%Y-%m-%d %H:%M") + " (GMT+1)", file=self.__file_report) - print(u"# ", file=self.__file_report) - print(u"1.." + str(nr_tests), file=self.__file_report) - with open("/proc/sys/kernel/tainted") as taintfd: - self.__taint = taintfd.read() - if int(self.__taint, 0) != 0: - print("The kernel is tainted: %r" % self.__taint) - if not opts["ignore_taint"]: - raise Exception("The kernel is tainted: %r" % self.__taint) + self.__file_report = open(reportname, 'a') + print(u"TAP version 13", file=self.__file_report) + print(u"# Hardware architecture: " + arch, file=self.__file_report) + print(u"# Timestamp: " + now.strftime("%Y-%m-%d %H:%M") + + " (GMT+1)", + file=self.__file_report) + print(u"# ", file=self.__file_report) + print(u"1.." + str(nr_tests), file=self.__file_report) + with open("/proc/sys/kernel/tainted") as taintfd: + self.__taint = taintfd.read() + if int(self.__taint, 0) != 0: + print("The kernel is tainted: %r" % self.__taint) + if not opts["ignore_taint"]: + raise Exception("The kernel is tainted: %r" % self.__taint) - def __show_progress(self, msg): - perc = int(self.__nr * 16 / self.__total) - print("=== Run %d/%d %s %s" % (self.__nr, self.__total, '=' * perc + '-' * (16 - perc), msg)) + def __show_progress(self, msg): + perc = int(self.__nr * 16 / self.__total) + print("=== Run %d/%d %s %s" % + (self.__nr, self.__total, '=' * perc + '-' * (16 - perc), msg)) - def skip(self, name, reason): - print("Skipping %s (%s)" % (name, reason)) - self.__nr += 1 - self.__runtest += 1 - self.__nr_skip += 1 + def skip(self, name, reason): + print("Skipping %s (%s)" % (name, reason)) + self.__nr += 1 + self.__runtest += 1 + self.__nr_skip += 1 - if self.__junit_test_cases is not None: - tc = TestCase(name) - tc.add_skipped_info(reason) - self.__junit_test_cases.append(tc) - if self.__file_report: - testline = u"ok %d - %s # SKIP %s" % (self.__runtest, name, reason) - print(testline, file=self.__file_report) + if self.__junit_test_cases is not None: + tc = TestCase(name) + tc.add_skipped_info(reason) + self.__junit_test_cases.append(tc) + if self.__file_report: + testline = u"ok %d - %s # SKIP %s" % (self.__runtest, name, reason) + print(testline, file=self.__file_report) - def run_test(self, name, desc, flavor): + def run_test(self, name, desc, flavor): - if len(self.__subs) >= self.__max: - self.wait() + if len(self.__subs) >= self.__max: + self.wait() - with open("/proc/sys/kernel/tainted") as taintfd: - taint = taintfd.read() - if self.__taint != taint: - raise Exception("The kernel is tainted: %r (%r)" % (taint, self.__taint)) + with open("/proc/sys/kernel/tainted") as taintfd: + taint = taintfd.read() + if self.__taint != taint: + raise Exception("The kernel is tainted: %r (%r)" % + (taint, self.__taint)) - if test_flag(desc, 'excl'): - self.wait_all() + if test_flag(desc, 'excl'): + self.wait_all() - self.__nr += 1 - self.__show_progress(name) + self.__nr += 1 + self.__show_progress(name) - nd = ('nocr', 'norst', 'pre', 'iters', 'page_server', 'sibling', 'stop', 'empty_ns', - 'fault', 'keep_img', 'report', 'snaps', 'sat', 'script', 'rpc', 'lazy_pages', - 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', - 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'tls', - 'criu_bin', 'crit_bin') - arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) + nd = ('nocr', 'norst', 'pre', 'iters', 'page_server', 'sibling', + 'stop', 'empty_ns', 'fault', 'keep_img', 'report', 'snaps', + 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', + 'freezecg', 'user', 'dry_run', 'noauto_dedup', + 'remote_lazy_pages', 'show_stats', 'lazy_migrate', + 'tls', 'criu_bin', 'crit_bin') + arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) - if self.__use_log: - logf = name.replace('/', '_') + ".log" - log = open(logf, "w") - else: - logf = None - log = None + if self.__use_log: + logf = name.replace('/', '_') + ".log" + log = open(logf, "w") + else: + logf = None + log = None - sub = subprocess.Popen(["./zdtm_ct", "zdtm.py"], - env = dict(os.environ, CR_CT_TEST_INFO = arg), - stdout = log, stderr = subprocess.STDOUT, close_fds = True) - self.__subs[sub.pid] = {'sub': sub, 'log': logf, 'name': name, "start": time.time()} + sub = subprocess.Popen(["./zdtm_ct", "zdtm.py"], + env=dict(os.environ, CR_CT_TEST_INFO=arg), + stdout=log, + stderr=subprocess.STDOUT, + close_fds=True) + self.__subs[sub.pid] = { + 'sub': sub, + 'log': logf, + 'name': name, + "start": time.time() + } - if test_flag(desc, 'excl'): - self.wait() + if test_flag(desc, 'excl'): + self.wait() - def __wait_one(self, flags): - pid = -1 - status = -1 - signal.alarm(10) - while True: - try: - pid, status = os.waitpid(0, flags) - except OSError as e: - if e.errno == errno.EINTR: - subprocess.Popen(["ps", "axf"]).wait() - continue - signal.alarm(0) - raise e - else: - break - signal.alarm(0) + def __wait_one(self, flags): + pid = -1 + status = -1 + signal.alarm(10) + while True: + try: + pid, status = os.waitpid(0, flags) + except OSError as e: + if e.errno == errno.EINTR: + subprocess.Popen(["ps", "axf"]).wait() + continue + signal.alarm(0) + raise e + else: + break + signal.alarm(0) - self.__runtest += 1 - if pid != 0: - sub = self.__subs.pop(pid) - tc = None - if self.__junit_test_cases is not None: - tc = TestCase(sub['name'], elapsed_sec=time.time() - sub['start']) - self.__junit_test_cases.append(tc) - if status != 0: - self.__fail = True - failed_flavor = decode_flav(os.WEXITSTATUS(status)) - self.__failed.append([sub['name'], failed_flavor]) - if self.__file_report: - testline = u"not ok %d - %s # flavor %s" % (self.__runtest, sub['name'], failed_flavor) - with open(sub['log']) as sublog: - output = sublog.read() - details = {'output': output} - tc.add_error_info(output = output) - print(testline, file=self.__file_report) - print("%s" % yaml.safe_dump(details, explicit_start=True, - explicit_end=True, default_style='|'), file=self.__file_report) - if sub['log']: - add_to_output(sub['log']) - else: - if self.__file_report: - testline = u"ok %d - %s" % (self.__runtest, sub['name']) - print(testline, file=self.__file_report) + self.__runtest += 1 + if pid != 0: + sub = self.__subs.pop(pid) + tc = None + if self.__junit_test_cases is not None: + tc = TestCase(sub['name'], + elapsed_sec=time.time() - sub['start']) + self.__junit_test_cases.append(tc) + if status != 0: + self.__fail = True + failed_flavor = decode_flav(os.WEXITSTATUS(status)) + self.__failed.append([sub['name'], failed_flavor]) + if self.__file_report: + testline = u"not ok %d - %s # flavor %s" % ( + self.__runtest, sub['name'], failed_flavor) + with open(sub['log']) as sublog: + output = sublog.read() + details = {'output': output} + tc.add_error_info(output=output) + print(testline, file=self.__file_report) + print("%s" % yaml.safe_dump(details, + explicit_start=True, + explicit_end=True, + default_style='|'), + file=self.__file_report) + if sub['log']: + add_to_output(sub['log']) + else: + if self.__file_report: + testline = u"ok %d - %s" % (self.__runtest, sub['name']) + print(testline, file=self.__file_report) - if sub['log']: - with open(sub['log']) as sublog: - print("%s" % sublog.read().encode('ascii', 'ignore').decode('utf-8')) - os.unlink(sub['log']) + if sub['log']: + with open(sub['log']) as sublog: + print("%s" % sublog.read().encode( + 'ascii', 'ignore').decode('utf-8')) + os.unlink(sub['log']) - return True + return True - return False + return False - def __wait_all(self): - while self.__subs: - self.__wait_one(0) + def __wait_all(self): + while self.__subs: + self.__wait_one(0) - def wait(self): - self.__wait_one(0) - while self.__subs: - if not self.__wait_one(os.WNOHANG): - break - if self.__fail and not opts['keep_going']: - raise test_fail_exc('') + def wait(self): + self.__wait_one(0) + while self.__subs: + if not self.__wait_one(os.WNOHANG): + break + if self.__fail and not opts['keep_going']: + raise test_fail_exc('') - def wait_all(self): - self.__wait_all() - if self.__fail and not opts['keep_going']: - raise test_fail_exc('') + def wait_all(self): + self.__wait_all() + if self.__fail and not opts['keep_going']: + raise test_fail_exc('') - def finish(self): - self.__wait_all() - if not opts['fault'] and check_core_files(): - self.__fail = True - if self.__file_report: - ts = TestSuite(opts['title'], self.__junit_test_cases, os.getenv("NODE_NAME")) - self.__junit_file.write(TestSuite.to_xml_string([ts])) - self.__junit_file.close() - self.__file_report.close() + def finish(self): + self.__wait_all() + if not opts['fault'] and check_core_files(): + self.__fail = True + if self.__file_report: + ts = TestSuite(opts['title'], self.__junit_test_cases, + os.getenv("NODE_NAME")) + self.__junit_file.write(TestSuite.to_xml_string([ts])) + self.__junit_file.close() + self.__file_report.close() - if opts['keep_going']: - if self.__fail: - print_sep("%d TEST(S) FAILED (TOTAL %d/SKIPPED %d)" - % (len(self.__failed), self.__total, self.__nr_skip), "#") - for failed in self.__failed: - print(" * %s(%s)" % (failed[0], failed[1])) - else: - print_sep("ALL TEST(S) PASSED (TOTAL %d/SKIPPED %d)" - % (self.__total, self.__nr_skip), "#") + if opts['keep_going']: + if self.__fail: + print_sep( + "%d TEST(S) FAILED (TOTAL %d/SKIPPED %d)" % + (len(self.__failed), self.__total, self.__nr_skip), "#") + for failed in self.__failed: + print(" * %s(%s)" % (failed[0], failed[1])) + else: + print_sep( + "ALL TEST(S) PASSED (TOTAL %d/SKIPPED %d)" % + (self.__total, self.__nr_skip), "#") - if self.__fail: - print_sep("FAIL", "#") - sys.exit(1) + if self.__fail: + print_sep("FAIL", "#") + sys.exit(1) def all_tests(opts): - with open(opts['set'] + '.desc') as fd: - desc = eval(fd.read()) + with open(opts['set'] + '.desc') as fd: + desc = eval(fd.read()) - files = [] - mask = stat.S_IFREG | stat.S_IXUSR - for d in os.walk(desc['dir']): - for f in d[2]: - fp = os.path.join(d[0], f) - st = os.lstat(fp) - if (st.st_mode & mask) != mask: - continue - if stat.S_IFMT(st.st_mode) in [stat.S_IFLNK, stat.S_IFSOCK]: - continue - files.append(fp) - excl = list(map(lambda x: os.path.join(desc['dir'], x), desc['exclude'])) - tlist = filter(lambda x: - not x.endswith('.checkskip') and - not x.endswith('.hook') and - x not in excl, - map(lambda x: x.strip(), files) - ) - return tlist + files = [] + mask = stat.S_IFREG | stat.S_IXUSR + for d in os.walk(desc['dir']): + for f in d[2]: + fp = os.path.join(d[0], f) + st = os.lstat(fp) + if (st.st_mode & mask) != mask: + continue + if stat.S_IFMT(st.st_mode) in [stat.S_IFLNK, stat.S_IFSOCK]: + continue + files.append(fp) + excl = list(map(lambda x: os.path.join(desc['dir'], x), desc['exclude'])) + tlist = filter( + lambda x: not x.endswith('.checkskip') and not x.endswith('.hook') and + x not in excl, map(lambda x: x.strip(), files)) + return tlist # Descriptor for abstract test not in list @@ -1874,355 +1991,363 @@ default_test = {} def get_test_desc(tname): - d_path = tname + '.desc' - if os.access(d_path, os.F_OK) and os.path.getsize(d_path) > 0: - with open(d_path) as fd: - return eval(fd.read()) + d_path = tname + '.desc' + if os.access(d_path, os.F_OK) and os.path.getsize(d_path) > 0: + with open(d_path) as fd: + return eval(fd.read()) - return default_test + return default_test def self_checkskip(tname): - chs = tname + '.checkskip' - if os.access(chs, os.X_OK): - ch = subprocess.Popen([chs]) - return not ch.wait() == 0 + chs = tname + '.checkskip' + if os.access(chs, os.X_OK): + ch = subprocess.Popen([chs]) + return not ch.wait() == 0 - return False + return False def print_fname(fname, typ): - print("=[%s]=> %s" % (typ, fname)) + print("=[%s]=> %s" % (typ, fname)) -def print_sep(title, sep = "=", width = 80): - print((" " + title + " ").center(width, sep)) +def print_sep(title, sep="=", width=80): + print((" " + title + " ").center(width, sep)) def print_error(line): - line = line.rstrip() - print(line) - if line.endswith('>'): # combine pie output - return True - return False + line = line.rstrip() + print(line) + if line.endswith('>'): # combine pie output + return True + return False def grep_errors(fname): - first = True - print_next = False - before = [] - with open(fname) as fd: - for l in fd: - before.append(l) - if len(before) > 5: - before.pop(0) - if "Error" in l or "Warn" in l: - if first: - print_fname(fname, 'log') - print_sep("grep Error", "-", 60) - first = False - for i in before: - print_next = print_error(i) - before = [] - else: - if print_next: - print_next = print_error(l) - before = [] - if not first: - print_sep("ERROR OVER", "-", 60) + first = True + print_next = False + before = [] + with open(fname) as fd: + for l in fd: + before.append(l) + if len(before) > 5: + before.pop(0) + if "Error" in l or "Warn" in l: + if first: + print_fname(fname, 'log') + print_sep("grep Error", "-", 60) + first = False + for i in before: + print_next = print_error(i) + before = [] + else: + if print_next: + print_next = print_error(l) + before = [] + if not first: + print_sep("ERROR OVER", "-", 60) def run_tests(opts): - excl = None - features = {} + excl = None + features = {} - if opts['pre'] or opts['snaps']: - if not criu.check("mem_dirty_track"): - print("Tracking memory is not available") - return + if opts['pre'] or opts['snaps']: + if not criu.check("mem_dirty_track"): + print("Tracking memory is not available") + return - if opts['all']: - torun = all_tests(opts) - run_all = True - elif opts['tests']: - r = re.compile(opts['tests']) - torun = filter(lambda x: r.match(x), all_tests(opts)) - run_all = True - elif opts['test']: - torun = opts['test'] - run_all = False - elif opts['from']: - if not os.access(opts['from'], os.R_OK): - print("No such file") - return + if opts['all']: + torun = all_tests(opts) + run_all = True + elif opts['tests']: + r = re.compile(opts['tests']) + torun = filter(lambda x: r.match(x), all_tests(opts)) + run_all = True + elif opts['test']: + torun = opts['test'] + run_all = False + elif opts['from']: + if not os.access(opts['from'], os.R_OK): + print("No such file") + return - with open(opts['from']) as fd: - torun = map(lambda x: x.strip(), fd) - opts['keep_going'] = False - run_all = True - else: - print("Specify test with -t or -a") - return + with open(opts['from']) as fd: + torun = map(lambda x: x.strip(), fd) + opts['keep_going'] = False + run_all = True + else: + print("Specify test with -t or -a") + return - torun = list(torun) - if opts['keep_going'] and len(torun) < 2: - print("[WARNING] Option --keep-going is more useful when running multiple tests") - opts['keep_going'] = False + torun = list(torun) + if opts['keep_going'] and len(torun) < 2: + print( + "[WARNING] Option --keep-going is more useful when running multiple tests" + ) + opts['keep_going'] = False - if opts['exclude']: - excl = re.compile(".*(" + "|".join(opts['exclude']) + ")") - print("Compiled exclusion list") + if opts['exclude']: + excl = re.compile(".*(" + "|".join(opts['exclude']) + ")") + print("Compiled exclusion list") - if opts['report']: - init_report(opts['report']) + if opts['report']: + init_report(opts['report']) - if opts['parallel'] and opts['freezecg']: - print("Parallel launch with freezer not supported") - opts['parallel'] = None + if opts['parallel'] and opts['freezecg']: + print("Parallel launch with freezer not supported") + opts['parallel'] = None - if opts['join_ns']: - if subprocess.Popen(["ip", "netns", "add", "zdtm_netns"]).wait(): - raise Exception("Unable to create a network namespace") - if subprocess.Popen(["ip", "netns", "exec", "zdtm_netns", "ip", "link", "set", "up", "dev", "lo"]).wait(): - raise Exception("ip link set up dev lo") + if opts['join_ns']: + if subprocess.Popen(["ip", "netns", "add", "zdtm_netns"]).wait(): + raise Exception("Unable to create a network namespace") + if subprocess.Popen([ + "ip", "netns", "exec", "zdtm_netns", "ip", "link", "set", "up", + "dev", "lo" + ]).wait(): + raise Exception("ip link set up dev lo") - if opts['lazy_pages'] or opts['remote_lazy_pages'] or opts['lazy_migrate']: - uffd = criu.check("uffd") - uffd_noncoop = criu.check("uffd-noncoop") - if not uffd: - raise Exception("UFFD is not supported, cannot run with --lazy-pages") - if not uffd_noncoop: - # Most tests will work with 4.3 - 4.11 - print("[WARNING] Non-cooperative UFFD is missing, some tests might spuriously fail") + if opts['lazy_pages'] or opts['remote_lazy_pages'] or opts['lazy_migrate']: + uffd = criu.check("uffd") + uffd_noncoop = criu.check("uffd-noncoop") + if not uffd: + raise Exception( + "UFFD is not supported, cannot run with --lazy-pages") + if not uffd_noncoop: + # Most tests will work with 4.3 - 4.11 + print( + "[WARNING] Non-cooperative UFFD is missing, some tests might spuriously fail" + ) - launcher = Launcher(opts, len(torun)) - try: - for t in torun: - global arch + launcher = Launcher(opts, len(torun)) + try: + for t in torun: + global arch - if excl and excl.match(t): - launcher.skip(t, "exclude") - continue + if excl and excl.match(t): + launcher.skip(t, "exclude") + continue - tdesc = get_test_desc(t) - if tdesc.get('arch', arch) != arch: - launcher.skip(t, "arch %s" % tdesc['arch']) - continue + tdesc = get_test_desc(t) + if tdesc.get('arch', arch) != arch: + launcher.skip(t, "arch %s" % tdesc['arch']) + continue - if test_flag(tdesc, 'reqrst') and opts['norst']: - launcher.skip(t, "restore stage is required") - continue + if test_flag(tdesc, 'reqrst') and opts['norst']: + launcher.skip(t, "restore stage is required") + continue - if run_all and test_flag(tdesc, 'noauto'): - launcher.skip(t, "manual run only") - continue + if run_all and test_flag(tdesc, 'noauto'): + launcher.skip(t, "manual run only") + continue - feat_list = tdesc.get('feature', "") - for feat in feat_list.split(): - if feat not in features: - print("Checking feature %s" % feat) - features[feat] = criu.check(feat) + feat_list = tdesc.get('feature', "") + for feat in feat_list.split(): + if feat not in features: + print("Checking feature %s" % feat) + features[feat] = criu.check(feat) - if not features[feat]: - launcher.skip(t, "no %s feature" % feat) - feat_list = None - break - if feat_list is None: - continue + if not features[feat]: + launcher.skip(t, "no %s feature" % feat) + feat_list = None + break + if feat_list is None: + continue - if self_checkskip(t): - launcher.skip(t, "checkskip failed") - continue + if self_checkskip(t): + launcher.skip(t, "checkskip failed") + continue - if opts['user']: - if test_flag(tdesc, 'suid'): - launcher.skip(t, "suid test in user mode") - continue - if test_flag(tdesc, 'nouser'): - launcher.skip(t, "criu root prio needed") - continue + if opts['user']: + if test_flag(tdesc, 'suid'): + launcher.skip(t, "suid test in user mode") + continue + if test_flag(tdesc, 'nouser'): + launcher.skip(t, "criu root prio needed") + continue - if opts['join_ns']: - if test_flag(tdesc, 'samens'): - launcher.skip(t, "samens test in the same namespace") - continue + if opts['join_ns']: + if test_flag(tdesc, 'samens'): + launcher.skip(t, "samens test in the same namespace") + continue - if opts['lazy_pages'] or opts['remote_lazy_pages'] or opts['lazy_migrate']: - if test_flag(tdesc, 'nolazy'): - launcher.skip(t, "lazy pages are not supported") - continue + if opts['lazy_pages'] or opts['remote_lazy_pages'] or opts[ + 'lazy_migrate']: + if test_flag(tdesc, 'nolazy'): + launcher.skip(t, "lazy pages are not supported") + continue - if opts['remote_lazy_pages']: - if test_flag(tdesc, 'noremotelazy'): - launcher.skip(t, "remote lazy pages are not supported") - continue + if opts['remote_lazy_pages']: + if test_flag(tdesc, 'noremotelazy'): + launcher.skip(t, "remote lazy pages are not supported") + continue - test_flavs = tdesc.get('flavor', 'h ns uns').split() - opts_flavs = (opts['flavor'] or 'h,ns,uns').split(',') - if opts_flavs != ['best']: - run_flavs = set(test_flavs) & set(opts_flavs) - else: - run_flavs = set([test_flavs.pop()]) - if not criu.check("userns"): - run_flavs -= set(['uns']) - if opts['user']: - # FIXME -- probably uns will make sense - run_flavs -= set(['ns', 'uns']) + test_flavs = tdesc.get('flavor', 'h ns uns').split() + opts_flavs = (opts['flavor'] or 'h,ns,uns').split(',') + if opts_flavs != ['best']: + run_flavs = set(test_flavs) & set(opts_flavs) + else: + run_flavs = set([test_flavs.pop()]) + if not criu.check("userns"): + run_flavs -= set(['uns']) + if opts['user']: + # FIXME -- probably uns will make sense + run_flavs -= set(['ns', 'uns']) - # remove ns and uns flavor in join_ns - if opts['join_ns']: - run_flavs -= set(['ns', 'uns']) - if opts['empty_ns']: - run_flavs -= set(['h']) + # remove ns and uns flavor in join_ns + if opts['join_ns']: + run_flavs -= set(['ns', 'uns']) + if opts['empty_ns']: + run_flavs -= set(['h']) - if run_flavs: - launcher.run_test(t, tdesc, run_flavs) - else: - launcher.skip(t, "no flavors") - finally: - launcher.finish() - if opts['join_ns']: - subprocess.Popen(["ip", "netns", "delete", "zdtm_netns"]).wait() + if run_flavs: + launcher.run_test(t, tdesc, run_flavs) + else: + launcher.skip(t, "no flavors") + finally: + launcher.finish() + if opts['join_ns']: + subprocess.Popen(["ip", "netns", "delete", "zdtm_netns"]).wait() sti_fmt = "%-40s%-10s%s" def show_test_info(t): - tdesc = get_test_desc(t) - flavs = tdesc.get('flavor', '') - return sti_fmt % (t, flavs, tdesc.get('flags', '')) + tdesc = get_test_desc(t) + flavs = tdesc.get('flavor', '') + return sti_fmt % (t, flavs, tdesc.get('flags', '')) def list_tests(opts): - tlist = all_tests(opts) - if opts['info']: - print(sti_fmt % ('Name', 'Flavors', 'Flags')) - tlist = map(lambda x: show_test_info(x), tlist) - print('\n'.join(tlist)) + tlist = all_tests(opts) + if opts['info']: + print(sti_fmt % ('Name', 'Flavors', 'Flags')) + tlist = map(lambda x: show_test_info(x), tlist) + print('\n'.join(tlist)) class group: - def __init__(self, tname, tdesc): - self.__tests = [tname] - self.__desc = tdesc - self.__deps = set() + def __init__(self, tname, tdesc): + self.__tests = [tname] + self.__desc = tdesc + self.__deps = set() - def __is_mergeable_desc(self, desc): - # For now make it full match - if self.__desc.get('flags') != desc.get('flags'): - return False - if self.__desc.get('flavor') != desc.get('flavor'): - return False - if self.__desc.get('arch') != desc.get('arch'): - return False - if self.__desc.get('opts') != desc.get('opts'): - return False - if self.__desc.get('feature') != desc.get('feature'): - return False - return True + def __is_mergeable_desc(self, desc): + # For now make it full match + if self.__desc.get('flags') != desc.get('flags'): + return False + if self.__desc.get('flavor') != desc.get('flavor'): + return False + if self.__desc.get('arch') != desc.get('arch'): + return False + if self.__desc.get('opts') != desc.get('opts'): + return False + if self.__desc.get('feature') != desc.get('feature'): + return False + return True - def merge(self, tname, tdesc): - if not self.__is_mergeable_desc(tdesc): - return False + def merge(self, tname, tdesc): + if not self.__is_mergeable_desc(tdesc): + return False - self.__deps |= set(tdesc.get('deps', [])) - self.__tests.append(tname) - return True + self.__deps |= set(tdesc.get('deps', [])) + self.__tests.append(tname) + return True - def size(self): - return len(self.__tests) + def size(self): + return len(self.__tests) - # common method to write a "meta" auxiliary script (hook/checkskip) - # which will call all tests' scripts in turn - def __dump_meta(self, fname, ext): - scripts = filter(lambda names: os.access(names[1], os.X_OK), - map(lambda test: (test, test + ext), - self.__tests)) - if scripts: - f = open(fname + ext, "w") - f.write("#!/bin/sh -e\n") + # common method to write a "meta" auxiliary script (hook/checkskip) + # which will call all tests' scripts in turn + def __dump_meta(self, fname, ext): + scripts = filter(lambda names: os.access(names[1], os.X_OK), + map(lambda test: (test, test + ext), self.__tests)) + if scripts: + f = open(fname + ext, "w") + f.write("#!/bin/sh -e\n") - for test, script in scripts: - f.write("echo 'Running %s for %s'\n" % (ext, test)) - f.write('%s "$@"\n' % script) + for test, script in scripts: + f.write("echo 'Running %s for %s'\n" % (ext, test)) + f.write('%s "$@"\n' % script) - f.write("echo 'All %s scripts OK'\n" % ext) - f.close() - os.chmod(fname + ext, 0o700) + f.write("echo 'All %s scripts OK'\n" % ext) + f.close() + os.chmod(fname + ext, 0o700) - def dump(self, fname): - f = open(fname, "w") - for t in self.__tests: - f.write(t + '\n') - f.close() - os.chmod(fname, 0o700) + def dump(self, fname): + f = open(fname, "w") + for t in self.__tests: + f.write(t + '\n') + f.close() + os.chmod(fname, 0o700) - if len(self.__desc) or len(self.__deps): - f = open(fname + '.desc', "w") - if len(self.__deps): - self.__desc['deps'] = list(self.__deps) - f.write(repr(self.__desc)) - f.close() + if len(self.__desc) or len(self.__deps): + f = open(fname + '.desc', "w") + if len(self.__deps): + self.__desc['deps'] = list(self.__deps) + f.write(repr(self.__desc)) + f.close() - # write "meta" .checkskip and .hook scripts - self.__dump_meta(fname, '.checkskip') - self.__dump_meta(fname, '.hook') + # write "meta" .checkskip and .hook scripts + self.__dump_meta(fname, '.checkskip') + self.__dump_meta(fname, '.hook') def group_tests(opts): - excl = None - groups = [] - pend_groups = [] - maxs = int(opts['max_size']) + excl = None + groups = [] + pend_groups = [] + maxs = int(opts['max_size']) - if not os.access("groups", os.F_OK): - os.mkdir("groups") + if not os.access("groups", os.F_OK): + os.mkdir("groups") - tlist = all_tests(opts) - random.shuffle(tlist) - if opts['exclude']: - excl = re.compile(".*(" + "|".join(opts['exclude']) + ")") - print("Compiled exclusion list") + tlist = all_tests(opts) + random.shuffle(tlist) + if opts['exclude']: + excl = re.compile(".*(" + "|".join(opts['exclude']) + ")") + print("Compiled exclusion list") - for t in tlist: - if excl and excl.match(t): - continue + for t in tlist: + if excl and excl.match(t): + continue - td = get_test_desc(t) + td = get_test_desc(t) - for g in pend_groups: - if g.merge(t, td): - if g.size() == maxs: - pend_groups.remove(g) - groups.append(g) - break - else: - g = group(t, td) - pend_groups.append(g) + for g in pend_groups: + if g.merge(t, td): + if g.size() == maxs: + pend_groups.remove(g) + groups.append(g) + break + else: + g = group(t, td) + pend_groups.append(g) - groups += pend_groups + groups += pend_groups - nr = 0 - suf = opts['name'] or 'group' + nr = 0 + suf = opts['name'] or 'group' - for g in groups: - if maxs > 1 and g.size() == 1: # Not much point in group test for this - continue + for g in groups: + if maxs > 1 and g.size() == 1: # Not much point in group test for this + continue - fn = os.path.join("groups", "%s.%d" % (suf, nr)) - g.dump(fn) - nr += 1 + fn = os.path.join("groups", "%s.%d" % (suf, nr)) + g.dump(fn) + nr += 1 - print("Generated %d group(s)" % nr) + print("Generated %d group(s)" % nr) def clean_stuff(opts): - print("Cleaning %s" % opts['what']) - if opts['what'] == 'nsroot': - for f in flavors: - f = flavors[f] - f.clean() + print("Cleaning %s" % opts['what']) + if opts['what'] == 'nsroot': + for f in flavors: + f = flavors[f] + f.clean() # @@ -2230,103 +2355,167 @@ def clean_stuff(opts): # if 'CR_CT_TEST_INFO' in os.environ: - # Fork here, since we're new pidns init and are supposed to - # collect this namespace's zombies - status = 0 - pid = os.fork() - if pid == 0: - tinfo = eval(os.environ['CR_CT_TEST_INFO']) - do_run_test(tinfo[0], tinfo[1], tinfo[2], tinfo[3]) - else: - while True: - wpid, status = os.wait() - if wpid == pid: - if os.WIFEXITED(status): - status = os.WEXITSTATUS(status) - else: - status = 1 - break + # Fork here, since we're new pidns init and are supposed to + # collect this namespace's zombies + status = 0 + pid = os.fork() + if pid == 0: + tinfo = eval(os.environ['CR_CT_TEST_INFO']) + do_run_test(tinfo[0], tinfo[1], tinfo[2], tinfo[3]) + else: + while True: + wpid, status = os.wait() + if wpid == pid: + if os.WIFEXITED(status): + status = os.WEXITSTATUS(status) + else: + status = 1 + break - sys.exit(status) + sys.exit(status) p = argparse.ArgumentParser("CRIU test suite") -p.add_argument("--debug", help = "Print what's being executed", action = 'store_true') -p.add_argument("--set", help = "Which set of tests to use", default = 'zdtm') +p.add_argument("--debug", + help="Print what's being executed", + action='store_true') +p.add_argument("--set", help="Which set of tests to use", default='zdtm') -sp = p.add_subparsers(help = "Use --help for list of actions") +sp = p.add_subparsers(help="Use --help for list of actions") -rp = sp.add_parser("run", help = "Run test(s)") -rp.set_defaults(action = run_tests) -rp.add_argument("-a", "--all", action = 'store_true') -rp.add_argument("-t", "--test", help = "Test name", action = 'append') -rp.add_argument("-T", "--tests", help = "Regexp") -rp.add_argument("-F", "--from", help = "From file") -rp.add_argument("-f", "--flavor", help = "Flavor to run") -rp.add_argument("-x", "--exclude", help = "Exclude tests from --all run", action = 'append') +rp = sp.add_parser("run", help="Run test(s)") +rp.set_defaults(action=run_tests) +rp.add_argument("-a", "--all", action='store_true') +rp.add_argument("-t", "--test", help="Test name", action='append') +rp.add_argument("-T", "--tests", help="Regexp") +rp.add_argument("-F", "--from", help="From file") +rp.add_argument("-f", "--flavor", help="Flavor to run") +rp.add_argument("-x", + "--exclude", + help="Exclude tests from --all run", + action='append') -rp.add_argument("--sibling", help = "Restore tests as siblings", action = 'store_true') -rp.add_argument("--join-ns", help = "Restore tests and join existing namespace", action = 'store_true') -rp.add_argument("--empty-ns", help = "Restore tests in empty net namespace", action = 'store_true') -rp.add_argument("--pre", help = "Do some pre-dumps before dump (n[:pause])") -rp.add_argument("--snaps", help = "Instead of pre-dumps do full dumps", action = 'store_true') -rp.add_argument("--dedup", help = "Auto-deduplicate images on iterations", action = 'store_true') -rp.add_argument("--noauto-dedup", help = "Manual deduplicate images on iterations", action = 'store_true') -rp.add_argument("--nocr", help = "Do not CR anything, just check test works", action = 'store_true') -rp.add_argument("--norst", help = "Don't restore tasks, leave them running after dump", action = 'store_true') -rp.add_argument("--stop", help = "Check that --leave-stopped option stops ps tree.", action = 'store_true') -rp.add_argument("--iters", help = "Do CR cycle several times before check (n[:pause])") -rp.add_argument("--fault", help = "Test fault injection") -rp.add_argument("--sat", help = "Generate criu strace-s for sat tool (restore is fake, images are kept)", action = 'store_true') -rp.add_argument("--sbs", help = "Do step-by-step execution, asking user for keypress to continue", action = 'store_true') -rp.add_argument("--freezecg", help = "Use freeze cgroup (path:state)") -rp.add_argument("--user", help = "Run CRIU as regular user", action = 'store_true') -rp.add_argument("--rpc", help = "Run CRIU via RPC rather than CLI", action = 'store_true') +rp.add_argument("--sibling", + help="Restore tests as siblings", + action='store_true') +rp.add_argument("--join-ns", + help="Restore tests and join existing namespace", + action='store_true') +rp.add_argument("--empty-ns", + help="Restore tests in empty net namespace", + action='store_true') +rp.add_argument("--pre", help="Do some pre-dumps before dump (n[:pause])") +rp.add_argument("--snaps", + help="Instead of pre-dumps do full dumps", + action='store_true') +rp.add_argument("--dedup", + help="Auto-deduplicate images on iterations", + action='store_true') +rp.add_argument("--noauto-dedup", + help="Manual deduplicate images on iterations", + action='store_true') +rp.add_argument("--nocr", + help="Do not CR anything, just check test works", + action='store_true') +rp.add_argument("--norst", + help="Don't restore tasks, leave them running after dump", + action='store_true') +rp.add_argument("--stop", + help="Check that --leave-stopped option stops ps tree.", + action='store_true') +rp.add_argument("--iters", + help="Do CR cycle several times before check (n[:pause])") +rp.add_argument("--fault", help="Test fault injection") +rp.add_argument( + "--sat", + help="Generate criu strace-s for sat tool (restore is fake, images are kept)", + action='store_true') +rp.add_argument( + "--sbs", + help="Do step-by-step execution, asking user for keypress to continue", + action='store_true') +rp.add_argument("--freezecg", help="Use freeze cgroup (path:state)") +rp.add_argument("--user", help="Run CRIU as regular user", action='store_true') +rp.add_argument("--rpc", + help="Run CRIU via RPC rather than CLI", + action='store_true') -rp.add_argument("--page-server", help = "Use page server dump", action = 'store_true') -rp.add_argument("-p", "--parallel", help = "Run test in parallel") -rp.add_argument("--dry-run", help="Don't run tests, just pretend to", action='store_true') +rp.add_argument("--page-server", + help="Use page server dump", + action='store_true') +rp.add_argument("--remote", + help="Use remote option for diskless C/R", + action='store_true') +rp.add_argument("-p", "--parallel", help="Run test in parallel") +rp.add_argument("--dry-run", + help="Don't run tests, just pretend to", + action='store_true') rp.add_argument("--script", help="Add script to get notified by criu") -rp.add_argument("-k", "--keep-img", help = "Whether or not to keep images after test", - choices = ['always', 'never', 'failed'], default = 'failed') -rp.add_argument("--report", help = "Generate summary report in directory") -rp.add_argument("--keep-going", help = "Keep running tests in spite of failures", action = 'store_true') -rp.add_argument("--ignore-taint", help = "Don't care about a non-zero kernel taint flag", action = 'store_true') -rp.add_argument("--lazy-pages", help = "restore pages on demand", action = 'store_true') -rp.add_argument("--lazy-migrate", help = "restore pages on demand", action = 'store_true') -rp.add_argument("--remote-lazy-pages", help = "simulate lazy migration", action = 'store_true') -rp.add_argument("--tls", help = "use TLS for migration", action = 'store_true') -rp.add_argument("--title", help = "A test suite title", default = "criu") -rp.add_argument("--show-stats", help = "Show criu statistics", action = 'store_true') -rp.add_argument("--criu-bin", help = "Path to criu binary", default = '../criu/criu') -rp.add_argument("--crit-bin", help = "Path to crit binary", default = '../crit/crit') +rp.add_argument("-k", + "--keep-img", + help="Whether or not to keep images after test", + choices=['always', 'never', 'failed'], + default='failed') +rp.add_argument("--report", help="Generate summary report in directory") +rp.add_argument("--keep-going", + help="Keep running tests in spite of failures", + action='store_true') +rp.add_argument("--ignore-taint", + help="Don't care about a non-zero kernel taint flag", + action='store_true') +rp.add_argument("--lazy-pages", + help="restore pages on demand", + action='store_true') +rp.add_argument("--lazy-migrate", + help="restore pages on demand", + action='store_true') +rp.add_argument("--remote-lazy-pages", + help="simulate lazy migration", + action='store_true') +rp.add_argument("--tls", help="use TLS for migration", action='store_true') +rp.add_argument("--title", help="A test suite title", default="criu") +rp.add_argument("--show-stats", + help="Show criu statistics", + action='store_true') +rp.add_argument("--criu-bin", + help="Path to criu binary", + default='../criu/criu') +rp.add_argument("--crit-bin", + help="Path to crit binary", + default='../crit/crit') -lp = sp.add_parser("list", help = "List tests") -lp.set_defaults(action = list_tests) -lp.add_argument('-i', '--info', help = "Show more info about tests", action = 'store_true') +lp = sp.add_parser("list", help="List tests") +lp.set_defaults(action=list_tests) +lp.add_argument('-i', + '--info', + help="Show more info about tests", + action='store_true') -gp = sp.add_parser("group", help = "Generate groups") -gp.set_defaults(action = group_tests) -gp.add_argument("-m", "--max-size", help = "Maximum number of tests in group") -gp.add_argument("-n", "--name", help = "Common name for group tests") -gp.add_argument("-x", "--exclude", help = "Exclude tests from --all run", action = 'append') +gp = sp.add_parser("group", help="Generate groups") +gp.set_defaults(action=group_tests) +gp.add_argument("-m", "--max-size", help="Maximum number of tests in group") +gp.add_argument("-n", "--name", help="Common name for group tests") +gp.add_argument("-x", + "--exclude", + help="Exclude tests from --all run", + action='append') -cp = sp.add_parser("clean", help = "Clean something") -cp.set_defaults(action = clean_stuff) -cp.add_argument("what", choices = ['nsroot']) +cp = sp.add_parser("clean", help="Clean something") +cp.set_defaults(action=clean_stuff) +cp.add_argument("what", choices=['nsroot']) opts = vars(p.parse_args()) if opts.get('sat', False): - opts['keep_img'] = 'always' + opts['keep_img'] = 'always' if opts['debug']: - sys.settrace(traceit) + sys.settrace(traceit) if opts['action'] == 'run': - criu.available() + criu.available() for tst in test_classes.values(): - tst.available() + tst.available() opts['action'](opts) for tst in test_classes.values(): - tst.cleanup() + tst.cleanup() From 4c1ee3e227045fc1dc07b10ac7a538a68299693b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 29 Jul 2019 22:07:10 +0100 Subject: [PATCH 0172/2030] test/other: Resolve Py3 compatibility issues When Python 2 is not installed we assume that /usr/bin/python refers to version 3 of Python and the executable /usr/bin/python2 does not exist. This commit also resolves a compatibility issue with Popen where in Py2 file descriptors will be inherited by the child process and in Py3 they will be closed by default. Signed-off-by: Radostin Stoyanov --- soccr/test/run.py | 2 +- test/check_actions.py | 2 +- test/others/ext-tty/run.py | 2 +- test/others/mounts/mounts.sh | 2 +- test/others/rpc/config_file.py | 15 ++++++++++----- test/others/rpc/errno.py | 2 +- test/others/rpc/ps_test.py | 2 +- test/others/rpc/restore-loop.py | 2 +- test/others/rpc/test.py | 2 +- test/others/rpc/version.py | 17 ++++++++++------- test/others/shell-job/run.py | 6 +++--- 11 files changed, 31 insertions(+), 23 deletions(-) diff --git a/soccr/test/run.py b/soccr/test/run.py index 446584a71..1ffe58a58 100644 --- a/soccr/test/run.py +++ b/soccr/test/run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import sys, os import hashlib diff --git a/test/check_actions.py b/test/check_actions.py index ae909e668..4973e3938 100755 --- a/test/check_actions.py +++ b/test/check_actions.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import sys import os diff --git a/test/others/ext-tty/run.py b/test/others/ext-tty/run.py index b1dcb4a5a..2c0bacc84 100755 --- a/test/others/ext-tty/run.py +++ b/test/others/ext-tty/run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import subprocess import os, sys, time, signal, pty diff --git a/test/others/mounts/mounts.sh b/test/others/mounts/mounts.sh index a9a1cc80c..19116d0cf 100755 --- a/test/others/mounts/mounts.sh +++ b/test/others/mounts/mounts.sh @@ -20,7 +20,7 @@ for i in `cat /proc/self/mounts | awk '{ print $2 }'`; do umount -l $i done -python2 mounts.py +python mounts.py kill $INMNTNS_PID while :; do sleep 10 diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py index 3579ac76f..e4b395e31 100755 --- a/test/others/rpc/config_file.py +++ b/test/others/rpc/config_file.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python import os import socket @@ -15,10 +15,15 @@ does_not_exist = 'does-not.exist' def setup_swrk(): print('Connecting to CRIU in swrk mode.') - css = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) - swrk = subprocess.Popen(['./criu', "swrk", "%d" % css[0].fileno()]) - css[0].close() - return swrk, css[1] + s1, s2 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + + kwargs = {} + if sys.version_info.major == 3: + kwargs["pass_fds"] = [s1.fileno()] + + swrk = subprocess.Popen(['./criu', "swrk", "%d" % s1.fileno()], **kwargs) + s1.close() + return swrk, s2 def setup_config_file(content): diff --git a/test/others/rpc/errno.py b/test/others/rpc/errno.py index 49cb622de..01a6eee7b 100755 --- a/test/others/rpc/errno.py +++ b/test/others/rpc/errno.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python # Test criu errno import socket, os, errno diff --git a/test/others/rpc/ps_test.py b/test/others/rpc/ps_test.py index d16efd3f6..b51357d42 100755 --- a/test/others/rpc/ps_test.py +++ b/test/others/rpc/ps_test.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python import socket, os, sys, errno import rpc_pb2 as rpc diff --git a/test/others/rpc/restore-loop.py b/test/others/rpc/restore-loop.py index c81567426..84a2ce56d 100755 --- a/test/others/rpc/restore-loop.py +++ b/test/others/rpc/restore-loop.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python import socket, os, sys import rpc_pb2 as rpc diff --git a/test/others/rpc/test.py b/test/others/rpc/test.py index 9a35e0e97..80f6338f4 100755 --- a/test/others/rpc/test.py +++ b/test/others/rpc/test.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python import socket, os, sys import rpc_pb2 as rpc diff --git a/test/others/rpc/version.py b/test/others/rpc/version.py index f978c6c37..3b8f1b961 100755 --- a/test/others/rpc/version.py +++ b/test/others/rpc/version.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2 +#!/usr/bin/python import socket import sys @@ -7,11 +7,14 @@ import subprocess print('Connecting to CRIU in swrk mode to check the version:') -css = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) -swrk = subprocess.Popen(['./criu', "swrk", "%d" % css[0].fileno()]) -css[0].close() +s1, s2 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) -s = css[1] +kwargs = {} +if sys.version_info.major == 3: + kwargs["pass_fds"] = [s2.fileno()] + +swrk = subprocess.Popen(['./criu', "swrk", "%d" % s2.fileno()], **kwargs) +s2.close() # Create criu msg, set it's type to dump request # and set dump options. Checkout more options in protobuf/rpc.proto @@ -19,12 +22,12 @@ req = rpc.criu_req() req.type = rpc.VERSION # Send request -s.send(req.SerializeToString()) +s1.send(req.SerializeToString()) # Recv response resp = rpc.criu_resp() MAX_MSG_SIZE = 1024 -resp.ParseFromString(s.recv(MAX_MSG_SIZE)) +resp.ParseFromString(s1.recv(MAX_MSG_SIZE)) if resp.type != rpc.VERSION: print('RPC: Unexpected msg type') diff --git a/test/others/shell-job/run.py b/test/others/shell-job/run.py index bd5c42509..a59945d6a 100755 --- a/test/others/shell-job/run.py +++ b/test/others/shell-job/run.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import os, pty, sys, subprocess import termios, fcntl, time @@ -9,11 +9,11 @@ os.chdir(os.getcwd()) def create_pty(): (fd1, fd2) = pty.openpty() - return (os.fdopen(fd1, "w+"), os.fdopen(fd2, "w+")) + return (os.fdopen(fd1, "wb"), os.fdopen(fd2, "wb")) if not os.access("work", os.X_OK): - os.mkdir("work", 0755) + os.mkdir("work", 0o755) open("running", "w").close() m, s = create_pty() From 6b615ca15277fc14b52a09b4eb18314b7c6cbe75 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Wed, 31 Jul 2019 09:46:18 +0100 Subject: [PATCH 0173/2030] test/others: Reuse setup_swrk() Reduce code duplication by taking setup_swrk() function into a separate module that can be reused in multiple places. Signed-off-by: Radostin Stoyanov --- test/others/rpc/config_file.py | 17 ++--------------- test/others/rpc/setup_swrk.py | 16 ++++++++++++++++ test/others/rpc/version.py | 13 +++---------- 3 files changed, 21 insertions(+), 25 deletions(-) create mode 100644 test/others/rpc/setup_swrk.py diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py index e4b395e31..7b07bc145 100755 --- a/test/others/rpc/config_file.py +++ b/test/others/rpc/config_file.py @@ -1,31 +1,18 @@ #!/usr/bin/python import os -import socket import sys import rpc_pb2 as rpc import argparse -import subprocess from tempfile import mkstemp import time +from setup_swrk import setup_swrk + log_file = 'config_file_test.log' does_not_exist = 'does-not.exist' -def setup_swrk(): - print('Connecting to CRIU in swrk mode.') - s1, s2 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) - - kwargs = {} - if sys.version_info.major == 3: - kwargs["pass_fds"] = [s1.fileno()] - - swrk = subprocess.Popen(['./criu', "swrk", "%d" % s1.fileno()], **kwargs) - s1.close() - return swrk, s2 - - def setup_config_file(content): # Creating a temporary file which will be used as configuration file. fd, path = mkstemp() diff --git a/test/others/rpc/setup_swrk.py b/test/others/rpc/setup_swrk.py new file mode 100644 index 000000000..c7f84f952 --- /dev/null +++ b/test/others/rpc/setup_swrk.py @@ -0,0 +1,16 @@ +import sys +import socket +import subprocess + +def setup_swrk(): + print('Connecting to CRIU in swrk mode.') + s1, s2 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + + kwargs = {} + if sys.version_info.major == 3: + kwargs["pass_fds"] = [s1.fileno()] + + swrk = subprocess.Popen(['./criu', "swrk", "%d" % s1.fileno()], **kwargs) + s1.close() + return swrk, s2 + diff --git a/test/others/rpc/version.py b/test/others/rpc/version.py index 3b8f1b961..9d7fa745b 100755 --- a/test/others/rpc/version.py +++ b/test/others/rpc/version.py @@ -1,20 +1,13 @@ #!/usr/bin/python -import socket import sys import rpc_pb2 as rpc -import subprocess + +from setup_swrk import setup_swrk print('Connecting to CRIU in swrk mode to check the version:') -s1, s2 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) - -kwargs = {} -if sys.version_info.major == 3: - kwargs["pass_fds"] = [s2.fileno()] - -swrk = subprocess.Popen(['./criu', "swrk", "%d" % s2.fileno()], **kwargs) -s2.close() +swrk, s1 = setup_swrk() # Create criu msg, set it's type to dump request # and set dump options. Checkout more options in protobuf/rpc.proto From 34dbf67b240f6fb97f576a81c96ea6ce46a8e288 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 1 Jul 2019 17:43:56 +0300 Subject: [PATCH 0174/2030] pyimages: Add pb2dict.py to checked and fix warnings/errors Signed-off-by: Pavel Emelyanov --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 0b49364fb..0140330e1 100644 --- a/Makefile +++ b/Makefile @@ -384,6 +384,7 @@ lint: flake8 --config=scripts/flake8.cfg test/zdtm.py flake8 --config=scripts/flake8.cfg test/inhfd/*.py flake8 --config=scripts/flake8.cfg test/others/rpc/config_file.py + flake8 --config=scripts/flake8.cfg lib/py/images/pb2dict.py include Makefile.install From 72402c6e7a4a9a70eefd534f91bcbe068736276d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 28 Jun 2019 20:17:35 +0300 Subject: [PATCH 0175/2030] py: Fix tabs in code comments These were left by yapf formatter Signed-off-by: Pavel Emelyanov --- coredump/criu_coredump/coredump.py | 130 ++++++++++++++--------------- coredump/criu_coredump/elf.py | 12 +-- lib/py/criu.py | 74 ++++++++-------- lib/py/images/images.py | 92 ++++++++++---------- scripts/magic-gen.py | 4 +- 5 files changed, 156 insertions(+), 156 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 9b2c6c60c..bc53a7705 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -9,24 +9,24 @@ # # On my x86_64 systems with fresh kernel ~3.17 core dump looks like: # -# 1) Elf file header; -# 2) PT_NOTE program header describing notes section; -# 3) PT_LOAD program headers for (almost?) each vma; -# 4) NT_PRPSINFO note with elf_prpsinfo inside; -# 5) An array of notes for each thread of the process: -# NT_PRSTATUS note with elf_prstatus inside; -# NT_FPREGSET note with elf_fpregset inside; -# NT_X86_XSTATE note with x86 extended state using xsave; -# NT_SIGINFO note with siginfo_t inside; -# 6) NT_AUXV note with auxv; -# 7) NT_FILE note with mapped files; -# 8) VMAs themselves; +# 1) Elf file header; +# 2) PT_NOTE program header describing notes section; +# 3) PT_LOAD program headers for (almost?) each vma; +# 4) NT_PRPSINFO note with elf_prpsinfo inside; +# 5) An array of notes for each thread of the process: +# NT_PRSTATUS note with elf_prstatus inside; +# NT_FPREGSET note with elf_fpregset inside; +# NT_X86_XSTATE note with x86 extended state using xsave; +# NT_SIGINFO note with siginfo_t inside; +# 6) NT_AUXV note with auxv; +# 7) NT_FILE note with mapped files; +# 8) VMAs themselves; # # Or, you can represent it in less details as: -# 1) Elf file header; -# 2) Program table; -# 3) Notes; -# 4) VMAs contents; +# 1) Elf file header; +# 2) Program table; +# 3) Notes; +# 4) VMAs contents; # import io import elf @@ -65,9 +65,9 @@ class elf_note: class coredump: """ - A class to keep elf core dump components inside and - functions to properly write them to file. - """ + A class to keep elf core dump components inside and + functions to properly write them to file. + """ ehdr = None # Elf ehdr; phdrs = [] # Array of Phdrs; notes = [] # Array of elf_notes; @@ -77,8 +77,8 @@ class coredump: def write(self, f): """ - Write core dump to file f. - """ + Write core dump to file f. + """ buf = io.BytesIO() buf.write(self.ehdr) @@ -117,8 +117,8 @@ class coredump: class coredump_generator: """ - Generate core dump from criu images. - """ + Generate core dump from criu images. + """ coredumps = {} # coredumps by pid; pstree = {} # process info by pid; @@ -129,8 +129,8 @@ class coredump_generator: def _img_open_and_strip(self, name, single=False, pid=None): """ - Load criu image and strip it from magic and redundant list. - """ + Load criu image and strip it from magic and redundant list. + """ path = self._imgs_dir + "/" + name if pid: path += "-" + str(pid) @@ -146,8 +146,8 @@ class coredump_generator: def __call__(self, imgs_dir): """ - Parse criu images stored in directory imgs_dir to fill core dumps. - """ + Parse criu images stored in directory imgs_dir to fill core dumps. + """ self._imgs_dir = imgs_dir pstree = self._img_open_and_strip("pstree") @@ -171,9 +171,9 @@ class coredump_generator: def write(self, coredumps_dir, pid=None): """ - Write core dumpt to cores_dir directory. Specify pid to choose - core dump of only one process. - """ + Write core dumpt to cores_dir directory. Specify pid to choose + core dump of only one process. + """ for p in self.coredumps: if pid and p != pid: continue @@ -182,8 +182,8 @@ class coredump_generator: def _gen_coredump(self, pid): """ - Generate core dump for pid. - """ + Generate core dump for pid. + """ cd = coredump() # Generate everything backwards so it is easier to calculate offset. @@ -196,8 +196,8 @@ class coredump_generator: def _gen_ehdr(self, pid, phdrs): """ - Generate elf header for process pid with program headers phdrs. - """ + Generate elf header for process pid with program headers phdrs. + """ ehdr = elf.Elf64_Ehdr() ctypes.memset(ctypes.addressof(ehdr), 0, ctypes.sizeof(ehdr)) @@ -223,8 +223,8 @@ class coredump_generator: def _gen_phdrs(self, pid, notes, vmas): """ - Generate program headers for process pid. - """ + Generate program headers for process pid. + """ phdrs = [] offset = ctypes.sizeof(elf.Elf64_Ehdr()) @@ -272,8 +272,8 @@ class coredump_generator: def _gen_prpsinfo(self, pid): """ - Generate NT_PRPSINFO note for process pid. - """ + Generate NT_PRPSINFO note for process pid. + """ pstree = self.pstree[pid] core = self.cores[pid] @@ -324,8 +324,8 @@ class coredump_generator: def _gen_prstatus(self, pid, tid): """ - Generate NT_PRSTATUS note for thread tid of process pid. - """ + Generate NT_PRSTATUS note for thread tid of process pid. + """ core = self.cores[tid] regs = core["thread_info"]["gpregs"] pstree = self.pstree[pid] @@ -382,8 +382,8 @@ class coredump_generator: def _gen_fpregset(self, pid, tid): """ - Generate NT_FPREGSET note for thread tid of process pid. - """ + Generate NT_FPREGSET note for thread tid of process pid. + """ core = self.cores[tid] regs = core["thread_info"]["fpregs"] @@ -402,7 +402,7 @@ class coredump_generator: *regs["st_space"]) fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))( *regs["xmm_space"]) - #fpregset.padding = regs["padding"] unused + #fpregset.padding = regs["padding"] unused nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 @@ -418,8 +418,8 @@ class coredump_generator: def _gen_x86_xstate(self, pid, tid): """ - Generate NT_X86_XSTATE note for thread tid of process pid. - """ + Generate NT_X86_XSTATE note for thread tid of process pid. + """ core = self.cores[tid] fpregs = core["thread_info"]["fpregs"] @@ -459,8 +459,8 @@ class coredump_generator: def _gen_siginfo(self, pid, tid): """ - Generate NT_SIGINFO note for thread tid of process pid. - """ + Generate NT_SIGINFO note for thread tid of process pid. + """ siginfo = elf.siginfo_t() # FIXME zeroify everything for now ctypes.memset(ctypes.addressof(siginfo), 0, ctypes.sizeof(siginfo)) @@ -479,8 +479,8 @@ class coredump_generator: def _gen_auxv(self, pid): """ - Generate NT_AUXV note for thread tid of process pid. - """ + Generate NT_AUXV note for thread tid of process pid. + """ mm = self.mms[pid] num_auxv = len(mm["mm_saved_auxv"]) / 2 @@ -506,8 +506,8 @@ class coredump_generator: def _gen_files(self, pid): """ - Generate NT_FILE note for process pid. - """ + Generate NT_FILE note for process pid. + """ mm = self.mms[pid] class mmaped_file_info: @@ -597,8 +597,8 @@ class coredump_generator: def _gen_notes(self, pid): """ - Generate notes for core dump of process pid. - """ + Generate notes for core dump of process pid. + """ notes = [] notes.append(self._gen_prpsinfo(pid)) @@ -622,8 +622,8 @@ class coredump_generator: def _get_page(self, pid, page_no): """ - Try to find memory page page_no in pages.img image for process pid. - """ + Try to find memory page page_no in pages.img image for process pid. + """ pagemap = self.pagemaps[pid] # First entry is pagemap_head, we will need it later to open @@ -654,8 +654,8 @@ class coredump_generator: def _gen_mem_chunk(self, pid, vma, size): """ - Obtain vma contents for process pid. - """ + Obtain vma contents for process pid. + """ f = None if size == 0: @@ -749,8 +749,8 @@ class coredump_generator: def _gen_cmdline(self, pid): """ - Generate full command with arguments. - """ + Generate full command with arguments. + """ mm = self.mms[pid] vma = {} @@ -768,8 +768,8 @@ class coredump_generator: def _get_vma_dump_size(self, vma): """ - Calculate amount of vma to put into core dump. - """ + Calculate amount of vma to put into core dump. + """ if vma["status"] & status["VMA_AREA_VVAR"] or \ vma["status"] & status["VMA_AREA_VSYSCALL"] or \ vma["status"] & status["VMA_AREA_VDSO"]: @@ -791,8 +791,8 @@ class coredump_generator: def _get_vma_flags(self, vma): """ - Convert vma flags int elf flags. - """ + Convert vma flags int elf flags. + """ flags = 0 if vma['prot'] & prot["PROT_READ"]: @@ -808,8 +808,8 @@ class coredump_generator: def _gen_vmas(self, pid): """ - Generate vma contents for core dump for process pid. - """ + Generate vma contents for core dump for process pid. + """ mm = self.mms[pid] class vma_class: diff --git a/coredump/criu_coredump/elf.py b/coredump/criu_coredump/elf.py index 65da583c3..e65919e6b 100644 --- a/coredump/criu_coredump/elf.py +++ b/coredump/criu_coredump/elf.py @@ -120,9 +120,9 @@ NT_FPREGSET = 2 # #define NT_FPREGSET 2 /* Contains copy of f NT_PRPSINFO = 3 # #define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */ NT_AUXV = 6 # #define NT_AUXV 6 /* Contains copy of auxv array */ NT_SIGINFO = 0x53494749 # #define NT_SIGINFO 0x53494749 /* Contains copy of siginfo_t, -# size might increase */ +# size might increase */ NT_FILE = 0x46494c45 # #define NT_FILE 0x46494c45 /* Contains information about mapped -# files */ +# files */ NT_X86_XSTATE = 0x202 # #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ @@ -259,7 +259,7 @@ class user_regs_struct(ctypes.Structure): # struct user_regs_struct ] # }; -#elf_greg_t = ctypes.c_ulonglong +#elf_greg_t = ctypes.c_ulonglong #ELF_NGREG = ctypes.sizeof(user_regs_struct)/ctypes.sizeof(elf_greg_t) #elf_gregset_t = elf_greg_t*ELF_NGREG elf_gregset_t = user_regs_struct @@ -450,7 +450,7 @@ class _siginfo_t_U_sigpoll(ctypes.Structure): # struct ] # } _sigpoll; - # /* SIGSYS. */ + # /* SIGSYS. */ class _siginfo_t_U_sigsys(ctypes.Structure): # struct _fields_ = [ # { ("_call_addr", ctypes.c_void_p @@ -515,7 +515,7 @@ class _siginfo_t_U(ctypes.Union): # union # int si_fd; # } _sigpoll; # - # /* SIGSYS. */ + # /* SIGSYS. */ ("_sigsys", _siginfo_t_U_sigpoll) # struct # { # void *_call_addr; /* Calling user insn. */ @@ -587,7 +587,7 @@ class siginfo_t(ctypes.Structure): # typedef struct # int si_fd; # } _sigpoll; # - # /* SIGSYS. */ + # /* SIGSYS. */ # struct # { # void *_call_addr; /* Calling user insn. */ diff --git a/lib/py/criu.py b/lib/py/criu.py index d94fea9e1..f3e018095 100644 --- a/lib/py/criu.py +++ b/lib/py/criu.py @@ -11,8 +11,8 @@ import pycriu.rpc_pb2 as rpc class _criu_comm: """ - Base class for communication classes. - """ + Base class for communication classes. + """ COMM_SK = 0 COMM_FD = 1 COMM_BIN = 2 @@ -22,22 +22,22 @@ class _criu_comm: def connect(self, daemon): """ - Connect to criu and return socket object. - daemon -- is for whether or not criu should daemonize if executing criu from binary(comm_bin). - """ + Connect to criu and return socket object. + daemon -- is for whether or not criu should daemonize if executing criu from binary(comm_bin). + """ pass def disconnect(self): """ - Disconnect from criu. - """ + Disconnect from criu. + """ pass class _criu_comm_sk(_criu_comm): """ - Communication class for unix socket. - """ + Communication class for unix socket. + """ def __init__(self, sk_path): self.comm_type = self.COMM_SK @@ -55,8 +55,8 @@ class _criu_comm_sk(_criu_comm): class _criu_comm_fd(_criu_comm): """ - Communication class for file descriptor. - """ + Communication class for file descriptor. + """ def __init__(self, fd): self.comm_type = self.COMM_FD @@ -74,8 +74,8 @@ class _criu_comm_fd(_criu_comm): class _criu_comm_bin(_criu_comm): """ - Communication class for binary. - """ + Communication class for binary. + """ def __init__(self, bin_path): self.comm_type = self.COMM_BIN @@ -139,8 +139,8 @@ class _criu_comm_bin(_criu_comm): class CRIUException(Exception): """ - Exception class for handling and storing criu errors. - """ + Exception class for handling and storing criu errors. + """ typ = None _str = None @@ -150,8 +150,8 @@ class CRIUException(Exception): class CRIUExceptionInternal(CRIUException): """ - Exception class for handling and storing internal errors. - """ + Exception class for handling and storing internal errors. + """ def __init__(self, typ, s): self.typ = typ @@ -161,8 +161,8 @@ class CRIUExceptionInternal(CRIUException): class CRIUExceptionExternal(CRIUException): """ - Exception class for handling and storing criu RPC errors. - """ + Exception class for handling and storing criu RPC errors. + """ def __init__(self, req_typ, resp_typ, errno): self.typ = req_typ @@ -196,8 +196,8 @@ class CRIUExceptionExternal(CRIUException): class criu: """ - Call criu through RPC. - """ + Call criu through RPC. + """ opts = None #CRIU options in pb format _comm = None #Communication method @@ -209,26 +209,26 @@ class criu: def use_sk(self, sk_name): """ - Access criu using unix socket which that belongs to criu service daemon. - """ + Access criu using unix socket which that belongs to criu service daemon. + """ self._comm = _criu_comm_sk(sk_name) def use_fd(self, fd): """ - Access criu using provided fd. - """ + Access criu using provided fd. + """ self._comm = _criu_comm_fd(fd) def use_binary(self, bin_name): """ - Access criu by execing it using provided path to criu binary. - """ + Access criu by execing it using provided path to criu binary. + """ self._comm = _criu_comm_bin(bin_name) def _send_req_and_recv_resp(self, req): """ - As simple as send request and receive response. - """ + As simple as send request and receive response. + """ # In case of self-dump we need to spawn criu swrk detached # from our current process, as criu has a hard time separating # process resources from its own if criu is located in a same @@ -262,8 +262,8 @@ class criu: def check(self): """ - Checks whether the kernel support is up-to-date. - """ + Checks whether the kernel support is up-to-date. + """ req = rpc.criu_req() req.type = rpc.CHECK @@ -274,8 +274,8 @@ class criu: def dump(self): """ - Checkpoint a process/tree identified by opts.pid. - """ + Checkpoint a process/tree identified by opts.pid. + """ req = rpc.criu_req() req.type = rpc.DUMP req.opts.MergeFrom(self.opts) @@ -289,8 +289,8 @@ class criu: def pre_dump(self): """ - Checkpoint a process/tree identified by opts.pid. - """ + Checkpoint a process/tree identified by opts.pid. + """ req = rpc.criu_req() req.type = rpc.PRE_DUMP req.opts.MergeFrom(self.opts) @@ -304,8 +304,8 @@ class criu: def restore(self): """ - Restore a process/tree. - """ + Restore a process/tree. + """ req = rpc.criu_req() req.type = rpc.RESTORE req.opts.MergeFrom(self.opts) diff --git a/lib/py/images/images.py b/lib/py/images/images.py index 28c6d9e1f..f4517d845 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -12,8 +12,8 @@ # SIZE ::= "32 bit integer, equals the PAYLOAD length" # # Images v1.1 NOTE: MAGIC now consist of 2 32 bit integers, first one is -# MAGIC_COMMON or MAGIC_SERVICE and the second one is same as MAGIC -# in images V1.0. We don't keep "first" magic in json images. +# MAGIC_COMMON or MAGIC_SERVICE and the second one is same as MAGIC +# in images V1.0. We don't keep "first" magic in json images. # # In order to convert images to human-readable format, we use dict(json). # Using json not only allows us to easily read\write images, but also @@ -23,18 +23,18 @@ # Using dict(json) format, criu images can be described like: # # { -# 'magic' : 'FOO', -# 'entries' : [ -# entry, -# ... -# ] +# 'magic' : 'FOO', +# 'entries' : [ +# entry, +# ... +# ] # } # # Entry, in its turn, could be described as: # # { -# pb_msg, -# 'extra' : extra_msg +# pb_msg, +# 'extra' : extra_msg # } # import io @@ -72,23 +72,23 @@ class MagicException(Exception): # format to/from dict(json). class entry_handler: """ - Generic class to handle loading/dumping criu images - entries from/to bin format to/from dict(json). - """ + Generic class to handle loading/dumping criu images + entries from/to bin format to/from dict(json). + """ def __init__(self, payload, extra_handler=None): """ - Sets payload class and extra handler class. - """ + Sets payload class and extra handler class. + """ self.payload = payload self.extra_handler = extra_handler def load(self, f, pretty=False, no_payload=False): """ - Convert criu image entries from binary format to dict(json). - Takes a file-like object and returnes a list with entries in - dict(json) format. - """ + Convert criu image entries from binary format to dict(json). + Takes a file-like object and returnes a list with entries in + dict(json) format. + """ entries = [] while True: @@ -128,17 +128,17 @@ class entry_handler: def loads(self, s, pretty=False): """ - Same as load(), but takes a string as an argument. - """ + Same as load(), but takes a string as an argument. + """ f = io.BytesIO(s) return self.load(f, pretty) def dump(self, entries, f): """ - Convert criu image entries from dict(json) format to binary. - Takes a list of entries and a file-like object to write entries - in binary format to. - """ + Convert criu image entries from dict(json) format to binary. + Takes a list of entries and a file-like object to write entries + in binary format to. + """ for entry in entries: extra = entry.pop('extra', None) @@ -156,17 +156,17 @@ class entry_handler: def dumps(self, entries): """ - Same as dump(), but doesn't take file-like object and just - returns a string. - """ + Same as dump(), but doesn't take file-like object and just + returns a string. + """ f = io.BytesIO('') self.dump(entries, f) return f.read() def count(self, f): """ - Counts the number of top-level object in the image file - """ + Counts the number of top-level object in the image file + """ entries = 0 while True: @@ -183,10 +183,10 @@ class entry_handler: # Special handler for pagemap.img class pagemap_handler: """ - Special entry handler for pagemap.img, which is unique in a way - that it has a header of pagemap_head type followed by entries - of pagemap_entry type. - """ + Special entry handler for pagemap.img, which is unique in a way + that it has a header of pagemap_head type followed by entries + of pagemap_entry type. + """ def load(self, f, pretty=False, no_payload=False): entries = [] @@ -547,10 +547,10 @@ def __rhandler(f): def load(f, pretty=False, no_payload=False): """ - Convert criu image from binary format to dict(json). - Takes a file-like object to read criu image from. - Returns criu image in dict(json) format. - """ + Convert criu image from binary format to dict(json). + Takes a file-like object to read criu image from. + Returns criu image in dict(json) format. + """ image = {} m, handler = __rhandler(f) @@ -574,18 +574,18 @@ def info(f): def loads(s, pretty=False): """ - Same as load(), but takes a string. - """ + Same as load(), but takes a string. + """ f = io.BytesIO(s) return load(f, pretty) def dump(img, f): """ - Convert criu image from dict(json) format to binary. - Takes an image in dict(json) format and file-like - object to write to. - """ + Convert criu image from dict(json) format to binary. + Takes an image in dict(json) format and file-like + object to write to. + """ m = img['magic'] magic_val = magic.by_name[img['magic']] @@ -609,9 +609,9 @@ def dump(img, f): def dumps(img): """ - Same as dump(), but takes only an image and returns - a string. - """ + Same as dump(), but takes only an image and returns + a string. + """ f = io.BytesIO(b'') dump(img, f) return f.getvalue() diff --git a/scripts/magic-gen.py b/scripts/magic-gen.py index 3d9777735..3b1f29fb5 100755 --- a/scripts/magic-gen.py +++ b/scripts/magic-gen.py @@ -15,8 +15,8 @@ def main(argv): out = open(magic_py, 'w+') # all_magic is used to parse constructions like: - # #define PAGEMAP_MAGIC 0x56084025 - # #define SHMEM_PAGEMAP_MAGIC PAGEMAP_MAGIC + # #define PAGEMAP_MAGIC 0x56084025 + # #define SHMEM_PAGEMAP_MAGIC PAGEMAP_MAGIC all_magic = {} # and magic is used to store only unique magic. magic = {} From c703e3fd8404e506cc6156719b953ea0580d59a4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 11 Sep 2019 11:29:31 +0300 Subject: [PATCH 0176/2030] criu: Version 3.13 Here we have some bugfixes, huuuge *.py patch for coding style and nice set of new features like 32bit for ARM, TLS for page server and new mode for CGroups. Signed-off-by: Pavel Emelyanov --- Makefile.versions | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.versions b/Makefile.versions index a7820b3b3..f3adcb0a6 100644 --- a/Makefile.versions +++ b/Makefile.versions @@ -1,10 +1,10 @@ # # CRIU version. CRIU_VERSION_MAJOR := 3 -CRIU_VERSION_MINOR := 12 -CRIU_VERSION_SUBLEVEL := 1 +CRIU_VERSION_MINOR := 13 +CRIU_VERSION_SUBLEVEL := CRIU_VERSION_EXTRA := -CRIU_VERSION_NAME := Ice Penguin +CRIU_VERSION_NAME := Silicon Willet CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA)) export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL From 08f3b57ab324aea55e7a92ecbe961be60df4983d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 1 Jul 2019 17:40:44 +0300 Subject: [PATCH 0177/2030] py: Manual fixlets of code formatting Signed-off-by: Pavel Emelyanov --- coredump/criu_coredump/coredump.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index bc53a7705..68dc16bf2 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -645,8 +645,7 @@ class coredump_generator: ppid = self.pstree[pid]["ppid"] return self._get_page(ppid, page_no) else: - with open(self._imgs_dir + "/" + "pages-" + str(pages_id) + - ".img") as f: + with open(self._imgs_dir + "/pages-%s.img" % pages_id) as f: f.seek(off * PAGESIZE) return f.read(PAGESIZE) From 3eed47223b255c092f3aba31b68a9acefa9be523 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 1 Sep 2019 12:23:39 +0100 Subject: [PATCH 0178/2030] files-reg: Drop clear_ghost_files() prototype The function clear_ghost_files() has been removed in commit b11eeea "restore: auto-unlink for ghost files (v2)". Signed-off-by: Radostin Stoyanov --- criu/include/files-reg.h | 1 - 1 file changed, 1 deletion(-) diff --git a/criu/include/files-reg.h b/criu/include/files-reg.h index 7a22d4d82..016d76a9f 100644 --- a/criu/include/files-reg.h +++ b/criu/include/files-reg.h @@ -30,7 +30,6 @@ extern int open_reg_by_id(u32 id); extern int open_reg_fd(struct file_desc *); extern int open_path(struct file_desc *, int (*open_cb)(int ns_root_fd, struct reg_file_info *, void *), void *arg); -extern void clear_ghost_files(void); extern const struct fdtype_ops regfile_dump_ops; extern int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *arg); From 8ea953f18b8534be883de3638369e4804771d086 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 13 Aug 2019 22:11:04 +0100 Subject: [PATCH 0179/2030] cr-dump: Remove redundant if-statement Signed-off-by: Radostin Stoyanov --- criu/cr-dump.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 9273fc0a5..fcbe816e8 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -782,8 +782,6 @@ static int dump_task_core_all(struct parasite_ctl *ctl, img = img_from_set(cr_imgset, CR_FD_CORE); ret = pb_write_one(img, core, PB_CORE); - if (ret < 0) - goto err; err: pr_info("----------------------------------------\n"); From 0d8e2477e928a1301c0611de233aed6879fdc13b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 10 Sep 2019 06:50:58 -0700 Subject: [PATCH 0180/2030] arch/x86: push correct eip on the stack before lretq Right now we use pushq, but it pushes sign-extended value, so if the parasite code is placed higher that 2Gb, we will see something like this: 0xf7efd5b0: pushq $0x23 0xf7efd5b2: pushq $0xfffffffff7efd5b9 => 0xf7efd5b7: lretq Actually we want to push 0xf7efd5b9 instead of 0xfffffffff7efd5b9. Fixes: #398 Cc: Dmitry Safonov Cc: Cyrill Gorcunov Signed-off-by: Andrei Vagin Acked-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/include/uapi/asm/sigframe.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h index 51ca023f7..486c0c8e0 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/sigframe.h @@ -194,7 +194,9 @@ void rt_sigframe_erase_sigset(struct rt_sigframe *sigframe) #define ARCH_RT_SIGRETURN_COMPAT(new_sp) \ asm volatile( \ "pushq $"__stringify(USER32_CS)" \n" \ - "pushq $1f \n" \ + "xor %%rax, %%rax \n" \ + "movl $1f, %%eax \n" \ + "pushq %%rax \n" \ "lretq \n" \ "1: \n" \ ".code32 \n" \ From 3e9dc1c7f5537a860a7332b93e056e7058162578 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 11 Sep 2019 11:13:51 +0100 Subject: [PATCH 0181/2030] compel/x86: Don't use pushq for a label `pushq` sign-extends the value. Which is a bummer as the label's address may be higher that 2Gb, which means that the sign-bit will be set. As it long-jumps with ia32 selector, %r11 can be scratched. Use %r11 register as a temporary to push the 32-bit address. Complements: a9a760278c1a ("arch/x86: push correct eip on the stack before lretq") Cc: Cyrill Gorcunov Reported-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/x86/plugins/std/parasite-head.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compel/arch/x86/plugins/std/parasite-head.S b/compel/arch/x86/plugins/std/parasite-head.S index a988de9d4..465cd887b 100644 --- a/compel/arch/x86/plugins/std/parasite-head.S +++ b/compel/arch/x86/plugins/std/parasite-head.S @@ -25,7 +25,9 @@ ENTRY(__export_parasite_head_start_compat) .code64 PARASITE_ENTRY 0 pushq $__USER32_CS - pushq $2f + xor %r11, %r11 + movl $2f, %r11d + pushq %r11 lretq 2: .code32 From ad7e82a30f813b8b902026467434cc2e7421452e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 9 Sep 2019 21:57:33 +0100 Subject: [PATCH 0182/2030] scripts: Drop Fedora 28/rawhide fix This change was introduced with c75cb2b and it is no longer necessary. Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 280ce1cdd..b1127c9b2 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -30,12 +30,6 @@ RUN dnf install -y \ rubygem-asciidoctor \ kmod -# Replace coreutils-single with "traditional" coreutils -# to fix the following error on Fedora 28/rawhide while -# running under QEMU: -# > sh: /usr/bin/sort: /usr/bin/coreutils: bad interpreter: No such file or directory -RUN dnf install -y --allowerasing coreutils - RUN ln -sf python3 /usr/bin/python ENV PYTHON=python3 From 2f337652ad5c40f7a420fdd9a7c57767af4ba8a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Thu, 8 Aug 2019 18:49:13 +0200 Subject: [PATCH 0183/2030] Add new command line option: --cgroup-yard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of creating cgroup yard in CRIU, now we can create it externally and pass it to CRIU. Useful if somebody doesn't want to grant CAP_SYS_ADMIN to CRIU. Signed-off-by: Michał Cłapiński --- Documentation/criu.txt | 36 ++++++++++++-- criu/cgroup.c | 101 +++++++++++++++++++++++++------------- criu/config.c | 4 ++ criu/cr-service.c | 3 ++ criu/crtools.c | 4 ++ criu/image.c | 2 +- criu/include/cr_options.h | 1 + images/rpc.proto | 1 + lib/c/criu.c | 13 +++++ lib/c/criu.h | 1 + 10 files changed, 126 insertions(+), 40 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 94fc5428a..28913a7fb 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -266,10 +266,33 @@ For example, the command line for the above example should look like this: discovered automatically (usually via */proc*). This option is useful when one needs *criu* to skip some controllers. -*--cgroup-props-ignore-default*:: - When combined with *--cgroup-props*, makes *criu* substitute - a predefined controller property with the new one shipped. If the option - is not used, the predefined properties are merged with the provided ones. +*--cgroup-yard* 'path':: + Instead of trying to mount cgroups in CRIU, provide a path to a directory + with already created cgroup yard. Useful if you don't want to grant + CAP_SYS_ADMIN to CRIU. For every cgroup mount there should be exactly one + directory. If there is only one controller in this mount, the dir's name + should be just the name of the controller. If there are multiple controllers + comounted, the directory name should have them be separated by a comma. ++ +For example, if */proc/cgroups* looks like this: ++ +---------- +#subsys_name hierarchy num_cgroups enabled +cpu 1 1 1 +devices 2 2 1 +freezer 2 2 1 +---------- ++ +then you can create the cgroup yard by the following commands: ++ +---------- +mkdir private_yard +cd private_yard +mkdir cpu +mount -t cgroup -o cpu none cpu +mkdir devices,freezer +mount -t cgroup -o devices,freezer none devices,freezer +---------- *--tcp-established*:: Checkpoint established TCP connections. @@ -442,6 +465,11 @@ The 'mode' may be one of the following: *ignore*::: Don't deal with cgroups and pretend that they don't exist. +*--cgroup-yard* 'path':: + Instead of trying to mount cgroups in CRIU, provide a path to a directory + with already created cgroup yard. For more information look in the *dump* + section. + *--cgroup-root* ['controller'*:*]/'newroot':: Change the root cgroup the controller will be installed into. No controller means that root is the default for all controllers not specified. diff --git a/criu/cgroup.c b/criu/cgroup.c index 332c79fb9..9f3aef10d 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -549,8 +549,9 @@ static int collect_cgroups(struct list_head *ctls) int fd = -1; list_for_each_entry(cc, ctls, l) { - char path[PATH_MAX], mopts[1024], *root; + char path[PATH_MAX], *root; char prefix[] = ".criu.cgmounts.XXXXXX"; + const char namestr[] = "name="; struct cg_controller *cg; struct cg_root_opt *o; @@ -568,7 +569,7 @@ static int collect_cgroups(struct list_head *ctls) if (!current_controller) { /* only allow "fake" controllers to be created this way */ - if (!strstartswith(cc->name, "name=")) { + if (!strstartswith(cc->name, namestr)) { pr_err("controller %s not found\n", cc->name); return -1; } else { @@ -586,26 +587,45 @@ static int collect_cgroups(struct list_head *ctls) if (!opts.manage_cgroups) continue; - if (strstartswith(cc->name, "name=")) - snprintf(mopts, sizeof(mopts), "none,%s", cc->name); - else - snprintf(mopts, sizeof(mopts), "%s", cc->name); + if (opts.cgroup_yard) { + char dir_path[PATH_MAX]; + int off; + + off = snprintf(dir_path, PATH_MAX, "%s/", opts.cgroup_yard); + if (strstartswith(cc->name, namestr)) + snprintf(dir_path + off, PATH_MAX, "%s", cc->name + strlen(namestr)); + else + snprintf(dir_path + off, PATH_MAX, "%s", cc->name); - if (mkdtemp(prefix) == NULL) { - pr_perror("can't make dir for cg mounts"); - return -1; + fd = open(dir_path, O_RDONLY | O_DIRECTORY, 0); + if (fd < 0) { + pr_perror("couldn't open %s", dir_path); + return -1; + } + } else { + char mopts[1024]; + + if (strstartswith(cc->name, namestr)) + snprintf(mopts, sizeof(mopts), "none,%s", cc->name); + else + snprintf(mopts, sizeof(mopts), "%s", cc->name); + + if (mkdtemp(prefix) == NULL) { + pr_perror("can't make dir for cg mounts"); + return -1; + } + + if (mount("none", prefix, "cgroup", 0, mopts) < 0) { + pr_perror("couldn't mount %s", mopts); + rmdir(prefix); + return -1; + } + + fd = open_detach_mount(prefix); + if (fd < 0) + return -1; } - if (mount("none", prefix, "cgroup", 0, mopts) < 0) { - pr_perror("couldn't mount %s", mopts); - rmdir(prefix); - return -1; - } - - fd = open_detach_mount(prefix); - if (fd < 0) - return -1; - path_pref_len = snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd); root = cc->path; @@ -620,6 +640,7 @@ static int collect_cgroups(struct list_head *ctls) snprintf(path + path_pref_len, PATH_MAX - path_pref_len, "%s", root); ret = ftw(path, add_cgroup, 4); + if (ret < 0) pr_perror("failed walking %s for empty cgroups", path); @@ -1167,10 +1188,12 @@ void fini_cgroup(void) return; close_service_fd(CGROUP_YARD); - if (umount2(cg_yard, MNT_DETACH)) - pr_perror("Unable to umount %s", cg_yard); - if (rmdir(cg_yard)) - pr_perror("Unable to remove %s", cg_yard); + if (!opts.cgroup_yard) { + if (umount2(cg_yard, MNT_DETACH)) + pr_perror("Unable to umount %s", cg_yard); + if (rmdir(cg_yard)) + pr_perror("Unable to remove %s", cg_yard); + } xfree(cg_yard); cg_yard = NULL; } @@ -1652,20 +1675,28 @@ static int prepare_cgroup_sfd(CgroupEntry *ce) pr_info("Preparing cgroups yard (cgroups restore mode %#x)\n", opts.manage_cgroups); - off = sprintf(paux, ".criu.cgyard.XXXXXX"); - if (mkdtemp(paux) == NULL) { - pr_perror("Can't make temp cgyard dir"); - return -1; - } + if (opts.cgroup_yard) { + off = sprintf(paux, "%s", opts.cgroup_yard); - cg_yard = xstrdup(paux); - if (!cg_yard) { - rmdir(paux); - return -1; - } + cg_yard = xstrdup(paux); + if (!cg_yard) + return -1; + } else { + off = sprintf(paux, ".criu.cgyard.XXXXXX"); + if (mkdtemp(paux) == NULL) { + pr_perror("Can't make temp cgyard dir"); + return -1; + } - if (make_yard(cg_yard)) - goto err; + cg_yard = xstrdup(paux); + if (!cg_yard) { + rmdir(paux); + return -1; + } + + if (make_yard(cg_yard)) + goto err; + } pr_debug("Opening %s as cg yard\n", cg_yard); i = open(cg_yard, O_DIRECTORY); diff --git a/criu/config.c b/criu/config.c index 39aa071c9..cdea91f02 100644 --- a/criu/config.c +++ b/criu/config.c @@ -516,6 +516,7 @@ int parse_options(int argc, char **argv, bool *usage_error, { "tls-key", required_argument, 0, 1095}, BOOL_OPT("tls", &opts.tls), {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, + { "cgroup-yard", required_argument, 0, 1096 }, { }, }; @@ -814,6 +815,9 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1095: SET_CHAR_OPTS(tls_key, optarg); break; + case 1096: + SET_CHAR_OPTS(cgroup_yard, optarg); + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) diff --git a/criu/cr-service.c b/criu/cr-service.c index 0938db02b..95ba2e5ce 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -608,6 +608,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req) goto err; } + if (req->cgroup_yard) + SET_CHAR_OPTS(cgroup_yard, req->cgroup_yard); + if (req->tls_cacert) SET_CHAR_OPTS(tls_cacert, req->tls_cacert); if (req->tls_cacrl) diff --git a/criu/crtools.c b/criu/crtools.c index a94875684..0799a564c 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -366,6 +366,10 @@ usage: " --cgroup-dump-controller NAME\n" " define cgroup controller to be dumped\n" " and skip anything else present in system\n" +" --cgroup-yard PATH\n" +" instead of trying to mount cgroups in CRIU, provide\n" +" a path to a directory with already created cgroup yard.\n" +" Useful if you don't want to grant CAP_SYS_ADMIN to CRIU\n" " --lsm-profile TYPE:NAME\n" " Specify an LSM profile to be used during restore.\n" " The type can be either 'apparmor' or 'selinux'.\n" diff --git a/criu/image.c b/criu/image.c index 2eb926929..0225788b0 100644 --- a/criu/image.c +++ b/criu/image.c @@ -190,7 +190,7 @@ int prepare_inventory(InventoryEntry *he) struct dmp_info d; } crt = { .i.pid = &pid }; - pr_info("Perparing image inventory (version %u)\n", CRTOOLS_IMAGES_V1); + pr_info("Preparing image inventory (version %u)\n", CRTOOLS_IMAGES_V1); he->img_version = CRTOOLS_IMAGES_V1_1; he->fdinfo_per_id = true; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 82f76ad94..da7c10d69 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -106,6 +106,7 @@ struct cr_options { char *cgroup_props; char *cgroup_props_file; struct list_head new_cgroup_roots; + char *cgroup_yard; bool autodetect_ext_mounts; int enable_external_sharing; int enable_external_masters; diff --git a/images/rpc.proto b/images/rpc.proto index 15e677a77..c402259ac 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -120,6 +120,7 @@ message criu_opts { optional string tls_key = 57; optional bool tls = 58; optional bool tls_no_cn_verify = 59; + optional string cgroup_yard = 60; /* optional bool check_mounts = 128; */ } diff --git a/lib/c/criu.c b/lib/c/criu.c index 17d5c3983..14ddff26d 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -987,6 +987,19 @@ int criu_local_add_cg_dump_controller(criu_opts *opts, const char *name) return 0; } +int criu_local_add_cg_yard(criu_opts *opts, const char *path) +{ + char *new; + + new = strdup(path); + if (!new) + return -ENOMEM; + + free(opts->rpc->cgroup_yard); + opts->rpc->cgroup_yard = new; + return 0; +} + int criu_add_skip_mnt(const char *mnt) { return criu_local_add_skip_mnt(global_opts, mnt); diff --git a/lib/c/criu.h b/lib/c/criu.h index 76f3547fc..cb37c5291 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -207,6 +207,7 @@ int criu_local_add_irmap_path(criu_opts *opts, const char *path); int criu_local_add_cg_props(criu_opts *opts, const char *stream); int criu_local_add_cg_props_file(criu_opts *opts, const char *path); int criu_local_add_cg_dump_controller(criu_opts *opts, const char *name); +int criu_local_add_cg_yard(criu_opts *opts, const char *path); int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key); int criu_local_add_external(criu_opts *opts, const char *key); int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port); From cf0080505ac3b3194f664d77edccccfa47bf450a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Wed, 14 Aug 2019 21:13:34 +0200 Subject: [PATCH 0184/2030] test: implement test for new --cgroup-yard option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michał Cłapiński --- test/zdtm.py | 4 +- test/zdtm/static/Makefile | 3 +- test/zdtm/static/cgroup_yard.c | 1 + test/zdtm/static/cgroup_yard.desc | 7 ++++ test/zdtm/static/cgroup_yard.hook | 70 +++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 3 deletions(-) create mode 120000 test/zdtm/static/cgroup_yard.c create mode 100644 test/zdtm/static/cgroup_yard.desc create mode 100755 test/zdtm/static/cgroup_yard.hook diff --git a/test/zdtm.py b/test/zdtm.py index 0153c6058..f0a102413 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -2018,7 +2018,7 @@ def print_sep(title, sep="=", width=80): def print_error(line): line = line.rstrip() - print(line) + print(line.encode('utf-8')) if line.endswith('>'): # combine pie output return True return False @@ -2028,7 +2028,7 @@ def grep_errors(fname): first = True print_next = False before = [] - with open(fname) as fd: + with open(fname, errors='replace') as fd: for l in fd: before.append(l) if len(before) > 5: diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index d8279d6f8..a38482f44 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -319,7 +319,8 @@ TST_DIR = \ cgroup03 \ cgroup04 \ cgroup_ifpriomap \ - cgroup_stray \ + cgroup_stray \ + cgroup_yard \ unlink_fstat04 \ unlink_fstat041 \ mntns_remap \ diff --git a/test/zdtm/static/cgroup_yard.c b/test/zdtm/static/cgroup_yard.c new file mode 120000 index 000000000..f3683c2b4 --- /dev/null +++ b/test/zdtm/static/cgroup_yard.c @@ -0,0 +1 @@ +cgroup00.c \ No newline at end of file diff --git a/test/zdtm/static/cgroup_yard.desc b/test/zdtm/static/cgroup_yard.desc new file mode 100644 index 000000000..8736d6780 --- /dev/null +++ b/test/zdtm/static/cgroup_yard.desc @@ -0,0 +1,7 @@ +{ +'flavor': 'h', +'flags': 'suid', +# We create the external cgroup yard in working directory during --pre-dump +# hook. We have to go up a few directories to find the yard. +'opts': '--manage-cgroups --cgroup-yard ../../../../../../external_yard' +} diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook new file mode 100755 index 000000000..7ae53342c --- /dev/null +++ b/test/zdtm/static/cgroup_yard.hook @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +import sys +import os +import subprocess +import tempfile + +yard = "external_yard" + +if sys.argv[1] == "--pre-dump": + ''' + Create external cgroup yard to be passed to CRIU via --cgroup-yard + ''' + os.mkdir(yard) + with open("/proc/self/cgroup") as f: + for line in f: + cgr = line.split(":")[1] + + if cgr == "": + continue + + if cgr.startswith("name="): + ctrl = cgr[len("name="):] + opts = "none," + cgr + else: + ctrl = cgr + opts = cgr + + os.mkdir(yard + "/" + ctrl) + subprocess.check_call(["mount", "-t", "cgroup", "none", yard + "/" + ctrl, "-o", opts]) + +if sys.argv[1] == "--post-restore": + ''' + Clean up the cgroup yard created during `--pre-dump` + ''' + with open("/proc/self/cgroup") as f: + for line in f: + cgr = line.split(":")[1] + + if cgr == "": + continue + + if cgr.startswith("name="): + ctrl = cgr[len("name="):] + else: + ctrl = cgr + + subprocess.check_call(["umount", yard + "/" + ctrl]) + os.rmdir(yard + "/" + ctrl) + os.rmdir(yard) + +if sys.argv[1] in ["--pre-restore", "--clean"]: + ''' + Clean up the leftover cgroups created by the test + ''' + tname = tempfile.mkdtemp() + subprocess.call(["mount", "-t", "cgroup", "none", tname, "-o", "none,name=zdtmtst"]) + + try: + os.rmdir(os.path.join(tname, "subcg00", "subsubcg")) + except: + pass + + try: + os.rmdir(os.path.join(tname, "subcg00")) + except: + pass + + subprocess.call(["umount", tname]) + os.rmdir(tname) From 4f24786b36058ab82e669fd5686cc9f5cfc573db Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 14 Sep 2019 13:47:06 +0100 Subject: [PATCH 0185/2030] travis: Install missing diffutils dependency The following tests fail in Fedora rawhide because /usr/bin/diff is missing. * zdtm/static/bridge(ns) * zdtm/static/cr_veth(uns) * zdtm/static/macvlan(ns) * zdtm/static/netns(uns) * zdtm/static/netns-nf(ns) * zdtm/static/sit(ns) Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index b1127c9b2..0500a8fc5 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -3,6 +3,7 @@ ARG ENV1=FOOBAR RUN dnf install -y \ ccache \ + diffutils \ findutils \ gcc \ git \ From 8bdc60d50e5b990aa8debd06785175da3e0ba34a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 14 Sep 2019 10:26:22 +0300 Subject: [PATCH 0186/2030] arch/x86: fpu_state->fpu_state_ia32.xsave hast to be 64-byte aligned Before the 5.2 kernel, only fpu_state->fpu_state_64.xsave has to be 64-byte aligned. But staring with the 5.2 kernel, the same is required for pu_state->fpu_state_ia32.xsave. The behavior was changed in: c2ff9e9a3d9d ("x86/fpu: Merge the two code paths in __fpu__restore_sig()") Signed-off-by: Andrei Vagin --- compel/arch/x86/src/lib/include/uapi/asm/fpu.h | 8 ++++++-- criu/arch/x86/sigframe.c | 6 ++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h index 509f4488b..4ff531fb9 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h @@ -263,7 +263,7 @@ struct xsave_struct_ia32 { struct ymmh_struct ymmh; uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE]; }; -} __aligned(FXSAVE_ALIGN_BYTES); +}; typedef struct { /* @@ -309,7 +309,11 @@ typedef struct { typedef struct { union { fpu_state_64_t fpu_state_64; - fpu_state_ia32_t fpu_state_ia32; + struct { + /* fpu_state_ia32->xsave has to be 64-byte aligned. */ + uint32_t __pad[2]; + fpu_state_ia32_t fpu_state_ia32; + }; }; uint8_t has_fpu; diff --git a/criu/arch/x86/sigframe.c b/criu/arch/x86/sigframe.c index 11b0d640d..33ba14387 100644 --- a/criu/arch/x86/sigframe.c +++ b/criu/arch/x86/sigframe.c @@ -28,8 +28,14 @@ int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, sigframe->native.uc.uc_mcontext.fpstate = (uint64_t)addr; } else if (!sigframe->is_native) { + unsigned long addr = (unsigned long)(void *)&fpu_state->fpu_state_ia32.xsave; sigframe->compat.uc.uc_mcontext.fpstate = (uint32_t)(unsigned long)(void *)&fpu_state->fpu_state_ia32; + if ((addr % 64ul)) { + pr_err("Unaligned address passed: %lx (native %d)\n", + addr, sigframe->is_native); + return -1; + } } return 0; From a9f974b4951a261063187f0536c01c7f84e5fe56 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Sep 2019 06:58:15 +0100 Subject: [PATCH 0187/2030] Introduce flush_early_log_to_stderr destructor Prior log initialisation CRIU preserves all (early) log messages in a buffer. In case of error the content of the content of this buffer needs to be printed out (flushed). Suggested-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Radostin Stoyanov --- criu/crtools.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 0799a564c..4625446ad 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -47,6 +47,13 @@ #include "setproctitle.h" #include "sysctl.h" +void flush_early_log_to_stderr() __attribute__((destructor)); + +void flush_early_log_to_stderr(void) +{ + flush_early_log_buffer(STDERR_FILENO); +} + int main(int argc, char *argv[], char *envp[]) { int ret = -1; @@ -95,10 +102,8 @@ int main(int argc, char *argv[], char *envp[]) return cr_service_work(atoi(argv[2])); } - if (check_options()) { - flush_early_log_buffer(STDERR_FILENO); + if (check_options()) return 1; - } if (opts.imgs_dir == NULL) SET_CHAR_OPTS(imgs_dir, "."); From 813bfbeb4f26611f8fd431c6fd65104cbad789d1 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Sep 2019 07:03:57 +0100 Subject: [PATCH 0188/2030] Convert pr_msg() error messages to pr_err() Print error messages to stderr (instead of stdout). Suggested-by: Andrei Vagin Signed-off-by: Radostin Stoyanov --- criu/config.c | 4 ++-- criu/cr-check.c | 2 +- criu/crtools.c | 22 +++++++++++----------- criu/proc_parse.c | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/criu/config.c b/criu/config.c index cdea91f02..2ad2fd43c 100644 --- a/criu/config.c +++ b/criu/config.c @@ -835,10 +835,10 @@ int parse_options(int argc, char **argv, bool *usage_error, bad_arg: if (idx < 0) /* short option */ - pr_msg("Error: invalid argument for -%c: %s\n", + pr_err("invalid argument for -%c: %s\n", opt, optarg); else /* long option */ - pr_msg("Error: invalid argument for --%s: %s\n", + pr_err("invalid argument for --%s: %s\n", long_opts[idx].name, optarg); return 1; } diff --git a/criu/cr-check.c b/criu/cr-check.c index 75a665cfb..729b2dc38 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -62,7 +62,7 @@ static int check_tty(void) int ret = -1; if (ARRAY_SIZE(t.c_cc) < TERMIOS_NCC) { - pr_msg("struct termios has %d @c_cc while " + pr_err("struct termios has %d @c_cc while " "at least %d expected.\n", (int)ARRAY_SIZE(t.c_cc), TERMIOS_NCC); diff --git a/criu/crtools.c b/criu/crtools.c index 4625446ad..5740b806d 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -112,7 +112,7 @@ int main(int argc, char *argv[], char *envp[]) SET_CHAR_OPTS(work_dir, opts.imgs_dir); if (optind >= argc) { - pr_msg("Error: command is required\n"); + pr_err("command is required\n"); goto usage; } @@ -120,17 +120,17 @@ int main(int argc, char *argv[], char *envp[]) if (has_exec_cmd) { if (!has_sub_command) { - pr_msg("Error: --exec-cmd requires a command\n"); + pr_err("--exec-cmd requires a command\n"); goto usage; } if (strcmp(argv[optind], "restore")) { - pr_msg("Error: --exec-cmd is available for the restore command only\n"); + pr_err("--exec-cmd is available for the restore command only\n"); goto usage; } if (opts.restore_detach) { - pr_msg("Error: --restore-detached and --exec-cmd cannot be used together\n"); + pr_err("--restore-detached and --exec-cmd cannot be used together\n"); goto usage; } @@ -142,7 +142,7 @@ int main(int argc, char *argv[], char *envp[]) } else { /* No subcommands except for cpuinfo and restore --exec-cmd */ if (strcmp(argv[optind], "cpuinfo") && has_sub_command) { - pr_msg("Error: excessive parameter%s for command %s\n", + pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]); goto usage; } @@ -241,7 +241,7 @@ int main(int argc, char *argv[], char *envp[]) if (!strcmp(argv[optind], "cpuinfo")) { if (!argv[optind + 1]) { - pr_msg("Error: cpuinfo requires an action: dump or check\n"); + pr_err("cpuinfo requires an action: dump or check\n"); goto usage; } if (!strcmp(argv[optind + 1], "dump")) @@ -251,17 +251,17 @@ int main(int argc, char *argv[], char *envp[]) } if (!strcmp(argv[optind], "exec")) { - pr_msg("The \"exec\" action is deprecated by the Compel library.\n"); + pr_err("The \"exec\" action is deprecated by the Compel library.\n"); return -1; } if (!strcmp(argv[optind], "show")) { - pr_msg("The \"show\" action is deprecated by the CRIT utility.\n"); - pr_msg("To view an image use the \"crit decode -i $name --pretty\" command.\n"); + pr_err("The \"show\" action is deprecated by the CRIT utility.\n"); + pr_err("To view an image use the \"crit decode -i $name --pretty\" command.\n"); return -1; } - pr_msg("Error: unknown command: %s\n", argv[optind]); + pr_err("unknown command: %s\n", argv[optind]); usage: pr_msg("\n" "Usage:\n" @@ -455,6 +455,6 @@ usage: return 0; opt_pid_missing: - pr_msg("Error: pid not specified\n"); + pr_err("pid not specified\n"); return 1; } diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 0e8b6f209..97f82ee01 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -932,7 +932,7 @@ int prepare_loginuid(unsigned int value, unsigned int loglevel) if (write(fd, buf, 11) < 0) { print_on_level(loglevel, - "Write %s to /proc/self/loginuid failed: %s", + "Write %s to /proc/self/loginuid failed: %s\n", buf, strerror(errno)); ret = -1; } From db40ef5be671dbd78f42bd868a5377e62707c3de Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Sep 2019 11:49:27 -0700 Subject: [PATCH 0189/2030] test/cgroup_yard: always clean up a test cgroup yard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now it is cleaned up from a post-restore hook, but zdtm.py can be executed with the norst option: $ zdtm.py run -t zdtm/static/cgroup_yard --norst ... OSError: [Errno 17] File exists: 'external_yard' Cc: Michał Cłapiński Signed-off-by: Andrei Vagin --- test/zdtm/static/cgroup_yard.hook | 39 ++++++++----------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook index 7ae53342c..cc3971707 100755 --- a/test/zdtm/static/cgroup_yard.hook +++ b/test/zdtm/static/cgroup_yard.hook @@ -12,6 +12,7 @@ if sys.argv[1] == "--pre-dump": Create external cgroup yard to be passed to CRIU via --cgroup-yard ''' os.mkdir(yard) + subprocess.check_call(["mount", "-t", "tmpfs", "zdtm_yard", yard]) with open("/proc/self/cgroup") as f: for line in f: cgr = line.split(":")[1] @@ -29,26 +30,6 @@ if sys.argv[1] == "--pre-dump": os.mkdir(yard + "/" + ctrl) subprocess.check_call(["mount", "-t", "cgroup", "none", yard + "/" + ctrl, "-o", opts]) -if sys.argv[1] == "--post-restore": - ''' - Clean up the cgroup yard created during `--pre-dump` - ''' - with open("/proc/self/cgroup") as f: - for line in f: - cgr = line.split(":")[1] - - if cgr == "": - continue - - if cgr.startswith("name="): - ctrl = cgr[len("name="):] - else: - ctrl = cgr - - subprocess.check_call(["umount", yard + "/" + ctrl]) - os.rmdir(yard + "/" + ctrl) - os.rmdir(yard) - if sys.argv[1] in ["--pre-restore", "--clean"]: ''' Clean up the leftover cgroups created by the test @@ -56,15 +37,15 @@ if sys.argv[1] in ["--pre-restore", "--clean"]: tname = tempfile.mkdtemp() subprocess.call(["mount", "-t", "cgroup", "none", tname, "-o", "none,name=zdtmtst"]) - try: - os.rmdir(os.path.join(tname, "subcg00", "subsubcg")) - except: - pass - - try: - os.rmdir(os.path.join(tname, "subcg00")) - except: - pass + for cg in [os.path.join(tname, "subcg00", "subsubcg"), + os.path.join(tname, "subcg00")]: + if os.access(cg, os.F_OK): + os.rmdir(cg) subprocess.call(["umount", tname]) os.rmdir(tname) + +if sys.argv[1] == "--clean": + if os.access(yard, os.F_OK): + subprocess.call(["umount", "-l", yard]) + os.rmdir(yard) From f44939317f60288874bdc7b8544442a59db0d024 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 19 Sep 2019 23:37:57 +0300 Subject: [PATCH 0190/2030] zdtm/cgroup_yard: create a test cgroup yard from the post-start hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right now, it is created from the pre-dump hook, but if the --snap option is set, the test fails: $ python test/zdtm.py run -t zdtm/static/cgroup_yard -f h --snap --iter 3 ... Running zdtm/static/cgroup_yard.hook(--pre-dump) Traceback (most recent call last): File zdtm/static/cgroup_yard.hook, line 14, in os.mkdir(yard) OSError: [Errno 17] File exists: 'external_yard' Cc: Michał Cłapiński Signed-off-by: Andrei Vagin --- test/zdtm/static/cgroup_yard.hook | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm/static/cgroup_yard.hook b/test/zdtm/static/cgroup_yard.hook index cc3971707..072b9d38d 100755 --- a/test/zdtm/static/cgroup_yard.hook +++ b/test/zdtm/static/cgroup_yard.hook @@ -7,7 +7,7 @@ import tempfile yard = "external_yard" -if sys.argv[1] == "--pre-dump": +if sys.argv[1] == "--post-start": ''' Create external cgroup yard to be passed to CRIU via --cgroup-yard ''' From b47ef26eac1777396ede638af0d6951a3fc52a1e Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Sat, 21 Sep 2019 13:35:18 +0300 Subject: [PATCH 0191/2030] cgroup: fixup nits 1) s/\s*$// 2) fix snprintf out of bound access Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index 9f3aef10d..1be8be234 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -590,12 +590,12 @@ static int collect_cgroups(struct list_head *ctls) if (opts.cgroup_yard) { char dir_path[PATH_MAX]; int off; - + off = snprintf(dir_path, PATH_MAX, "%s/", opts.cgroup_yard); if (strstartswith(cc->name, namestr)) - snprintf(dir_path + off, PATH_MAX, "%s", cc->name + strlen(namestr)); + snprintf(dir_path + off, PATH_MAX - off, "%s", cc->name + strlen(namestr)); else - snprintf(dir_path + off, PATH_MAX, "%s", cc->name); + snprintf(dir_path + off, PATH_MAX - off, "%s", cc->name); fd = open(dir_path, O_RDONLY | O_DIRECTORY, 0); if (fd < 0) { From 3f1c4a17ad18fca3f95c0f51c5c42fedbb403b89 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Sep 2019 23:36:29 +0300 Subject: [PATCH 0192/2030] pipe: print pipe_id as unsigned to generate an external pipe name Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/pipes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index fd1a7e6bb..cb5da71de 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -282,8 +282,8 @@ static char *pipe_d_name(struct file_desc *d, char *buf, size_t s) struct pipe_info *pi; pi = container_of(d, struct pipe_info, d); - if (snprintf(buf, s, "pipe:[%d]", pi->pe->pipe_id) >= s) { - pr_err("Not enough room for pipe %d identifier string\n", + if (snprintf(buf, s, "pipe:[%u]", pi->pe->pipe_id) >= s) { + pr_err("Not enough room for pipe %u identifier string\n", pi->pe->pipe_id); return NULL; } From b84f481b55442433f46b5ea4b91a32dd8cffc502 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Sep 2019 23:48:15 +0300 Subject: [PATCH 0193/2030] unix: print inode numbers as unsigned int Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/sk-unix.c | 60 +++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f0620e676..f43aa2124 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -130,7 +130,7 @@ static struct unix_sk_listen_icon *lookup_unix_listen_icons(unsigned int peer_in static void show_one_unix(char *act, const struct unix_sk_desc *sk) { - pr_debug("\t%s: ino %d peer_ino %d family %4d type %4d state %2d name %s\n", + pr_debug("\t%s: ino %u peer_ino %u family %4d type %4d state %2d name %s\n", act, sk->sd.ino, sk->peer_ino, sk->sd.family, sk->type, sk->state, sk->name); if (sk->nr_icons) { @@ -143,7 +143,7 @@ static void show_one_unix(char *act, const struct unix_sk_desc *sk) static void show_one_unix_img(const char *act, const UnixSkEntry *e) { - pr_info("\t%s: id %#x ino %d peer %d type %d state %d name %d bytes\n", + pr_info("\t%s: id %#x ino %u peer %u type %d state %d name %d bytes\n", act, e->id, e->ino, e->peer, e->type, e->state, (int)e->name.len); } @@ -426,7 +426,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) if (ue->peer) { peer = (struct unix_sk_desc *)lookup_socket(ue->peer, PF_UNIX, 0); if (IS_ERR_OR_NULL(peer)) { - pr_err("Unix socket %d without peer %d\n", + pr_err("Unix socket %u without peer %u\n", ue->ino, ue->peer); goto err; } @@ -437,7 +437,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) */ if (peer->peer_ino != ue->ino) { if (!peer->name) { - pr_err("Unix socket %d with unreachable peer %d (%d)\n", + pr_err("Unix socket %u with unreachable peer %u (%u)\n", ue->ino, ue->peer, peer->peer_ino); goto err; } @@ -513,7 +513,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) ue->peer = e->sk_desc->sd.ino; - pr_debug("\t\tFixed inflight socket %d peer %d)\n", + pr_debug("\t\tFixed inflight socket %u peer %u)\n", ue->ino, ue->peer); } dump: @@ -1383,7 +1383,7 @@ static int keep_deleted(struct unix_sk_info *ui) { int fd = open(ui->name, O_PATH); if (fd < 0) { - pr_perror("ghost: Can't open id %#x ino %d addr %s", + pr_perror("ghost: Can't open id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return -1; } @@ -1409,7 +1409,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) int ret; if (ui->ue->name.len >= UNIX_PATH_MAX) { - pr_err("ghost: Too long name for socket id %#x ino %d name %s\n", + pr_err("ghost: Too long name for socket id %#x ino %u name %s\n", ui->ue->id, ui->ue->ino, ui->name); return -ENOSPC; } @@ -1424,14 +1424,14 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = access(path, R_OK | W_OK | X_OK); if (ret == 0) { ui->ghost_dir_pos = pos - path; - pr_debug("ghost: socket id %#x ino %d name %s detected F_OK %s\n", + pr_debug("ghost: socket id %#x ino %u name %s detected F_OK %s\n", ui->ue->id, ui->ue->ino, ui->name, path); break; } if (errno != ENOENT) { ret = -errno; - pr_perror("ghost: Can't access %s for socket id %#x ino %d name %s", + pr_perror("ghost: Can't access %s for socket id %#x ino %u name %s", path, ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1441,7 +1441,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) path[ui->ue->name.len] = '\0'; pos = dirname(path); - pr_debug("ghost: socket id %#x ino %d name %s creating %s\n", + pr_debug("ghost: socket id %#x ino %u name %s creating %s\n", ui->ue->id, ui->ue->ino, ui->name, pos); ret = mkdirpat(AT_FDCWD, pos, 0755); if (ret) { @@ -1471,15 +1471,15 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) * clean it up. */ if (unlinkat(AT_FDCWD, path_parked, 0) == 0) - pr_debug("ghost: Unlinked stale socket id %#x ino %d name %s\n", + pr_debug("ghost: Unlinked stale socket id %#x ino %u name %s\n", ui->ue->id, ui->ue->ino, path_parked); if (rename(ui->name, path_parked)) { ret = -errno; - pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s", + pr_perror("ghost: Can't rename id %#x ino %u addr %s -> %s", ui->ue->id, ui->ue->ino, ui->name, path_parked); return ret; } - pr_debug("ghost: id %#x ino %d renamed %s -> %s\n", + pr_debug("ghost: id %#x ino %u renamed %s -> %s\n", ui->ue->id, ui->ue->ino, ui->name, path_parked); renamed = true; ret = bind(sk, (struct sockaddr *)&addr, @@ -1487,7 +1487,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) } if (ret < 0) { ret = -errno; - pr_perror("ghost: Can't bind on socket id %#x ino %d addr %s", + pr_perror("ghost: Can't bind on socket id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1499,7 +1499,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = keep_deleted(ui); if (ret < 0) { - pr_err("ghost: Can't save socket %#x ino %d addr %s into fdstore\n", + pr_err("ghost: Can't save socket %#x ino %u addr %s into fdstore\n", ui->ue->id, ui->ue->ino, ui->name); return -EIO; } @@ -1511,7 +1511,7 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) ret = unlinkat(AT_FDCWD, ui->name, 0); if (ret < 0) { ret = -errno; - pr_perror("ghost: Can't unlink socket %#x ino %d addr %s", + pr_perror("ghost: Can't unlink socket %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); return ret; } @@ -1519,12 +1519,12 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) if (renamed) { if (rename(path_parked, ui->name)) { ret = -errno; - pr_perror("ghost: Can't rename id %#x ino %d addr %s -> %s", + pr_perror("ghost: Can't rename id %#x ino %u addr %s -> %s", ui->ue->id, ui->ue->ino, path_parked, ui->name); return ret; } - pr_debug("ghost: id %#x ino %d renamed %s -> %s\n", + pr_debug("ghost: id %#x ino %u renamed %s -> %s\n", ui->ue->id, ui->ue->ino, path_parked, ui->name); } @@ -1542,11 +1542,11 @@ static int bind_on_deleted(int sk, struct unix_sk_info *ui) pos = strrchr(path, '/')) { *pos = '\0'; if (rmdir(path)) { - pr_perror("ghost: Can't remove directory %s on id %#x ino %d", + pr_perror("ghost: Can't remove directory %s on id %#x ino %u", path, ui->ue->id, ui->ue->ino); return -1; } - pr_debug("ghost: Removed %s on id %#x ino %d\n", + pr_debug("ghost: Removed %s on id %#x ino %u\n", path, ui->ue->id, ui->ue->ino); } } @@ -1594,13 +1594,13 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) mutex_lock(mutex_ghost); if (ui->flags & USK_GHOST_FDSTORE) { - pr_debug("ghost: bind id %#x ino %d addr %s\n", + pr_debug("ghost: bind id %#x ino %u addr %s\n", ui->ue->id, ui->ue->ino, ui->name); ret = bind_on_deleted(sk, ui); if (ret) errno = -ret; } else { - pr_debug("bind id %#x ino %d addr %s\n", + pr_debug("bind id %#x ino %u addr %s\n", ui->ue->id, ui->ue->ino, ui->name); ret = bind(sk, (struct sockaddr *)&addr, sizeof(addr.sun_family) + ui->ue->name.len); @@ -1608,7 +1608,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) goto done; } if (ret < 0) { - pr_perror("Can't bind id %#x ino %d addr %s", + pr_perror("Can't bind id %#x ino %u addr %s", ui->ue->id, ui->ue->ino, ui->name); goto done; } @@ -1654,7 +1654,7 @@ static int post_open_interconnected_master(struct unix_sk_info *ui) static void pr_info_opening(const char *prefix, struct unix_sk_info *ui, struct fdinfo_list_entry *fle) { - pr_info("Opening %s (stage %d id %#x ino %d peer %d)\n", + pr_info("Opening %s (stage %d id %#x ino %u peer %u)\n", prefix, fle->stage, ui->ue->id, ui->ue->ino, ui->ue->peer); } @@ -1950,7 +1950,7 @@ static char *socket_d_name(struct file_desc *d, char *buf, size_t s) ui = container_of(d, struct unix_sk_info, d); - if (snprintf(buf, s, "socket:[%d]", ui->ue->ino) >= s) { + if (snprintf(buf, s, "socket:[%u]", ui->ue->ino) >= s) { pr_err("Not enough room for unixsk %d identifier string\n", ui->ue->ino); return NULL; @@ -1981,14 +1981,14 @@ static int unlink_sk(struct unix_sk_info *ui) ret = unlinkat(AT_FDCWD, ui->name, 0) ? -1 : 0; if (ret < 0 && errno != ENOENT) { - pr_warn("Can't unlink socket %d peer %d (name %s dir %s)\n", + pr_warn("Can't unlink socket %u peer %u (name %s dir %s)\n", ui->ue->ino, ui->ue->peer, ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-", ui->name_dir ? ui->name_dir : "-"); ret = -errno; goto out; } else if (ret == 0) { - pr_debug("Unlinked socket %d peer %d (name %s dir %s)\n", + pr_debug("Unlinked socket %u peer %u (name %s dir %s)\n", ui->ue->ino, ui->ue->peer, ui->name ? (ui->name[0] ? ui->name : &ui->name[1]) : "-", ui->name_dir ? ui->name_dir : "-"); @@ -2065,7 +2065,7 @@ int unix_prepare_root_shared(void) char tp_name[32]; char st_name[32]; - pr_debug("ghost: id %#x type %s state %s ino %d peer %d address %s\n", + pr_debug("ghost: id %#x type %s state %s ino %u peer %u address %s\n", ui->ue->id, __socket_type_name(ui->ue->type, tp_name), __tcp_state_name(ui->ue->state, st_name), ui->ue->ino, ui->peer ? ui->peer->ue->ino : 0, @@ -2113,7 +2113,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) uname = "-"; } - pr_info(" `- Got id %#x ino %d type %s state %s peer %d (name %s%.*s dir %s)\n", + pr_info(" `- Got id %#x ino %u type %s state %s peer %u (name %s%.*s dir %s)\n", ui->ue->id, ui->ue->ino, ___socket_type_name(ui->ue->type), ___tcp_state_name(ui->ue->state), ui->ue->peer, prefix, ulen, uname, ui->name_dir ? ui->name_dir : "-"); @@ -2128,7 +2128,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) if (ui->ue->deleted) { if (!ui->name || !ui->ue->name.len || !ui->name[0]) { - pr_err("No name present, ino %d\n", ui->ue->ino); + pr_err("No name present, ino %u\n", ui->ue->ino); return -1; } From 578597299a82f0aea0ef7a3063e6dc6ea6fccb33 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Sep 2019 20:57:08 +0000 Subject: [PATCH 0194/2030] Cleanup do_full_int80() 1) Instead of tampering with the nr argument, do_full_int80() returns the value of the system call. It also avoids copying all registers back into the syscall_args32 argument after the syscall. 2) Additionally, the registers r12-r15 were added in the list of clobbers as kernels older than v4.4 do not preserve these. 3) Further, GCC uses a 128-byte red-zone as defined in the x86_64 ABI optimizing away the correct position of the %rsp register in leaf-functions. We now avoid tampering with the red-zone, fixing a SIGSEGV when running mmap_bug_test() in debug mode (DEBUG=1). Signed-off-by: Nicolas Viennot Reviewed-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: Andrei Vagin --- criu/arch/x86/crtools.c | 6 ++-- criu/arch/x86/include/asm/compat.h | 51 ++++++++++++++++++++---------- criu/arch/x86/kerndat.c | 4 +-- criu/arch/x86/restorer.c | 3 +- criu/arch/x86/sigaction_compat.c | 6 +--- 5 files changed, 40 insertions(+), 30 deletions(-) diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index efc23e5fe..e4073c27b 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -590,8 +590,7 @@ static int get_robust_list32(pid_t pid, uintptr_t head, uintptr_t len) .arg2 = (uint32_t)len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } static int set_robust_list32(uint32_t head, uint32_t len) @@ -602,8 +601,7 @@ static int set_robust_list32(uint32_t head, uint32_t len) .arg1 = len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info) diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h index cd1ae472d..acd552fb3 100644 --- a/criu/arch/x86/include/asm/compat.h +++ b/criu/arch/x86/include/asm/compat.h @@ -38,26 +38,45 @@ struct syscall_args32 { uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5; }; -static inline void do_full_int80(struct syscall_args32 *args) +static inline uint32_t do_full_int80(struct syscall_args32 *args) { /* - * r8-r11 registers are cleared during returning to userspace - * from syscall - that's x86_64 ABI to avoid leaking kernel - * pointers. + * Kernel older than v4.4 do not preserve r8-r15 registers when + * invoking int80, so we need to preserve them. * - * Other than that - we can't use %rbp in clobbers as GCC's inline - * assembly doesn't allow to do so. So, here is explicitly saving - * %rbp before syscall and restoring it's value afterward. + * Additionally, %rbp is used as the 6th syscall argument, and we need + * to preserve its value when returning from the syscall to avoid + * upsetting GCC. However, we can't use %rbp in the GCC asm clobbers + * due to a GCC limitation. Instead, we explicitly save %rbp on the + * stack before invoking the syscall and restore its value afterward. + * + * Further, GCC may not adjust the %rsp pointer when allocating the + * args and ret variables because 1) do_full_int80() is a leaf + * function, and 2) the local variables (args and ret) are in the + * 128-byte red-zone as defined in the x86_64 ABI. To use the stack + * when preserving %rbp, we must either tell GCC to a) mark the + * function as non-leaf, or b) move away from the red-zone when using + * the stack. It seems that there is no easy way to do a), so we'll go + * with b). + * Note 1: Another workaround would have been to add %rsp in the list + * of clobbers, but this was deprecated in GCC 9. + * Note 2: This red-zone bug only manifests when compiling CRIU with + * DEBUG=1. */ - asm volatile ("pushq %%rbp\n\t" - "mov %6, %%ebp\n\t" - "int $0x80\n\t" - "mov %%ebp, %6\n\t" - "popq %%rbp\n\t" - : "+a" (args->nr), - "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+g" (args->arg5) - : : "r8", "r9", "r10", "r11"); + uint32_t ret; + + asm volatile ("sub $128, %%rsp\n\t" + "pushq %%rbp\n\t" + "mov %7, %%ebp\n\t" + "int $0x80\n\t" + "popq %%rbp\n\t" + "add $128, %%rsp\n\t" + : "=a" (ret) + : "a" (args->nr), + "b" (args->arg0), "c" (args->arg1), "d" (args->arg2), + "S" (args->arg3), "D" (args->arg4), "g" (args->arg5) + : "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); + return ret; } #ifndef CR_NOGLIBC diff --git a/criu/arch/x86/kerndat.c b/criu/arch/x86/kerndat.c index f7593251b..94c954e1e 100644 --- a/criu/arch/x86/kerndat.c +++ b/criu/arch/x86/kerndat.c @@ -75,9 +75,7 @@ void *mmap_ia32(void *addr, size_t len, int prot, s.arg4 = fildes; s.arg5 = (uint32_t)off; - do_full_int80(&s); - - return (void *)(uintptr_t)s.nr; + return (void *)(uintptr_t)do_full_int80(&s); } /* diff --git a/criu/arch/x86/restorer.c b/criu/arch/x86/restorer.c index 2d335d5e1..b2c3b3668 100644 --- a/criu/arch/x86/restorer.c +++ b/criu/arch/x86/restorer.c @@ -54,8 +54,7 @@ int set_compat_robust_list(uint32_t head_ptr, uint32_t len) .arg1 = len, }; - do_full_int80(&s); - return (int)s.nr; + return do_full_int80(&s); } static int prepare_stack32(void **stack32) diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c index b38ba8011..f467da490 100644 --- a/criu/arch/x86/sigaction_compat.c +++ b/criu/arch/x86/sigaction_compat.c @@ -28,7 +28,6 @@ extern char restore_rt_sigaction; */ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) { - int ret; struct syscall_args32 arg = {}; unsigned long act_stack = (unsigned long)stack32; @@ -49,8 +48,5 @@ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) arg.arg2 = 0; /* oldact */ arg.arg3 = (uint32_t)sizeof(act->rt_sa_mask); /* sigsetsize */ - do_full_int80(&arg); - asm volatile ("\t movl %%eax,%0\n" : "=r"(ret)); - return ret; + return do_full_int80(&arg); } - From 576a99f492b1f55050d4fde8560efe454ab887bc Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 4 Oct 2019 16:32:48 +0100 Subject: [PATCH 0195/2030] restorer/inotify: Don't overflow PIE stack PATH_MAX == 4096; PATH_MAX*8 == 32k; RESTORE_STACK_SIZE == 32k. Fixes: a3cdf948699c6 ("inotify: cleanup auxiliary events from queue") Cc: Pavel Tikhomirov Cc: Andrei Vagin Co-debugged-with: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/pie/restorer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 390c0e1a9..dab58add6 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1320,21 +1320,23 @@ static int fd_poll(int inotify_fd) } /* - * note: Actually kernel may want even more space for one event (see - * round_event_name_len), so using buffer of EVENT_BUFF_SIZE size may fail. - * To be on the safe side - take a bigger buffer, and these also allows to - * read more events in one syscall. + * In the worst case buf size should be: + * sizeof(struct inotify_event) * 2 + PATH_MAX + * See round_event_name_len() in kernel. */ -#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) + PATH_MAX)) +#define EVENT_BUFF_SIZE ((sizeof(struct inotify_event) * 2 + PATH_MAX)) /* * Read all available events from inotify queue */ static int cleanup_inotify_events(int inotify_fd) { - char buf[EVENT_BUFF_SIZE * 8]; + char buf[EVENT_BUFF_SIZE * 3]; int ret; + /* Limit buf to be lesser than half of restorer's stack */ + BUILD_BUG_ON(ARRAY_SIZE(buf) >= RESTORE_STACK_SIZE/2); + while (1) { ret = fd_poll(inotify_fd); if (ret < 0) { From 20d4920a8bf74d1eceebc076bcc00889ba40e9f7 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:18 +0530 Subject: [PATCH 0196/2030] Adding --pre-dump-mode option Two modes of pre-dump algorithm: 1) splicing memory by parasite --pre-dump-mode=splice (default) 2) using process_vm_readv syscall --pre-dump-mode=read Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- Documentation/criu.txt | 6 ++++++ criu/config.c | 10 ++++++++++ criu/cr-service.c | 13 +++++++++++++ criu/crtools.c | 2 ++ criu/include/cr_options.h | 7 +++++++ criu/mem.c | 13 ++++++++++++- images/rpc.proto | 6 ++++++ lib/c/criu.c | 15 +++++++++++++++ lib/c/criu.h | 7 +++++++ test/zdtm.py | 9 ++++++++- 10 files changed, 86 insertions(+), 2 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 28913a7fb..2729bc95a 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -156,6 +156,12 @@ In addition, *page-server* options may be specified. Turn on memory changes tracker in the kernel. If the option is not passed the memory tracker get turned on implicitly. +*--pre-dump-mode*='mode':: + There are two 'mode' to operate pre-dump algorithm. The 'splice' mode + is parasite based, whereas 'read' mode is based on process_vm_readv + syscall. The 'read' mode incurs reduced frozen time and reduced + memory pressure as compared to 'splice' mode. Default is 'splice' mode. + *dump* ~~~~~~ Performs a checkpoint procedure. diff --git a/criu/config.c b/criu/config.c index 2ad2fd43c..e5d42efe4 100644 --- a/criu/config.c +++ b/criu/config.c @@ -276,6 +276,7 @@ void init_opts(void) opts.empty_ns = 0; opts.status_fd = -1; opts.log_level = DEFAULT_LOGLEVEL; + opts.pre_dump_mode = PRE_DUMP_SPLICE; } bool deprecated_ok(char *what) @@ -517,6 +518,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT("tls", &opts.tls), {"tls-no-cn-verify", no_argument, &opts.tls_no_cn_verify, true}, { "cgroup-yard", required_argument, 0, 1096 }, + { "pre-dump-mode", required_argument, 0, 1097}, { }, }; @@ -818,6 +820,14 @@ int parse_options(int argc, char **argv, bool *usage_error, case 1096: SET_CHAR_OPTS(cgroup_yard, optarg); break; + case 1097: + if (!strcmp("read", optarg)) { + opts.pre_dump_mode = PRE_DUMP_READ; + } else if (strcmp("splice", optarg)) { + pr_err("Unable to parse value of --pre-dump-mode\n"); + return 1; + } + break; case 'V': pr_msg("Version: %s\n", CRIU_VERSION); if (strcmp(CRIU_GITID, "0")) diff --git a/criu/cr-service.c b/criu/cr-service.c index 95ba2e5ce..392e9ac50 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -473,6 +473,19 @@ static int setup_opts_from_req(int sk, CriuOpts *req) opts.lazy_pages = req->lazy_pages; } + if (req->has_pre_dump_mode) { + switch (req->pre_dump_mode) { + case CRIU_PRE_DUMP_MODE__SPLICE: + opts.pre_dump_mode = PRE_DUMP_SPLICE; + break; + case CRIU_PRE_DUMP_MODE__READ: + opts.pre_dump_mode = PRE_DUMP_READ; + break; + default: + goto err; + } + } + if (req->ps) { opts.port = (short)req->ps->port; diff --git a/criu/crtools.c b/criu/crtools.c index 5740b806d..700fad994 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -428,6 +428,8 @@ usage: " pages images of previous dump\n" " when used on restore, as soon as page is restored, it\n" " will be punched from the image\n" +" --pre-dump-mode splice - parasite based pre-dumping (default)\n" +" read - process_vm_readv syscall based pre-dumping\n" "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index da7c10d69..2c1451e86 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -38,6 +38,12 @@ struct cg_root_opt { char *newroot; }; +/* + * Pre-dump variants + */ +#define PRE_DUMP_SPLICE 1 /* Pre-dump using parasite */ +#define PRE_DUMP_READ 2 /* Pre-dump using process_vm_readv syscall */ + /* * Cgroup management options. */ @@ -81,6 +87,7 @@ struct cr_options { int evasive_devices; int link_remap_ok; int log_file_per_pid; + int pre_dump_mode; bool swrk_restore; char *output; char *root; diff --git a/criu/mem.c b/criu/mem.c index de66a6210..911b9d21c 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -482,7 +482,18 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, if (mdc->lazy) memcpy(pargs_iovs(args), pp->iovs, sizeof(struct iovec) * pp->nr_iovs); - ret = drain_pages(pp, ctl, args); + + /* + * Faking drain_pages for pre-dump here. Actual drain_pages for pre-dump + * will happen after task unfreezing in cr_pre_dump_finish(). This is + * actual optimization which reduces time for which process was frozen + * during pre-dump. + */ + if (mdc->pre_dump && opts.pre_dump_mode == PRE_DUMP_READ) + ret = 0; + else + ret = drain_pages(pp, ctl, args); + if (!ret && !mdc->pre_dump) ret = xfer_pages(pp, &xfer); if (ret) diff --git a/images/rpc.proto b/images/rpc.proto index c402259ac..fc2f1bce2 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -47,6 +47,11 @@ enum criu_cg_mode { DEFAULT = 6; }; +enum criu_pre_dump_mode { + SPLICE = 1; + READ = 2; +}; + message criu_opts { required int32 images_dir_fd = 1; optional int32 pid = 2; /* if not set on dump, will dump requesting process */ @@ -121,6 +126,7 @@ message criu_opts { optional bool tls = 58; optional bool tls_no_cn_verify = 59; optional string cgroup_yard = 60; + optional criu_pre_dump_mode pre_dump_mode = 61 [default = SPLICE]; /* optional bool check_mounts = 128; */ } diff --git a/lib/c/criu.c b/lib/c/criu.c index 14ddff26d..fffb9fd9c 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -336,6 +336,21 @@ int criu_set_parent_images(const char *path) return criu_local_set_parent_images(global_opts, path); } +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) +{ + opts->rpc->has_pre_dump_mode = true; + if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { + opts->rpc->pre_dump_mode = mode; + return 0; + } + return -1; +} + +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode) +{ + return criu_local_set_pre_dump_mode(global_opts, mode); +} + void criu_local_set_track_mem(criu_opts *opts, bool track_mem) { opts->rpc->has_track_mem = true; diff --git a/lib/c/criu.h b/lib/c/criu.h index cb37c5291..22db0fdcf 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -43,6 +43,11 @@ enum criu_cg_mode { CRIU_CG_MODE_DEFAULT, }; +enum criu_pre_dump_mode { + CRIU_PRE_DUMP_SPLICE = 1, + CRIU_PRE_DUMP_READ = 2 +}; + int criu_set_service_address(const char *path); void criu_set_service_fd(int fd); int criu_set_service_binary(const char *path); @@ -95,6 +100,7 @@ int criu_add_irmap_path(const char *path); int criu_add_inherit_fd(int fd, const char *key); int criu_add_external(const char *key); int criu_set_page_server_address_port(const char *address, int port); +int criu_set_pre_dump_mode(enum criu_pre_dump_mode mode); /* * The criu_notify_arg_t na argument is an opaque @@ -211,6 +217,7 @@ int criu_local_add_cg_yard(criu_opts *opts, const char *path); int criu_local_add_inherit_fd(criu_opts *opts, int fd, const char *key); int criu_local_add_external(criu_opts *opts, const char *key); int criu_local_set_page_server_address_port(criu_opts *opts, const char *address, int port); +int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode); void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na)); diff --git a/test/zdtm.py b/test/zdtm.py index f0a102413..3c0cee667 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1019,6 +1019,7 @@ class criu: self.__tls = self.__tls_options() if opts['tls'] else [] self.__criu_bin = opts['criu_bin'] self.__crit_bin = opts['crit_bin'] + self.__pre_dump_mode = opts['pre_dump_mode'] def fini(self): if self.__lazy_migrate: @@ -1249,6 +1250,8 @@ class criu: a_opts += ['--leave-stopped'] if self.__empty_ns: a_opts += ['--empty-ns', 'net'] + if self.__pre_dump_mode: + a_opts += ["--pre-dump-mode", "%s" % self.__pre_dump_mode] nowait = False if self.__lazy_migrate and action == "dump": @@ -1835,7 +1838,7 @@ class Launcher: 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', 'remote_lazy_pages', 'show_stats', 'lazy_migrate', - 'tls', 'criu_bin', 'crit_bin') + 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) if self.__use_log: @@ -2482,6 +2485,10 @@ rp.add_argument("--criu-bin", rp.add_argument("--crit-bin", help="Path to crit binary", default='../crit/crit') +rp.add_argument("--pre-dump-mode", + help="Use splice or read mode of pre-dumping", + choices=['splice', 'read'], + default='splice') lp = sp.add_parser("list", help="List tests") lp.set_defaults(action=list_tests) From e0ea21ad5ecafadad653f46a0ed8cbef69c7b883 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:19 +0530 Subject: [PATCH 0197/2030] Handling iov generation for non-PROT_READ regions Skip iov-generation for regions not having PROT_READ, since process_vm_readv syscall can't process them during "read" pre-dump. Handle random order of "read" & "splice" pre-dumps. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 5 ++++ criu/mem.c | 56 ++++++++++++++++++++++++++++++++++++++++-- images/inventory.proto | 1 + 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index fcbe816e8..dd5b62dd0 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1485,6 +1485,9 @@ static int cr_pre_dump_finish(int status) if (ret) goto err; + he.has_pre_dump_mode = true; + he.pre_dump_mode = opts.pre_dump_mode; + pstree_switch_state(root_item, TASK_ALIVE); timing_stop(TIME_FROZEN); @@ -1914,6 +1917,8 @@ int cr_dump_tasks(pid_t pid) if (ret) goto err; + he.has_pre_dump_mode = false; + ret = write_img_inventory(&he); if (ret) goto err; diff --git a/criu/mem.c b/criu/mem.c index 911b9d21c..a5de23755 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -351,7 +351,8 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, struct page_pipe *pp, struct page_xfer *xfer, struct parasite_dump_pages_args *args, struct parasite_ctl *ctl, pmc_t *pmc, - bool has_parent, bool pre_dump) + bool has_parent, bool pre_dump, + int parent_predump_mode) { u64 off = 0; u64 *map; @@ -361,6 +362,52 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, !vma_area_is(vma, VMA_ANON_SHARED)) return 0; + /* + * To facilitate any combination of pre-dump modes to run after + * one another, we need to take extra care as discussed below. + * + * The SPLICE mode pre-dump, processes all type of memory regions, + * whereas READ mode pre-dump skips processing those memory regions + * which lacks PROT_READ flag. + * + * Now on mixing pre-dump modes: + * If SPLICE mode follows SPLICE mode : no issue + * -> everything dumped both the times + * + * If READ mode follows READ mode : no issue + * -> non-PROT_READ skipped both the time + * + * If READ mode follows SPLICE mode : no issue + * -> everything dumped at first, + * the non-PROT_READ skipped later + * + * If SPLICE mode follows READ mode : Need special care + * + * If READ pre-dump happens first, then it has skipped processing + * non-PROT_READ regions. Following SPLICE pre-dump expects pagemap + * entries for all mappings in parent pagemap, but last READ mode + * pre-dump cycle has skipped processing & pagemap generation for + * non-PROT_READ regions. So SPLICE mode throws error of missing + * pagemap entry for encountered non-PROT_READ mapping. + * + * To resolve this, the pre-dump-mode is stored in current pre-dump's + * inventoy file. This pre-dump mode is read back from this file + * (present in parent pre-dump dir) as parent-pre-dump-mode during + * next pre-dump. + * + * If parent-pre-dump-mode and next-pre-dump-mode are in READ-mode -> + * SPLICE-mode order, then SPLICE mode doesn't expect mappings for + * non-PROT_READ regions in parent-image and marks "has_parent=false". + */ + + if (!(vma->e->prot & PROT_READ)) { + if (opts.pre_dump_mode == PRE_DUMP_READ && pre_dump) + return 0; + if ((parent_predump_mode == PRE_DUMP_READ && + opts.pre_dump_mode == PRE_DUMP_SPLICE) || !pre_dump) + has_parent = false; + } + if (vma_entry_is(vma->e, VMA_AREA_AIORING)) { if (pre_dump) return 0; @@ -406,6 +453,7 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, unsigned long pmc_size; int possible_pid_reuse = 0; bool has_parent; + int parent_predump_mode = -1; pr_info("\n"); pr_info("Dumping pages (type: %d pid: %d)\n", CR_FD_PAGES, item->pid->real); @@ -472,9 +520,13 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, */ args->off = 0; has_parent = !!xfer.parent && !possible_pid_reuse; + if(mdc->parent_ie) + parent_predump_mode = mdc->parent_ie->pre_dump_mode; + list_for_each_entry(vma_area, &vma_area_list->h, list) { ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, - &pmc, has_parent, mdc->pre_dump); + &pmc, has_parent, mdc->pre_dump, + parent_predump_mode); if (ret < 0) goto out_xfer; } diff --git a/images/inventory.proto b/images/inventory.proto index 7bc2b0c02..d1438e8c8 100644 --- a/images/inventory.proto +++ b/images/inventory.proto @@ -16,4 +16,5 @@ message inventory_entry { optional uint32 root_cg_set = 5; optional lsmtype lsmtype = 6; optional uint64 dump_uptime = 8; + optional uint32 pre_dump_mode = 9; } From 29b63e9a720ec3e996de3bdbeccde5d7c0cb46e3 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:20 +0530 Subject: [PATCH 0198/2030] Skip adding PROT_READ to non-PROT_READ mappings "read" mode pre-dump may fail even after adding PROT_READ flag. Adding PROT_READ works when dumping statically. See added comment for details. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/mem.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/criu/mem.c b/criu/mem.c index a5de23755..4e110c9e9 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -591,13 +591,47 @@ int parasite_dump_pages_seized(struct pstree_item *item, * able to read the memory contents. * * Afterwards -- reprotect memory back. + * + * This step is required for "splice" mode pre-dump and dump. + * Skip this step for "read" mode pre-dump. + * "read" mode pre-dump delegates processing of non-PROT_READ + * regions to dump stage. Adding PROT_READ works fine for + * static processing (target process frozen during pre-dump) + * and fails for dynamic as explained below. + * + * Consider following sequence of instances to reason, why + * not to add PROT_READ in "read" mode pre-dump ? + * + * CRIU- "read" pre-dump Target Process + * + * 1. Creates mapping M + * without PROT_READ + * 2. CRIU freezes target + * process + * 3. Collect the mappings + * 4. Add PROT_READ to M + * (non-PROT_READ region) + * 5. CRIU unfreezes target + * process + * 6. Add flag PROT_READ + * to mapping M + * 7. Revoke flag PROT_READ + * from mapping M + * 8. process_vm_readv tries + * to copy mapping M + * (believing M have + * PROT_READ flag) + * 9. syscall fails to copy + * data from M */ - pargs->add_prot = PROT_READ; - ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl); - if (ret) { - pr_err("Can't dump unprotect vmas with parasite\n"); - return ret; + if (!mdc->pre_dump || opts.pre_dump_mode == PRE_DUMP_SPLICE) { + pargs->add_prot = PROT_READ; + ret = compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl); + if (ret) { + pr_err("Can't dump unprotect vmas with parasite\n"); + return ret; + } } if (fault_injected(FI_DUMP_PAGES)) { @@ -612,10 +646,12 @@ int parasite_dump_pages_seized(struct pstree_item *item, return ret; } - pargs->add_prot = 0; - if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) { - pr_err("Can't rollback unprotected vmas with parasite\n"); - ret = -1; + if (!mdc->pre_dump || opts.pre_dump_mode == PRE_DUMP_SPLICE) { + pargs->add_prot = 0; + if (compel_rpc_call_sync(PARASITE_CMD_MPROTECT_VMAS, ctl)) { + pr_err("Can't rollback unprotected vmas with parasite\n"); + ret = -1; + } } return ret; From 4c774afc18e8af458eeb03ea021ba52d0af4b32c Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:21 +0530 Subject: [PATCH 0199/2030] Adding cnt_sub for stats manipulation adding cnt_sub function (complement of cnt_add). cnt_sub is utilized to decrement stats counter according to skipped page count during "read" mode pre-dump. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/include/stats.h | 1 + criu/stats.c | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/criu/include/stats.h b/criu/include/stats.h index bab9a0507..5d408b7b1 100644 --- a/criu/include/stats.h +++ b/criu/include/stats.h @@ -45,6 +45,7 @@ enum { }; extern void cnt_add(int c, unsigned long val); +extern void cnt_sub(int c, unsigned long val); #define DUMP_STATS 1 #define RESTORE_STATS 2 diff --git a/criu/stats.c b/criu/stats.c index 7410b5ced..cb528011a 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -41,6 +41,18 @@ void cnt_add(int c, unsigned long val) BUG(); } +void cnt_sub(int c, unsigned long val) +{ + if (dstats != NULL) { + BUG_ON(c >= DUMP_CNT_NR_STATS); + dstats->counts[c] -= val; + } else if (rstats != NULL) { + BUG_ON(c >= RESTORE_CNT_NR_STATS); + atomic_sub(val, &rstats->counts[c]); + } else + BUG(); +} + static void timeval_accumulate(const struct timeval *from, const struct timeval *to, struct timeval *res) { From 98608b90de0f853b1c8a6e15b312320e1441c359 Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:22 +0530 Subject: [PATCH 0200/2030] read mode pre-dump implementation Pre-dump using the process_vm_readv syscall. During frozen state, only iovecs will be generated and draining of memory happens after the task is unfrozen. Pre-dumping of shared memory remains unmodified. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +- criu/include/page-xfer.h | 4 + criu/page-xfer.c | 389 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 399 insertions(+), 1 deletion(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index dd5b62dd0..ff05e38d7 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1513,7 +1513,12 @@ static int cr_pre_dump_finish(int status) goto err; mem_pp = dmpi(item)->mem_pp; - ret = page_xfer_dump_pages(&xfer, mem_pp); + + if (opts.pre_dump_mode == PRE_DUMP_READ) + ret = page_xfer_predump_pages(item->pid->real, + &xfer, mem_pp); + else + ret = page_xfer_dump_pages(&xfer, mem_pp); xfer.close(&xfer); diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h index fa72273ea..98061e2d3 100644 --- a/criu/include/page-xfer.h +++ b/criu/include/page-xfer.h @@ -9,6 +9,9 @@ struct ps_info { extern int cr_page_server(bool daemon_mode, bool lazy_dump, int cfd); +/* User buffer for read-mode pre-dump*/ +#define BUFFER_SIZE (PIPE_MAX_SIZE << PAGE_SHIFT) + /* * page_xfer -- transfer pages into image file. * Two images backends are implemented -- local image file @@ -48,6 +51,7 @@ struct page_xfer { extern int open_page_xfer(struct page_xfer *xfer, int fd_type, unsigned long id); struct page_pipe; extern int page_xfer_dump_pages(struct page_xfer *, struct page_pipe *); +extern int page_xfer_predump_pages(int pid, struct page_xfer *, struct page_pipe *); extern int connect_to_page_server_to_send(void); extern int connect_to_page_server_to_recv(int epfd); extern int disconnect_from_page_server(void); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 75e135c66..8709df745 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -6,6 +6,7 @@ #include #include #include +#include #undef LOG_PREFIX #define LOG_PREFIX "page-xfer: " @@ -480,6 +481,394 @@ static inline u32 ppb_xfer_flags(struct page_xfer *xfer, struct page_pipe_buf *p return PE_PRESENT; } +/* + * Optimized pre-dump algorithm + * ============================== + * + * Note: Please refer man(2) page of process_vm_readv syscall. + * + * The following discussion covers the possibly faulty-iov + * locations in an iovec, which hinders process_vm_readv from + * dumping the entire iovec in a single invocation. + * + * Memory layout of target process: + * + * Pages: A B C + * +--------+--------+--------+--------+--------+--------+ + * ||||||||||||||||||||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * + * Single "iov" representation: {starting_address, length_in_bytes} + * An iovec is array of iov-s. + * + * NOTE: For easy representation and discussion purpose, we carry + * out further discussion at "page granularity". + * length_in_bytes will represent page count in iov instead + * of byte count. Same assumption applies for the syscall's + * return value. Instead of returning the number of bytes + * read, it returns a page count. + * + * For above memory mapping, generated iovec: {A,1}{B,1}{C,4} + * + * This iovec remains unmodified once generated. At the same + * time some of memory regions listed in iovec may get modified + * (unmap/change protection) by the target process while syscall + * is trying to dump iovec regions. + * + * Case 1: + * A is unmapped, {A,1} become faulty iov + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * | |||||||||||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * start + * (2) + * + * process_vm_readv will return -1. Increment start pointer(2), + * syscall will process {B,1}{C,4} in one go and copy 5 pages + * to userbuf from iov-B and iov-C. + * + * Case 2: + * B is unmapped, {B,1} become faulty iov + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * ||||||||| ||||||||||||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * start + * (2) + * + * process_vm_readv will return 1, i.e. page A copied to + * userbuf successfully and syscall stopped, since B got + * unmapped. + * + * Increment the start pointer to C(2) and invoke syscall. + * Userbuf contains 5 pages overall from iov-A and iov-C. + * + * Case 3: + * This case deals with partial unmapping of iov representing + * more than one pagesize region. + * + * Syscall can't process such faulty iov as whole. So we + * process such regions part-by-part and form new sub-iovs + * in aux_iov from successfully processed pages. + * + * + * Part 3.1: + * First page of C is unmapped + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * |||||||||||||||||| |||||||||||||||||||||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | | + * start | + * (1) | + * dummy + * (2) + * + * process_vm_readv will return 2, i.e. pages A and B copied. + * We identify length of iov-C is more than 1 page, that is + * where this case differs from Case 2. + * + * dummy-iov is introduced(2) as: {C+1,3}. dummy-iov can be + * directly placed at next page to failing page. This will copy + * remaining 3 pages from iov-C to userbuf. Finally create + * modified iov entry in aux_iov. Complete aux_iov look like: + * + * aux_iov: {A,1}{B,1}{C+1,3}* + * + * + * Part 3.2: + * In between page of C is unmapped, let's say third + * + * A B C + * +--------+--------+--------+--------+--------+--------+ + * |||||||||||||||||||||||||||||||||||| |||||||||| + * +--------+--------+--------+--------+--------+--------+ + * ^ ^ + * | |-----------------| | + * start partial_read_bytes | + * (1) | + * dummy + * (2) + * + * process_vm_readv will return 4, i.e. pages A and B copied + * completely and first two pages of C are also copied. + * + * Since, iov-C is not processed completely, we need to find + * "partial_read_byte" count to place out dummy-iov for + * remainig processing of iov-C. This function is performed by + * analyze_iov function. + * + * dummy-iov will be(2): {C+3,1}. dummy-iov will be placed + * next to first failing address to process remaining iov-C. + * New entries in aux_iov will look like: + * + * aux_iov: {A,1}{B,1}{C,2}*{C+3,1}* + */ + +unsigned long handle_faulty_iov(int pid, struct iovec* riov, + unsigned long faulty_index, + struct iovec *bufvec, struct iovec* aux_iov, + unsigned long* aux_len, + unsigned long partial_read_bytes) +{ + /* Handling Case 2*/ + if (riov[faulty_index].iov_len == PAGE_SIZE) { + cnt_sub(CNT_PAGES_WRITTEN, 1); + return 0; + } + + struct iovec dummy; + ssize_t bytes_read; + unsigned long offset = 0; + unsigned long final_read_cnt = 0; + + /* Handling Case 3-Part 3.2*/ + offset = (partial_read_bytes)? partial_read_bytes : PAGE_SIZE; + + dummy.iov_base = riov[faulty_index].iov_base + offset; + dummy.iov_len = riov[faulty_index].iov_len - offset; + + if (!partial_read_bytes) + cnt_sub(CNT_PAGES_WRITTEN, 1); + + while (dummy.iov_len) { + + bytes_read = process_vm_readv(pid, bufvec, 1, &dummy, 1, 0); + + if(bytes_read == -1) { + /* Handling faulty page read in faulty iov */ + cnt_sub(CNT_PAGES_WRITTEN, 1); + dummy.iov_base += PAGE_SIZE; + dummy.iov_len -= PAGE_SIZE; + continue; + } + + /* If aux-iov can merge and expand or new entry required */ + if (aux_iov[(*aux_len)-1].iov_base + + aux_iov[(*aux_len)-1].iov_len == dummy.iov_base) + aux_iov[(*aux_len)-1].iov_len += bytes_read; + else { + aux_iov[*aux_len].iov_base = dummy.iov_base; + aux_iov[*aux_len].iov_len = bytes_read; + (*aux_len) += 1; + } + + dummy.iov_base += bytes_read; + dummy.iov_len -= bytes_read; + bufvec->iov_base += bytes_read; + bufvec->iov_len -= bytes_read; + final_read_cnt += bytes_read; + } + + return final_read_cnt; +} + +/* + * This function will position start pointer to the latest + * successfully read iov in iovec. In case of partial read it + * returns partial_read_bytes, otherwise 0. + */ +static unsigned long analyze_iov(ssize_t bytes_read, struct iovec* riov, + unsigned long *index, struct iovec *aux_iov, + unsigned long *aux_len) +{ + ssize_t processed_bytes = 0; + unsigned long partial_read_bytes = 0; + + /* correlating iovs with read bytes */ + while (processed_bytes < bytes_read) { + + processed_bytes += riov[*index].iov_len; + aux_iov[*aux_len].iov_base = riov[*index].iov_base; + aux_iov[*aux_len].iov_len = riov[*index].iov_len; + + (*aux_len) += 1; + (*index) += 1; + } + + /* handling partially processed faulty iov*/ + if (processed_bytes - bytes_read) { + + (*index) -= 1; + + partial_read_bytes = riov[*index].iov_len + - (processed_bytes - bytes_read); + aux_iov[*aux_len-1].iov_len = partial_read_bytes; + } + + return partial_read_bytes; +} + +/* + * This function iterates over complete ppb->iov entries and pass + * them to process_vm_readv syscall. + * + * Since process_vm_readv returns count of successfully read bytes. + * It does not point to iovec entry associated to last successful + * byte read. The correlation between bytes read and corresponding + * iovec is setup through analyze_iov function. + * + * If all iovecs are not processed in one go, it means there exists + * some faulty iov entry(memory mapping modified after it was grabbed) + * in iovec. process_vm_readv syscall stops at such faulty iov and + * skip processing further any entry in iovec. This is handled by + * handle_faulty_iov function. + */ +static long fill_userbuf(int pid, struct page_pipe_buf *ppb, + struct iovec *bufvec, + struct iovec* aux_iov, + unsigned long *aux_len) +{ + struct iovec *riov = ppb->iov; + ssize_t bytes_read; + unsigned long total_read = 0; + unsigned long start = 0; + unsigned long partial_read_bytes = 0; + + while (start < ppb->nr_segs) { + + bytes_read = process_vm_readv(pid, bufvec, 1, &riov[start], + ppb->nr_segs - start, 0); + + if (bytes_read == -1) { + /* Handling Case 1*/ + if (riov[start].iov_len == PAGE_SIZE) { + cnt_sub(CNT_PAGES_WRITTEN, 1); + start += 1; + continue; + } else if (errno == ESRCH) { + pr_debug("Target process PID:%d not found\n", pid); + return ESRCH; + } + } + + partial_read_bytes = 0; + + if (bytes_read > 0) { + partial_read_bytes = analyze_iov(bytes_read, riov, + &start, aux_iov, + aux_len); + bufvec->iov_base += bytes_read; + bufvec->iov_len -= bytes_read; + total_read += bytes_read; + } + + /* + * If all iovs not processed in one go, + * it means some iov in between has failed. + */ + if (start < ppb->nr_segs) + total_read += handle_faulty_iov(pid, riov, start, bufvec, + aux_iov, aux_len, + partial_read_bytes); + + start += 1; + } + + return total_read; +} + +/* + * This function is similar to page_xfer_dump_pages, instead it uses + * auxiliary_iov array for pagemap generation. + * + * The entries of ppb->iov may mismatch with actual process mappings + * present at time of pre-dump. Such entries need to be adjusted as per + * the pages read by process_vm_readv syscall. These adjusted entries + * along with unmodified entries are present in aux_iov array. + */ + +int page_xfer_predump_pages(int pid, struct page_xfer *xfer, + struct page_pipe *pp) +{ + struct page_pipe_buf *ppb; + unsigned int cur_hole = 0, i; + unsigned long ret, bytes_read; + struct iovec bufvec; + + struct iovec aux_iov[PIPE_MAX_SIZE]; + unsigned long aux_len; + + char *userbuf = mmap(NULL, BUFFER_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + if (userbuf == MAP_FAILED) { + pr_perror("Unable to mmap a buffer"); + return -1; + } + + list_for_each_entry(ppb, &pp->bufs, l) { + + aux_len = 0; + bufvec.iov_len = BUFFER_SIZE; + bufvec.iov_base = userbuf; + + bytes_read = fill_userbuf(pid, ppb, &bufvec, aux_iov, &aux_len); + + if (bytes_read == ESRCH) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + bufvec.iov_base = userbuf; + bufvec.iov_len = bytes_read; + ret = vmsplice(ppb->p[1], &bufvec, 1, SPLICE_F_NONBLOCK); + + if (ret == -1 || ret != bytes_read) { + pr_err("vmsplice: Failed to splice user buffer to pipe %ld\n", ret); + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + /* generating pagemap */ + for (i = 0; i < aux_len; i++) { + + struct iovec iov = aux_iov[i]; + u32 flags; + + ret = dump_holes(xfer, pp, &cur_hole, iov.iov_base); + if (ret) { + munmap(userbuf, BUFFER_SIZE); + return ret; + } + + BUG_ON(iov.iov_base < (void *)xfer->offset); + iov.iov_base -= xfer->offset; + pr_debug("\t p %p [%u]\n", iov.iov_base, + (unsigned int)(iov.iov_len / PAGE_SIZE)); + + flags = ppb_xfer_flags(xfer, ppb); + + if (xfer->write_pagemap(xfer, &iov, flags)) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + + if (xfer->write_pages(xfer, ppb->p[0], iov.iov_len)) { + munmap(userbuf, BUFFER_SIZE); + return -1; + } + } + + } + + munmap(userbuf, BUFFER_SIZE); + return dump_holes(xfer, pp, &cur_hole, NULL); +} + int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp) { struct page_pipe_buf *ppb; From befbbd9bba013533a05547c9b3702a256904190c Mon Sep 17 00:00:00 2001 From: Abhishek Dubey Date: Thu, 3 Oct 2019 19:39:23 +0530 Subject: [PATCH 0201/2030] Refactor time accounting macros refactoring time macros as per read mode pre-dump design. Signed-off-by: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +++++-- criu/page-xfer.c | 8 ++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index ff05e38d7..f72373d22 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1514,11 +1514,14 @@ static int cr_pre_dump_finish(int status) mem_pp = dmpi(item)->mem_pp; - if (opts.pre_dump_mode == PRE_DUMP_READ) + if (opts.pre_dump_mode == PRE_DUMP_READ) { + timing_stop(TIME_MEMWRITE); ret = page_xfer_predump_pages(item->pid->real, &xfer, mem_pp); - else + } + else { ret = page_xfer_dump_pages(&xfer, mem_pp); + } xfer.close(&xfer); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 8709df745..4d2d046ef 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -812,6 +812,8 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, list_for_each_entry(ppb, &pp->bufs, l) { + timing_start(TIME_MEMDUMP); + aux_len = 0; bufvec.iov_len = BUFFER_SIZE; bufvec.iov_base = userbuf; @@ -833,6 +835,9 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, return -1; } + timing_stop(TIME_MEMDUMP); + timing_start(TIME_MEMWRITE); + /* generating pagemap */ for (i = 0; i < aux_len; i++) { @@ -863,9 +868,12 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, } } + timing_stop(TIME_MEMWRITE); } munmap(userbuf, BUFFER_SIZE); + timing_start(TIME_MEMWRITE); + return dump_holes(xfer, pp, &cur_hole, NULL); } From d30557699616d27bed6e2e1fd6bbdf45a8c99d7f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 4 Oct 2019 19:36:37 +0300 Subject: [PATCH 0202/2030] zdtm: handle --pre-dump-mode in the rpc mode Signed-off-by: Andrei Vagin --- test/zdtm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 3c0cee667..571962241 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -912,6 +912,13 @@ class criu_rpc: if arg == '--prev-images-dir': criu.opts.parent_img = args.pop(0) continue + if arg == '--pre-dump-mode': + key = args.pop(0) + mode = crpc.rpc.READ + if key == "splice": + mode = crpc.rpc.SPLICE + criu.opts.pre_dump_mode = mode + continue if arg == '--track-mem': criu.opts.track_mem = True continue @@ -929,7 +936,7 @@ class criu_rpc: inhfd.key = key continue - raise test_fail_exc('RPC for %s required' % arg) + raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) @staticmethod def run(action, From 7c97cc7eb26c7232ca286e5ad4322c3be2bdcfca Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Fri, 4 Oct 2019 20:02:07 +0300 Subject: [PATCH 0203/2030] lib/c: fix a compile time error lib/c/criu.c:343:30: error: implicit conversion from enumeration type 'enum criu_pre_dump_mode' to different enumeration type 'CriuPreDumpMode' (aka 'enum _CriuPreDumpMode') [-Werror,-Wenum-conversion opts->rpc->pre_dump_mode = mode; ~ ^~~~ Signed-off-by: Andrei Vagin --- lib/c/criu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index fffb9fd9c..1d0a235f4 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -340,7 +340,7 @@ int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) { opts->rpc->has_pre_dump_mode = true; if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { - opts->rpc->pre_dump_mode = mode; + opts->rpc->pre_dump_mode = (CriuPreDumpMode)mode; return 0; } return -1; From 6b264f591f42e0ee3e1ceee103d5c557184829d5 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 5 Oct 2019 22:46:02 +0300 Subject: [PATCH 0204/2030] criu: use atomic_add instead of atomic_sub atomic_sub isn't defined for all platforms. Reported-by: Mr Jenkins Cc: Abhishek Dubey Signed-off-by: Andrei Vagin --- criu/stats.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/stats.c b/criu/stats.c index cb528011a..891c37800 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -48,7 +48,7 @@ void cnt_sub(int c, unsigned long val) dstats->counts[c] -= val; } else if (rstats != NULL) { BUG_ON(c >= RESTORE_CNT_NR_STATS); - atomic_sub(val, &rstats->counts[c]); + atomic_add(-val, &rstats->counts[c]); } else BUG(); } From 3efe44382fef816ea274ed1833adc1abfa4b4f06 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 6 Oct 2019 01:01:50 +0300 Subject: [PATCH 0205/2030] image: avoid name conflicts in image files Conflict register for file "sk-opts.proto": READ is already defined in file "rpc.proto". Please fix the conflict by adding package name on the proto file, or use different name for the duplication. Note: enum values appear as siblings of the enum type instead of children of it. https://github.com/checkpoint-restore/criu/issues/815 Signed-off-by: Andrei Vagin --- criu/cr-service.c | 2 +- images/rpc.proto | 2 +- test/zdtm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 392e9ac50..a70f99d71 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -478,7 +478,7 @@ static int setup_opts_from_req(int sk, CriuOpts *req) case CRIU_PRE_DUMP_MODE__SPLICE: opts.pre_dump_mode = PRE_DUMP_SPLICE; break; - case CRIU_PRE_DUMP_MODE__READ: + case CRIU_PRE_DUMP_MODE__VM_READ: opts.pre_dump_mode = PRE_DUMP_READ; break; default: diff --git a/images/rpc.proto b/images/rpc.proto index fc2f1bce2..df1b5aed2 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -49,7 +49,7 @@ enum criu_cg_mode { enum criu_pre_dump_mode { SPLICE = 1; - READ = 2; + VM_READ = 2; }; message criu_opts { diff --git a/test/zdtm.py b/test/zdtm.py index 571962241..6d3fddfad 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -914,7 +914,7 @@ class criu_rpc: continue if arg == '--pre-dump-mode': key = args.pop(0) - mode = crpc.rpc.READ + mode = crpc.rpc.VM_READ if key == "splice": mode = crpc.rpc.SPLICE criu.opts.pre_dump_mode = mode From 71c2a9dc73f679df93f2c749fed39088616a7b16 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 1 Oct 2019 20:56:26 +0000 Subject: [PATCH 0206/2030] Guard against empty file lock status The lock status string may be empty. This can happen when the owner of the lock is invisible from our PID namespace. This unfortunate behavior is fixed in kernels v4.19 and up (see commit 1cf8e5de40) Signed-off-by: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/proc_parse.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 97f82ee01..d67392a12 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1669,17 +1669,27 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) if (fdinfo_field(str, "lock")) { struct file_lock *fl; struct fdinfo_common *fdinfo = arg; + char *flock_status = str+sizeof("lock:\t")-1; if (type != FD_TYPES__UND) continue; + /* + * The lock status can be empty when the owner of the + * lock is invisible from our PID namespace. + * This unfortunate behavior is fixed in kernels v4.19 + * and up (see commit 1cf8e5de40). + */ + if (flock_status[0] == '\0') + continue; + fl = alloc_file_lock(); if (!fl) { pr_perror("Alloc file lock failed!"); goto out; } - if (parse_file_lock_buf(str + 6, fl, 0)) { + if (parse_file_lock_buf(flock_status, fl, 0)) { xfree(fl); goto parse_err; } From 5a92f100b88e25981d7d51b3f4db374297fcff3c Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 28 Sep 2019 06:59:45 +0100 Subject: [PATCH 0207/2030] page-pipe: Resize up to PIPE_MAX_SIZE When performing pre-dump we continuously increase the page-pipe size to fit the max amount memory pages in the pipe's buffer. However, we never actually set the pipe's buffer size to max. By doing so, we can reduce the number of pipe-s necessary for pre-dump and improve the performance as shown in the example below. For example, let's consider the following process: #include #include #include void main(void) { int i = 0; void *cache = calloc(1, 1024 * 1024 * 1024); while(1) { printf("%d\n", i++); sleep(1); } } stats-dump before this change: frozen_time: 123538 memdump_time: 95344 memwrite_time: 11980078 pages_scanned: 262721 pages_written: 262169 page_pipes: 513 page_pipe_bufs: 519 stats-dump after this change: frozen_time: 83287 memdump_time: 54587 memwrite_time: 12547466 pages_scanned: 262721 pages_written: 262169 page_pipes: 257 page_pipe_bufs: 263 Signed-off-by: Radostin Stoyanov --- criu/page-pipe.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/criu/page-pipe.c b/criu/page-pipe.c index a8216962d..439c180e4 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -54,8 +54,12 @@ static inline int ppb_resize_pipe(struct page_pipe_buf *ppb) if (ppb->pages_in + ppb->pipe_off < ppb->pipe_size) return 0; - if (new_size > PIPE_MAX_SIZE) - return 1; + if (new_size > PIPE_MAX_SIZE) { + if (ppb->pipe_size < PIPE_MAX_SIZE) + ppb->pipe_size = PIPE_MAX_SIZE; + else + return 1; + } ret = __ppb_resize_pipe(ppb, new_size); if (ret < 0) From f65b17e976633ad4d4a10dae96e3279157d8e77f Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 30 Apr 2019 11:35:26 +0300 Subject: [PATCH 0208/2030] cgroup: fix cg_yard leak on error path in prepare_cgroup_sfd Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index 1be8be234..a66fc960e 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -1730,11 +1730,11 @@ static int prepare_cgroup_sfd(CgroupEntry *ce) pr_debug("\tMaking controller dir %s (%s)\n", paux, opt); if (mkdir(paux, 0700)) { pr_perror("\tCan't make controller dir %s", paux); - return -1; + goto err; } if (mount("none", paux, "cgroup", 0, opt) < 0) { pr_perror("\tCan't mount controller dir %s", paux); - return -1; + goto err; } } From e56401ed3c187150c8b95cb5fd69e0d637c5515c Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 14 Sep 2019 12:47:14 +0100 Subject: [PATCH 0209/2030] image-desc: Remove CR_FD_FILE_LOCKS_PID The support for per-pid images with locks has been dropped with commit d040219 ("locks: Drop support for per-pid images with locks") and CR_FD_FILE_LOCKS_PID is not used. Signed-off-by: Radostin Stoyanov --- criu/image-desc.c | 5 ----- criu/include/image-desc.h | 1 - 2 files changed, 6 deletions(-) diff --git a/criu/image-desc.c b/criu/image-desc.c index 053e7af21..81cd07484 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -112,9 +112,4 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { .magic = IRMAP_CACHE_MAGIC, .oflags = O_SERVICE | O_FORCE_LOCAL, }, - - [CR_FD_FILE_LOCKS_PID] = { - .fmt = "filelocks-%u.img", - .magic = FILE_LOCKS_MAGIC, - }, }; diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 3135f56b4..fea80a719 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -79,7 +79,6 @@ enum { CR_FD_RLIMIT, CR_FD_ITIMERS, CR_FD_POSIX_TIMERS, - CR_FD_FILE_LOCKS_PID, CR_FD_IRMAP_CACHE, CR_FD_CPUINFO, From 477c3a4b0b7a246808afb2f12d0553db14dd74a6 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 8 Oct 2019 21:37:22 +0100 Subject: [PATCH 0210/2030] service: Use space on stack for msg buffer RPC messages are have fairly small size and using space on the stack might be a better option. This change follows the pattern used with do_pb_read_one() and pb_write_one(). Signed-off-by: Radostin Stoyanov --- criu/cr-service.c | 56 +++++++++++++++++++++++------------------ criu/include/protobuf.h | 7 ++++++ criu/protobuf.c | 7 ------ 3 files changed, 39 insertions(+), 31 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index a70f99d71..549b3368b 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -27,6 +27,7 @@ #include "cr-service.h" #include "cr-service-const.h" #include "page-xfer.h" +#include "protobuf.h" #include "net.h" #include "mount.h" #include "filesystems.h" @@ -49,18 +50,21 @@ unsigned int service_sk_ino = -1; static int recv_criu_msg(int socket_fd, CriuReq **req) { - unsigned char *buf; - int len; + u8 local[PB_PKOBJ_LOCAL_SIZE]; + void *buf = (void *)&local; + int len, exit_code = -1; len = recv(socket_fd, NULL, 0, MSG_TRUNC | MSG_PEEK); if (len == -1) { pr_perror("Can't read request"); - return -1; + goto err; } - buf = xmalloc(len); - if (!buf) - return -ENOMEM; + if (len > sizeof(local)) { + buf = xmalloc(len); + if (!buf) + return -ENOMEM; + } len = recv(socket_fd, buf, len, MSG_TRUNC); if (len == -1) { @@ -80,43 +84,47 @@ static int recv_criu_msg(int socket_fd, CriuReq **req) goto err; } - xfree(buf); - return 0; + exit_code = 0; err: - xfree(buf); - return -1; + if (buf != (void *)&local) + xfree(buf); + return exit_code; } static int send_criu_msg_with_fd(int socket_fd, CriuResp *msg, int fd) { - unsigned char *buf; - int len, ret; + u8 local[PB_PKOBJ_LOCAL_SIZE]; + void *buf = (void *)&local; + int len, exit_code = -1; len = criu_resp__get_packed_size(msg); - buf = xmalloc(len); - if (!buf) - return -ENOMEM; + if (len > sizeof(local)) { + buf = xmalloc(len); + if (!buf) + return -ENOMEM; + } if (criu_resp__pack(msg, buf) != len) { pr_perror("Failed packing response"); goto err; } - if (fd >= 0) { - ret = send_fds(socket_fd, NULL, 0, &fd, 1, buf, len); - } else - ret = write(socket_fd, buf, len); - if (ret < 0) { + if (fd >= 0) + exit_code = send_fds(socket_fd, NULL, 0, &fd, 1, buf, len); + else + exit_code = write(socket_fd, buf, len); + + if (exit_code < 0) { pr_perror("Can't send response"); goto err; } - xfree(buf); - return 0; + exit_code = 0; err: - xfree(buf); - return -1; + if (buf != (void *)&local) + xfree(buf); + return exit_code; } static int send_criu_msg(int socket_fd, CriuResp *msg) diff --git a/criu/include/protobuf.h b/criu/include/protobuf.h index fb7489e9d..0b6d8c150 100644 --- a/criu/include/protobuf.h +++ b/criu/include/protobuf.h @@ -52,4 +52,11 @@ static inline int collect_images(struct collect_image_info **array, unsigned siz return 0; } +/* + * To speed up reading of packed objects + * by providing space on stack, this should + * be more than enough for most objects. + */ +#define PB_PKOBJ_LOCAL_SIZE 1024 + #endif /* __CR_PROTOBUF_H__ */ diff --git a/criu/protobuf.c b/criu/protobuf.c index 8eb73e019..e68d42b5c 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -20,13 +20,6 @@ #include "protobuf.h" #include "util.h" -/* - * To speed up reading of packed objects - * by providing space on stack, this should - * be more than enough for most objects. - */ -#define PB_PKOBJ_LOCAL_SIZE 1024 - static char *image_name(struct cr_img *img) { int fd = img->_x.fd; From f8125b8bef7bf5a7bbaea5e6e1d29578c45bf53d Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Fri, 13 Sep 2019 18:47:33 +0000 Subject: [PATCH 0211/2030] Couple of fixes to build and run libcriu tests libcriu tests are currently broken. This patch fixes couple of issues to allow the building and running libcriu tests. 1. lib/c/criu.h got updated to include version.h which is present at "criu/include", but the command to compile libcriu tests is not specifying "criu/include" in the path to be searched for header files. This resulted in compilation error. This can be fixed by adding "-I ../../../../../criu/criu/include" however it causes more problems as "criu/include/fcntl.h" would now hide system defined fcntl.h Solution is to use "-iquote ../../../../../criu/criu/include" which applies only to the quote form of include directive. 2. Secondly, libcriu.so major version got updated to 2 but libcriu/run.sh still assumes verion 1. Instead of just updating the version in libcriu/run.sh to 2, this patch updates the libcriu/Makefile to use "CRIU_SO_VERSION_MAJOR" so that future changes to major version of libcriu won't cause same problem again. Signed-off-by: Ashutosh Mehra --- test/others/libcriu/Makefile | 14 ++++++++++++-- test/others/libcriu/run.sh | 5 ++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/test/others/libcriu/Makefile b/test/others/libcriu/Makefile index 5289ed15a..226396e6a 100644 --- a/test/others/libcriu/Makefile +++ b/test/others/libcriu/Makefile @@ -1,3 +1,5 @@ +include ../../../../criu/Makefile.versions + TESTS += test_sub TESTS += test_self TESTS += test_notify @@ -19,8 +21,16 @@ endef $(foreach t, $(TESTS), $(eval $(call genb, $(t)))) %.o: %.c - gcc -c $^ -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror + gcc -c $^ -iquote ../../../../criu/criu/include -I../../../../criu/lib/c/ -I../../../../criu/images/ -o $@ -Werror -clean: +clean: libcriu_clean rm -rf $(TESTS) $(TESTS:%=%.o) lib.o .PHONY: clean + +libcriu_clean: + rm -f libcriu.so.${CRIU_SO_VERSION_MAJOR} +.PHONY: libcriu_clean + +libcriu: + ln -s ../../../../criu/lib/c/libcriu.so libcriu.so.${CRIU_SO_VERSION_MAJOR} +.PHONY: libcriu diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index a99b91e52..5f692db31 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -5,14 +5,13 @@ source ../env.sh || exit 1 echo "== Clean" make clean +make libcriu rm -rf wdir -rm -f ./libcriu.so.1 echo "== Prepare" mkdir -p wdir/i/ echo "== Run tests" -ln -s ../../../../criu/lib/c/libcriu.so libcriu.so.1 export LD_LIBRARY_PATH=. export PATH="`dirname ${BASH_SOURCE[0]}`/../../:$PATH" @@ -40,6 +39,6 @@ run_test test_iters run_test test_errno echo "== Tests done" -unlink libcriu.so.1 +make libcriu_clean [ $RESULT -eq 0 ] && echo "Success" || echo "FAIL" exit $RESULT From 321f82662129f516573cedba10b4fbbfc12f9f5a Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Mon, 16 Sep 2019 06:49:07 +0000 Subject: [PATCH 0212/2030] Enable libcriu testing in travis jobs Updated scripts/travis/travis-tests to run libcriu test. Signed-off-by: Ashutosh Mehra --- scripts/travis/travis-tests | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 980d74734..b2ebe969b 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -161,6 +161,9 @@ ip net add test ./test/zdtm.py run -t zdtm/static/env00 -k always ./test/crit-recode.py +# libcriu testing +make -C test/others/libcriu run + make -C test/others/shell-job if ! [ -x "$(command -v flake8)" ]; then From 00ce121fd55e5947d477be4601169e8676a2bbbf Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Mon, 23 Sep 2019 08:36:12 +0000 Subject: [PATCH 0213/2030] Add `criu` to PATH env variable in libcriu tests PATH is pointing to incorrect location for `criu` executable causing libcriu tests to fail when running in travis. Also added statements to display log file contents on failure to help in debugging. Signed-off-by: Ashutosh Mehra --- test/others/libcriu/run.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index 5f692db31..bd92f8544 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -13,7 +13,7 @@ mkdir -p wdir/i/ echo "== Run tests" export LD_LIBRARY_PATH=. -export PATH="`dirname ${BASH_SOURCE[0]}`/../../:$PATH" +export PATH="`dirname ${BASH_SOURCE[0]}`/../../../criu:$PATH" RESULT=0 @@ -21,6 +21,19 @@ function run_test { echo "== Build $1" if ! make $1; then echo "FAIL build $1" + echo "** Output of $1/test.log" + cat wdir/i/$1/test.log + echo "---------------" + if [ -f wdir/i/$1/dump.log ]; then + echo "** Contents of dump.log" + cat wdir/i/$1/dump.log + echo "---------------" + fi + if [ -f wdir/i/$1/restore.log ]; then + echo "** Contents of restore.log" + cat wdir/i/$1/restore.log + echo "---------------" + fi RESULT=1; else echo "== Test $1" From 19a24df53c2dba3b2e2457c99965edf43819818d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 1 Oct 2019 00:29:14 +0100 Subject: [PATCH 0214/2030] early-log: Print warnings only if the buffer is full I don't see many issues with early-log, so we probably don't need the warning when it was used. Note that after commit 74731d9 ("zdtm: make grep_errors also grep warnings") also warnings are grepped by zdtm.py (and I believe that was an improvement) which prints some bothering lines: > =[log]=> dump/zdtm/static/inotify00/38/1/dump.log > ------------------------ grep Error ------------------------ > (00.000000) Will allow link remaps on FS > (00.000034) Warn (criu/log.c:203): The early log isn't empty > ------------------------ ERROR OVER ------------------------ Instead of decreasing loglevel of the message, improve it by reporting a real issue. Cc: Adrian Reber Cc: Pavel Tikhomirov Cc: Radostin Stoyanov Signed-off-by: Dmitry Safonov --- criu/log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/log.c b/criu/log.c index 8bdf83534..0ee113b91 100644 --- a/criu/log.c +++ b/criu/log.c @@ -199,8 +199,8 @@ void flush_early_log_buffer(int fd) } pos += hdr->len; } - if (early_log_buf_off) - pr_warn("The early log isn't empty\n"); + if (early_log_buf_off == EARLY_LOG_BUF_LEN) + pr_warn("The early log buffer is full, some messages may have been lost\n"); early_log_buf_off = 0; } From 1a28dee52b63eca5adc48c1d6f1dda8d532a0e8e Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 24 Oct 2019 19:39:39 +0000 Subject: [PATCH 0215/2030] Action scripts should be invoked with normal signal behavior Signal masks propagate through execve, so we need to clear them before invoking the action scripts as it may want to handle SIGCHLD, or SIGSEGV. Signed-off-by: Nicolas Viennot --- criu/util.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/criu/util.c b/criu/util.c index 028f604bb..e47e109ae 100644 --- a/criu/util.c +++ b/criu/util.c @@ -536,7 +536,7 @@ int cr_system_userns(int in, int out, int err, char *cmd, sigemptyset(&blockmask); sigaddset(&blockmask, SIGCHLD); if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) { - pr_perror("Can not set mask of blocked signals"); + pr_perror("Cannot set mask of blocked signals"); return -1; } @@ -545,6 +545,12 @@ int cr_system_userns(int in, int out, int err, char *cmd, pr_perror("fork() failed"); goto out; } else if (pid == 0) { + sigemptyset(&blockmask); + if (sigprocmask(SIG_SETMASK, &blockmask, NULL) == -1) { + pr_perror("Cannot clear blocked signals"); + goto out_chld; + } + if (userns_pid > 0) { if (switch_ns(userns_pid, &user_ns_desc, NULL)) goto out_chld; From 3861b334b252ff65acf2c827b5d85cab21de086a Mon Sep 17 00:00:00 2001 From: Sergey Bronnikov Date: Sun, 3 Nov 2019 13:08:09 +0300 Subject: [PATCH 0216/2030] Fix broken web-links --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 16e8452b5..558e87160 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,8 @@ Linux kernel supporting checkpoint and restore for all the features it provides. looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. Here are some useful hints to get involved. -* We have both -- [very simple](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; -* CRIU does need [extensive testing](https://checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); +* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; +* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); * For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches); From a7c625938eb1d472341770699469ca6ddb4d91b1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 29 Oct 2019 15:17:40 +0100 Subject: [PATCH 0217/2030] travis: start to use aarch64 hardware With the newly introduced aarch64 at Travis it is possible for the CRIU test-cases to switch to aarch64. Travis uses unprivileged LXD containers on aarch64 which blocks many of the kernel interfaces CRIU needs. So for now this only tests building CRIU natively on aarch64 instead of using the Docker+QEMU combination. All tests based on Docker are not working on aarch64 is there currently seems to be a problem with Docker on aarch64. Maybe because of the nesting of Docker in LXD. Signed-off-by: Adrian Reber --- .travis.yml | 11 ++++-- scripts/build/Dockerfile.alpine | 2 +- scripts/build/Dockerfile.centos | 1 + scripts/build/Makefile | 4 +-- scripts/travis/travis-tests | 63 +++++++++++++++++++++------------ 5 files changed, 54 insertions(+), 27 deletions(-) diff --git a/.travis.yml b/.travis.yml index 82ba9fbc8..4cde9c4fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,11 +14,9 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=armv7hf - - TR_ARCH=aarch64 - TR_ARCH=ppc64le - TR_ARCH=s390x - TR_ARCH=armv7hf CLANG=1 - - TR_ARCH=aarch64 CLANG=1 - TR_ARCH=ppc64le CLANG=1 - TR_ARCH=alpine CLANG=1 - TR_ARCH=docker-test @@ -27,6 +25,15 @@ env: - TR_ARCH=centos - TR_ARCH=podman-test matrix: + include: + - os: linux + arch: arm64 + env: TR_ARCH=local + dist: bionic + - os: linux + arch: arm64 + env: TR_ARCH=local CLANG=1 + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index c71a3901f..70fdf480a 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -42,5 +42,5 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install protobuf ipaddress junit_xml +RUN pip install protobuf ipaddress junit_xml flake8 RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.centos b/scripts/build/Dockerfile.centos index 2ce40b179..213be694f 100644 --- a/scripts/build/Dockerfile.centos +++ b/scripts/build/Dockerfile.centos @@ -23,6 +23,7 @@ RUN yum install -y \ protobuf-devel \ protobuf-python \ python \ + python-flake8 \ python-ipaddress \ python2-future \ python2-junit_xml \ diff --git a/scripts/build/Makefile b/scripts/build/Makefile index bb2e9ca9d..3d4d91cd5 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ -QEMU_ARCHES := armv7hf aarch64 ppc64le s390x fedora-rawhide-aarch64 # require qemu -ARCHES := $(QEMU_ARCHES) x86_64 fedora-asan fedora-rawhide centos +QEMU_ARCHES := armv7hf ppc64le s390x fedora-rawhide-aarch64 # require qemu +ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index b2ebe969b..1f6b19130 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -1,17 +1,31 @@ #!/bin/sh set -x -e -TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev +TRAVIS_PKGS="protobuf-c-compiler libprotobuf-c-dev libaio-dev python-future libgnutls28-dev libgnutls30 libprotobuf-dev protobuf-compiler - libcap-dev libnl-3-dev gcc-multilib gdb bash python-protobuf - libnet-dev util-linux asciidoctor libnl-route-3-dev" + libcap-dev libnl-3-dev gdb bash python-protobuf python-yaml + libnet-dev util-linux asciidoctor libnl-route-3-dev + python-junit.xml python-ipaddress time ccache flake8 + libbsd-dev" + +X86_64_PKGS="gcc-multilib" + +UNAME_M=`uname -m` + +if [ "$UNAME_M" != "x86_64" ]; then + # For Travis only x86_64 seems to be baremetal. Other + # architectures are running in unprivileged LXD containers. + # That seems to block most of CRIU's interfaces. + SKIP_TRAVIS_TEST=1 +fi travis_prep () { [ -n "$SKIP_TRAVIS_PREP" ] && return cd ../../ - service apport stop + # This can fail on aarch64 travis + service apport stop || : CC=gcc # clang support @@ -43,24 +57,41 @@ travis_prep () { sed -i '/security/ d' /etc/apt/sources.list fi + + # Do not install x86_64 specific packages on other architectures + if [ "$UNAME_M" = "x86_64" ]; then + TRAVIS_PKGS="$TRAVIS_PKGS $X86_64_PKGS" + fi + apt-get update -qq apt-get install -qq --no-install-recommends $TRAVIS_PKGS - # travis is based on 14.04 and that does not have python - # packages for future and ipaddress (16.04 has those packages) - pip install junit-xml future ipaddress chmod a+x $HOME } travis_prep -ulimit -c unlimited -echo "|`pwd`/test/abrt.sh %P %p %s %e" > /proc/sys/kernel/core_pattern - export GCOV +$CC --version time make CC="$CC" -j4 +./criu/criu -v4 cpuinfo dump || : +./criu/criu -v4 cpuinfo check || : + +make lint + +# Check that help output fits into 80 columns +WIDTH=$(./criu/criu --help | wc --max-line-length) +if [ "$WIDTH" -gt 80 ]; then + echo "criu --help output does not obey 80 characters line width!" + exit 1 +fi + [ -n "$SKIP_TRAVIS_TEST" ] && return +ulimit -c unlimited + +echo "|`pwd`/test/abrt.sh %P %p %s %e" > /proc/sys/kernel/core_pattern + if [ "${COMPAT_TEST}x" = "yx" ] ; then # Dirty hack to keep both ia32 & x86_64 shared libs on a machine: # headers are probably not compatible, so apt-get doesn't allow @@ -165,15 +196,3 @@ ip net add test make -C test/others/libcriu run make -C test/others/shell-job - -if ! [ -x "$(command -v flake8)" ]; then - pip install flake8 -fi -make lint - -# Check that help output fits into 80 columns -WIDTH=$(./criu/criu --help | wc --max-line-length) -if [ "$WIDTH" -gt 80 ]; then - echo "criu --help output does not obey 80 characters line width!" - exit 1 -fi From c4006c0034ef2162693d9dd619d025c240affc78 Mon Sep 17 00:00:00 2001 From: Vitaly Ostrosablin Date: Fri, 1 Nov 2019 09:00:23 +0000 Subject: [PATCH 0218/2030] test/static:conntracks: Support nftables Update test to support both iptables and nft to create conntrack rules. Signed-off-by: Vitaly Ostrosablin Signed-off-by: Andrei Vagin --- test/zdtm/static/conntracks | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/conntracks b/test/zdtm/static/conntracks index a30e0e268..26220f97c 100755 --- a/test/zdtm/static/conntracks +++ b/test/zdtm/static/conntracks @@ -23,7 +23,7 @@ do_or_fail() fail "$failmsg: $output" } -do_start() +do_start_ipt() { [ -f "$statefile" ] && die "state file $statefile aleady exists" @@ -35,7 +35,7 @@ do_start() iptables -L \> "$statefile" } -do_stop() +do_stop_ipt() { do_or_fail "can't compare the iptables" \ iptables -L \| diff -u "$statefile" - @@ -45,6 +45,38 @@ do_stop() echo "PASS" > $outfile } +do_start_nft() +{ + [ -f "$statefile" ] && die "state file $statefile aleady exists" + + do_or_fail "can't install a state match" \ + nft add rule filter INPUT \ + ct state related,established accept + + do_or_fail "can't list the loaded nftables" \ + nft list ruleset \> "$statefile" +} + +do_stop_nft() +{ + do_or_fail "can't compare the nftables" \ + nft list ruleset \| diff -u "$statefile" - + + rm -f "$statefile" + + echo "PASS" > $outfile +} + +do_start() +{ + [ -x "$(command -v nft)" ] && do_start_nft || do_start_ipt +} + +do_stop() +{ + [ -x "$(command -v nft)" ] && do_stop_nft || do_stop_ipt +} + tmpargs="$(../lib/parseargs.sh --name=$0 \ --flags-req=statefile,outfile \ --flags-opt="start,stop" -- "$@")" || From 389bcfef3e8f4be35464da9f94681e6573d6d1d9 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Fri, 18 Oct 2019 20:09:15 +0530 Subject: [PATCH 0219/2030] test/java: Add FileRead Tests Signed-off-by: Nidhi Gupta --- test/javaTests/README.md | 33 ++ test/javaTests/pom.xml | 47 ++ .../criu/java/tests/CheckpointRestore.java | 450 ++++++++++++++++++ .../src/org/criu/java/tests/FileRead.java | 175 +++++++ .../src/org/criu/java/tests/Helper.java | 99 ++++ .../src/org/criu/java/tests/ImgFilter.java | 11 + test/javaTests/test.xml | 13 + 7 files changed, 828 insertions(+) create mode 100644 test/javaTests/README.md create mode 100644 test/javaTests/pom.xml create mode 100644 test/javaTests/src/org/criu/java/tests/CheckpointRestore.java create mode 100644 test/javaTests/src/org/criu/java/tests/FileRead.java create mode 100644 test/javaTests/src/org/criu/java/tests/Helper.java create mode 100644 test/javaTests/src/org/criu/java/tests/ImgFilter.java create mode 100644 test/javaTests/test.xml diff --git a/test/javaTests/README.md b/test/javaTests/README.md new file mode 100644 index 000000000..cb779285e --- /dev/null +++ b/test/javaTests/README.md @@ -0,0 +1,33 @@ +# JavaTests + +Java Functional tests checks the Java File based APIs and Memory mapping APIs by placing the process in various states before checkpointing and validates if these resources are still accessible after restore. It also validates if the file contents are in expected states. + +Tests are to be run by a user having following capabilities: +CAP_DAC_OVERRIDE +CAP_CHOWN +CAP_SETPCAP +CAP_SETGID +CAP_AUDIT_CONTROL +CAP_DAC_READ_SEARCH +CAP_NET_ADMIN +CAP_SYS_ADMIN +CAP_SYS_CHROOT +CAP_SYS_PTRACE +CAP_FOWNER +CAP_KILL +CAP_FSETID +CAP_SYS_RESOURCE +CAP_SETUID + +## File-based Java APIs + +Here we test the File-Based Java APIs by checkpointing the application in the following scenarios and verifying the contents of the file after restore: +- Reading and writing in the same file. (FileRead.java) + +### Prerequisites for running the tests: +- Maven + +### To run the tests: +- In the javaTests folder run the command ```sudo mvn test``` +- To keep the img files and logs from previous failures, between different runs of the test, use the ```-DneverCleanFailures=true ``` option in the maven command +as ```sudo mvn -DneverCleanFailures=true test``` diff --git a/test/javaTests/pom.xml b/test/javaTests/pom.xml new file mode 100644 index 000000000..faae44d1b --- /dev/null +++ b/test/javaTests/pom.xml @@ -0,0 +1,47 @@ + + 4.0.0 + criu + criu-javaTests + 1 + criu-javaTests + + + src + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.14.1 + + + + test.xml + + + + + + maven-compiler-plugin + 3.1 + + 1.7 + 1.7 + + + + + + + + org.testng + testng + 6.3.1 + + + + UTF-8 + + diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java new file mode 100644 index 000000000..968488191 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -0,0 +1,450 @@ +package org.criu.java.tests; + +import org.testng.Assert; +import org.testng.annotations.AfterTest; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.Parameters; +import org.testng.annotations.Test; + +import java.io.*; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.text.SimpleDateFormat; +import java.util.Date; + +public class CheckpointRestore { + private MappedByteBuffer mappedByteBuffer = null; + private String testName = ""; + private String logFolder = Helper.LOG_FOLDER + "/"; + private String outputFolder = Helper.OUTPUT_FOLDER_NAME + "/"; + + /** + * Create CRlog and output directory if they don't exist. + * Delete directories containing .img files from failed Checkpoint-Restore if 'neverCleanFailures' property is not set to true. + * + * @throws IOException + */ + @BeforeSuite + void suiteSetup() throws IOException { + System.out.println("Tests are to be run as a privileged user having capabilities mentioned in ReadMe"); + boolean neverCleanFailures = Boolean.getBoolean("neverCleanFailures"); + Path logDir = Paths.get(logFolder); + Path outputDir = Paths.get(outputFolder); + if (!Files.exists(logDir)) { + System.out.println("Logs directory does not exist, creating it"); + Files.createDirectory(logDir); + } + if (!Files.exists(outputDir)) { + System.out.println("Output directory does not exist, creating it"); + Files.createDirectory(outputDir); + } + /* + * Delete the directories containing the img files from failed Checkpoint-Restore. + */ + if (!neverCleanFailures) { + File output = new File(outputFolder); + String[] name = output.list(); + for (int i = 0; null != name && i < name.length; i++) { + File testFolder = new File(outputFolder + name[i]); + if (testFolder.isDirectory()) { + String[] list = testFolder.list(); + File file; + if (null != list) { + for (int j = 0; j < list.length; j++) { + file = new File(outputFolder + name[i] + "/" + list[j]); + if (!file.isDirectory()) { + Files.delete(file.toPath()); + } + } + } + } + Files.delete(testFolder.toPath()); + } + } + } + + /** + * Create the output folder for the test in case it does not exist + * + * @param testName Name of the java test + * @throws IOException + */ + private void testSetup(String testName) throws IOException { + Path testFolderPath = Paths.get(outputFolder + testName + "/"); + if (!Files.exists(testFolderPath)) { + System.out.println("Test Folder does not exist creating it"); + Files.createDirectory(testFolderPath); + } + } + + /** + * Read the pid of process from the pid file of test + * + * @param name Name of the java test + * @return pid Process id of the java test process + * @throws IOException + */ + private String getPid(String name) throws IOException { + name = outputFolder + testName + "/" + name + Helper.PID_APPEND; + File pidfile = new File(name); + BufferedReader pidReader = new BufferedReader(new FileReader(pidfile)); + String pid = pidReader.readLine(); + pidReader.close(); + return pid; + } + + /** + * @param testName Name of the java test + * @param checkpointOpt Additional options for checkpoint + * @param restoreOpt Additional options for restore + * @throws Exception + */ + @Test + @Parameters({"testname", "checkpointOpt", "restoreOpt"}) + public void runtest(String testName, String checkpointOpt, String restoreOpt) throws Exception { + this.testName = testName; + String name = Helper.PACKAGE_NAME + "." + testName; + String pid; + int exitCode; + + System.out.println("======= Testing " + testName + " ========"); + + testSetup(testName); + + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + if (f.exists()) { + f.delete(); + } + + /* + * Create a new file that will be mapped to memory and used to communicate between + * this process and the java test process. + */ + boolean newFile = f.createNewFile(); + Assert.assertTrue(newFile, "Unable to create a new file to be mapped"); + + /* + * MappedByteBuffer communicates between this process and java process called. + */ + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + mappedByteBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + mappedByteBuffer.clear(); + channel.close(); + + /* + * Put MappedByteBuffer in Init state + */ + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + /* + * Run the test as a separate process + */ + System.out.println("Starting the java Test"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", name); + Process process = builder.start(); + + char currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + /* + * Loop until the test process changes the state of MappedByteBuffer from init state + */ + while (Helper.STATE_INIT == currentState) { + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * If Mapped Buffer is in Helper.STATE_FAIL state before checkpointing then an exception must + * have occurred in the test. + */ + while (Helper.STATE_FAIL == currentState) { + try { + /* + * We exit the test process with exit code 5 in case of an exception + */ + exitCode = process.exitValue(); + /* + * Reaching here implies that .exitValue() has not thrown an exception, so the process has + * exited, We now check the exitCode. + */ + if (5 == exitCode) { + Assert.fail(testName + ": Exception occurred while running the test: check the log file for details."); + } else { + Assert.fail(testName + ": ERROR: Unexpected value of exit code: " + exitCode + ", expected: 5"); + } + } catch (IllegalThreadStateException e) { + /* + * Do nothing, as an Exception is expected if the process has not exited + * and we try to get its exitValue. + */ + } + + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * Mapped Buffer state should be Helper.STATE_CHECKPOINT for checkpointing or Helper.STATE_END if some error occurs in test + */ + if (Helper.STATE_END != currentState) { + Assert.assertEquals(currentState, Helper.STATE_CHECKPOINT, testName + ": ERROR: Error occurred while running the test: test is not in the excepted 'waiting to be checkpointed state': " + currentState); + } else { + Assert.fail(testName + ": ERROR: Error took place in the test check the log file for more details"); + } + /* + * Reaching here implies that MappedByteBuffer is in To Be Checkpointed state. + * Get the pid of the test process + */ + + pid = getPid(testName); + try { + /* + * Checkpoint the process + */ + checkpoint(pid, checkpointOpt); + + } catch (Exception e) { + /* + * If exception occurs put the MappedByteBuffer to Helper.STATE_TERMINATE-Terminate state. + * On reading the terminate state, the test process terminates, else it + * may go on looping. + */ + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + Assert.fail(testName + ": Exception occurred while during checkpointing" + e, e); + } + + /* + * The process has been checkpointed successfully, now restoring the process. + */ + try { + /* + * Restore the process + */ + restore(restoreOpt); + } catch (Exception e) { + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + Assert.fail(testName + ": Exception occurred while restoring the test" + e, e); + } + + /* + * Wait for test process to finish + */ + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + while (Helper.STATE_RESTORE == currentState) { + currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + } + + /* + * If a test passes it puts the MappedByteBuffer to Helper.STATE_PASS-Pass state, + * On failing to Helper.STATE_FAIL-Fail state, and if our Buffer is in Helper.STATE_TERMINATE state + * its because the checkpoint-restore of test process failed. + */ + + Assert.assertNotEquals(currentState, Helper.STATE_TERMINATE, testName + ": ERROR: Checkpoint-Restore failed"); + Assert.assertNotEquals(currentState, Helper.STATE_FAIL, testName + ": ERROR: Test Failed, Check Log for details"); + Assert.assertEquals(currentState, Helper.STATE_PASS, testName + " ERROR: Unexpected State of Mapped Buffer"); + System.out.println("-----" + "PASS" + "-----"); + + } + + /** + * Remove .img files, dump.log, restore.log, stats-dump and stats-restore files from Log Directory + * + * @throws IOException + */ + @AfterTest + void cleanup() throws IOException { + int i; + String currentPath = System.getProperty("user.dir"); + currentPath = currentPath + "/" + logFolder; + File deleteFile; + File dir = new File(currentPath); + String[] imgFiles = dir.list(new ImgFilter()); + if (null != imgFiles) { + for (i = 0; i < imgFiles.length; i++) { + deleteFile = new File(currentPath + imgFiles[i]); + Files.delete(deleteFile.toPath()); + } + } + + boolean exists = Files.exists(Paths.get(currentPath + "dump.log")); + if (exists) { + Files.delete(Paths.get(currentPath + "dump.log")); + } + + exists = Files.exists(Paths.get(currentPath + "restore.log")); + if (exists) { + Files.delete(Paths.get(currentPath + "restore.log")); + } + + exists = Files.exists(Paths.get(currentPath + "stats-dump")); + if (exists) { + Files.delete(Paths.get(currentPath + "stats-dump")); + } + + exists = Files.exists(Paths.get(currentPath + "stats-restore")); + if (exists) { + Files.delete(Paths.get(currentPath + "stats-restore")); + } + } + + /** + * Copy .img files, dump.log, restore.log, stats-dump and stats-restore files from Log Directory if they exist + * to another folder. + * + * @throws IOException + */ + String copyFiles() throws IOException { + String currentPath = System.getProperty("user.dir"); + String folderSuffix = new SimpleDateFormat("yyMMddHHmmss").format(new Date()); + String fromPath = currentPath + "/" + logFolder; + File fromDir = new File(fromPath); + Path fromFile, toFile; + boolean exists; + String toPath = currentPath + "/" + outputFolder + testName + folderSuffix + "/"; + Path dirPath = Paths.get(toPath); + Files.createDirectory(dirPath); + + String[] imgFiles = fromDir.list(new ImgFilter()); + if (null != imgFiles) { + for (int i = 0; i < imgFiles.length; i++) { + fromFile = Paths.get(fromPath + imgFiles[i]); + toFile = Paths.get(toPath + imgFiles[i]); + Files.copy(fromFile, toFile); + } + } + + fromFile = Paths.get(fromPath + "dump.log"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "dump.log"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "restore.log"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "restore.log"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "stats-dump"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "stats-dump"); + Files.copy(fromFile, toFile); + } + + fromFile = Paths.get(fromPath + "stats-restore"); + exists = Files.exists(fromFile); + if (exists) { + toFile = Paths.get(toPath + "stats-restore"); + Files.copy(fromFile, toFile); + } + + return folderSuffix; + } + + /** + * Checkpoint the process, if process has not been checkpointed correctly + * copy the .img, log and stats files, puts MappedBuffer to 'terminate' state and mark + * test as failed + * + * @param pid Pid of process to be checkpointed + * @param checkpointOpt Additional options for checkpoint + * @throws IOException + * @throws InterruptedException + */ + private void checkpoint(String pid, String checkpointOpt) throws IOException, InterruptedException { + ProcessBuilder builder; + System.out.println("Checkpointing process " + pid); + String command = "../../criu/criu dump --shell-job -t " + pid + " -vvv -D " + logFolder + " -o dump.log"; + if (0 == checkpointOpt.length()) { + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } else { + command = command + " " + checkpointOpt; + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } + Process process = builder.start(); + BufferedReader stdError = new BufferedReader(new InputStreamReader(process.getErrorStream())); + int exitCode = process.waitFor(); + + if (0 != exitCode) { + /* + * Print the error stream + */ + String line = stdError.readLine(); + while (null != line) { + System.out.println(line); + line = stdError.readLine(); + } + + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + /* + * If checkpoint fails copy the img files, dump.log, stats-dump, stats-restore + */ + String folderSuffix = copyFiles(); + + Assert.fail(testName + ": ERROR: Error during checkpoint: exitCode of checkpoint process was not zero.\nFor more details check dump.log in " + outputFolder + testName + folderSuffix); + return; + } + + System.out.println("Checkpoint success"); + process.destroy(); + + } + + /** + * Restore the process, if process has been restored correctly put Mapped Buffer to + * 'restored' state, else copy the .img, log and stats files and put MappedBuffer to 'terminate' + * state and mark test as failed + * + * @param restoreOpt Additional options for restore + * @throws IOException + * @throws InterruptedException + */ + private void restore(String restoreOpt) throws IOException, InterruptedException { + ProcessBuilder builder; + System.out.println("Restoring process"); + String command = "../../criu/criu restore -d -vvv --shell-job -D " + logFolder + " -o restore.log"; + if (0 == restoreOpt.length()) { + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } else { + command = command + " " + restoreOpt; + String[] cmd = command.split(" "); + builder = new ProcessBuilder(cmd); + } + + Process process = builder.start(); + BufferedReader stdError = new BufferedReader(new InputStreamReader(process.getErrorStream())); + int exitCode = process.waitFor(); + + if (0 != exitCode) { + /* + * Print the error stream + */ + String line = stdError.readLine(); + while (null != line) { + System.out.println(line); + line = stdError.readLine(); + } + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_TERMINATE); + /* + * If restore fails copy img files, dump.log, restore.log, stats-dump, stats-restore + */ + String folderSuffix = copyFiles(); + Assert.fail(testName + ": ERROR: Error during restore: exitCode of restore process was not zero.\nFor more details check restore.log in " + outputFolder + testName + folderSuffix); + + return; + } else { + System.out.println("Restore success"); + mappedByteBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + } + process.destroy(); + } +} diff --git a/test/javaTests/src/org/criu/java/tests/FileRead.java b/test/javaTests/src/org/criu/java/tests/FileRead.java new file mode 100644 index 000000000..d94a14112 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/FileRead.java @@ -0,0 +1,175 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class FileRead { + private static String TESTNAME = "FileRead"; + + /** + * @param i int value denoting the line number. + * @return The line as a string. + */ + private static String getLine(int i) { + return "Line No: " + i + "\n"; + } + + /** + * Write in a file, line by line, and read it, checkpoint and restore + * and then continue to read and write the file. + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + Logger logger = null; + int wi, ri = 0; + try { + File file = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/FileRead_write.txt"); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if ('I' != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Checking existence of file to be read and written to."); + if (file.exists()) { + file.delete(); + } + boolean newFile = file.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Cannot create a new file to read and write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedWriter brw = new BufferedWriter(new FileWriter(file)); + BufferedReader brr = new BufferedReader(new FileReader(file)); + + logger.log(Level.INFO, "Start writing the lines in file"); + + for (wi = 1; wi <= 5; wi++) { + brw.write(getLine(wi)); + } + + brw.flush(); + String s = "Line No: 0"; + int i; + + for (i = 0; i < 50; i++) { + brw.write(getLine(wi)); + brw.flush(); + wi++; + s = brr.readLine(); + ri = Integer.parseInt(s.replaceAll("[\\D]", "")); + } + + wi--; + logger.log(Level.INFO, "Going to checkpoint"); + + /* + * Checkpoint and wait for restore + */ + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + + brw.flush(); + + try { + s = brr.readLine(); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Error: Buffered Reader is not reading file"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (null == s || s.isEmpty()) { + logger.log(Level.SEVERE, "Error: Error while reading lines after restore: Line read is null"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + int readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + if (ri + 1 != readLineNo) { + logger.log(Level.SEVERE, "Error: Not reading at correct line"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + String ch = brr.readLine(); + while (null != ch && !ch.isEmpty()) { + s = ch; + ch = brr.readLine(); + } + + readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + + if (readLineNo != wi) { + logger.log(Level.SEVERE, "Error: Data written has been lost"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + try { + brw.write(getLine(wi + 1)); + brw.flush(); + } catch (IOException e) { + logger.log(Level.SEVERE, "Error: cannot write file after restore"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + s = brr.readLine(); + readLineNo = Integer.parseInt(s.replaceAll("[\\D]", "")); + + if (readLineNo != wi + 1) { + logger.log(Level.SEVERE, "Error: Data not written correctly"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "File is being read and written to correctly after restore!"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + brw.close(); + brr.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java new file mode 100644 index 000000000..d608fba47 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -0,0 +1,99 @@ +package org.criu.java.tests; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.logging.SimpleFormatter; + +class Helper { + static String MEMORY_MAPPED_FILE_NAME = "output/file"; + static String PASS_MESSAGE = "Test was a Success!!!"; + static String OUTPUT_FOLDER_NAME = "output"; + static String PACKAGE_NAME = "org.criu.java.tests"; + static String PID_APPEND = ".pid"; + static String SOURCE_FOLDER = "src/org/criu/java/tests"; + static String LOG_FOLDER = "CRlogs"; + static int MAPPED_REGION_SIZE = 100; + static int MAPPED_INDEX = 1; + static char STATE_RESTORE = 'R'; + static char STATE_CHECKPOINT = 'C'; + static char STATE_INIT = 'I'; + static char STATE_TERMINATE = 'T'; + static char STATE_END = 'E'; + static char STATE_FAIL = 'F'; + static char STATE_PASS = 'P'; + + /** + * Create a new log file and pidfile and write + * the pid to the pidFile. + * + * @param testName Name of the java test + * @param pid Pid of the java test process + * @param logger + * @return 0 or 1 denoting whether the function was successful or not. + * @throws IOException + */ + static int init(String testName, String pid, Logger logger) throws IOException { + File pidfile = new File(OUTPUT_FOLDER_NAME + "/" + testName + "/" + testName + PID_APPEND); + + FileHandler handler = new FileHandler(Helper.OUTPUT_FOLDER_NAME + "/" + testName + "/" + testName + ".log", false); + handler.setFormatter(new SimpleFormatter()); + handler.setLevel(Level.FINE); + logger.addHandler(handler); + logger.setLevel(Level.FINE); + + /* + * Create a pid file and write the process's pid into it. + */ + if (pidfile.exists()) { + pidfile.delete(); + } + boolean newFile = pidfile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Cannot create new pid file."); + return 1; + } + BufferedWriter pidWriter = new BufferedWriter(new FileWriter(pidfile)); + pidWriter.write(pid + "\n"); + pidWriter.close(); + return 0; + } + + /** + * Put the Mapped Buffer to 'Ready to be checkpointed' state and wait for restore. + * + * @param b The MappedByteBuffer from the calling process. + * @param logger The Logger from the calling process. + */ + static void checkpointAndWait(MappedByteBuffer b, Logger logger) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + char c = b.getChar(Helper.MAPPED_INDEX); + /* + * Loop while MappedByteBuffer is in 'To be checkpointed' state + */ + while (Helper.STATE_CHECKPOINT == c) { + c = b.getChar(Helper.MAPPED_INDEX); + } + /* + * Test is in 'T' state if some error or exception occurs during checkpoint or restore. + */ + if (Helper.STATE_TERMINATE == c) { + logger.log(Level.SEVERE, "Error during checkpoint-restore, Test terminated"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * The expected state of MappedByteBuffer is Helper.STATE_RESTORE-restored state. + */ + if (Helper.STATE_RESTORE != c) { + logger.log(Level.INFO, "Error: Test state is not the expected Restored state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/ImgFilter.java b/test/javaTests/src/org/criu/java/tests/ImgFilter.java new file mode 100644 index 000000000..97087c2cc --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/ImgFilter.java @@ -0,0 +1,11 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.FilenameFilter; + +class ImgFilter implements FilenameFilter { + @Override + public boolean accept(File dir, String fileName) { + return (fileName.endsWith(".img")); + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml new file mode 100644 index 000000000..8ff67c5e0 --- /dev/null +++ b/test/javaTests/test.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + From 9325339e64e0485e981e880ae460729cd9b9b648 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 18:31:58 +0000 Subject: [PATCH 0220/2030] travis: Disallow failures on ia32 It seems pretty stable and hasn't add many false-positives during last months. While can reveal some issues for compatible C/R code. Signed-off-by: Dmitry Safonov --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4cde9c4fb..7a0c29a55 100644 --- a/.travis.yml +++ b/.travis.yml @@ -40,8 +40,6 @@ matrix: - env: TR_ARCH=fedora-rawhide-aarch64 - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 - - env: TR_ARCH=local COMPAT_TEST=y - - env: TR_ARCH=local CLANG=1 COMPAT_TEST=y script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: From d804f70a680b0ba7410e3845aa8179d8ab5c4219 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:48 +0300 Subject: [PATCH 0221/2030] mount: remove useless check in populate_mnt_ns The path: restore_root_task prepare_namespace_before_tasks mntns_maybe_create_roots is always called before the path below: retore_root_task fork_with_pid restore_task_with_children prepare_namespace prepare_mnt_ns populate_mnt_ns So (!!mnt_roots) == (root_ns_mask & CLONE_NEWNS) in populate_mnt_ns, but in prepare_mnt_ns we've already checked that it is true, so there is no need in these check - remove it. Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 486d01719..802295778 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3139,15 +3139,12 @@ static int populate_mnt_ns(void) struct ns_id *nsid; int ret; - if (mnt_roots) { - /* mnt_roots is a tmpfs mount and it's private */ - root_yard_mp = mnt_entry_alloc(); - if (!root_yard_mp) - return -1; + root_yard_mp = mnt_entry_alloc(); + if (!root_yard_mp) + return -1; - root_yard_mp->mountpoint = mnt_roots; - root_yard_mp->mounted = true; - } + root_yard_mp->mountpoint = mnt_roots; + root_yard_mp->mounted = true; pms = mnt_build_tree(mntinfo, root_yard_mp); if (!pms) From 71dff54aa474efa105043cd86cc38103c3c21859 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:49 +0300 Subject: [PATCH 0222/2030] ns: make rst_new_ns_id static It's never used outside of namespaces.c Signed-off-by: Pavel Tikhomirov --- criu/include/namespaces.h | 1 - criu/namespaces.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index 287abb3c8..a9a970a9b 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -166,7 +166,6 @@ extern int restore_ns(int rst, struct ns_desc *nd); extern int dump_task_ns_ids(struct pstree_item *); extern int predump_task_ns_ids(struct pstree_item *); -extern struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd, enum ns_type t); extern int rst_add_ns_id(unsigned int id, struct pstree_item *, struct ns_desc *nd); extern struct ns_id *lookup_ns_by_id(unsigned int id, struct ns_desc *nd); diff --git a/criu/namespaces.c b/criu/namespaces.c index a228737ee..57f6bdfef 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -290,7 +290,7 @@ static void nsid_add(struct ns_id *ns, struct ns_desc *nd, unsigned int id, pid_ pr_info("Add %s ns %d pid %d\n", nd->str, ns->id, ns->ns_pid); } -struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, +static struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd, enum ns_type type) { struct ns_id *nsid; From 7be7260261a1f94b111b9390a2b39179e87d7d8b Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:50 +0300 Subject: [PATCH 0223/2030] ns/restore/image: do not read namespace images for non-namespaced case Images for mount and net namespaces are empty if ns does not belong to us, thus we don't need to collect on restore. By adding these checks we will eliminate suspicious messages in logs about lack of images: ./test/zdtm.py run -k always -f h -t zdtm/static/env00 env00/54/2/restore.log:(00.000332) No mountpoints-5.img image env00/54/2/restore.log:(00.000342) No netns-2.img image Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 5 +++++ criu/net.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/criu/mount.c b/criu/mount.c index 802295778..fdaaa7b31 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2989,6 +2989,11 @@ int read_mnt_ns_img(void) struct mount_info *pms = NULL; struct ns_id *nsid; + if (!(root_ns_mask & CLONE_NEWNS)) { + mntinfo = NULL; + return 0; + } + for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) { if (nsid->nd != &mnt_ns_desc) continue; diff --git a/criu/net.c b/criu/net.c index 44b0ce224..9825db10f 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2057,6 +2057,9 @@ int read_net_ns_img(void) { struct ns_id *ns; + if (!(root_ns_mask & CLONE_NEWNET)) + return 0; + for (ns = ns_ids; ns != NULL; ns = ns->next) { struct cr_img *img; int ret; From 35adc08598f78e9845388f235ffe0917d7606779 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 7 Feb 2019 15:17:51 +0300 Subject: [PATCH 0224/2030] mount: rework mount tree build step on restore Build each mntns mount tree alone just after reading mounts for it from image. These additional step before merging everything to a single mount tree allows us to have pointers to each mntns root mount at hand, also it allows us to remove extra complication from mnt_build_tree. Teach collect_mnt_from_image return a tail pointer, so we can merge lists together later after building each tree. Add separate merge_mount_trees helper to create joint mount tree for all mntns'es and simplify mnt_build_ids_tree. I don't see any place where we use mntinfo_tree on restore, so save the real root of mntns mounts tree in it, instead of root_yard_mp, will need it in next patches for checking restore of these trees. v2: prepend children to the root_yard in merge_mount_trees so that the order in merged tree persists Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 131 +++++++++++++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 66 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index fdaaa7b31..49708ffd5 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -330,7 +330,7 @@ static bool mounts_equal(struct mount_info *a, struct mount_info *b) */ static char *mnt_roots; -static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mount_info *yard_mount) +static struct mount_info *mnt_build_ids_tree(struct mount_info *list) { struct mount_info *m, *root = NULL; @@ -351,41 +351,14 @@ static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou if (!parent) { /* Only a root mount can be without parent */ - if (root == NULL && m->is_ns_root) { + if (!root && m->is_ns_root) { root = m; - if (!yard_mount) - continue; + continue; } - if (!root) { - pr_err("No parent found for mountpoint %d (@%s)\n", - m->mnt_id, m->mountpoint); - return NULL; - } - - pr_debug("Mountpoint %d (@%s) w/o parent %d\n", - m->mnt_id, m->mountpoint, m->parent_mnt_id); - - if (!mounts_sb_equal(root, m) || - strcmp(root->root, m->root)) { - pr_err("Nested mount namespaces with different " - "roots %d (@%s %s) %d (@%s %s) are not supported yet\n", - root->mnt_id, root->mountpoint, root->root, - m->mnt_id, m->mountpoint, m->root); - return NULL; - } - - /* Mount all namespace roots into the roots yard. */ - parent = yard_mount; - if (unlikely(!yard_mount)) { - pr_err("Nested mount %d (@%s %s) w/o root insertion detected\n", - m->mnt_id, m->mountpoint, m->root); - return NULL; - } - - pr_debug("Mountpoint %d (@%s) get parent %d (@%s)\n", - m->mnt_id, m->mountpoint, - parent->mnt_id, parent->mountpoint); + pr_err("No parent found for mountpoint %d (@%s)\n", + m->mnt_id, m->mountpoint); + return NULL; } m->parent = parent; @@ -397,9 +370,6 @@ static struct mount_info *mnt_build_ids_tree(struct mount_info *list, struct mou return NULL; } - if (yard_mount) - return yard_mount; - return root; } @@ -997,8 +967,7 @@ static int resolve_shared_mounts(struct mount_info *info, int root_master_id) return 0; } -static struct mount_info *mnt_build_tree(struct mount_info *list, - struct mount_info *root_mp) +static struct mount_info *mnt_build_tree(struct mount_info *list) { struct mount_info *tree; @@ -1007,7 +976,7 @@ static struct mount_info *mnt_build_tree(struct mount_info *list, */ pr_info("Building mountpoints tree\n"); - tree = mnt_build_ids_tree(list, root_mp); + tree = mnt_build_ids_tree(list); if (!tree) return NULL; @@ -1690,7 +1659,7 @@ struct mount_info *collect_mntinfo(struct ns_id *ns, bool for_dump) return NULL; } - ns->mnt.mntinfo_tree = mnt_build_tree(pm, NULL); + ns->mnt.mntinfo_tree = mnt_build_tree(pm); if (ns->mnt.mntinfo_tree == NULL) goto err; @@ -2881,7 +2850,7 @@ static int get_mp_mountpoint(char *mountpoint, struct mount_info *mi, char *root return 0; } -static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid) +static int collect_mnt_from_image(struct mount_info **head, struct mount_info **tail, struct ns_id *nsid) { MntEntry *me = NULL; int ret, root_len = 1; @@ -2909,8 +2878,10 @@ static int collect_mnt_from_image(struct mount_info **pms, struct ns_id *nsid) goto err; pm->nsid = nsid; - pm->next = *pms; - *pms = pm; + pm->next = *head; + *head = pm; + if (!*tail) + *tail = pm; pm->mnt_id = me->mnt_id; pm->parent_mnt_id = me->parent_mnt_id; @@ -2995,11 +2966,20 @@ int read_mnt_ns_img(void) } for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) { + struct mount_info *head = NULL, *tail = NULL; + if (nsid->nd != &mnt_ns_desc) continue; - if (collect_mnt_from_image(&pms, nsid)) + if (collect_mnt_from_image(&head, &tail, nsid)) return -1; + + nsid->mnt.mntinfo_tree = mnt_build_tree(head); + if (!nsid->mnt.mntinfo_tree) + return -1; + + tail->next = pms; + pms = head; } mntinfo = pms; @@ -3101,6 +3081,40 @@ void fini_restore_mntns(void) } } +static int merge_mount_trees(struct mount_info *root_yard) +{ + struct mount_info *first = NULL; + struct ns_id *nsid; + + /* Merge mount trees together under root_yard */ + for (nsid = ns_ids; nsid; nsid = nsid->next) { + struct mount_info *root; + + if (nsid->nd != &mnt_ns_desc) + continue; + + root = nsid->mnt.mntinfo_tree; + + if (!first) + first = root; + else if (!mounts_sb_equal(root, first) || + strcmp(root->root, first->root)) { + pr_err("Nested mount namespaces with different " + "roots %d (@%s %s) %d (@%s %s) are not supported yet\n", + root->mnt_id, root->mountpoint, root->root, + first->mnt_id, first->mountpoint, first->root); + return -1; + } + + pr_debug("Mountpoint %d (@%s) moved to the root yard\n", + root->mnt_id, root->mountpoint); + root->parent = root_yard; + list_add(&root->siblings, &root_yard->children); + } + + return 0; +} + /* * All nested mount namespaces are restore as sub-trees of the root namespace. */ @@ -3140,8 +3154,6 @@ static int populate_roots_yard(void) static int populate_mnt_ns(void) { - struct mount_info *pms; - struct ns_id *nsid; int ret; root_yard_mp = mnt_entry_alloc(); @@ -3151,40 +3163,27 @@ static int populate_mnt_ns(void) root_yard_mp->mountpoint = mnt_roots; root_yard_mp->mounted = true; - pms = mnt_build_tree(mntinfo, root_yard_mp); - if (!pms) + if (merge_mount_trees(root_yard_mp)) return -1; #ifdef CONFIG_BINFMT_MISC_VIRTUALIZED if (!opts.has_binfmt_misc && !list_empty(&binfmt_misc_list)) { /* Add to mount tree. Generic code will mount it later */ - ret = add_cr_time_mount(pms, "binfmt_misc", BINFMT_MISC_HOME, 0); + ret = add_cr_time_mount(root_yard_mp, "binfmt_misc", BINFMT_MISC_HOME, 0); if (ret) return -1; } #endif - if (resolve_shared_mounts(mntinfo, pms->master_id)) + if (resolve_shared_mounts(mntinfo, 0)) return -1; - for (nsid = ns_ids; nsid; nsid = nsid->next) { - if (nsid->nd != &mnt_ns_desc) - continue; - - /* - * Make trees of all namespaces look the - * same, so that manual paths resolution - * works on them. - */ - nsid->mnt.mntinfo_tree = pms; - } - if (validate_mounts(mntinfo, false)) return -1; - mnt_tree_for_each(pms, set_is_overmounted); + mnt_tree_for_each(root_yard_mp, set_is_overmounted); - if (find_remap_mounts(pms)) + if (find_remap_mounts(root_yard_mp)) return -1; if (populate_roots_yard()) @@ -3193,8 +3192,8 @@ static int populate_mnt_ns(void) if (mount_clean_path()) return -1; - ret = mnt_tree_for_each(pms, do_mount_one); - mnt_tree_for_each(pms, do_close_one); + ret = mnt_tree_for_each(root_yard_mp, do_mount_one); + mnt_tree_for_each(root_yard_mp, do_close_one); if (ret == 0 && fixup_remap_mounts()) return -1; From f3cca97d80c77a6f2b9702edc8225ea8bb6034e5 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 27 Jun 2019 12:43:40 +0300 Subject: [PATCH 0225/2030] mount: make mnt_resort_siblings nonrecursive and reuse friendly Add mnt_subtree_next DFS-next search to remove recursion. v5: add these patch, remove recursion from sorting helpers v6: rip out butifull yet unused step-part of nfs-next algorithm Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 57 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 49708ffd5..974af6eb2 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -385,13 +385,12 @@ static unsigned int mnt_depth(struct mount_info *m) return depth; } -static void mnt_resort_siblings(struct mount_info *tree) +static void __mnt_resort_children(struct mount_info *parent) { - struct mount_info *m, *p; LIST_HEAD(list); /* - * Put siblings of each node in an order they can be (u)mounted + * Put children mounts in an order they can be (u)mounted * I.e. if we have mounts on foo/bar/, foo/bar/foobar/ and foo/ * we should put them in the foo/bar/foobar/, foo/bar/, foo/ order. * Otherwise we will not be able to (u)mount them in a sequence. @@ -403,11 +402,12 @@ static void mnt_resort_siblings(struct mount_info *tree) * to contain hundreds (or more) elements. */ - pr_info("\tResorting siblings on %d\n", tree->mnt_id); - while (!list_empty(&tree->children)) { + pr_info("\tResorting children of %d in mount order\n", parent->mnt_id); + while (!list_empty(&parent->children)) { + struct mount_info *m, *p; unsigned int depth; - m = list_first_entry(&tree->children, struct mount_info, siblings); + m = list_first_entry(&parent->children, struct mount_info, siblings); list_del(&m->siblings); depth = mnt_depth(m); @@ -416,10 +416,31 @@ static void mnt_resort_siblings(struct mount_info *tree) break; list_add_tail(&m->siblings, &p->siblings); - mnt_resort_siblings(m); } - list_splice(&list, &tree->children); + list_splice(&list, &parent->children); +} + +static struct mount_info *mnt_subtree_next(struct mount_info *mi, + struct mount_info *root); + +static void resort_siblings(struct mount_info *root, + void (*resort_children)(struct mount_info *)) { + struct mount_info *mi = root; + while (1) { + /* + * Explanation: sorting the children of the tree like these is + * safe and does not break the tree search in mnt_subtree_next + * (DFS-next search), as we sort children before calling next + * on parent and thus before DFS-next ever touches them, so + * from the perspective of DFS-next all children look like they + * are already sorted. + */ + resort_children(mi); + mi = mnt_subtree_next(mi, root); + if (!mi) + break; + } } static void mnt_tree_show(struct mount_info *tree, int off) @@ -980,7 +1001,7 @@ static struct mount_info *mnt_build_tree(struct mount_info *list) if (!tree) return NULL; - mnt_resort_siblings(tree); + resort_siblings(tree, __mnt_resort_children); pr_info("Done:\n"); mnt_tree_show(tree, 0); return tree; @@ -3821,3 +3842,21 @@ int remount_readonly_mounts(void) */ return call_helper_process(ns_remount_readonly_mounts, NULL); } + +static struct mount_info *mnt_subtree_next(struct mount_info *mi, + struct mount_info *root) +{ + if (!list_empty(&mi->children)) + return list_entry(mi->children.next, + struct mount_info, siblings); + + while (mi->parent && mi != root) { + if (mi->siblings.next == &mi->parent->children) + mi = mi->parent; + else + return list_entry(mi->siblings.next, + struct mount_info, siblings); + } + + return NULL; +} From 2b4e653361ec0c3b827fb4af4e0b8848fb9a26f0 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Tue, 5 Nov 2019 15:19:25 +0530 Subject: [PATCH 0226/2030] Run java functional tests on travis Signed-off-by: Nidhi Gupta --- .travis.yml | 2 ++ scripts/build/Dockerfile.openj9-alpine | 33 ++++++++++++++++++++++++++ scripts/build/Dockerfile.openj9-ubuntu | 30 +++++++++++++++++++++++ scripts/travis/Makefile | 3 +++ scripts/travis/openj9-test.sh | 22 +++++++++++++++++ 5 files changed, 90 insertions(+) create mode 100644 scripts/build/Dockerfile.openj9-alpine create mode 100644 scripts/build/Dockerfile.openj9-ubuntu create mode 100755 scripts/travis/openj9-test.sh diff --git a/.travis.yml b/.travis.yml index 7a0c29a55..6e854540b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,8 @@ env: - TR_ARCH=fedora-rawhide-aarch64 - TR_ARCH=centos - TR_ARCH=podman-test + - TR_ARCH=openj9-test + matrix: include: - os: linux diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine new file mode 100644 index 000000000..654e7bf31 --- /dev/null +++ b/scripts/build/Dockerfile.openj9-alpine @@ -0,0 +1,33 @@ +FROM adoptopenjdk/openjdk8-openj9:alpine + +RUN apk update && apk add \ + bash \ + build-base \ + ccache \ + coreutils \ + git \ + gnutls-dev \ + libaio-dev \ + libcap-dev \ + libnet-dev \ + libnl3-dev \ + pkgconfig \ + protobuf-c-dev \ + protobuf-dev \ + python \ + sudo \ + maven \ + py-yaml \ + py-pip \ + py2-future \ + ip6tables \ + iptables \ + bash + +COPY . /criu +WORKDIR /criu + +RUN make + +ENTRYPOINT mvn -f test/javaTests/pom.xml test + diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu new file mode 100644 index 000000000..13d9080ff --- /dev/null +++ b/scripts/build/Dockerfile.openj9-ubuntu @@ -0,0 +1,30 @@ +FROM adoptopenjdk/openjdk8-openj9:latest + +RUN apt-get update && apt-get install -y --no-install-recommends protobuf-c-compiler \ + libprotobuf-c-dev \ + libaio-dev \ + python-future \ + libprotobuf-dev \ + protobuf-compiler \ + libcap-dev \ + libnl-3-dev \ + gdb \ + bash \ + python-protobuf \ + python-yaml \ + libnet-dev \ + libnl-route-3-dev \ + libbsd-dev \ + make \ + git \ + pkg-config \ + gcc \ + maven + +COPY . /criu +WORKDIR /criu + +RUN make + +ENTRYPOINT mvn -f test/javaTests/pom.xml test + diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index baddd6eb1..c6b67935b 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -41,5 +41,8 @@ docker-test: podman-test: ./podman-test.sh +openj9-test: + ./openj9-test.sh + %: $(MAKE) -C ../build $@$(target-suffix) diff --git a/scripts/travis/openj9-test.sh b/scripts/travis/openj9-test.sh new file mode 100755 index 000000000..968f064f8 --- /dev/null +++ b/scripts/travis/openj9-test.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +cd ../.. + +failures="" + +docker build -t criu-openj9-ubuntu-test:latest -f scripts/build/Dockerfile.openj9-ubuntu . +docker run --rm --privileged criu-openj9-ubuntu-test:latest +if [ $? -ne 0 ]; then + failures=`echo "$failures ubuntu"` +fi + +docker build -t criu-openj9-alpine-test:latest -f scripts/build/Dockerfile.openj9-alpine . +docker run --rm --privileged criu-openj9-alpine-test:latest +if [ $? -ne 0 ]; then + failures=`echo "$failures alpine"` +fi + +if [ -n "$failures" ]; then + echo "Tests failed on $failures" + exit 1 +fi From 62953d4334b0294ac90e0ec088267fa99daf9f92 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:52:55 +0100 Subject: [PATCH 0227/2030] travis: fix copy paste error from previous commit In my previous commit I copied a line with a return into the main script body. bash can only return from functions. This changes return to exit. Signed-off-by: Adrian Reber --- scripts/travis/travis-tests | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 1f6b19130..07311511c 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -86,7 +86,7 @@ if [ "$WIDTH" -gt 80 ]; then exit 1 fi -[ -n "$SKIP_TRAVIS_TEST" ] && return +[ -n "$SKIP_TRAVIS_TEST" ] && exit 0 ulimit -c unlimited From 6be414bb2be1e8be13b996f60977ecc44b765a2e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:54:22 +0100 Subject: [PATCH 0228/2030] travis: Do not run privileged containers in LXD Travis uses unprivileged containers for aarch64 in LXD. Docker with '--privileged' fails in such situation. This changes the travis setup to only start docker with '--privileged' if running on x86_64. Signed-off-by: Adrian Reber --- scripts/travis/Makefile | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index c6b67935b..80c7b9230 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -13,6 +13,9 @@ endif TARGETS := alpine fedora-rawhide centos ZDTM_OPTIONS := +UNAME := $(shell uname -m) + +export UNAME alpine: ZDTM_OPTIONS=-x zdtm/static/binfmt_misc -x zdtm/static/netns-nf -x zdtm/static/sched_policy00 -x zdtm/static/seccomp_strict -x zdtm/static/sigaltstack -x zdtm/static/signalfd00 -x zdtm/static/config_inotify_irmap @@ -23,17 +26,29 @@ define DOCKER_JSON endef export DOCKER_JSON -$(TARGETS): - echo "$$DOCKER_JSON" > /etc/docker/daemon.json - systemctl restart docker - $(MAKE) -C ../build $@$(target-suffix) - docker run --env-file docker.env --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run criu-$@ scripts/travis/travis-tests -fedora-asan: - echo "$$DOCKER_JSON" > /etc/docker/daemon.json - systemctl restart docker +ifeq ($(UNAME),x86_64) + CONTAINER_OPTS := --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run +else + CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run +endif + +restart-docker: + if [ "$$UNAME" = "x86_64" ]; then \ + echo "$$DOCKER_JSON" > /etc/docker/daemon.json; \ + cat /etc/docker/daemon.json; \ + systemctl status docker; \ + systemctl restart docker; \ + systemctl status docker; \ + fi + +$(TARGETS): restart-docker $(MAKE) -C ../build $@$(target-suffix) - docker run --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run criu-$@ ./scripts/travis/asan.sh $(ZDTM_OPTIONS) + docker run --env-file docker.env $(CONTAINER_OPTS) criu-$@ scripts/travis/travis-tests + +fedora-asan: restart-docker + $(MAKE) -C ../build $@$(target-suffix) + docker run -it $(CONTAINER_OPTS) criu-$@ ./scripts/travis/asan.sh $(ZDTM_OPTIONS) docker-test: ./docker-test.sh From 075f1beaf7d36cb9ea5030e1faab9661c33290ab Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:56:15 +0100 Subject: [PATCH 0229/2030] Makefile hack for travis aarch64/armv8l For CRIU's compile only tests for armv7hf on Travis we are using 'setarch linux32' which returns armv8l on Travis aarch64. This adds a path in the Makefile to treat armv8l just as armv7hf during compile. This enables us to run armv7hf compile tests on Travis aarch64 hardware. Much faster. Maybe not entirely correct, but probably good enough for compile testing in an armv7hf container. Signed-off-by: Adrian Reber --- Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0140330e1..f827e7baa 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,6 @@ endif # Architecture specific options. ifeq ($(ARCH),arm) ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') - DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32 ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 @@ -45,6 +44,16 @@ ifeq ($(ARCH),arm) USERCFLAGS += -march=armv7-a endif + ifeq ($(ARMV),8) + # Running 'setarch linux32 uname -m' returns armv8l on travis aarch64. + # This tells CRIU to handle armv8l just as armv7hf. Right now this is + # only used for compile testing. No further verification of armv8l exists. + USERCFLAGS += -march=armv7-a + ARMV := 7 + endif + + DEFINES := -DCONFIG_ARMV$(ARMV) -DCONFIG_VDSO_32 + PROTOUFIX := y # For simplicity - compile code in Arm mode without interwork. # We could choose Thumb mode as default instead - but a dirty From eab8cf0775ed5569bb22795a86817dae06cf0005 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 4 Nov 2019 08:58:54 +0100 Subject: [PATCH 0230/2030] travis: switch all arm related tests to real hardware This switches all arm related tests (32bit and 64bit) to the aarch64 systems Travis provides. For arm32 we are running in a armv7hf container on aarch64 with 'setarch linux32'. The main changes are that docker on Travis aarch64 cannot use '--privileged' as Travis is using unprivileged LXD containers to setup the testing environment. Signed-off-by: Adrian Reber --- .travis.yml | 50 +++++++++++++++---- scripts/build/Dockerfile.armv7hf.hdr | 4 +- scripts/build/Dockerfile.armv7hf.tmpl | 2 +- .../Dockerfile.fedora-rawhide-aarch64.hdr | 3 -- .../Dockerfile.fedora-rawhide-aarch64.tmpl | 1 - scripts/build/Dockerfile.linux32.tmpl | 47 +++++++++++++++++ scripts/build/Makefile | 4 +- scripts/travis/Makefile | 2 + scripts/travis/podman-test.sh | 2 +- 9 files changed, 93 insertions(+), 22 deletions(-) delete mode 100644 scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr delete mode 120000 scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl create mode 100644 scripts/build/Dockerfile.linux32.tmpl diff --git a/.travis.yml b/.travis.yml index 6e854540b..85b6b6e07 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ language: c sudo: required -dist: xenial +dist: bionic cache: ccache services: - docker @@ -9,21 +9,12 @@ env: - TR_ARCH=local CLANG=1 - TR_ARCH=local COMPAT_TEST=y - TR_ARCH=local CLANG=1 COMPAT_TEST=y - - TR_ARCH=alpine - - TR_ARCH=fedora-asan - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=armv7hf - TR_ARCH=ppc64le - TR_ARCH=s390x - - TR_ARCH=armv7hf CLANG=1 - TR_ARCH=ppc64le CLANG=1 - - TR_ARCH=alpine CLANG=1 - TR_ARCH=docker-test - - TR_ARCH=fedora-rawhide - - TR_ARCH=fedora-rawhide-aarch64 - - TR_ARCH=centos - - TR_ARCH=podman-test - TR_ARCH=openj9-test matrix: @@ -36,10 +27,47 @@ matrix: arch: arm64 env: TR_ARCH=local CLANG=1 dist: bionic + - os: linux + arch: arm64 + # This runs on aarch64 with 'setarch linux32' + env: TR_ARCH=armv7hf + dist: bionic + - os: linux + arch: arm64 + # This runs on aarch64 with 'setarch linux32' + env: TR_ARCH=armv7hf CLANG=1 + dist: bionic + - os: linux + arch: arm64 + env: TR_ARCH=fedora-rawhide + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=fedora-rawhide + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=podman-test + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=alpine CLANG=1 + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=alpine + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=centos + dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=fedora-asan + dist: xenial # test hangs on bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - - env: TR_ARCH=fedora-rawhide-aarch64 - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 script: diff --git a/scripts/build/Dockerfile.armv7hf.hdr b/scripts/build/Dockerfile.armv7hf.hdr index d453d6df7..7c66474e5 100644 --- a/scripts/build/Dockerfile.armv7hf.hdr +++ b/scripts/build/Dockerfile.armv7hf.hdr @@ -1,3 +1 @@ -FROM arm32v7/ubuntu:xenial - -COPY scripts/build/qemu-user-static/usr/bin/qemu-arm-static /usr/bin/qemu-arm-static +FROM arm32v7/ubuntu:bionic diff --git a/scripts/build/Dockerfile.armv7hf.tmpl b/scripts/build/Dockerfile.armv7hf.tmpl index cb804790e..7bc6d9cde 120000 --- a/scripts/build/Dockerfile.armv7hf.tmpl +++ b/scripts/build/Dockerfile.armv7hf.tmpl @@ -1 +1 @@ -Dockerfile.tmpl \ No newline at end of file +Dockerfile.linux32.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr b/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr deleted file mode 100644 index 82f29e336..000000000 --- a/scripts/build/Dockerfile.fedora-rawhide-aarch64.hdr +++ /dev/null @@ -1,3 +0,0 @@ -FROM arm64v8/fedora:rawhide - -COPY scripts/build/qemu-user-static/usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static diff --git a/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl b/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl deleted file mode 120000 index e4c40309c..000000000 --- a/scripts/build/Dockerfile.fedora-rawhide-aarch64.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.fedora.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl new file mode 100644 index 000000000..5d3fe5139 --- /dev/null +++ b/scripts/build/Dockerfile.linux32.tmpl @@ -0,0 +1,47 @@ +ARG CC=gcc +ARG ENV1=FOOBAR + +RUN apt-get update && apt-get install -y \ + ccache \ + libnet-dev \ + libnl-route-3-dev \ + $CC \ + bsdmainutils \ + build-essential \ + git-core \ + iptables \ + libaio-dev \ + libcap-dev \ + libgnutls28-dev \ + libgnutls30 \ + libnl-3-dev \ + libprotobuf-c-dev \ + libprotobuf-dev \ + libselinux-dev \ + pkg-config \ + protobuf-c-compiler \ + protobuf-compiler \ + python-minimal \ + python-future + +COPY . /criu +WORKDIR /criu +ENV CC="ccache $CC" CCACHE_DIR=/tmp/.ccache CCACHE_NOCOMPRESS=1 $ENV1=yes + +RUN uname -m && setarch linux32 uname -m && setarch --list + +RUN mv .ccache /tmp && make mrproper && ccache -s && \ + date && \ +# Check single object build + setarch linux32 make -j $(nproc) CC="$CC" criu/parasite-syscall.o && \ +# Compile criu + setarch linux32 make -j $(nproc) CC="$CC" && \ + date && \ +# Check that "make mrproper" works + setarch linux32 make mrproper && ! git clean -ndx --exclude=scripts/build \ + --exclude=.config --exclude=test | grep . + +# Compile tests +RUN date && setarch linux32 make -j $(nproc) CC="$CC" -C test/zdtm && date + +#RUN make test/compel/handle_binary && ./test/compel/handle_binary diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 3d4d91cd5..d7ad82aec 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ -QEMU_ARCHES := armv7hf ppc64le s390x fedora-rawhide-aarch64 # require qemu -ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos +QEMU_ARCHES := ppc64le s390x # require qemu +ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 80c7b9230..373171149 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -28,6 +28,8 @@ endef export DOCKER_JSON ifeq ($(UNAME),x86_64) + # On anything besides x86_64 Travis is running unprivileged LXD + # containers which do not support running docker with '--privileged'. CONTAINER_OPTS := --rm -it --privileged -v /lib/modules:/lib/modules --tmpfs /run else CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 9bd1f3d8b..eafdc73be 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -11,7 +11,7 @@ apt-get install -qq \ apt-get update -qq -apt-get install -qqy podman +apt-get install -qqy podman containernetworking-plugins export SKIP_TRAVIS_TEST=1 From fe668075ad2afe1021f8ff86d774eccb7bd1bef7 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 Nov 2019 08:38:16 +0100 Subject: [PATCH 0231/2030] travis: switch pcp64le and s390x to real hardware Now that Travis also supports ppc64le and s390x we can remove all qemu based docker emulation from our test setup. This now runs ppc64le and s390x tests on real hardware (LXD containers). Signed-off-by: Adrian Reber --- .travis.yml | 17 +++++++++---- scripts/build/Dockerfile.aarch64.hdr | 3 --- scripts/build/Dockerfile.aarch64.tmpl | 1 - scripts/build/Dockerfile.ppc64le.hdr | 5 ---- scripts/build/Dockerfile.ppc64le.tmpl | 1 - scripts/build/Dockerfile.s390x.hdr | 6 ----- scripts/build/Dockerfile.s390x.tmpl | 1 - scripts/build/Makefile | 21 +--------------- scripts/build/binfmt_misc | 13 ---------- scripts/build/extract-deb-pkg | 36 --------------------------- scripts/travis/travis-tests | 7 ------ 11 files changed, 13 insertions(+), 98 deletions(-) delete mode 100644 scripts/build/Dockerfile.aarch64.hdr delete mode 120000 scripts/build/Dockerfile.aarch64.tmpl delete mode 100644 scripts/build/Dockerfile.ppc64le.hdr delete mode 120000 scripts/build/Dockerfile.ppc64le.tmpl delete mode 100644 scripts/build/Dockerfile.s390x.hdr delete mode 120000 scripts/build/Dockerfile.s390x.tmpl delete mode 100755 scripts/build/binfmt_misc delete mode 100755 scripts/build/extract-deb-pkg diff --git a/.travis.yml b/.travis.yml index 85b6b6e07..3c760d08a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,14 +11,22 @@ env: - TR_ARCH=local CLANG=1 COMPAT_TEST=y - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=ppc64le - - TR_ARCH=s390x - - TR_ARCH=ppc64le CLANG=1 - TR_ARCH=docker-test - TR_ARCH=openj9-test - matrix: include: + - os: linux + arch: ppc64le + env: TR_ARCH=local + dist: bionic + - os: linux + arch: ppc64le + env: TR_ARCH=local CLANG=1 + dist: bionic + - os: linux + arch: s390x + env: TR_ARCH=local + dist: bionic - os: linux arch: arm64 env: TR_ARCH=local @@ -68,7 +76,6 @@ matrix: allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - - env: TR_ARCH=s390x - env: TR_ARCH=local GCOV=1 script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH diff --git a/scripts/build/Dockerfile.aarch64.hdr b/scripts/build/Dockerfile.aarch64.hdr deleted file mode 100644 index c90c98088..000000000 --- a/scripts/build/Dockerfile.aarch64.hdr +++ /dev/null @@ -1,3 +0,0 @@ -FROM arm64v8/ubuntu:xenial - -COPY scripts/build/qemu-user-static/usr/bin/qemu-aarch64-static /usr/bin/qemu-aarch64-static diff --git a/scripts/build/Dockerfile.aarch64.tmpl b/scripts/build/Dockerfile.aarch64.tmpl deleted file mode 120000 index cb804790e..000000000 --- a/scripts/build/Dockerfile.aarch64.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64le.hdr b/scripts/build/Dockerfile.ppc64le.hdr deleted file mode 100644 index ba65901c2..000000000 --- a/scripts/build/Dockerfile.ppc64le.hdr +++ /dev/null @@ -1,5 +0,0 @@ -FROM ppc64le/ubuntu:xenial - -ENV QEMU_CPU POWER8 -COPY scripts/build/qemu-user-static/usr/bin/qemu-ppc64le-static /usr/bin/qemu-ppc64le-static -RUN sed -i '/security/ d' /etc/apt/sources.list diff --git a/scripts/build/Dockerfile.ppc64le.tmpl b/scripts/build/Dockerfile.ppc64le.tmpl deleted file mode 120000 index cb804790e..000000000 --- a/scripts/build/Dockerfile.ppc64le.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.s390x.hdr b/scripts/build/Dockerfile.s390x.hdr deleted file mode 100644 index e02097f62..000000000 --- a/scripts/build/Dockerfile.s390x.hdr +++ /dev/null @@ -1,6 +0,0 @@ -FROM s390x/debian:latest - -ENV QEMU_CPU z900 -COPY scripts/build/qemu-user-static/usr/bin/qemu-s390x-static /usr/bin/qemu-s390x-static -# The security repository does not seem to exist anymore -RUN sed -i '/security/ d' /etc/apt/sources.list diff --git a/scripts/build/Dockerfile.s390x.tmpl b/scripts/build/Dockerfile.s390x.tmpl deleted file mode 120000 index cb804790e..000000000 --- a/scripts/build/Dockerfile.s390x.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.tmpl \ No newline at end of file diff --git a/scripts/build/Makefile b/scripts/build/Makefile index d7ad82aec..a7c78e8bd 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,4 @@ -QEMU_ARCHES := ppc64le s390x # require qemu -ARCHES := $(QEMU_ARCHES) aarch64 x86_64 fedora-asan fedora-rawhide centos armv7hf +ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker @@ -16,15 +15,6 @@ $(foreach arch,$(ARCHES),$(eval $(call ARCH_DEP,$(arch)))) Dockerfile.%: Dockerfile.%.hdr Dockerfile.%.tmpl cat $^ > $@ -qemu-user-static: - ./extract-deb-pkg qemu-user-static - -binfmt_misc: - ./binfmt_misc -.PHONY: binfmt_misc - -$(QEMU_ARCHES): qemu-user-static binfmt_misc - $(TARGETS): mkdir -p $(HOME)/.ccache mv $(HOME)/.ccache ../../ @@ -42,12 +32,3 @@ $(foreach t,$(TARGETS),$(eval $(call CLANG_DEP,$(t)))) %-clang: DB_ENV=--build-arg ENV1=CCACHE_CPP2 s390x-clang: DB_CC=--build-arg CC=clang-3.8 .PHONY: $(TARGETS_CLANG) - -clean: - rm -rf qemu-user-static - for ARCH in $(ARCHES); do \ - FILE=/proc/sys/fs/binfmt_misc/$$ARCH; \ - test -f $$FILE && echo -1 > $$FILE; \ - rm -f Dockerfile.$$ARCH; \ - done -.PHONY: clean diff --git a/scripts/build/binfmt_misc b/scripts/build/binfmt_misc deleted file mode 100755 index bf2a2ecad..000000000 --- a/scripts/build/binfmt_misc +++ /dev/null @@ -1,13 +0,0 @@ -set -e -x - -test -f /proc/sys/fs/binfmt_misc/armv7hf || - echo ':armv7hf:M::\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x28\x00:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff:/usr/bin/qemu-arm-static:' > /proc/sys/fs/binfmt_misc/register; - -test -f /proc/sys/fs/binfmt_misc/aarch64 || - echo ':aarch64:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\xb7:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-aarch64-static:' > /proc/sys/fs/binfmt_misc/register - -test -f /proc/sys/fs/binfmt_misc/ppc64le || - echo ':ppc64le:M::\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x15\x00:\xff\xff\xff\xff\xff\xff\xff\xfc\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\x00:/usr/bin/qemu-ppc64le-static:' > /proc/sys/fs/binfmt_misc/register - -test -f /proc/sys/fs/binfmt_misc/s390x || - echo ':s390x:M::\x7fELF\x02\x02\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x16:\xff\xff\xff\xff\xff\xff\xff\x00\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff:/usr/bin/qemu-s390x-static:' > /proc/sys/fs/binfmt_misc/register diff --git a/scripts/build/extract-deb-pkg b/scripts/build/extract-deb-pkg deleted file mode 100755 index 44457bc5a..000000000 --- a/scripts/build/extract-deb-pkg +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -set -e -set -u -set -o pipefail -MIRROR="https://mirrors.kernel.org/ubuntu" -PKGS="$MIRROR/dists/bionic/universe/binary-amd64/Packages.gz" - -if [ $# -ne 1 ]; then - echo "Usage: $0 package-name" 1>&2 - exit 1 -fi - -if [ -d "$1" ]; then - echo "Directory $1 already exists -- exiting" - exit 0 -fi - -if ! pkg=$(curl -sSL "$PKGS" | zgrep "Filename.*$1" | awk '{ print $2 }'); then - echo "ERROR: no packages matching $1" 1>&2 - exit 1 -fi - -if [ "$(wc -w <<< "$pkg")" -gt 1 ]; then - echo "$pkg" 1>&2 - echo "ERROR: more than one match for $1" 1>&2 - exit 1 -fi - -mkdir "$1" -cd "$1" - -wget "$MIRROR/$pkg" -pkg=$(basename "$pkg") -ar vx "$pkg" -tar xJvf data.tar.xz diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 07311511c..bc97fd455 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -51,13 +51,6 @@ travis_prep () { CC="ccache $CC" fi - # The /etc/apt/sources.list in the current trusty image for ppc64le is - # broken and needs to be fixed - if [ "$TR_ARCH" = "ppc64le" ] ; then - sed -i '/security/ d' /etc/apt/sources.list - fi - - # Do not install x86_64 specific packages on other architectures if [ "$UNAME_M" = "x86_64" ]; then TRAVIS_PKGS="$TRAVIS_PKGS $X86_64_PKGS" From ea018e9a9c78353b8f5532a2e5a36a0d1c5e8769 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 Nov 2019 13:25:30 +0100 Subject: [PATCH 0232/2030] travis: remove group from .travis.yml Tests are successful even after removing 'group:' from .travis.yml. Apparently it is not necessary. Signed-off-by: Adrian Reber --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 3c760d08a..f6f71be48 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,4 +82,3 @@ script: after_success: - ccache -s - make -C scripts/travis after_success -group: deprecated-2017Q2 From ef277068de3f6f89f394b9f63a3870eddde8c998 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:34 +0000 Subject: [PATCH 0233/2030] lib/ptrace: Allow PTRACE_PEEKDATA with errno != 0 >From man ptrace: > On error, all requests return -1, and errno is set appropriately. > Since the value returned by a successful PTRACE_PEEK* request may be > -1, the caller must clear errno before the call, and then check > it afterward to determine whether or not an error occurred. FWIW: if ptrace_peek_area() is called with (errno != 0) it may false-fail if the data is (-1). Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/ptrace.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compel/src/lib/ptrace.c b/compel/src/lib/ptrace.c index 9142bac42..715e564df 100644 --- a/compel/src/lib/ptrace.c +++ b/compel/src/lib/ptrace.c @@ -34,14 +34,20 @@ int ptrace_suspend_seccomp(pid_t pid) int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes) { unsigned long w; + int old_errno = errno; + if (bytes & (sizeof(long) - 1)) return -1; + + errno = 0; for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *d = dst, *a = addr; + d[w] = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL); if (d[w] == -1U && errno) goto err; } + errno = old_errno; return 0; err: return -2; From a93117ede1e58db68246f775c00bc21683954c39 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:35 +0000 Subject: [PATCH 0234/2030] lib/ptrace: Be more elaborate about failures Also, don't use the magic -2 => return errno on failure. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/ptrace.c | 46 ++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/compel/src/lib/ptrace.c b/compel/src/lib/ptrace.c index 715e564df..4c3530c85 100644 --- a/compel/src/lib/ptrace.c +++ b/compel/src/lib/ptrace.c @@ -36,50 +36,72 @@ int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes) unsigned long w; int old_errno = errno; - if (bytes & (sizeof(long) - 1)) + if (bytes & (sizeof(long) - 1)) { + pr_err("Peek request with non-word size %ld\n", bytes); return -1; + } errno = 0; for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *d = dst, *a = addr; d[w] = ptrace(PTRACE_PEEKDATA, pid, a + w, NULL); - if (d[w] == -1U && errno) + if (d[w] == -1U && errno) { + pr_perror("PEEKDATA failed"); goto err; + } } errno = old_errno; return 0; err: - return -2; + return -errno; } int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes) { unsigned long w; - if (bytes & (sizeof(long) - 1)) + + if (bytes & (sizeof(long) - 1)) { + pr_err("Poke request with non-word size %ld\n", bytes); return -1; + } + for (w = 0; w < bytes / sizeof(long); w++) { unsigned long *s = src, *a = addr; - if (ptrace(PTRACE_POKEDATA, pid, a + w, s[w])) + + if (ptrace(PTRACE_POKEDATA, pid, a + w, s[w])) { + pr_perror("POKEDATA failed"); goto err; + } } return 0; err: - return -2; + return -errno; } /* don't swap big space, it might overflow the stack */ int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes) { void *t = alloca(bytes); + int err; - if (ptrace_peek_area(pid, t, dst, bytes)) - return -1; + err = ptrace_peek_area(pid, t, dst, bytes); + if (err) + return err; - if (ptrace_poke_area(pid, src, dst, bytes)) { - if (ptrace_poke_area(pid, t, dst, bytes)) - return -2; - return -1; + err = ptrace_poke_area(pid, src, dst, bytes); + if (err) { + int err2; + + pr_err("Can't poke %d @ %p from %p sized %ld\n", + pid, dst, src, bytes); + + err2 = ptrace_poke_area(pid, t, dst, bytes); + if (err2) { + pr_err("Can't restore the original data with poke\n"); + return err2; + } + return err; } memcpy(src, t, bytes); From c8f16bfacb82b98841a9de49f5f9a15254d7b95f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:36 +0000 Subject: [PATCH 0235/2030] compel/infect: Warn if close() failed on memfd As a preparation for __must_check on compel_syscall(), check it on close() too - maybe not as useful as with other syscalls, but why not. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/infect.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index f0bcaf334..f726a9895 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -718,14 +718,25 @@ static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size) return 0; } +static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) +{ + bool __maybe_unused compat = !compel_mode_native(ctl); + long ret; + int err; + + err = compel_syscall(ctl, __NR(close, compat), &ret, fd, 0, 0, 0, 0, 0); + if (err || ret) + pr_err("Can't close memfd\n"); +} + static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) { void *where = (void *)ctl->ictx.syscall_ip + BUILTIN_SYSCALL_SIZE; + bool __maybe_unused compat_task = !compel_mode_native(ctl); uint8_t orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME; pid_t pid = ctl->rpid; long sret = -ENOSYS; int ret, fd, lfd; - bool __maybe_unused compat_task = !compel_mode_native(ctl); if (ctl->ictx.flags & INFECT_NO_MEMFD) return 1; @@ -741,10 +752,9 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) (unsigned long)where, 0, 0, 0, 0, 0); if (ptrace_poke_area(pid, orig_code, where, sizeof(orig_code))) { - fd = (int)(long)sret; + fd = (int)sret; if (fd >= 0) - compel_syscall(ctl, __NR(close, compat_task), &sret, - fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); pr_err("Can't restore memfd args (pid: %d)\n", pid); return -1; } @@ -752,7 +762,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) if (ret < 0) return ret; - fd = (int)(long)sret; + fd = (int)sret; if (fd == -ENOSYS) return 1; if (fd < 0) { @@ -787,7 +797,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) goto err_curef; } - compel_syscall(ctl, __NR(close, compat_task), &sret, fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); close(lfd); pr_info("Set up parasite blob using memfd\n"); @@ -796,7 +806,7 @@ static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) err_curef: close(lfd); err_cure: - compel_syscall(ctl, __NR(close, compat_task), &sret, fd, 0, 0, 0, 0, 0); + parasite_memfd_close(ctl, fd); return -1; } From ee449e27c6979291660772db9724474a55d83b12 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:37 +0000 Subject: [PATCH 0236/2030] compel: Mark compat argument of __NR() as used And remove __maybe_unused work-around. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/aarch64/src/lib/include/syscall.h | 2 +- compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/arm/src/lib/include/syscall.h | 2 +- compel/arch/arm/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/ppc64/src/lib/include/syscall.h | 2 +- compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/arch/s390/src/lib/include/uapi/asm/infect-types.h | 2 +- compel/src/lib/infect.c | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/compel/arch/aarch64/src/lib/include/syscall.h b/compel/arch/aarch64/src/lib/include/syscall.h index e2ec1272e..30290667a 100644 --- a/compel/arch/aarch64/src/lib/include/syscall.h +++ b/compel/arch/aarch64/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h index 4662f7689..7a33baa8e 100644 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h @@ -27,6 +27,6 @@ typedef struct user_fpsimd_state user_fpregs_struct_t; #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/arm/src/lib/include/syscall.h b/compel/arch/arm/src/lib/include/syscall.h index e2ec1272e..30290667a 100644 --- a/compel/arch/arm/src/lib/include/syscall.h +++ b/compel/arch/arm/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h index b8286d404..69222b251 100644 --- a/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/arm/src/lib/include/uapi/asm/infect-types.h @@ -61,6 +61,6 @@ struct user_vfp_exc { #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/ppc64/src/lib/include/syscall.h b/compel/arch/ppc64/src/lib/include/syscall.h index e2ec1272e..30290667a 100644 --- a/compel/arch/ppc64/src/lib/include/syscall.h +++ b/compel/arch/ppc64/src/lib/include/syscall.h @@ -1,4 +1,4 @@ #ifndef __COMPEL_SYSCALL_H__ #define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h index 89fc4aa3c..126fa2ea3 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h @@ -81,6 +81,6 @@ typedef struct { #define ARCH_SI_TRAP TRAP_BRKPT -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h index fddf65d3b..8171d3395 100644 --- a/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/s390/src/lib/include/uapi/asm/infect-types.h @@ -73,7 +73,7 @@ typedef struct { #define user_regs_native(pregs) true -#define __NR(syscall, compat) __NR_##syscall +#define __NR(syscall, compat) ({ (void)compat; __NR_##syscall; }) struct mmap_arg_struct { unsigned long addr; diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index f726a9895..656cc030d 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -720,7 +720,7 @@ static int parasite_mmap_exchange(struct parasite_ctl *ctl, unsigned long size) static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) { - bool __maybe_unused compat = !compel_mode_native(ctl); + bool compat = !compel_mode_native(ctl); long ret; int err; @@ -732,7 +732,7 @@ static void parasite_memfd_close(struct parasite_ctl *ctl, int fd) static int parasite_memfd_exchange(struct parasite_ctl *ctl, unsigned long size) { void *where = (void *)ctl->ictx.syscall_ip + BUILTIN_SYSCALL_SIZE; - bool __maybe_unused compat_task = !compel_mode_native(ctl); + bool compat_task = !compel_mode_native(ctl); uint8_t orig_code[MEMFD_FNAME_SZ] = MEMFD_FNAME; pid_t pid = ctl->rpid; long sret = -ENOSYS; From 71738565780552b93fad5dcd42ecfcb4e972471f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:38 +0000 Subject: [PATCH 0237/2030] lib/infect: Check if compel succeed in executing munmap Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/arch/s390/src/lib/infect.c | 4 +++- compel/src/lib/infect.c | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index 00e9c36d2..7e7d24ce2 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -453,8 +453,10 @@ void *remote_mmap(struct parasite_ctl *ctl, if (ptrace_poke_area(pid, &arg_struct, where, sizeof(arg_struct))) { pr_err("Can't restore mmap args (pid: %d)\n", pid); if (map != 0) { - compel_syscall(ctl, __NR_munmap, NULL, map, + err = compel_syscall(ctl, __NR_munmap, NULL, map, length, 0, 0, 0, 0); + if (err) + pr_err("Can't munmap %d\n", err); map = 0; } } diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 656cc030d..8b377e7d2 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -1303,6 +1303,7 @@ int compel_stop_daemon(struct parasite_ctl *ctl) int compel_cure_remote(struct parasite_ctl *ctl) { long ret; + int err; if (compel_stop_daemon(ctl)) return -1; @@ -1310,9 +1311,12 @@ int compel_cure_remote(struct parasite_ctl *ctl) if (!ctl->remote_map) return 0; - compel_syscall(ctl, __NR(munmap, !compel_mode_native(ctl)), &ret, - (unsigned long)ctl->remote_map, ctl->map_length, - 0, 0, 0, 0); + err = compel_syscall(ctl, __NR(munmap, !compel_mode_native(ctl)), &ret, + (unsigned long)ctl->remote_map, ctl->map_length, + 0, 0, 0, 0); + if (err) + return err; + if (ret) { pr_err("munmap for remote map %p, %lu returned %lu\n", ctl->remote_map, ctl->map_length, ret); From b5a83623b0327c19a2b9e6da28c434f28e33f7c3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:39 +0000 Subject: [PATCH 0238/2030] cr-dump: Try to cure remote on err-pathes On daemon stop or threads dump failures it's still desired to remove parasite from the remote (if possible). Try best and keep hopeing. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index f72373d22..56724f9a5 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1385,16 +1385,20 @@ static int dump_one_task(struct pstree_item *item, InventoryEntry *parent_ie) ret = compel_stop_daemon(parasite_ctl); if (ret) { - pr_err("Can't cure (pid: %d) from parasite\n", pid); - goto err; + pr_err("Can't stop daemon in parasite (pid: %d)\n", pid); + goto err_cure; } ret = dump_task_threads(parasite_ctl, item); if (ret) { pr_err("Can't dump threads\n"); - goto err; + goto err_cure; } + /* + * On failure local map will be cured in cr_dump_finish() + * for lazy pages. + */ if (opts.lazy_pages) ret = compel_cure_remote(parasite_ctl); else @@ -1427,7 +1431,9 @@ err: err_cure: close_cr_imgset(&cr_imgset); err_cure_imgset: - compel_cure(parasite_ctl); + ret = compel_cure(parasite_ctl); + if (ret) + pr_err("Can't cure (pid: %d) from parasite\n", pid); goto err; } From 1038a0ae44971129b3720c4351b788913e7be8f2 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:40 +0000 Subject: [PATCH 0239/2030] cr-dump: Warn if unmapping local memfd failed Probably, not the worst that could happen, but still unexpected. Preparing the ground to make compel_cure*() functions __must_check. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 56724f9a5..4b5a01cfd 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1537,7 +1537,8 @@ static int cr_pre_dump_finish(int status) timing_stop(TIME_MEMWRITE); destroy_page_pipe(mem_pp); - compel_cure_local(ctl); + if (compel_cure_local(ctl)) + pr_err("Can't cure local: something happened with mapping?\n"); } free_pstree(root_item); @@ -1664,7 +1665,8 @@ static int cr_lazy_mem_dump(void) for_each_pstree_item(item) { if (item->pid->state != TASK_DEAD) { destroy_page_pipe(dmpi(item)->mem_pp); - compel_cure_local(dmpi(item)->parasite_ctl); + if (compel_cure_local(dmpi(item)->parasite_ctl)) + pr_err("Can't cure local: something happened with mapping?\n"); } } From abe48f8c3618113035c3e5ff76747b0342b6c7e7 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:41 +0000 Subject: [PATCH 0240/2030] cr-restore: Warn if restorer can't be unmapped Too late to stop restore: it's already printed that restore was successful. Oh, well warn aloud about infection. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b4530f8e5..25b820132 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1952,6 +1952,7 @@ static void finalize_restore(void) for_each_pstree_item(item) { pid_t pid = item->pid->real; struct parasite_ctl *ctl; + unsigned long restorer_addr; if (!task_alive(item)) continue; @@ -1961,7 +1962,9 @@ static void finalize_restore(void) if (ctl == NULL) continue; - compel_unmap(ctl, (unsigned long)rsti(item)->munmap_restorer); + restorer_addr = (unsigned long)rsti(item)->munmap_restorer; + if (compel_unmap(ctl, restorer_addr)) + pr_err("Failed to unmap restorer from %d\n", pid); xfree(ctl); From bd17ee85882033ead401a89e1eb21b18c7cb2afb Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:42 +0000 Subject: [PATCH 0241/2030] parasite-syscall: Log if can't cure on failed infection Maybe expected, hopefully never happens - let's warn in any case. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/parasite-syscall.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index b9788a4c2..e5a8194e5 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -565,7 +565,8 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item, parasite_ensure_args_size(aio_rings_args_size(vma_area_list)); if (compel_infect(ctl, item->nr_threads, parasite_args_size) < 0) { - compel_cure(ctl); + if (compel_cure(ctl)) + pr_warn("Can't cure failed infection\n"); return NULL; } From c21c0aea1bd11b9d5c99803a7413314e0d6a0866 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:43 +0000 Subject: [PATCH 0242/2030] compel/infect: Detach but fail compel_resume_task() Unknown state means that the task in the end may be not in wanted state. Return err code. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/src/lib/infect.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 8b377e7d2..3fad85ed3 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -313,6 +313,8 @@ err: int compel_resume_task(pid_t pid, int orig_st, int st) { + int ret = 0; + pr_debug("\tUnseizing %d into %d\n", pid, st); if (st == COMPEL_TASK_DEAD) { @@ -335,15 +337,17 @@ int compel_resume_task(pid_t pid, int orig_st, int st) */ if (orig_st == COMPEL_TASK_STOPPED) kill(pid, SIGSTOP); - } else + } else { pr_err("Unknown final state %d\n", st); + ret = -1; + } if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) { pr_perror("Unable to detach from %d", pid); return -1; } - return 0; + return ret; } static int gen_parasite_saddr(struct sockaddr_un *saddr, int key) From 56bc4189e47c3f356c6f407544f5a88768bd4f00 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:44 +0000 Subject: [PATCH 0243/2030] criu: Kill tasks even when the network is unlocked Currently if anything fails after network has been unlocked tasks aren't killed. Which doesn't work anyway: any stage sets `ret` and nothing later gets called. Which means the tasks aren't resumed properly. Furthermore, functions like catch_tasks() and compel_stop_on_syscall() return failure on the first error. Let's do the cleanup even when the network is unlocked. If we want to keep the mess and ignore failures - a cli option should be introduced for that (and existing code should be reworked with decisions what is critical and what can be ignored). Move "Restore finished successfully" message accordingly where everything is evidently good. While at here, any late failure will result not only in cleanup but in criu returning error code. Which in result makes tests to fail in such case: > ======================= Run zdtm/static/inotify04 in ns ======================== > Start test > ./inotify04 --pidfile=inotify04.pid --outfile=inotify04.out --dirname=inotify04.test > Run criu dump > =[log]=> dump/zdtm/static/inotify04/84/1/dump.log > ------------------------ grep Error ------------------------ > (00.119763) fsnotify: openable (inode match) as zdtm/static/inotify04.test/inotify-testfile > (00.119766) fsnotify: Dumping /zdtm/static/inotify04.test/inotify-testfile as path for handle > (00.119769) fsnotify: id 0x00000b flags 0x000800 > (00.119787) 88 fdinfo 5: pos: 0 flags: 4000/0 > (00.119796) Warn (criu/fsnotify.c:336): fsnotify: The 0x00000c inotify events will be dropped > ------------------------ ERROR OVER ------------------------ > Run criu restore > =[log]=> dump/zdtm/static/inotify04/84/1/restore.log > ------------------------ grep Error ------------------------ > (00.391582) 123 was stopped > (00.391667) 106 was trapped > (00.391674) 106 (native) is going to execute the syscall 11, required is 11 > (00.391697) 106 was stopped > (00.391720) Error (compel/src/lib/infect.c:1439): Task 123 is in unexpected state: b7f > (00.391736) Error (compel/src/lib/infect.c:1447): Task stopped with 11: Segmentation fault > ------------------------ ERROR OVER ------------------------ > 5: Old maps lost: set([]) > 5: New maps appeared: set([u'10000-1a000 rwxp', u'1a000-24000 rw-p']) > ############### Test zdtm/static/inotify04 FAIL at maps compare ################ > Send the 9 signal to 106 > Wait for zdtm/static/inotify04(106) to die for 0.100000 > ======================= Test zdtm/static/inotify04 PASS ======================== Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 50 ++++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 25b820132..05a25835a 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1974,7 +1974,7 @@ static void finalize_restore(void) } } -static void finalize_restore_detach(int status) +static int finalize_restore_detach(void) { struct pstree_item *item; @@ -1988,16 +1988,21 @@ static void finalize_restore_detach(int status) for (i = 0; i < item->nr_threads; i++) { pid = item->threads[i].real; if (pid < 0) { - BUG_ON(status >= 0); - break; + pr_err("pstree item has unvalid pid %d\n", pid); + continue; } - if (arch_set_thread_regs_nosigrt(&item->threads[i])) + if (arch_set_thread_regs_nosigrt(&item->threads[i])) { pr_perror("Restoring regs for %d failed", pid); - if (ptrace(PTRACE_DETACH, pid, NULL, 0)) - pr_perror("Unable to execute %d", pid); + return -1; + } + if (ptrace(PTRACE_DETACH, pid, NULL, 0)) { + pr_perror("Unable to detach %d", pid); + return -1; + } } } + return 0; } static void ignore_kids(void) @@ -2255,32 +2260,37 @@ skip_ns_bouncing: /* * ------------------------------------------------------------- - * Below this line nothing should fail, because network is unlocked + * Network is unlocked. If something fails below - we lose data + * or a connection. */ attach_to_tasks(root_seized); - ret = restore_switch_stage(CR_STATE_RESTORE_CREDS); - BUG_ON(ret); + if (restore_switch_stage(CR_STATE_RESTORE_CREDS)) + goto out_kill_network_unlocked; timing_stop(TIME_RESTORE); - ret = catch_tasks(root_seized, &flag); + if (catch_tasks(root_seized, &flag)) { + pr_err("Can't catch all tasks\n"); + goto out_kill_network_unlocked; + } if (lazy_pages_finish_restore()) - goto out_kill; + goto out_kill_network_unlocked; - pr_info("Restore finished successfully. Resuming tasks.\n"); __restore_switch_stage(CR_STATE_COMPLETE); - if (ret == 0) - ret = compel_stop_on_syscall(task_entries->nr_threads, - __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag); + ret = compel_stop_on_syscall(task_entries->nr_threads, + __NR(rt_sigreturn, 0), __NR(rt_sigreturn, 1), flag); + if (ret) { + pr_err("Can't stop all tasks on rt_sigreturn\n"); + goto out_kill_network_unlocked; + } if (clear_breakpoints()) pr_err("Unable to flush breakpoints\n"); - if (ret == 0) - finalize_restore(); + finalize_restore(); ret = run_scripts(ACT_PRE_RESUME); if (ret) @@ -2292,8 +2302,10 @@ skip_ns_bouncing: fini_cgroup(); /* Detaches from processes and they continue run through sigreturn. */ - finalize_restore_detach(ret); + if (finalize_restore_detach()) + goto out_kill_network_unlocked; + pr_info("Restore finished successfully. Tasks resumed.\n"); write_stats(RESTORE_STATS); ret = run_scripts(ACT_POST_RESUME); @@ -2305,6 +2317,8 @@ skip_ns_bouncing: return 0; +out_kill_network_unlocked: + pr_err("Killing processes because of failure on restore.\nThe Network was unlocked so some data or a connection may have been lost.\n"); out_kill: /* * The processes can be killed only when all of them have been created, From 1c0716924bbc1128c478388b70904438a5934e73 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 9 Nov 2019 22:20:45 +0000 Subject: [PATCH 0244/2030] compel/criu: Add __must_check All those compel functions can fail by various reasons. It may be status of the system, interruption by user or anything else. It's really desired to handle as many PIE related errors as possible otherwise it's hard to analyze statuses of parasite/restorer and the C/R process. At least warning for logs should be produced or even C/R stopped. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- compel/include/uapi/cpu.h | 2 +- compel/include/uapi/infect-rpc.h | 6 ++-- compel/include/uapi/infect-util.h | 5 ++- compel/include/uapi/infect.h | 39 +++++++++++++----------- compel/include/uapi/ptrace.h | 7 +++-- compel/include/uapi/sigframe-common.h | 5 +-- compel/plugins/include/uapi/plugin-fds.h | 2 +- compel/plugins/include/uapi/std/infect.h | 8 +++-- compel/plugins/include/uapi/std/log.h | 1 + criu/seize.c | 2 +- include/common/compiler.h | 27 ++++++++++++++++ 11 files changed, 71 insertions(+), 33 deletions(-) diff --git a/compel/include/uapi/cpu.h b/compel/include/uapi/cpu.h index 6f827d447..72c8a516c 100644 --- a/compel/include/uapi/cpu.h +++ b/compel/include/uapi/cpu.h @@ -6,7 +6,7 @@ #include -extern int compel_cpuid(compel_cpuinfo_t *info); +extern int /* TODO: __must_check */ compel_cpuid(compel_cpuinfo_t *info); extern bool compel_cpu_has_feature(unsigned int feature); extern bool compel_fpu_has_feature(unsigned int feature); extern uint32_t compel_fpu_feature_size(unsigned int feature); diff --git a/compel/include/uapi/infect-rpc.h b/compel/include/uapi/infect-rpc.h index 0176c1142..180dedf1f 100644 --- a/compel/include/uapi/infect-rpc.h +++ b/compel/include/uapi/infect-rpc.h @@ -6,9 +6,9 @@ #include struct parasite_ctl; -extern int compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl); -extern int compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl); -extern int compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_sync(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_call(unsigned int cmd, struct parasite_ctl *ctl); +extern int __must_check compel_rpc_call_sync(unsigned int cmd, struct parasite_ctl *ctl); extern int compel_rpc_sock(struct parasite_ctl *ctl); #define PARASITE_USER_CMDS 64 diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h index 7307ba57a..4e32d13dc 100644 --- a/compel/include/uapi/infect-util.h +++ b/compel/include/uapi/infect-util.h @@ -1,6 +1,9 @@ #ifndef __COMPEL_INFECT_UTIL_H__ #define __COMPEL_INFECT_UTIL_H__ + +#include "common/compiler.h" + struct parasite_ctl; -extern int compel_util_send_fd(struct parasite_ctl *ctl, int fd); +extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd); extern int compel_util_recv_fd(struct parasite_ctl *ctl, int *pfd); #endif diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index 08beaffcd..dd672bc1c 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -13,7 +13,7 @@ #define PARASITE_START_AREA_MIN (4096) -extern int compel_interrupt_task(int pid); +extern int __must_check compel_interrupt_task(int pid); struct seize_task_status { unsigned long long sigpnd; @@ -23,27 +23,28 @@ struct seize_task_status { int seccomp_mode; }; -extern int compel_wait_task(int pid, int ppid, +extern int __must_check compel_wait_task(int pid, int ppid, int (*get_status)(int pid, struct seize_task_status *, void *data), void (*free_status)(int pid, struct seize_task_status *, void *data), struct seize_task_status *st, void *data); -extern int compel_stop_task(int pid); +extern int __must_check compel_stop_task(int pid); extern int compel_resume_task(pid_t pid, int orig_state, int state); struct parasite_ctl; struct parasite_thread_ctl; -extern struct parasite_ctl *compel_prepare(int pid); -extern struct parasite_ctl *compel_prepare_noctx(int pid); -extern int compel_infect(struct parasite_ctl *ctl, unsigned long nr_threads, unsigned long args_size); -extern struct parasite_thread_ctl *compel_prepare_thread(struct parasite_ctl *ctl, int pid); +extern struct parasite_ctl __must_check *compel_prepare(int pid); +extern struct parasite_ctl __must_check *compel_prepare_noctx(int pid); +extern int __must_check compel_infect(struct parasite_ctl *ctl, + unsigned long nr_threads, unsigned long args_size); +extern struct parasite_thread_ctl __must_check *compel_prepare_thread(struct parasite_ctl *ctl, int pid); extern void compel_release_thread(struct parasite_thread_ctl *); -extern int compel_stop_daemon(struct parasite_ctl *ctl); -extern int compel_cure_remote(struct parasite_ctl *ctl); -extern int compel_cure_local(struct parasite_ctl *ctl); -extern int compel_cure(struct parasite_ctl *ctl); +extern int __must_check compel_stop_daemon(struct parasite_ctl *ctl); +extern int __must_check compel_cure_remote(struct parasite_ctl *ctl); +extern int __must_check compel_cure_local(struct parasite_ctl *ctl); +extern int __must_check compel_cure(struct parasite_ctl *ctl); #define PARASITE_ARG_SIZE_MIN ( 1 << 12) @@ -58,15 +59,16 @@ extern int compel_cure(struct parasite_ctl *ctl); extern void *compel_parasite_args_p(struct parasite_ctl *ctl); extern void *compel_parasite_args_s(struct parasite_ctl *ctl, unsigned long args_size); -extern int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, +extern int __must_check compel_syscall(struct parasite_ctl *ctl, + int nr, long *ret, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6); -extern int compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd); -extern int compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs); +extern int __must_check compel_run_in_thread(struct parasite_thread_ctl *tctl, unsigned int cmd); +extern int __must_check compel_run_at(struct parasite_ctl *ctl, unsigned long ip, user_regs_struct_t *ret_regs); /* * The PTRACE_SYSCALL will trap task twice -- on @@ -80,12 +82,13 @@ enum trace_flags { TRACE_EXIT, }; -extern int compel_stop_on_syscall(int tasks, int sys_nr, +extern int __must_check compel_stop_on_syscall(int tasks, int sys_nr, int sys_nr_compat, enum trace_flags trace); -extern int compel_stop_pie(pid_t pid, void *addr, enum trace_flags *tf, bool no_bp); +extern int __must_check compel_stop_pie(pid_t pid, void *addr, + enum trace_flags *tf, bool no_bp); -extern int compel_unmap(struct parasite_ctl *ctl, unsigned long addr); +extern int __must_check compel_unmap(struct parasite_ctl *ctl, unsigned long addr); extern int compel_mode_native(struct parasite_ctl *ctl); @@ -159,7 +162,7 @@ struct parasite_blob_desc { extern struct parasite_blob_desc *compel_parasite_blob_desc(struct parasite_ctl *); -extern int compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); +extern int __must_check compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); extern void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs); diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h index 4df00b6e1..13eed7232 100644 --- a/compel/include/uapi/ptrace.h +++ b/compel/include/uapi/ptrace.h @@ -1,6 +1,7 @@ #ifndef UAPI_COMPEL_PTRACE_H__ #define UAPI_COMPEL_PTRACE_H__ +#include "common/compiler.h" /* * We'd want to include both sys/ptrace.h and linux/ptrace.h, * hoping that most definitions come from either one or another. @@ -75,8 +76,8 @@ typedef struct { extern int ptrace_suspend_seccomp(pid_t pid); -extern int ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes); -extern int ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes); -extern int ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes); +extern int __must_check ptrace_peek_area(pid_t pid, void *dst, void *addr, long bytes); +extern int __must_check ptrace_poke_area(pid_t pid, void *src, void *addr, long bytes); +extern int __must_check ptrace_swap_area(pid_t pid, void *dst, void *src, long bytes); #endif /* UAPI_COMPEL_PTRACE_H__ */ diff --git a/compel/include/uapi/sigframe-common.h b/compel/include/uapi/sigframe-common.h index fc93c5480..177bf4c48 100644 --- a/compel/include/uapi/sigframe-common.h +++ b/compel/include/uapi/sigframe-common.h @@ -8,6 +8,7 @@ # error "Direct inclusion is forbidden, use instead" #endif +#include "common/compiler.h" #include #include @@ -56,7 +57,7 @@ struct rt_ucontext { unsigned long uc_regspace[128] __attribute__((aligned(8))); }; -extern int sigreturn_prep_fpu_frame(struct rt_sigframe *frame, - struct rt_sigframe *rframe); +extern int __must_check sigreturn_prep_fpu_frame(struct rt_sigframe *frame, + struct rt_sigframe *rframe); #endif /* UAPI_COMPEL_SIGFRAME_COMMON_H__ */ diff --git a/compel/plugins/include/uapi/plugin-fds.h b/compel/plugins/include/uapi/plugin-fds.h index cececb21d..e995b4b66 100644 --- a/compel/plugins/include/uapi/plugin-fds.h +++ b/compel/plugins/include/uapi/plugin-fds.h @@ -1,7 +1,7 @@ #ifndef COMPEL_PLUGIN_STD_STD_H__ #define COMPEL_PLUGIN_STD_STD_H__ -extern int fds_send_fd(int fd); +extern int __must_check fds_send_fd(int fd); extern int fds_recv_fd(void); #endif /* COMPEL_PLUGIN_STD_STD_H__ */ diff --git a/compel/plugins/include/uapi/std/infect.h b/compel/plugins/include/uapi/std/infect.h index 800df2509..1e784f8b4 100644 --- a/compel/plugins/include/uapi/std/infect.h +++ b/compel/plugins/include/uapi/std/infect.h @@ -1,14 +1,16 @@ #ifndef COMPEL_PLUGIN_STD_INFECT_H__ #define COMPEL_PLUGIN_STD_INFECT_H__ +#include "common/compiler.h" + extern int parasite_get_rpc_sock(void); -extern int parasite_service(unsigned int cmd, void *args); +extern int __must_check parasite_service(unsigned int cmd, void *args); /* * Must be supplied by user plugins. */ -extern int parasite_daemon_cmd(int cmd, void *args); -extern int parasite_trap_cmd(int cmd, void *args); +extern int __must_check parasite_daemon_cmd(int cmd, void *args); +extern int __must_check parasite_trap_cmd(int cmd, void *args); extern void parasite_cleanup(void); /* diff --git a/compel/plugins/include/uapi/std/log.h b/compel/plugins/include/uapi/std/log.h index f21b6df0d..91462c85b 100644 --- a/compel/plugins/include/uapi/std/log.h +++ b/compel/plugins/include/uapi/std/log.h @@ -2,6 +2,7 @@ #define COMPEL_PLUGIN_STD_LOG_H__ #include "compel/loglevels.h" +#include "common/compiler.h" #define STD_LOG_SIMPLE_CHUNK 256 diff --git a/criu/seize.c b/criu/seize.c index cce8911b9..e1e6b8195 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -483,7 +483,7 @@ static int collect_children(struct pstree_item *item) if (!opts.freeze_cgroup) /* fails when meets a zombie */ - compel_interrupt_task(pid); + __ignore_value(compel_interrupt_task(pid)); ret = compel_wait_task(pid, item->pid->real, parse_pid_status, NULL, &creds.s, NULL); if (ret < 0) { diff --git a/include/common/compiler.h b/include/common/compiler.h index fc8abcfef..1d431a529 100644 --- a/include/common/compiler.h +++ b/include/common/compiler.h @@ -22,6 +22,7 @@ #define __used __attribute__((__used__)) #define __maybe_unused __attribute__((unused)) #define __always_unused __attribute__((unused)) +#define __must_check __attribute__((__warn_unused_result__)) #define __section(S) __attribute__ ((__section__(#S))) @@ -99,4 +100,30 @@ #define is_log2(v) (((v) & ((v) - 1)) == 0) +/* + * Use "__ignore_value" to avoid a warning when using a function declared with + * gcc's warn_unused_result attribute, but for which you really do want to + * ignore the result. Traditionally, people have used a "(void)" cast to + * indicate that a function's return value is deliberately unused. However, + * if the function is declared with __attribute__((warn_unused_result)), + * gcc issues a warning even with the cast. + * + * Caution: most of the time, you really should heed gcc's warning, and + * check the return value. However, in those exceptional cases in which + * you're sure you know what you're doing, use this function. + * + * Normally casting an expression to void discards its value, but GCC + * versions 3.4 and newer have __attribute__ ((__warn_unused_result__)) + * which may cause unwanted diagnostics in that case. Use __typeof__ + * and __extension__ to work around the problem, if the workaround is + * known to be needed. + * Written by Jim Meyering, Eric Blake and Pádraig Brady. + * (See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425 for the details) + */ +#if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) +# define __ignore_value(x) ({ __typeof__ (x) __x = (x); (void) __x; }) +#else +# define __ignore_value(x) ((void) (x)) +#endif + #endif /* __CR_COMPILER_H__ */ From dc4677123ba03f93deab2d1ec6047d3a35ba694c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20C=C5=82api=C5=84ski?= Date: Wed, 6 Nov 2019 02:15:20 +0100 Subject: [PATCH 0245/2030] Checkpoint only specified controllers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this change CRIU would checkpoint all controllers, even the ones not specified in --cgroup-dump-controller. That becomes a problem if there's a cgroup controller on the checkpointing machine that doesn't exist on the restoring machine even if CRIU is instructed not to dump that controller. After that change everything works as expected. Signed-off-by: Michał Cłapiński --- criu/proc_parse.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index d67392a12..fa7644992 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -2498,6 +2498,12 @@ int collect_controllers(struct list_head *cgroups, unsigned int *n_cgroups) goto err; } *off = '\0'; + + if (cgp_should_skip_controller(controllers)) { + pr_debug("cg-prop: Skipping controller %s\n", controllers); + continue; + } + while (1) { off = strchr(controllers, ','); if (off) From 8f45330d168df043c400593f2387a92e2b686ef8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 17 Nov 2019 16:04:16 +0200 Subject: [PATCH 0246/2030] travis: group lazy-pages options The amount of lazy-pages options keeps growing, let's put the common ones into a variable. Signed-off-by: Mike Rapoport --- scripts/travis/travis-tests | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index bc97fd455..a87ddbaf4 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -146,10 +146,11 @@ fi LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps04" LAZY_TESTS=.*\(maps0\|uffd-events\|lazy-thp\|futex\|fork\).* +LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS" -./test/zdtm.py run -p 2 -T $LAZY_TESTS --lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS -./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages $LAZY_EXCLUDE $ZDTM_OPTS -./test/zdtm.py run -p 2 -T $LAZY_TESTS --remote-lazy-pages --tls $LAZY_EXCLUDE $ZDTM_OPTS +./test/zdtm.py run $LAZY_OPTS --lazy-pages +./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages +./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages --tls bash ./test/jenkins/criu-fault.sh bash ./test/jenkins/criu-fcg.sh From 75fcec0ecbccea5b8258def25adb056e4d02c0c1 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 17 Nov 2019 16:05:47 +0200 Subject: [PATCH 0247/2030] travis: exclude uns tests for lazy-pages on newer kernels Kernels 5.4 and higher will restrict availability of UFFD_EVENT_FORK only for users with SYS_CAP_PTRACE. This prevents running --lazy-pages tests with 'uns' flavor. Disable 'uns' for lazy pages testing in travis for newer kernels. Signed-off-by: Mike Rapoport --- scripts/travis/travis-tests | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index a87ddbaf4..4cb842c97 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -145,8 +145,15 @@ else fi LAZY_EXCLUDE="$LAZY_EXCLUDE -x maps04" +# Starting with 5.4 kernel requires SYS_CAP_PTRACE to use uffd events; as such +# we cannot run lazy-pages tests in uns +LAZY_FLAVORS="" +if [ $KERN_MAJ -ge "5" ] && [ $KERN_MIN -ge "4" ]; then + LAZY_FLAVORS = "-f h,ns" +fi + LAZY_TESTS=.*\(maps0\|uffd-events\|lazy-thp\|futex\|fork\).* -LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS" +LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $LAZY_FLAVORS $ZDTM_OPTS" ./test/zdtm.py run $LAZY_OPTS --lazy-pages ./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages From b50b6ea09e7a80b91f2bdeb0b5cd444b0ae800ca Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 3 Nov 2019 20:18:38 +0000 Subject: [PATCH 0248/2030] mount: Add error messages Suggested-by: Andrei Vagin Signed-off-by: Radostin Stoyanov --- criu/mount.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 974af6eb2..6b1adecc6 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1325,8 +1325,10 @@ int ns_open_mountpoint(void *arg) } /* Remount all mounts as private to disable propagation */ - if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) + if (mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)) { + pr_perror("Unable to remount"); goto err; + } if (umount_overmounts(mi)) goto err; @@ -1536,6 +1538,7 @@ static __maybe_unused int mount_cr_time_mount(struct ns_id *ns, unsigned int *s_ ret = mount(source, target, type, 0, NULL); if (ret < 0) { + pr_perror("Unable to mount %s %s", source, target); exit_code = -errno; goto restore_ns; } else { @@ -2004,7 +2007,10 @@ static int fetch_rt_stat(struct mount_info *m, const char *where) static int do_simple_mount(struct mount_info *mi, const char *src, const char *fstype, unsigned long mountflags) { - return mount(src, mi->mountpoint, fstype, mountflags, mi->options); + int ret = mount(src, mi->mountpoint, fstype, mountflags, mi->options); + if (ret) + pr_perror("Unable to mount %s %s (id=%d)", src, mi->mountpoint, mi->mnt_id); + return ret; } static char *mnt_fsname(struct mount_info *mi) @@ -2491,8 +2497,11 @@ static int do_mount_one(struct mount_info *mi) } /* do_mount_root() is called from populate_mnt_ns() */ - if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) + if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) { + pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); return -1; + } + if (do_mount_root(mi)) return -1; mi->mounted = true; From d99ee9753e90df1040dc49341a38357e58d838ee Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 9 Nov 2019 22:48:32 +0000 Subject: [PATCH 0249/2030] mount: Bind-mount root via userns_call When restoring a runc container with enabled user namespace CRIU fails to mount the specified root directory because the path is under /run/runc which is inaccessible to unprivileged users. Signed-off-by: Radostin Stoyanov --- criu/mount.c | 51 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 6b1adecc6..52e70d376 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2020,20 +2020,20 @@ static char *mnt_fsname(struct mount_info *mi) return mi->fstype->name; } -static int apply_sb_flags(void *args, int fd, pid_t pid) +static int userns_mount(char *src, void *args, int fd, pid_t pid) { unsigned long flags = *(unsigned long *) args; int rst = -1, err = -1; - char path[PSFDS]; + char target[PSFDS]; - snprintf(path, sizeof(path), "/proc/self/fd/%d", fd); + snprintf(target, sizeof(target), "/proc/self/fd/%d", fd); if (pid != getpid() && switch_ns(pid, &mnt_ns_desc, &rst)) return -1; - err = mount(NULL, path, NULL, MS_REMOUNT | flags, NULL); + err = mount(src, target, NULL, flags, NULL); if (err) - pr_perror("Unable to remount %s", path); + pr_perror("Unable to mount %s", target); if (rst >= 0 && restore_ns(rst, &mnt_ns_desc)) return -1; @@ -2041,6 +2041,16 @@ static int apply_sb_flags(void *args, int fd, pid_t pid) return err; } +static int apply_sb_flags(void *args, int fd, pid_t pid) +{ + return userns_mount(NULL, args, fd, pid); +} + +static int mount_root(void *args, int fd, pid_t pid) +{ + return userns_mount(opts.root, args, fd, pid); +} + static int do_new_mount(struct mount_info *mi) { unsigned long sflags = mi->sb_flags; @@ -2088,10 +2098,9 @@ static int do_new_mount(struct mount_info *mi) pr_perror("Unable to open %s", mi->mountpoint); return -1; } - sflags |= MS_RDONLY; - if (userns_call(apply_sb_flags, 0, - &sflags, sizeof(sflags), fd)) { - pr_perror("Unable to apply mount flags %d for %s", + sflags |= MS_RDONLY | MS_REMOUNT; + if (userns_call(apply_sb_flags, 0, &sflags, sizeof(sflags), fd)) { + pr_err("Unable to apply mount flags %d for %s", mi->sb_flags, mi->mountpoint); close(fd); return -1; @@ -2491,15 +2500,33 @@ static int do_mount_one(struct mount_info *mi) pr_debug("\tMounting %s @%s (%d)\n", mi->fstype->name, mi->mountpoint, mi->need_plugin); if (rst_mnt_is_root(mi)) { + int fd; + unsigned long flags = MS_BIND | MS_REC; + if (opts.root == NULL) { pr_err("The --root option is required to restore a mount namespace\n"); return -1; } /* do_mount_root() is called from populate_mnt_ns() */ - if (mount(opts.root, mi->mountpoint, NULL, MS_BIND | MS_REC, NULL)) { - pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); - return -1; + if (root_ns_mask & CLONE_NEWUSER) { + fd = open(mi->mountpoint, O_PATH); + if (fd < 0) { + pr_perror("Unable to open %s", mi->mountpoint); + return -1; + } + + if (userns_call(mount_root, 0, &flags, sizeof(flags), fd)) { + pr_err("Unable to mount %s\n", mi->mountpoint); + close(fd); + return -1; + } + close(fd); + } else { + if (mount(opts.root, mi->mountpoint, NULL, flags, NULL)) { + pr_perror("Unable to mount %s %s (id=%d)", opts.root, mi->mountpoint, mi->mnt_id); + return -1; + } } if (do_mount_root(mi)) From 8ab3e40e3e45a4e0337c6715c923fb640e2e8973 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 10 Nov 2019 07:35:50 +0000 Subject: [PATCH 0250/2030] restore: Create temp proc in /tmp When restoring a container with user namespace, CRIU fails to create a temporary directory for proc. The is because the unprivileged user that has been just restored does not have permissions to access the working directory used by CRIU. Resolves #828 Signed-off-by: Radostin Stoyanov --- criu/cr-restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 05a25835a..5694931f4 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1588,7 +1588,7 @@ static void restore_pgid(void) static int mount_proc(void) { int fd, ret; - char proc_mountpoint[] = "crtools-proc.XXXXXX"; + char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; if (root_ns_mask == 0) fd = ret = open("/proc", O_DIRECTORY); From 9a50fbce72228404c29642af70af7b42fbc60a7b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 3 Nov 2019 20:35:18 +0000 Subject: [PATCH 0251/2030] man: Describe --root option requirements These requirements have been described in https://github.com/opencontainers/runc/blob/b133feae/libcontainer/container_linux.go#L1265 Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 2729bc95a..133a094c0 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -414,6 +414,8 @@ usually need to be escaped from shell. *-r*, *--root* 'path':: Change the root filesystem to 'path' (when run in a mount namespace). + This option is required to restore a mount namespace. The directory + 'path' must be a mount point and its parent must not be overmounted. *--external* 'type'*[*'id'*]:*'value':: Restore an instance of an external resource. The generic syntax is From 90cbeadb668d99f9d9557cee7a4c67e593f6e7ad Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 19 Nov 2019 22:10:39 +0000 Subject: [PATCH 0252/2030] zdtm: Replace if->continue with if->elif->else Replacing the if->continue pattern with if->elif->else reduces the number of lines while preserving the logic. Signed-off-by: Radostin Stoyanov --- test/zdtm.py | 63 ++++++++++++++++++---------------------------------- 1 file changed, 22 insertions(+), 41 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 6d3fddfad..17e0540eb 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -867,76 +867,57 @@ class criu_rpc: def __set_opts(criu, args, ctx): while len(args) != 0: arg = args.pop(0) - if arg == '-v4': + if "-v4" == arg: criu.opts.log_level = 4 - continue - if arg == '-o': + elif "-o" == arg: criu.opts.log_file = args.pop(0) - continue - if arg == '-D': + elif "-D" == arg: criu.opts.images_dir_fd = os.open(args.pop(0), os.O_DIRECTORY) ctx['imgd'] = criu.opts.images_dir_fd - continue - if arg == '-t': + elif "-t" == arg: criu.opts.pid = int(args.pop(0)) - continue - if arg == '--pidfile': + elif "--pidfile" == arg: ctx['pidf'] = args.pop(0) - continue - if arg == '--timeout': + elif "--timeout" == arg: criu.opts.timeout = int(args.pop(0)) - continue - if arg == '--restore-detached': - # Set by service by default - ctx['rd'] = True - continue - if arg == '--root': + elif "--restore-detached" == arg: + ctx['rd'] = True # Set by service by default + elif "--root" == arg: criu.opts.root = args.pop(0) - continue - if arg == '--external': + elif "--external" == arg: criu.opts.external.append(args.pop(0)) - continue - if arg == '--status-fd': + elif "--status-fd" == arg: fd = int(args.pop(0)) os.write(fd, b"\0") fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC) - continue - if arg == '--port': + elif "--port" == arg: criu.opts.ps.port = int(args.pop(0)) - continue - if arg == '--address': + elif "--address" == arg: criu.opts.ps.address = args.pop(0) + elif "--page-server" == arg: continue - if arg == '--page-server': - continue - if arg == '--prev-images-dir': + elif "--prev-images-dir" == arg: criu.opts.parent_img = args.pop(0) - continue - if arg == '--pre-dump-mode': + elif "--pre-dump-mode" == arg: key = args.pop(0) mode = crpc.rpc.VM_READ if key == "splice": mode = crpc.rpc.SPLICE criu.opts.pre_dump_mode = mode - continue - if arg == '--track-mem': + elif "--track-mem" == arg: criu.opts.track_mem = True - continue - if arg == '--tcp-established': + elif "--tcp-established" == arg: criu.opts.tcp_established = True - continue - if arg == '--restore-sibling': + elif "--restore-sibling" == arg: criu.opts.rst_sibling = True - continue - if arg == "--inherit-fd": + elif "--inherit-fd" == arg: inhfd = criu.opts.inherit_fd.add() key = args.pop(0) fd, key = key.split(":", 1) inhfd.fd = int(fd[3:-1]) inhfd.key = key - continue - - raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) + else: + raise test_fail_exc('RPC for %s(%s) required' % (arg, args.pop(0))) @staticmethod def run(action, From 60bb5c731078ad15b3d9e62782d692d91c5c2db0 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 19 Nov 2019 22:48:44 +0000 Subject: [PATCH 0253/2030] zdtm: Set --root path to 0700 on restore Update zdtm tests to verify that CRIU does not require the --root path to be accessible to the unprivileged user being restored when restoring user namespace. Signed-off-by: Radostin Stoyanov --- test/zdtm.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 17e0540eb..16ff0b379 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -62,6 +62,7 @@ tests_root = None def clean_tests_root(): global tests_root if tests_root and tests_root[0] == os.getpid(): + os.rmdir(os.path.join(tests_root[1], "root")) os.rmdir(tests_root[1]) @@ -70,7 +71,9 @@ def make_tests_root(): if not tests_root: tests_root = (os.getpid(), tempfile.mkdtemp("", "criu-root-", "/tmp")) atexit.register(clean_tests_root) - return tests_root[1] + os.mkdir(os.path.join(tests_root[1], "root")) + os.chmod(tests_root[1], 0o777) + return os.path.join(tests_root[1], "root") # Report generation @@ -483,6 +486,13 @@ class zdtm_test: # move into some semi-random state time.sleep(random.random()) + if self.__flavor.ns: + # In the case of runc the path specified with the opts.root + # option is created in /run/runc/ which is inaccessible to + # unprivileged users. The permissions here are set to test + # this use case. + os.chmod(os.path.dirname(self.__flavor.root), 0o700) + def kill(self, sig=signal.SIGKILL): self.__freezer.thaw() if self.__pid: From 25f6d4f72fb995cb776d65a9d4d539d4fdcc6740 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:34 +0000 Subject: [PATCH 0254/2030] build: Remove SRCARCH SRCARCH is always equal ARCH. There are no rules when to use one or another and architectures may forget to set one of them up. No need for a second variable meaning the same and confusing people. Remove it completely. Self-correction [after some debug]: SRCARCH was different in one place: zdtm Makefile by some unintentional mistake: > ifeq ($(ARCH),arm64) > ARCH ?= aarch64 > SRCARCH ?= aarch64 > endif That meant to be "ARCH := aarch64" because "?=" would never work inside that ifeq. Fix up this part of mess too. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 3 +-- Makefile.config | 2 +- compel/plugins/Makefile | 4 ++-- criu/Makefile | 2 +- criu/pie/Makefile | 6 +++--- criu/pie/Makefile.library | 4 ++-- test/zdtm/Makefile.inc | 7 ++----- test/zdtm/static/Makefile | 4 ++-- 8 files changed, 14 insertions(+), 18 deletions(-) diff --git a/Makefile b/Makefile index f827e7baa..2e62f6f39 100644 --- a/Makefile +++ b/Makefile @@ -86,7 +86,6 @@ endif # commit "S/390: Fix 64 bit sibcall". ifeq ($(ARCH),s390) ARCH := s390 - SRCARCH := s390 DEFINES := -DCONFIG_S390 CFLAGS_PIE := -fno-optimize-sibling-calls endif @@ -94,7 +93,7 @@ endif CFLAGS_PIE += -DCR_NOGLIBC export CFLAGS_PIE -LDARCH ?= $(SRCARCH) +LDARCH ?= $(ARCH) export LDARCH export PROTOUFIX DEFINES diff --git a/Makefile.config b/Makefile.config index 1e4352b9d..5af3fed38 100644 --- a/Makefile.config +++ b/Makefile.config @@ -30,7 +30,7 @@ CONFIG_FILE = .config $(CONFIG_FILE): touch $(CONFIG_FILE) -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) # CONFIG_COMPAT is only for x86 now, no need for compile-test other archs ifeq ($(call try-asm,$(FEATURE_TEST_X86_COMPAT)),true) export CONFIG_COMPAT := y diff --git a/compel/plugins/Makefile b/compel/plugins/Makefile index a326e2a66..197ff1b24 100644 --- a/compel/plugins/Makefile +++ b/compel/plugins/Makefile @@ -53,11 +53,11 @@ std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/parasite-head.o target += fds fds-lib-y += fds/fds.o -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o endif -ifeq ($(SRCARCH),ppc64) +ifeq ($(ARCH),ppc64) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcmp.o endif diff --git a/criu/Makefile b/criu/Makefile index 4134e5052..ceb49ce09 100644 --- a/criu/Makefile +++ b/criu/Makefile @@ -2,7 +2,7 @@ # 6a8d90f5fec4 "attr: Allow attribute type 0" WRAPFLAGS += -Wl,--wrap=nla_parse,--wrap=nlmsg_parse -ARCH_DIR := criu/arch/$(SRCARCH) +ARCH_DIR := criu/arch/$(ARCH) PIE_DIR := criu/pie export ARCH_DIR PIE_DIR diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 1ad456f43..a30747ac3 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -14,7 +14,7 @@ ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif -LDS := compel/arch/$(SRCARCH)/scripts/compel-pack.lds.S +LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o restorer-obj-y += ./$(ARCH_DIR)/restorer.o @@ -26,11 +26,11 @@ ifeq ($(ARCH),x86) endif endif -ifeq ($(SRCARCH),aarch64) +ifeq ($(ARCH),aarch64) restorer-obj-y += ./$(ARCH_DIR)/intraprocedure.o endif -ifeq ($(SRCARCH),ppc64) +ifeq ($(ARCH),ppc64) restorer-obj-y += ./$(ARCH_DIR)/vdso-trampoline.o endif diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index 658c8a4eb..de75b11d4 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -9,14 +9,14 @@ lib-name := pie.lib.a lib-y += util.o lib-y += util-vdso.o -ifeq ($(SRCARCH),x86) +ifeq ($(ARCH),x86) ifeq ($(CONFIG_COMPAT),y) lib-y += util-vdso-elf32.o endif CFLAGS_util-vdso-elf32.o += -DCONFIG_VDSO_32 endif -ifeq ($(SRCARCH),arm) +ifeq ($(ARCH),arm) lib-y += ./$(ARCH_DIR)/aeabi-helpers.o lib-y += ./$(ARCH_DIR)/pie-cacheflush.o endif diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 170f31632..d5c013a3e 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -15,12 +15,9 @@ ARCH ?= $(shell uname -m | sed \ -e s/aarch64.*/arm64/) ifeq ($(ARCH),arm64) - ARCH ?= aarch64 - SRCARCH ?= aarch64 + ARCH := aarch64 endif -SRCARCH ?= $(ARCH) - ifeq ($(ARCH),arm) ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') @@ -35,7 +32,7 @@ CC := gcc CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE -CPPFLAGS += -iquote $(LIBDIR)/arch/$(SRCARCH)/include +CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include ifeq ($(strip $(V)),) E = @echo diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index a38482f44..e0d4d2c5c 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -219,13 +219,13 @@ TST_NOFILE := \ child_subreaper_and_reparent \ # jobctl00 \ -ifneq ($(SRCARCH),arm) +ifneq ($(ARCH),arm) ifneq ($(COMPAT_TEST),y) TST_NOFILE += maps03 endif endif -ifeq ($(SRCARCH),s390) +ifeq ($(ARCH),s390) TST_NOFILE += s390x_regs_check \ s390x_gs_threads \ s390x_runtime_instr From a4fa4162d410c0bbc751d92119022f9a1c3a6723 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:35 +0000 Subject: [PATCH 0255/2030] build/nmk: Remove SRCARCH It's not used anywhere now. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- scripts/nmk/scripts/include.mk | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk index e1701103f..ee0e32f62 100644 --- a/scripts/nmk/scripts/include.mk +++ b/scripts/nmk/scripts/include.mk @@ -22,9 +22,8 @@ SUBARCH := $(shell uname -m | sed \ -e s/aarch64.*/aarch64/) ARCH ?= $(SUBARCH) -SRCARCH := $(ARCH) -export SUBARCH ARCH SRCARCH +export SUBARCH ARCH ifndef ____nmk_defined__tools include $(__nmk_dir)tools.mk From df66aa99b6ce59108055759d5ebda69e2fd00669 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:36 +0000 Subject: [PATCH 0256/2030] build/nmk: Provide proper SUBARCH It's always equal ARCH and not very useful (so nothing actually uses it). Time for a change: SUBARCH now is meaningful and gives a way to detect what kind of ARCH flavor build is dealing with. Also, for cross-compiling sake don't set SUBARCH if the user supplied it. (and don't call useless uname during cross compilation) Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- scripts/nmk/scripts/include.mk | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk index ee0e32f62..c1c1e94af 100644 --- a/scripts/nmk/scripts/include.mk +++ b/scripts/nmk/scripts/include.mk @@ -8,21 +8,20 @@ endif # # Common vars. -SUBARCH := $(shell uname -m | sed \ - -e s/i.86/x86/ \ - -e s/x86_64/x86/ \ - -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ \ - -e s/sa110/arm/ \ - -e s/s390x/s390/ \ - -e s/parisc64/parisc/ \ - -e s/ppc64.*/ppc64/ \ - -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ \ +SUBARCH ?= $(shell uname -m) +ARCH ?= $(shell echo $(SUBARCH) | sed \ + -e s/i.86/x86/ \ + -e s/x86_64/x86/ \ + -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ \ + -e s/sa110/arm/ \ + -e s/s390x/s390/ \ + -e s/parisc64/parisc/ \ + -e s/ppc64.*/ppc64/ \ + -e s/mips.*/mips/ \ + -e s/sh[234].*/sh/ \ -e s/aarch64.*/aarch64/) -ARCH ?= $(SUBARCH) - export SUBARCH ARCH ifndef ____nmk_defined__tools From 1463c41119c8eef8ccf135e71359f579e821a21e Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:37 +0000 Subject: [PATCH 0257/2030] build: Use SUBARCH Instead of doing additional `uname -m` - use provided $(SUBARCH) to detect what architecture flavour the build should produce the result for. Fixes two things: - zdtm make now correctly supplies $(USERCFLAGS) - subtly fixes cross compilation by providing a way to specify $(SUBARCH) Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Makefile | 6 ++---- test/zdtm/Makefile.inc | 25 +++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 2e62f6f39..ef76d706c 100644 --- a/Makefile +++ b/Makefile @@ -17,8 +17,6 @@ ifeq ($(origin HOSTCFLAGS), undefined) HOSTCFLAGS := $(CFLAGS) $(USERCFLAGS) endif -UNAME-M := $(shell uname -m) - # # Supported Architectures ifneq ($(filter-out x86 arm aarch64 ppc64 s390,$(ARCH)),) @@ -27,14 +25,14 @@ endif # The PowerPC 64 bits architecture could be big or little endian. # They are handled in the same way. -ifeq ($(UNAME-M),ppc64) +ifeq ($(SUBARCH),ppc64) error := $(error ppc64 big endian is not yet supported) endif # # Architecture specific options. ifeq ($(ARCH),arm) - ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index d5c013a3e..7584d3b06 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -1,17 +1,18 @@ .SUFFIXES: MAKEFLAGS += -r -ARCH ?= $(shell uname -m | sed \ - -e s/i.86/x86/ \ - -e s/x86_64/x86/ \ - -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ \ - -e s/sa110/arm/ \ - -e s/s390x/s390/ \ - -e s/parisc64/parisc/ \ - -e s/ppc64.*/ppc64/ \ - -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ \ +SUBARCH ?= $(shell uname -m) +ARCH ?= $(shell echo $(SUBARCH) | sed \ + -e s/i.86/x86/ \ + -e s/x86_64/x86/ \ + -e s/sun4u/sparc64/ \ + -e s/arm.*/arm/ \ + -e s/sa110/arm/ \ + -e s/s390x/s390/ \ + -e s/parisc64/parisc/ \ + -e s/ppc64.*/ppc64/ \ + -e s/mips.*/mips/ \ + -e s/sh[234].*/sh/ \ -e s/aarch64.*/arm64/) ifeq ($(ARCH),arm64) @@ -19,7 +20,7 @@ ifeq ($(ARCH),arm64) endif ifeq ($(ARCH),arm) - ARMV := $(shell echo $(UNAME-M) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 From 70fae12509d7e0448e00fa0b0aa3a94b2384025f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:38 +0000 Subject: [PATCH 0258/2030] build/zdtm: Support cross-build Maybe not that useful, but only little change needed. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 7584d3b06..8f2650b44 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -29,7 +29,10 @@ ifeq ($(ARCH),arm) endif endif -CC := gcc +HOSTCC ?= gcc +ifeq ($(origin CC), default) + CC := $(CROSS_COMPILE)$(HOSTCC) +endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE From 3b24574b6d48b386127386b18036767a89ad6d0f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:39 +0000 Subject: [PATCH 0259/2030] build/zdtm: Makefile hack for travis aarch64/armv8l The very same hack to build aarch32 zdtm tests on armv8 Travis-CI as in the commit dfa0a1edcbcb ("Makefile hack for travis aarch64/armv8l") Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 8f2650b44..d132ca981 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -20,13 +20,17 @@ ifeq ($(ARCH),arm64) endif ifeq ($(ARCH),arm) - ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') + ARMV := $(shell echo $(SUBARCH) | sed -nr 's/armv([[:digit:]]).*/\1/p; t; i7') - ifeq ($(ARMV),6) - USERCFLAGS += -march=armv6 - else ifeq ($(ARMV),7) - USERCFLAGS += -march=armv7-a - endif + ifeq ($(ARMV),6) + USERCFLAGS += -march=armv6 + else ifeq ($(ARMV),7) + USERCFLAGS += -march=armv7-a + else ifeq ($(ARMV),8) + # To build aarch32 on armv8 Travis-CI (see criu Makefile) + USERCFLAGS += -march=armv7-a + ARMV := 7 + endif endif HOSTCC ?= gcc From bffa6e0ad005a3e125b9b6c3da527a929ed18c79 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:40 +0000 Subject: [PATCH 0260/2030] build/zdtm: Use pkg-config to find includes/libs Helps to cross-compile zdtm tests in case somebody needs it. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/Makefile.inc | 13 +++++++++++++ test/zdtm/static/Makefile | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index d132ca981..32fc72d32 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -53,12 +53,25 @@ endif RM := rm -f --one-file-system ifeq ($(COMPAT_TEST),y) + # Firstly look for 32-bit libs and then in standard path. + PKG_CONFIG_PATH := $(shell pkg-config --variable pc_path pkg-config) + PKG_CONFIG_PATH := /usr/lib32/pkgconfig:$(PKG_CONFIG_PATH) ifeq ($(ARCH),x86) export CFLAGS += -m32 export LDFLAGS += -m32 + PKG_CONFIG_PATH := /usr/lib/i386-linux-gnu/pkgconfig:$(PKG_CONFIG_PATH) endif + export PKG_CONFIG_PATH endif +define pkg-libs + $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" pkg-config --libs $(1)) +endef + +define pkg-cflags + $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" pkg-config --cflags $(1)) +endef + %.d: %.c $(E) " DEP " $@ $(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP -c $< -o $@ diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index e0d4d2c5c..36d00ca5c 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -528,8 +528,8 @@ stopped12: CFLAGS += -DZDTM_STOPPED_KILL -DZDTM_STOPPED_TKILL clone_fs: LDLIBS += -pthread # As generating dependencies won't work without proper includes, # we have to explicitly specify both .o and .d for this case: -netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += -I/usr/include/libnl3 -netns_sub_veth: LDLIBS += -lnl-3 -l nl-route-3 +netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += $(call pkg-cflags, libnl-3.0) +netns_sub_veth: LDLIBS += $(call pkg-libs, libnl-route-3.0 libnl-3.0) socket-tcp-fin-wait1: CFLAGS += -D ZDTM_TCP_FIN_WAIT1 socket-tcp-fin-wait2: CFLAGS += -D ZDTM_TCP_FIN_WAIT2 From 1dbc835954d9c27ad1edb8184c02cfea1fd414b1 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:41 +0000 Subject: [PATCH 0261/2030] travis: Add armv7-cross as cross-compile test Fixes: #455 Based-on-patch-by: Andrei Vagin Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- .travis.yml | 4 +++ scripts/build/Dockerfile.armv7-cross | 44 ++++++++++++++++++++++++++++ scripts/build/Makefile | 1 + 3 files changed, 49 insertions(+) create mode 100644 scripts/build/Dockerfile.armv7-cross diff --git a/.travis.yml b/.travis.yml index f6f71be48..b27dbfe7b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -73,6 +73,10 @@ matrix: arch: amd64 env: TR_ARCH=fedora-asan dist: xenial # test hangs on bionic + - os: linux + arch: amd64 + env: TR_ARCH=armv7-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide diff --git a/scripts/build/Dockerfile.armv7-cross b/scripts/build/Dockerfile.armv7-cross new file mode 100644 index 000000000..434934aad --- /dev/null +++ b/scripts/build/Dockerfile.armv7-cross @@ -0,0 +1,44 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ + dpkg --add-architecture armhf && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-armhf \ + libbz2-dev:armhf \ + libexpat1-dev:armhf \ + ncurses-dev:armhf \ + libssl-dev:armhf \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:armhf \ + libprotobuf-dev:armhf \ + libnet-dev:armhf \ + libprotobuf-c-dev:armhf \ + libcap-dev:armhf \ + libaio-dev:armhf \ + libnl-route-3-dev:armhf + +ENV CROSS_TRIPLE=arm-linux-gnueabihf +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=arm \ + SUBARCH=armv7 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index a7c78e8bd..d093ce76c 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,6 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker +TARGETS += armv7-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From 434e6b92dbcd47354f02e3a992ead6c25a6db16f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:42 +0000 Subject: [PATCH 0262/2030] Documentation: Add a hint about docker build The original/old guide probably doesn't work anymore: - the patch isn't accessible; - criu now depends on more libraries not only protobuf Still, keep it as it might be helpful for someone. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- Documentation/HOWTO.cross-compile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/HOWTO.cross-compile b/Documentation/HOWTO.cross-compile index f1b17842b..44b19dfea 100644 --- a/Documentation/HOWTO.cross-compile +++ b/Documentation/HOWTO.cross-compile @@ -1,4 +1,10 @@ -This HOWTO explains how to cross-compile CRIU on x86 +How to cross-compile CRIU on x86: + +Use the Dockerfile provided: + scripts/build/Dockerfile.armv7-cross + +Historical guide how-to do it without docker container: +[Unsupported, may not work anymore!] 1. Download the protobuf sources. 2. Apply the patch http://16918.selcdn.ru/crtools/aarch64/0001-protobuf-added-the-support-for-the-acrchitecture-AAr.patch From 6ab2bdd940c392ba58ebe68b5134d6327381a498 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 21 Nov 2019 21:56:43 +0000 Subject: [PATCH 0263/2030] zdtm/socket-tcp-fin-wait1: Use array index fro TEST_MSG Fixes the following compile-error: > CC socket-tcp-fin-wait1.o > socket-tcp-fin-wait1.c:144:26: error: adding 'int' to a string does not append to the string [-Werror,-Wstring-plus-int] > if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { > ~~~~~~~~~^~~ > socket-tcp-fin-wait1.c:144:26: note: use array indexing to silence this warning > if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { > ^ > & [ ] > 1 error generated. Signed-off-by: Dmitry Safonov Signed-off-by: Andrei Vagin --- test/zdtm/static/socket-tcp-fin-wait1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm/static/socket-tcp-fin-wait1.c b/test/zdtm/static/socket-tcp-fin-wait1.c index 6c7cc93e5..50da9c152 100644 --- a/test/zdtm/static/socket-tcp-fin-wait1.c +++ b/test/zdtm/static/socket-tcp-fin-wait1.c @@ -141,7 +141,7 @@ int main(int argc, char **argv) return 1; } - if (write(fd, TEST_MSG + 2, sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { + if (write(fd, &TEST_MSG[2], sizeof(TEST_MSG) - 2) != sizeof(TEST_MSG) - 2) { pr_err("write"); return 1; } From 37220b3c418d8d09ff2ef147e94c37fc897b3e27 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Thu, 7 Nov 2019 14:38:42 +0530 Subject: [PATCH 0264/2030] Add File-based Java Functional Tests Signed-off-by: Nidhi Gupta --- test/javaTests/README.md | 8 + .../criu/java/tests/CheckpointRestore.java | 4 +- .../src/org/criu/java/tests/FileRead.java | 2 +- .../src/org/criu/java/tests/Helper.java | 39 +++- .../org/criu/java/tests/MemoryMappings.java | 121 +++++++++++ .../org/criu/java/tests/MultipleFileRead.java | 203 ++++++++++++++++++ .../criu/java/tests/MultipleFileWrite.java | 140 ++++++++++++ .../src/org/criu/java/tests/ReadWrite.java | 119 ++++++++++ test/javaTests/test.xml | 30 +++ 9 files changed, 659 insertions(+), 7 deletions(-) create mode 100644 test/javaTests/src/org/criu/java/tests/MemoryMappings.java create mode 100644 test/javaTests/src/org/criu/java/tests/MultipleFileRead.java create mode 100644 test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java create mode 100644 test/javaTests/src/org/criu/java/tests/ReadWrite.java diff --git a/test/javaTests/README.md b/test/javaTests/README.md index cb779285e..670741677 100644 --- a/test/javaTests/README.md +++ b/test/javaTests/README.md @@ -23,6 +23,14 @@ CAP_SETUID Here we test the File-Based Java APIs by checkpointing the application in the following scenarios and verifying the contents of the file after restore: - Reading and writing in the same file. (FileRead.java) +- Read from a file and write its content to another file. (ReadWrite.java) +- Reading from multiple files and writing their content to another file. (MultipleFileRead) +- Reading from a file and writing its content to multiple files. (MultipleFileWrite) + +## Memory mapping Java APIs + +Here we test the Memory Mapping APIs by checkpointing the application in following scenario and verifying the contents after restore: +- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) ### Prerequisites for running the tests: - Maven diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java index 968488191..b848c9938 100644 --- a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -77,7 +77,7 @@ public class CheckpointRestore { private void testSetup(String testName) throws IOException { Path testFolderPath = Paths.get(outputFolder + testName + "/"); if (!Files.exists(testFolderPath)) { - System.out.println("Test Folder does not exist creating it"); + System.out.println("Creating the test folder"); Files.createDirectory(testFolderPath); } } @@ -245,7 +245,7 @@ public class CheckpointRestore { Assert.assertNotEquals(currentState, Helper.STATE_TERMINATE, testName + ": ERROR: Checkpoint-Restore failed"); Assert.assertNotEquals(currentState, Helper.STATE_FAIL, testName + ": ERROR: Test Failed, Check Log for details"); Assert.assertEquals(currentState, Helper.STATE_PASS, testName + " ERROR: Unexpected State of Mapped Buffer"); - System.out.println("-----" + "PASS" + "-----"); + System.out.println("----- " + "PASS" + " -----"); } diff --git a/test/javaTests/src/org/criu/java/tests/FileRead.java b/test/javaTests/src/org/criu/java/tests/FileRead.java index d94a14112..d8851a73e 100644 --- a/test/javaTests/src/org/criu/java/tests/FileRead.java +++ b/test/javaTests/src/org/criu/java/tests/FileRead.java @@ -50,7 +50,7 @@ class FileRead { /* * Mapped Byte Buffer should be in init state at the beginning of test */ - if ('I' != b.getChar(Helper.MAPPED_INDEX)) { + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); System.exit(1); diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java index d608fba47..fdf20bb52 100644 --- a/test/javaTests/src/org/criu/java/tests/Helper.java +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -1,9 +1,6 @@ package org.criu.java.tests; -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; +import java.io.*; import java.nio.MappedByteBuffer; import java.util.logging.FileHandler; import java.util.logging.Level; @@ -96,4 +93,38 @@ class Helper { System.exit(1); } } + + + /** + * Compare two files and return true if their content is similar. + * + * @param readFile File 1 whose content has to be compared. + * @param writeFile File 2 whose content has to be compared. + * @return true if the files are similar, false otherwise. + * @throws IOException + */ + static boolean compare(File readFile, File writeFile) throws IOException { + BufferedReader bir = new BufferedReader(new FileReader(readFile)); + BufferedReader bor = new BufferedReader(new FileReader(writeFile)); + String si, so; + si = bir.readLine(); + so = bor.readLine(); + while (null != si && null != so) { + if (!si.equals(so)) { + return false; + } + + si = bir.readLine(); + so = bor.readLine(); + } + + if ((null == si) && (null == so)) { + return true; + } + bir.close(); + bor.close(); + + return false; + } + } diff --git a/test/javaTests/src/org/criu/java/tests/MemoryMappings.java b/test/javaTests/src/org/criu/java/tests/MemoryMappings.java new file mode 100644 index 000000000..4ac6f4a17 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MemoryMappings.java @@ -0,0 +1,121 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MemoryMappings { + private static String TESTNAME = "MemoryMappings"; + + /** + * Map a file to memory and write the mapped data into a file, + * checkpointing and restoring in between. + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + Logger logger = null; + + try { + MappedByteBuffer testBuffer; + char ch; + int i = 1; + boolean similar; + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "ReadWrite.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "MemoryMappings_file.txt"); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of file to be memory mapped"); + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + channel = FileChannel.open(readFile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + testBuffer = channel.map(MapMode.READ_WRITE, 0, readFile.length()); + channel.close(); + + if (writeFile.exists()) { + writeFile.delete(); + } + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + + while (testBuffer.hasRemaining()) { + ch = (char) testBuffer.get(); + brw.write(ch); + i++; + if (200 == i) { + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + } + + brw.close(); + logger.log(Level.INFO, "Comparing contents of the file"); + + similar = Helper.compare(readFile, writeFile); + if (!similar) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Data was read and written correctly!"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + brw.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java b/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java new file mode 100644 index 000000000..7b023673e --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MultipleFileRead.java @@ -0,0 +1,203 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MultipleFileRead { + private static String TESTNAME = "MultipleFileRead"; + + /** + * @param readFile1 File 1 whose contents are read. + * @param readFile2 File 2 whose contents are read. + * @param writeFile File in which data has been written to. + * @return true if the data written is as expected, false otherwise. + * @throws IOException + */ + private static boolean compare(File readFile1, File readFile2, File writeFile) throws IOException { + BufferedReader br1 = new BufferedReader(new FileReader(readFile1)); + BufferedReader br2 = new BufferedReader(new FileReader(readFile2)); + BufferedReader brw = new BufferedReader(new FileReader(writeFile)); + boolean eof1, eof2; + eof1 = false; + eof2 = false; + String inpString, wrtString; + + while (!eof1 || !eof2) { + if (!eof1) { + inpString = br1.readLine(); + if (null == inpString) { + eof1 = true; + } else { + wrtString = brw.readLine(); + if (null == wrtString) { + return false; + } + if (!wrtString.equals(inpString)) { + return false; + } + } + } + if (!eof2) { + inpString = br2.readLine(); + if (null == inpString) { + eof2 = true; + } else { + wrtString = brw.readLine(); + if (null == wrtString) { + return false; + } + if (!wrtString.equals(inpString)) { + return false; + } + } + } + } + + wrtString = brw.readLine(); + if (null != wrtString) { + return false; + } + + br1.close(); + br2.close(); + brw.close(); + + return true; + } + + /** + * Read from multiple files and write their content into another file, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + String s; + int i = 0; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + File readFile1 = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File readFile2 = new File(Helper.SOURCE_FOLDER + "/" + "ReadWrite.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "MultipleFileRead_file.txt"); + boolean eofFile1 = false, eofFile2 = false, check; + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of the read files"); + + if (!readFile1.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (!readFile2.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (writeFile.exists()) { + writeFile.delete(); + } + logger.log(Level.INFO, "Creating writeFile"); + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedReader br1 = new BufferedReader(new FileReader(readFile1)); + BufferedReader br2 = new BufferedReader(new FileReader(readFile2)); + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + + logger.log(Level.INFO, "Writing in file"); + + while (!eofFile1 || !eofFile2) { + if (!eofFile1) { + s = br1.readLine(); + i++; + if (null == s) { + eofFile1 = true; + } else { + brw.write(s + "\n"); + } + } + if (!eofFile2) { + s = br2.readLine(); + i++; + if (null == s) { + eofFile2 = true; + } else { + brw.write(s + "\n"); + } + } + if (10 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + } + brw.flush(); + logger.log(Level.INFO, "Checking the content of the file"); + check = compare(readFile1, readFile2, writeFile); + + if (!check) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "The file has been written as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + br1.close(); + br2.close(); + brw.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java b/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java new file mode 100644 index 000000000..76d287a07 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/MultipleFileWrite.java @@ -0,0 +1,140 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class MultipleFileWrite { + private static String TESTNAME = "MultipleFileWrite"; + + /** + * Reads from a file and write its content into multiple files, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + MappedByteBuffer b = null; + String s, pid; + int i = 1; + Logger logger = null; + boolean similar1, similar2; + try { + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File writeFile1 = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + TESTNAME + "1_file.txt"); + File writeFile2 = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + TESTNAME + "2_file.txt"); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of read files!"); + + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (writeFile1.exists()) { + writeFile1.delete(); + } + boolean newFile = writeFile1.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + if (writeFile2.exists()) { + writeFile2.delete(); + } + newFile = writeFile2.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Created write files"); + + BufferedReader br = new BufferedReader(new FileReader(readFile)); + BufferedWriter bw1 = new BufferedWriter(new FileWriter(writeFile1)); + BufferedWriter bw2 = new BufferedWriter(new FileWriter(writeFile2)); + + s = br.readLine(); + + while (null != s) { + bw1.write(s + "\n"); + bw2.write(s + "\n"); + if (90 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + + i++; + s = br.readLine(); + } + + bw1.flush(); + bw2.flush(); + logger.log(Level.INFO, "Checking files have been written correctly"); + + similar1 = Helper.compare(readFile, writeFile1); + similar2 = Helper.compare(readFile, writeFile2); + + if (!similar1 || !similar2) { + logger.log(Level.SEVERE, "Error: Written data is not identical to the data read"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Content of files is as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + br.close(); + bw1.close(); + bw2.close(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/ReadWrite.java b/test/javaTests/src/org/criu/java/tests/ReadWrite.java new file mode 100644 index 000000000..fa98447ed --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/ReadWrite.java @@ -0,0 +1,119 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class ReadWrite { + private static String TESTNAME = "ReadWrite"; + + /** + * Read from a file and write its content into another file, + * checkpointing and restoring in between. + * + * @param args Not used. + */ + public static void main(String[] args) { + int i = 0; + String s, pid; + boolean similar; + MappedByteBuffer b = null; + Logger logger = null; + try { + File readFile = new File(Helper.SOURCE_FOLDER + "/" + "FileRead.java"); + File writeFile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/" + "ReadWrite_file.txt"); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + int val = Helper.init(TESTNAME, pid, logger); + if (0 != val) { + logger.log(Level.SEVERE, "Helper.init returned a non-zero code."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + FileChannel channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + /* + * Mapped Byte Buffer should be in init state at the beginning of test + */ + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Checking existence of files to be read!"); + if (!readFile.exists()) { + logger.log(Level.SEVERE, "Error: File from which to read does not exist"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + if (writeFile.exists()) { + writeFile.delete(); + } + logger.log(Level.INFO, "Creating the writeFile"); + boolean newFile = writeFile.createNewFile(); + if (!newFile) { + logger.log(Level.SEVERE, "Error: Cannot create a new file to write to."); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + BufferedReader brr = new BufferedReader(new FileReader(readFile)); + BufferedWriter brw = new BufferedWriter(new FileWriter(writeFile)); + logger.log(Level.INFO, "Start writing"); + + s = brr.readLine(); + + while (null != s) { + i++; + brw.write(s + "\n"); + + if (50 == i) { + /* + * Checkpoint and Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + Helper.checkpointAndWait(b, logger); + logger.log(Level.INFO, "Test has been restored!"); + } + s = brr.readLine(); + } + + brw.flush(); + logger.log(Level.INFO, "Checking content of the files."); + similar = Helper.compare(readFile, writeFile); + + if (!similar) { + logger.log(Level.SEVERE, "Error: Files are not similar after writing"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Content of file is as expected"); + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + System.exit(0); + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (null != b) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml index 8ff67c5e0..b73a31db2 100644 --- a/test/javaTests/test.xml +++ b/test/javaTests/test.xml @@ -4,10 +4,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 3ca09b191429a4260a12daf6bbaf58da2aebd656 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 20 Nov 2019 11:01:33 +0300 Subject: [PATCH 0265/2030] travis: ignore fails of podman-test until it will not be fixed. Signed-off-by: Andrei Vagin --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index b27dbfe7b..e6e410191 100644 --- a/.travis.yml +++ b/.travis.yml @@ -81,6 +81,7 @@ matrix: - env: TR_ARCH=docker-test - env: TR_ARCH=fedora-rawhide - env: TR_ARCH=local GCOV=1 + - env: TR_ARCH=podman-test script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: From b5b1c4ec4506df7cee6a9ba8ffff36f43e0cd8e3 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 21 Nov 2019 01:24:44 +0300 Subject: [PATCH 0266/2030] kerndat: check whether the new mount API is supported of not Signed-off-by: Andrei Vagin --- Makefile.config | 2 +- .../arch/arm/plugins/std/syscalls/syscall.def | 3 ++ .../plugins/std/syscalls/syscall-ppc64.tbl | 3 ++ .../plugins/std/syscalls/syscall-s390.tbl | 3 ++ .../x86/plugins/std/syscalls/syscall_32.tbl | 3 ++ .../x86/plugins/std/syscalls/syscall_64.tbl | 3 ++ criu/include/kerndat.h | 1 + criu/include/linux/mount.h | 35 +++++++++++++++++++ criu/kerndat.c | 16 +++++++++ scripts/feature-tests.mak | 12 +++++++ 10 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 criu/include/linux/mount.h diff --git a/Makefile.config b/Makefile.config index 5af3fed38..81aae24f8 100644 --- a/Makefile.config +++ b/Makefile.config @@ -47,7 +47,7 @@ export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG # $1 - config name define gen-feature-test diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index 721ff16dc..d5bdc677e 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -112,3 +112,6 @@ userfaultfd 282 388 (int flags) fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) cacheflush ! 983042 (void *start, void *end, int flags) ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +fsopen 430 430 (char *fsname, unsigned int flags) +fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) +fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 3b3079040..4e283d5e9 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -108,3 +108,6 @@ __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_preadv 320 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_userfaultfd 364 sys_userfaultfd (int flags) __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index cc13a63dd..fd48e3950 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -108,3 +108,6 @@ __NR_userfaultfd 355 sys_userfaultfd (int flags) __NR_preadv 328 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) __NR_gettimeofday 78 sys_gettimeofday (struct timeval *tv, struct timezone *tz) __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index 7903ab150..038aeb4f7 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -96,3 +96,6 @@ __NR_seccomp 354 sys_seccomp (unsigned int op, unsigned int flags, const char __NR_memfd_create 356 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 374 sys_userfaultfd (int flags) __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 4ac9164ea..215f32026 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -107,3 +107,6 @@ __NR_kcmp 312 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1 __NR_memfd_create 319 sys_memfd_create (const char *name, unsigned int flags) __NR_userfaultfd 323 sys_userfaultfd (int flags) __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index d93e07813..771195860 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -65,6 +65,7 @@ struct kerndat_s { bool x86_has_ptrace_fpu_xsave_bug; bool has_inotify_setnextwd; bool has_kcmp_epoll_tfd; + bool has_fsopen; }; extern struct kerndat_s kdat; diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h new file mode 100644 index 000000000..aa6be69ec --- /dev/null +++ b/criu/include/linux/mount.h @@ -0,0 +1,35 @@ +#ifndef _CRIU_LINUX_MOUNT_H +#define _CRIU_LINUX_MOUNT_H + +#include "common/config.h" +#include "compel/plugins/std/syscall-codes.h" + +#ifdef CONFIG_HAS_FSCONFIG +#include +#else +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ +}; +#endif + +static inline int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} +static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} +static inline int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +#endif diff --git a/criu/kerndat.c b/criu/kerndat.c index 39cacb8fe..b0dd83135 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -723,6 +723,20 @@ static int kerndat_has_inotify_setnextwd(void) return ret; } +static int kerndat_has_fsopen(void) +{ + if (syscall(__NR_fsopen, NULL, -1) != -1) { + pr_err("fsopen should fail\n"); + return -1; + } + if (errno == ENOSYS) + pr_info("The new mount API (fsopen, fsmount) isn't supported\n"); + else + kdat.has_fsopen = true; + + return 0; +} + static int has_kcmp_epoll_tfd(void) { kcmp_epoll_slot_t slot = { }; @@ -1043,6 +1057,8 @@ int kerndat_init(void) ret = kerndat_has_inotify_setnextwd(); if (!ret) ret = has_kcmp_epoll_tfd(); + if (!ret) + ret = kerndat_has_fsopen(); kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index e39d97bb1..39ddfd053 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -136,3 +136,15 @@ ENTRY(main) nop END(main) endef + +define FEATURE_TEST_FSCONFIG + +#include + +int main(void) +{ + if (FSCONFIG_CMD_CREATE > 0) + return 0; + return 0; +} +endef From 4997a096e4ffad4778a24f903e4450842171e576 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:50:08 +0300 Subject: [PATCH 0267/2030] util: introduce the mount_detached_fs helper Signed-off-by: Andrei Vagin --- criu/include/util.h | 2 ++ criu/util.c | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/criu/include/util.h b/criu/include/util.h index 313aacd8c..45bebf673 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -380,4 +380,6 @@ static inline void print_stack_trace(pid_t pid) {} ___ret; \ }) +extern int mount_detached_fs(const char *fsname); + #endif /* __CR_UTIL_H__ */ diff --git a/criu/util.c b/criu/util.c index e47e109ae..3bae18ab2 100644 --- a/criu/util.c +++ b/criu/util.c @@ -28,6 +28,8 @@ #include #include +#include "linux/mount.h" + #include "kerndat.h" #include "page.h" #include "util.h" @@ -1423,3 +1425,27 @@ void print_stack_trace(pid_t pid) free(strings); } #endif + +int mount_detached_fs(const char *fsname) +{ + int fsfd, fd; + + fsfd = sys_fsopen(fsname, 0); + if (fsfd < 0) { + pr_perror("Unable to open the %s file system", fsname); + return -1; + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) { + pr_perror("Unable to create the %s file system", fsname); + close(fsfd); + return -1; + } + + fd = sys_fsmount(fsfd, 0, 0); + if (fd < 0) + pr_perror("Unable to mount the %s file system", fsname); + close(fsfd); + return fd; +} + From 1a2d8ad7e162adf95124064109c959c7f7beb77a Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 21 Nov 2019 01:26:38 +0300 Subject: [PATCH 0268/2030] mount: use new mount API to open the proc file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 5694931f4..b920ce262 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -23,6 +23,8 @@ #include #include "common/compiler.h" +#include "linux/mount.h" + #include "clone-noasan.h" #include "cr_options.h" #include "servicefd.h" @@ -1585,27 +1587,39 @@ static void restore_pgid(void) futex_set_and_wake(&rsti(current)->pgrp_set, 1); } +static int __legacy_mount_proc() +{ + char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; + int fd; + + if (mkdtemp(proc_mountpoint) == NULL) { + pr_perror("mkdtemp failed %s", proc_mountpoint); + return -1; + } + + pr_info("Mount procfs in %s\n", proc_mountpoint); + if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) { + pr_perror("mount failed"); + if (rmdir(proc_mountpoint)) + pr_perror("Unable to remove %s", proc_mountpoint); + return -1; + } + + fd = open_detach_mount(proc_mountpoint); + return fd; +} + static int mount_proc(void) { int fd, ret; - char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; if (root_ns_mask == 0) fd = ret = open("/proc", O_DIRECTORY); else { - if (mkdtemp(proc_mountpoint) == NULL) { - pr_perror("mkdtemp failed %s", proc_mountpoint); - return -1; - } - - pr_info("Mount procfs in %s\n", proc_mountpoint); - if (mount("proc", proc_mountpoint, "proc", MS_MGC_VAL | MS_NOSUID | MS_NOEXEC | MS_NODEV, NULL)) { - pr_perror("mount failed"); - rmdir(proc_mountpoint); - return -1; - } - - ret = fd = open_detach_mount(proc_mountpoint); + if (kdat.has_fsopen) + fd = ret = mount_detached_fs("proc"); + else + fd = ret = __legacy_mount_proc(); } if (fd >= 0) { From 76e4d31a3fa6a8d4ccee9a111c212c27ab69474f Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:51:40 +0300 Subject: [PATCH 0269/2030] net: use new mount API to open the sysfs file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/net.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/criu/net.c b/criu/net.c index 9825db10f..5822de629 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2133,6 +2133,11 @@ static int mount_ns_sysfs(void) BUG_ON(ns_sysfs_fd != -1); + if (kdat.has_fsopen) { + ns_sysfs_fd = mount_detached_fs("sysfs"); + return ns_sysfs_fd >= 0 ? 0 : -1; + } + /* * A new mntns is required to avoid the race between * open_detach_mount and creating mntns. From be43c3b840b657a6a31a6885ca6e03da70de1b04 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:52:25 +0300 Subject: [PATCH 0270/2030] cgroup: use new mount API to open the cgroup file system It doesn't require to create a temporary directory and mount the proc file system in it. Signed-off-by: Andrei Vagin --- criu/cgroup.c | 105 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 23 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index a66fc960e..d4c712167 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -8,6 +8,7 @@ #include #include #include + #include "common/list.h" #include "xmalloc.h" #include "cgroup.h" @@ -24,6 +25,8 @@ #include "protobuf.h" #include "images/core.pb-c.h" #include "images/cgroup.pb-c.h" +#include "kerndat.h" +#include "linux/mount.h" /* * This structure describes set of controller groups @@ -542,6 +545,84 @@ static int add_freezer_state(struct cg_controller *controller) return 0; } +static const char namestr[] = "name="; +static int __new_open_cgroupfs(struct cg_ctl *cc) +{ + int fsfd, fd; + char *name; + + fsfd = sys_fsopen("cgroup", 0); + if (fsfd < 0) { + pr_perror("Unable to open the cgroup file system"); + return -1; + } + + if (strstartswith(cc->name, namestr)) { + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, + "name", cc->name + strlen(namestr), 0)) { + pr_perror("Unable to configure the cgroup (%s) file system", cc->name); + goto err; + } + } else { + char *saveptr = NULL, *buf = strdupa(cc->name); + name = strtok_r(buf, ",", &saveptr); + while (name) { + if (sys_fsconfig(fsfd, FSCONFIG_SET_FLAG, name, NULL, 0)) { + pr_perror("Unable to configure the cgroup (%s) file system", name); + goto err; + } + name = strtok_r(NULL, ",", &saveptr); + } + } + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)) { + pr_perror("Unable to create the cgroup (%s) file system", cc->name); + goto err; + } + + fd = sys_fsmount(fsfd, 0, 0); + if (fd < 0) + pr_perror("Unable to mount the cgroup (%s) file system", cc->name); + close(fsfd); + + return fd; +err: + close(fsfd); + return -1; +} + +static int open_cgroupfs(struct cg_ctl *cc) +{ + char prefix[] = ".criu.cgmounts.XXXXXX"; + char mopts[1024]; + int fd; + + if (kdat.has_fsopen) + return __new_open_cgroupfs(cc); + + if (strstartswith(cc->name, namestr)) + snprintf(mopts, sizeof(mopts), "none,%s", cc->name); + else + snprintf(mopts, sizeof(mopts), "%s", cc->name); + + if (mkdtemp(prefix) == NULL) { + pr_perror("can't make dir for cg mounts"); + return -1; + } + + if (mount("none", prefix, "cgroup", 0, mopts) < 0) { + pr_perror("Unable to mount %s", mopts); + rmdir(prefix); + return -1; + } + + fd = open_detach_mount(prefix); + if (fd < 0) + return -1; + + return fd; +} + static int collect_cgroups(struct list_head *ctls) { struct cg_ctl *cc; @@ -550,8 +631,6 @@ static int collect_cgroups(struct list_head *ctls) list_for_each_entry(cc, ctls, l) { char path[PATH_MAX], *root; - char prefix[] = ".criu.cgmounts.XXXXXX"; - const char namestr[] = "name="; struct cg_controller *cg; struct cg_root_opt *o; @@ -603,27 +682,7 @@ static int collect_cgroups(struct list_head *ctls) return -1; } } else { - char mopts[1024]; - - if (strstartswith(cc->name, namestr)) - snprintf(mopts, sizeof(mopts), "none,%s", cc->name); - else - snprintf(mopts, sizeof(mopts), "%s", cc->name); - - if (mkdtemp(prefix) == NULL) { - pr_perror("can't make dir for cg mounts"); - return -1; - } - - if (mount("none", prefix, "cgroup", 0, mopts) < 0) { - pr_perror("couldn't mount %s", mopts); - rmdir(prefix); - return -1; - } - - fd = open_detach_mount(prefix); - if (fd < 0) - return -1; + fd = open_cgroupfs(cc); } path_pref_len = snprintf(path, PATH_MAX, "/proc/self/fd/%d", fd); From af7e5f994b4d2221af1a0110dbfe5bdadd67f964 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 26 Nov 2019 07:26:31 +0300 Subject: [PATCH 0271/2030] readme: github pull-requests is the preferred way to contribute We will continue accepting patches. Signed-off-by: Andrei Vagin --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 558e87160..6a578b953 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ Here are some useful hints to get involved. * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* For historical reasons we do not accept PRs, instead [patches are welcome](http://criu.org/How_to_submit_patches); +* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); From 2237666ac1d277051d2bb90796fb1a0c5febb885 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 12 Nov 2019 14:31:08 +0300 Subject: [PATCH 0272/2030] restorer/inotify: reorder inotify cleanup after waiting helpers and zombies We've seen ppoll interrupted with signal in VZ7 CT migration tests, that is because in the beggining of CR_STATE_RESTORE_SIGCHLD zombies and helpers die, and that can trigger SIGCHILDs sent to their parents. Adding additional debug (printing "Task..." for zombies and helpers) in sigchld_handler I see: (15.644339) pie: 1: Task 10718 exited, status= 0 (15.644349) pie: 1: Cleaning inotify events from 29 (15.644359) pie: 1: Cleaning inotify events from 19 (15.644367) pie: 1: Cleaning inotify events from 10 And previousely we had: (05.718449) pie: 104: Cleaning inotify events from 5 (05.718835) pie: 330: Cleaning inotify events from 3 (05.719046) pie: 1: Cleaning inotify events from 23 (05.719164) pie: 80: Cleaning inotify events from 7 (05.719185) pie: 1: Error (criu/pie/restorer.c:1287): Failed to poll from inotify fd: -4 (05.719202) pie: 95: Cleaning inotify events from 6 (05.719269) pie: 1: Error (criu/pie/restorer.c:1890): Restorer fail 1 So reordering cleanup and wait should fix it. Signed-off-by: Pavel Tikhomirov --- criu/pie/restorer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index dab58add6..888eb8e65 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1839,9 +1839,6 @@ long __export_restore_task(struct task_restore_args *args) restore_finish_stage(task_entries_local, CR_STATE_RESTORE); - if (cleanup_current_inotify_events(args)) - goto core_restore_end; - if (wait_helpers(args) < 0) goto core_restore_end; if (wait_zombies(args) < 0) @@ -1854,6 +1851,9 @@ long __export_restore_task(struct task_restore_args *args) goto core_restore_end; } + if (cleanup_current_inotify_events(args)) + goto core_restore_end; + if (!args->compatible_mode) { ret = sys_sigaction(SIGCHLD, &args->sigchld_act, NULL, sizeof(k_rtsigset_t)); From 1d23dc4a3042599cabea90a81c063db453b89abb Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 29 Nov 2019 10:57:29 +0300 Subject: [PATCH 0273/2030] mount: Order call_helper_process calls When we do clone threads in a later stage of restore procedure it may race with helpers which do call clone_noasan by self. Thus we need to walk over each clone_noasan call and figure out if calling it without last_pid lock is safe. - open_mountpoint: called by fusectl_dump, dump_empty_fs, binfmt_misc_dump, tmpfs_dump -- they all are processing dump stage, thus safe - call_helper_process: try_remount_writable -- called from various places in reg-files.c, in particular open_reg_by_id called in parallel with other threads, needs a lock remount_readonly_mounts -- called from sigreturn_restore, so in parallel, needs a lock - call_in_child_process: prepare_net_namespaces -- called from prepare_namespace which runs before we start forking, no need for lock Thus call_helper_process should use lock_last_pid and unlock_last_pid helpers and wait for subprocess to finish. Same time put a warning text into clone_noasan comment so next time we need to use it we would recall the pitfalls. v2: - fix unitialized ret variable v3: - use exit_code instead of ret Signed-off-by: Cyrill Gorcunov --- criu/clone-noasan.c | 9 +++++++++ criu/mount.c | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index 5ca280eb8..5f1858d4d 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -18,6 +18,15 @@ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69863 * * So the only way is to put this wrapper in separate non-instrumented file + * + * WARNING: When calling clone_noasan make sure your not sitting in a later + * __restore__ phase where other tasks might be creating threads, otherwise + * all calls to clone_noasan should be guarder with + * + * lock_last_pid + * clone_noasan + * ... wait for process to finish ... + * unlock_last_pid */ int clone_noasan(int (*fn)(void *), int flags, void *arg) { diff --git a/criu/mount.c b/criu/mount.c index 52e70d376..24a8516c6 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3738,27 +3738,38 @@ struct ns_desc mnt_ns_desc = NS_DESC_ENTRY(CLONE_NEWNS, "mnt"); static int call_helper_process(int (*call)(void *), void *arg) { - int pid, status; + int pid, status, exit_code = -1; + + /* + * Running new helper process on the restore must be + * done under last_pid mutex: other tasks may be restoring + * threads and the PID we need there might be occupied by + * this clone() call. + */ + lock_last_pid(); pid = clone_noasan(call, CLONE_VFORK | CLONE_VM | CLONE_FILES | CLONE_IO | CLONE_SIGHAND | CLONE_SYSVSEM, arg); if (pid == -1) { pr_perror("Can't clone helper process"); - return -1; + goto out; } errno = 0; if (waitpid(pid, &status, __WALL) != pid) { pr_perror("Unable to wait %d", pid); - return -1; + goto out; } if (status) { pr_err("Bad child exit status: %d\n", status); - return -1; + goto out; } - return 0; + exit_code = 0; +out: + unlock_last_pid(); + return exit_code; } static int ns_remount_writable(void *arg) From ebe3b52353c5d380d01c332e7d57594995258c18 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 14 Nov 2019 14:41:04 +0300 Subject: [PATCH 0274/2030] unix: sysctl -- Preserve max_dgram_qlen value The /proc/sys/net/unix/max_dgram_qlen is a per-net variable and we already noticed that systemd inside a container may change its value (for example it sets it to 512 by now instead of kernel's default value 10), thus we need keep it inside image and restore then. Signed-off-by: Cyrill Gorcunov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- criu/net.c | 104 +++++++++++++++++++++++++++++++++++++++++++- images/netdev.proto | 1 + 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/criu/net.c b/criu/net.c index 5822de629..e960a34f9 100644 --- a/criu/net.c +++ b/criu/net.c @@ -210,6 +210,19 @@ char *devconfs6[] = { #define MAX_CONF_OPT_PATH IFNAMSIZ+60 #define MAX_STR_CONF_LEN 200 +static const char *unix_conf_entries[] = { + "max_dgram_qlen", +}; + +/* + * MAX_CONF_UNIX_PATH = (sizeof(CONF_UNIX_FMT) - strlen("%s")) + * + MAX_CONF_UNIX_OPT_PATH + */ +#define CONF_UNIX_BASE "net/unix" +#define CONF_UNIX_FMT CONF_UNIX_BASE"/%s" +#define MAX_CONF_UNIX_OPT_PATH 32 +#define MAX_CONF_UNIX_PATH (sizeof(CONF_UNIX_FMT) + MAX_CONF_UNIX_OPT_PATH - 2) + static int net_conf_op(char *tgt, SysctlEntry **conf, int n, int op, char *proto, struct sysctl_req *req, char (*path)[MAX_CONF_OPT_PATH], int size, char **devconfs, SysctlEntry **def_conf) @@ -339,6 +352,72 @@ static int ipv6_conf_op(char *tgt, SysctlEntry **conf, int n, int op, SysctlEntr devconfs6, def_conf); } +static int unix_conf_op(SysctlEntry ***rconf, size_t *n, int op) +{ + int i, ret = -1, flags = 0; + char path[ARRAY_SIZE(unix_conf_entries)][MAX_CONF_UNIX_PATH] = { }; + struct sysctl_req req[ARRAY_SIZE(unix_conf_entries)] = { }; + SysctlEntry **conf = *rconf; + + if (*n != ARRAY_SIZE(unix_conf_entries)) { + pr_err("unix: Unexpected entries in config (%zu %zu)\n", + *n, ARRAY_SIZE(unix_conf_entries)); + return -EINVAL; + } + + if (opts.weak_sysctls || op == CTL_READ) + flags = CTL_FLAGS_OPTIONAL; + + for (i = 0; i < *n; i++) { + snprintf(path[i], MAX_CONF_UNIX_PATH, CONF_UNIX_FMT, + unix_conf_entries[i]); + req[i].name = path[i]; + req[i].flags = flags; + + switch (conf[i]->type) { + case SYSCTL_TYPE__CTL_32: + req[i].type = CTL_32; + req[i].arg = &conf[i]->iarg; + break; + default: + pr_err("unix: Unknown config type %d\n", + conf[i]->type); + return -1; + } + } + + ret = sysctl_op(req, *n, op, CLONE_NEWNET); + if (ret < 0) { + pr_err("unix: Failed to %s %s/\n", + (op == CTL_READ) ? "read" : "write", + CONF_UNIX_BASE); + return -1; + } + + if (op == CTL_READ) { + bool has_entries = false; + + for (i = 0; i < *n; i++) { + if (req[i].flags & CTL_FLAGS_HAS) { + conf[i]->has_iarg = true; + if (!has_entries) + has_entries = true; + } + } + + /* + * Zap the whole section of data. + * Unix conf is optional. + */ + if (!has_entries) { + *n = 0; + *rconf = NULL; + } + } + + return 0; +} + /* * I case if some entry is missing in * the kernel, simply write DEVCONFS_UNUSED @@ -1824,6 +1903,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) int ret = -1; int i; NetnsEntry netns = NETNS_ENTRY__INIT; + SysctlEntry *unix_confs = NULL; + size_t sizex = ARRAY_SIZE(unix_conf_entries); SysctlEntry *def_confs4 = NULL, *all_confs4 = NULL; int size4 = ARRAY_SIZE(devconfs4); SysctlEntry *def_confs6 = NULL, *all_confs6 = NULL; @@ -1840,7 +1921,8 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) o_buf = buf = xmalloc( i * (sizeof(NetnsId*) + sizeof(NetnsId)) + size4 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + - size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + size6 * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) * 2 + + sizex * (sizeof(SysctlEntry*) + sizeof(SysctlEntry)) ); if (!buf) goto out; @@ -1896,6 +1978,16 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) } } + netns.n_unix_conf = sizex; + netns.unix_conf = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry*)); + unix_confs = xptr_pull_s(&buf, sizex * sizeof(SysctlEntry)); + + for (i = 0; i < sizex; i++) { + sysctl_entry__init(&unix_confs[i]); + netns.unix_conf[i] = &unix_confs[i]; + netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32; + } + ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL); if (ret < 0) goto err_free; @@ -1910,6 +2002,10 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) if (ret < 0) goto err_free; + ret = unix_conf_op(&netns.unix_conf, &netns.n_unix_conf, CTL_READ); + if (ret < 0) + goto err_free; + ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS); err_free: xfree(o_buf); @@ -2122,6 +2218,12 @@ static int restore_netns_conf(struct ns_id *ns) ret = ipv6_conf_op("default", (netns)->def_conf6, (netns)->n_def_conf6, CTL_WRITE, NULL); } + if ((netns)->unix_conf) { + ret = unix_conf_op(&(netns)->unix_conf, &(netns)->n_unix_conf, CTL_WRITE); + if (ret) + goto out; + } + ns->net.netns = netns; out: return ret; diff --git a/images/netdev.proto b/images/netdev.proto index 476a92ced..ae9c99531 100644 --- a/images/netdev.proto +++ b/images/netdev.proto @@ -71,4 +71,5 @@ message netns_entry { repeated netns_id nsids = 7; optional string ext_key = 8; + repeated sysctl_entry unix_conf = 9; } From 55f7a571f286baa6eac6fe7a020914505a0eb464 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 14 Nov 2019 14:50:43 +0300 Subject: [PATCH 0275/2030] zdtm: sysctl net.unix.max_dgram_qlen value preservation test Test checks that if the /proc/sys/net/unix/max_dgram_qlen value has been changed in process net namespace, then it is saved after c/r. Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- test/zdtm/lib/Makefile | 2 +- test/zdtm/lib/sysctl.c | 59 ++++++++++++++++++++++++++ test/zdtm/lib/sysctl.h | 7 +++ test/zdtm/static/Makefile | 1 + test/zdtm/static/netns_sub_sysctl.c | 56 ++++++++++++++++++++++++ test/zdtm/static/netns_sub_sysctl.desc | 4 ++ 6 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/lib/sysctl.c create mode 100644 test/zdtm/lib/sysctl.h create mode 100644 test/zdtm/static/netns_sub_sysctl.c create mode 100644 test/zdtm/static/netns_sub_sysctl.desc diff --git a/test/zdtm/lib/Makefile b/test/zdtm/lib/Makefile index d2d9f1cc3..b87f36e8f 100644 --- a/test/zdtm/lib/Makefile +++ b/test/zdtm/lib/Makefile @@ -4,7 +4,7 @@ CFLAGS += $(USERCFLAGS) LIB := libzdtmtst.a -LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c +LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c sysctl.c LIBOBJ := $(LIBSRC:%.c=%.o) BIN := groups diff --git a/test/zdtm/lib/sysctl.c b/test/zdtm/lib/sysctl.c new file mode 100644 index 000000000..9583ec3df --- /dev/null +++ b/test/zdtm/lib/sysctl.c @@ -0,0 +1,59 @@ +#include + +#include "zdtmtst.h" +#include "sysctl.h" + +int sysctl_read_int(const char *name, int *data) +{ + int fd; + int ret; + char buf[16]; + + fd = open(name, O_RDONLY); + if (fd < 0) { + pr_perror("Can't open %s", name); + return fd; + } + + ret = read(fd, buf, sizeof(buf) - 1); + if (ret < 0) { + pr_perror("Can't read %s", name); + ret = -errno; + goto err; + } + + buf[ret] = '\0'; + + *data = (int)strtoul(buf, NULL, 10); + ret = 0; +err: + close(fd); + return ret; +} + +int sysctl_write_int(const char *name, int val) +{ + int fd; + int ret; + char buf[16]; + + fd = open(name, O_WRONLY); + if (fd < 0) { + pr_perror("Can't open %s", name); + return fd; + } + + sprintf(buf, "%d\n", val); + + ret = write(fd, buf, strlen(buf)); + if (ret < 0) { + pr_perror("Can't write %d into %s", val, name); + ret = -errno; + goto err; + } + + ret = 0; +err: + close(fd); + return ret; +} diff --git a/test/zdtm/lib/sysctl.h b/test/zdtm/lib/sysctl.h new file mode 100644 index 000000000..67129102f --- /dev/null +++ b/test/zdtm/lib/sysctl.h @@ -0,0 +1,7 @@ +#ifndef __ZDTM_SYSCTL__ +#define __ZDTM_SYSCTL__ + +extern int sysctl_read_int(const char *name, int *data); +extern int sysctl_write_int(const char *name, int val); + +#endif diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 36d00ca5c..f9d2efe74 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -207,6 +207,7 @@ TST_NOFILE := \ pipe03 \ netns_sub \ netns_sub_veth \ + netns_sub_sysctl \ unlink_multiple_largefiles \ config_inotify_irmap \ thp_disable \ diff --git a/test/zdtm/static/netns_sub_sysctl.c b/test/zdtm/static/netns_sub_sysctl.c new file mode 100644 index 000000000..bf828e08e --- /dev/null +++ b/test/zdtm/static/netns_sub_sysctl.c @@ -0,0 +1,56 @@ +#include + +#include "zdtmtst.h" +#include "sysctl.h" + +const char *test_doc = "Check dump and restore a net.unix.max_dgram_qlen sysctl parameter in subns"; +const char *test_author = "Alexander Mikhalitsyn "; + +typedef struct { + const char *path; + int old; + int new; +} sysctl_opt_t; + +#define CONF_UNIX_BASE "/proc/sys/net/unix" + +static sysctl_opt_t net_unix_params[] = { + {CONF_UNIX_BASE"/max_dgram_qlen", 0, 0}, + {NULL, 0, 0} +}; + +int main(int argc, char **argv) +{ + int ret = 0; + sysctl_opt_t *p; + test_init(argc, argv); + + for (p = net_unix_params; p->path != NULL; p++) { + p->old = (((unsigned)lrand48()) % 1023) + 1; + if (sysctl_write_int(p->path, p->old)) { + pr_perror("Can't change %s", p->path); + return -1; + } + } + + test_daemon(); + test_waitsig(); + + for (p = net_unix_params; p->path != NULL; p++) { + if (sysctl_read_int(p->path, &p->new)) + ret = 1; + + if (p->old != p->new) { + errno = EINVAL; + pr_perror("%s changed: %d ---> %d", p->path, p->old, p->new); + ret = 1; + } + } + + if (ret) + fail(); + else + pass(); + + return ret; +} diff --git a/test/zdtm/static/netns_sub_sysctl.desc b/test/zdtm/static/netns_sub_sysctl.desc new file mode 100644 index 000000000..535842668 --- /dev/null +++ b/test/zdtm/static/netns_sub_sysctl.desc @@ -0,0 +1,4 @@ +{ + 'flavor': 'ns', + 'flags': 'suid' +} From 4c46cbc4d86c7578b98e64b8f664cf9c0b0fe978 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 16 Dec 2019 15:34:10 +0300 Subject: [PATCH 0276/2030] x86/cpu: cleanup and improve xfeatures_mask check Make xfeatures_mask check explicit. We were relying on our guess about hardware "backward compatibility" and used ">" check here for a long time. But it looks better to explicitly check that all xfeature bits available on the source are also available on the destination. For xsave_size we need to have smaller size on destination than on source, because xsave operation on small allocated buffer may corrupt the nearby data. So split up comments about xfeatures_mask and xsave_size, as having single comment for quiet a different cases is less understandable. v2: improve comments, remove extra else-ifs, remove extra typecast Signed-off-by: Pavel Tikhomirov --- criu/arch/x86/cpu.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/criu/arch/x86/cpu.c b/criu/arch/x86/cpu.c index 3808b9d33..72c5bd59c 100644 --- a/criu/arch/x86/cpu.c +++ b/criu/arch/x86/cpu.c @@ -236,6 +236,7 @@ static int cpu_validate_features(compel_cpuinfo_t *cpu_info) return -1; if (opts.cpu_cap & CPU_CAP_FPU) { + uint64_t m; /* * If we're requested to check FPU only ignore * any other bit. It's up to a user if the @@ -261,24 +262,33 @@ static int cpu_validate_features(compel_cpuinfo_t *cpu_info) #undef __mismatch_fpu_bit /* - * Make sure the xsave features are compatible. We already hit the - * issue with libc where we've checkpointed the container on old - * machine but restored on more modern one and libc fetched new - * xsave frame size directly by xsave instruction with greedy - * feature mask causing programs to misbehave. + * Make sure the xsave features are compatible. Check that on + * the destination there are all the features which were on the + * source. */ - if (cpu_info->xfeatures_mask > rt_cpu_info.xfeatures_mask) { - uint64_t m = cpu_info->xfeatures_mask & ~rt_cpu_info.xfeatures_mask; - pr_err("CPU xfeatures has unsupported bits (%#llx)\n", - (unsigned long long)m); + if ((m = cpu_info->xfeatures_mask & + ~rt_cpu_info.xfeatures_mask)) { + pr_err("CPU xfeatures has unsupported bits (%#" + PRIx64")\n", m); return -1; - } else if (cpu_info->xsave_size != rt_cpu_info.xsave_size) { + } + + /* + * Make sure the xsave sizes are compatible. We already hit the + * issue with libc where we've checkpointed the container on + * old machine but restored on more modern one and libc fetched + * new xsave frame size directly by xsave instruction with + * greedy feature mask causing programs to misbehave. + */ + if (cpu_info->xsave_size != rt_cpu_info.xsave_size) { pr_err("CPU xsave size mismatch (%u/%u)\n", cpu_info->xsave_size, rt_cpu_info.xsave_size); return -1; - } else if (cpu_info->xsave_size_max != rt_cpu_info.xsave_size_max) { + } + if (cpu_info->xsave_size_max != rt_cpu_info.xsave_size_max) { pr_err("CPU xsave max size mismatch (%u/%u)\n", - cpu_info->xsave_size_max, rt_cpu_info.xsave_size_max); + cpu_info->xsave_size_max, + rt_cpu_info.xsave_size_max); return -1; } } From 2e656222d78fecc1bf6490bed59078083bdb4351 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0277/2030] crit: fix python3 encoding issues Signed-off-by: Nicolas Viennot --- lib/py/images/images.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/py/images/images.py b/lib/py/images/images.py index f4517d845..3eedfca69 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -244,7 +244,7 @@ class ghost_file_handler: while True: gc = pb.ghost_chunk_entry() buf = f.read(4) - if buf == '': + if len(buf) == 0: break size, = struct.unpack('i', buf) gc.ParseFromString(f.read(size)) @@ -252,13 +252,13 @@ class ghost_file_handler: if no_payload: f.seek(gc.len, os.SEEK_CUR) else: - entry['extra'] = base64.encodebytes(f.read(gc.len)) + entry['extra'] = base64.encodebytes(f.read(gc.len)).decode('utf-8') entries.append(entry) else: if no_payload: f.seek(0, os.SEEK_END) else: - g_entry['extra'] = base64.encodebytes(f.read()) + g_entry['extra'] = base64.encodebytes(f.read()).decode('utf-8') entries.append(g_entry) return entries From 00bb068785a8b1a7c4481e2e7f2c0b9f903d941b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 20 Dec 2019 18:09:15 +0000 Subject: [PATCH 0278/2030] scripts: alpine: Install py2 packages with pip The py-future package has been renamed to py3-future [1] and py2 package for yaml has been dropped [2]. [1] https://git.alpinelinux.org/aports/commit/main?id=316d44abaed13964e97eb43c095cd1b64e3943ad [2] https://git.alpinelinux.org/aports/commit/main?id=e369c1fd7707a73f2c3e2b11b613198d9a4106de Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.alpine | 4 +--- scripts/build/Dockerfile.openj9-alpine | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 70fdf480a..a1d1d9191 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -27,9 +27,7 @@ RUN mv .ccache /tmp && make mrproper && ccache -sz && \ date && make -j $(nproc) CC="$CC" && date && ccache -s RUN apk add \ - py-yaml \ py-pip \ - py2-future \ ip6tables \ iptables \ iproute2 \ @@ -42,5 +40,5 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install protobuf ipaddress junit_xml flake8 +RUN pip install PyYAML future protobuf ipaddress junit_xml flake8 RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine index 654e7bf31..43a993444 100644 --- a/scripts/build/Dockerfile.openj9-alpine +++ b/scripts/build/Dockerfile.openj9-alpine @@ -17,9 +17,6 @@ RUN apk update && apk add \ python \ sudo \ maven \ - py-yaml \ - py-pip \ - py2-future \ ip6tables \ iptables \ bash From 0980617e24004ea00e4e0841c97b138f0a4e0073 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 12 Dec 2019 23:04:30 +0000 Subject: [PATCH 0279/2030] sockets: Remove duplicate variable assignment Signed-off-by: Radostin Stoyanov --- criu/sockets.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/criu/sockets.c b/criu/sockets.c index 312b55c6d..80f3153ba 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -524,7 +524,7 @@ int restore_prepare_socket(int sk) int restore_socket_opts(int sk, SkOptsEntry *soe) { - int ret = 0, val; + int ret = 0, val = 1; struct timeval tv; /* In kernel a bufsize value is doubled. */ u32 bufs[2] = { soe->so_sndbuf / 2, soe->so_rcvbuf / 2}; @@ -547,27 +547,22 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) ret |= restore_opt(sk, SOL_SOCKET, SO_MARK, &soe->so_mark); } if (soe->has_so_passcred && soe->so_passcred) { - val = 1; pr_debug("\tset passcred for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSCRED, &val); } if (soe->has_so_passsec && soe->so_passsec) { - val = 1; pr_debug("\tset passsec for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_PASSSEC, &val); } if (soe->has_so_dontroute && soe->so_dontroute) { - val = 1; pr_debug("\tset dontroute for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val); } if (soe->has_so_no_check && soe->so_no_check) { - val = 1; pr_debug("\tset no_check for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_NO_CHECK, &val); } if (soe->has_so_broadcast && soe->so_broadcast) { - val = 1; pr_debug("\tset broadcast for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); } From d4e6fc2a0dcff62ff246544d3d9a78d6961f253a Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 13 Dec 2019 00:10:28 +0000 Subject: [PATCH 0280/2030] socket: c/r support for SO_KEEPALIVE TCP keepalive packets can be used to determine if a connection is still valid. When the SO_KEEPALIVE option is set, TCP packets are periodically sent to keep the connection alive. This patch implements checkpoint/restore support for SO_KEEPALIVE, TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT options. Signed-off-by: Radostin Stoyanov --- criu/include/sk-inet.h | 2 +- criu/sk-inet.c | 6 +++++- criu/sk-tcp.c | 20 +++++++++++++++++++- criu/sockets.c | 20 ++++++++++++++++++++ images/sk-opts.proto | 4 ++++ 5 files changed, 49 insertions(+), 3 deletions(-) diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h index 79966517b..dec67ca6c 100644 --- a/criu/include/sk-inet.h +++ b/criu/include/sk-inet.h @@ -83,7 +83,7 @@ extern void tcp_locked_conn_add(struct inet_sk_info *); extern void rst_unlock_tcp_connections(void); extern void cpt_unlock_tcp_connections(void); -extern int dump_one_tcp(int sk, struct inet_sk_desc *sd); +extern int dump_one_tcp(int sk, struct inet_sk_desc *sd, SkOptsEntry *soe); extern int restore_one_tcp(int sk, struct inet_sk_info *si); #define SK_EST_PARAM "tcp-established" diff --git a/criu/sk-inet.c b/criu/sk-inet.c index f9c64c7af..342548585 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -551,7 +551,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa switch (proto) { case IPPROTO_TCP: - err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk) : 0; + err = (type != SOCK_RAW) ? dump_one_tcp(lfd, sk, &skopts) : 0; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: @@ -747,6 +747,10 @@ static int post_open_inet_sk(struct file_desc *d, int sk) if (!val && restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val)) return -1; + val = ii->ie->opts->so_keepalive; + if (!val && restore_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val)) + return -1; + return 0; } diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c index 4fd2eb8e6..7ee603818 100644 --- a/criu/sk-tcp.c +++ b/criu/sk-tcp.c @@ -218,8 +218,26 @@ err_r: return ret; } -int dump_one_tcp(int fd, struct inet_sk_desc *sk) +int dump_one_tcp(int fd, struct inet_sk_desc *sk, SkOptsEntry *soe) { + soe->has_tcp_keepcnt = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt)) { + pr_perror("Can't read TCP_KEEPCNT"); + return -1; + } + + soe->has_tcp_keepidle = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPIDLE, &soe->tcp_keepidle)) { + pr_perror("Can't read TCP_KEEPIDLE"); + return -1; + } + + soe->has_tcp_keepintvl = true; + if (dump_opt(fd, SOL_TCP, TCP_KEEPINTVL, &soe->tcp_keepintvl)) { + pr_perror("Can't read TCP_KEEPINTVL"); + return -1; + } + if (sk->dst_port == 0) return 0; diff --git a/criu/sockets.c b/criu/sockets.c index 80f3153ba..2e1ce9d7b 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -566,6 +566,22 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) pr_debug("\tset broadcast for socket\n"); ret |= restore_opt(sk, SOL_SOCKET, SO_BROADCAST, &val); } + if (soe->has_so_keepalive && soe->so_keepalive) { + pr_debug("\tset keepalive for socket\n"); + ret |= restore_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val); + } + if (soe->has_tcp_keepcnt) { + pr_debug("\tset keepcnt for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPCNT, &soe->tcp_keepcnt); + } + if (soe->has_tcp_keepidle) { + pr_debug("\tset keepidle for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPIDLE, &soe->tcp_keepidle); + } + if (soe->has_tcp_keepintvl) { + pr_debug("\tset keepintvl for socket\n"); + ret |= restore_opt(sk, SOL_TCP, TCP_KEEPINTVL, &soe->tcp_keepintvl); + } tv.tv_sec = soe->so_snd_tmo_sec; tv.tv_usec = soe->so_snd_tmo_usec; @@ -651,6 +667,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe) soe->has_so_broadcast = true; soe->so_broadcast = val ? true : false; + ret |= dump_opt(sk, SOL_SOCKET, SO_KEEPALIVE, &val); + soe->has_so_keepalive = true; + soe->so_keepalive = val ? true : false; + ret |= dump_bound_dev(sk, soe); ret |= dump_socket_filter(sk, soe); diff --git a/images/sk-opts.proto b/images/sk-opts.proto index c93ec5fd5..336cca22a 100644 --- a/images/sk-opts.proto +++ b/images/sk-opts.proto @@ -23,6 +23,10 @@ message sk_opts_entry { repeated fixed64 so_filter = 16; optional bool so_reuseport = 17; optional bool so_broadcast = 18; + optional bool so_keepalive = 19; + optional uint32 tcp_keepcnt = 20; + optional uint32 tcp_keepidle = 21; + optional uint32 tcp_keepintvl = 22; } enum sk_shutdown { From 8b467dd944f6b3bed0a468800b041efdb218d6e8 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 13 Dec 2019 04:01:36 +0000 Subject: [PATCH 0281/2030] zdtm: Add test for SO_KEEPALIVE Signed-off-by: Radostin Stoyanov --- test/zdtm/static/Makefile | 3 +- test/zdtm/static/socket-tcp-keepalive.c | 97 +++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/static/socket-tcp-keepalive.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index f9d2efe74..ea5d3c42e 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -105,7 +105,8 @@ TST_NOFILE := \ socket-tcp-unconn \ socket-tcp6-unconn \ socket-tcp-syn-sent \ - socket-tcp-skip-in-flight \ + socket-tcp-skip-in-flight \ + socket-tcp-keepalive \ sock_opts00 \ sock_opts01 \ sk-unix-unconn \ diff --git a/test/zdtm/static/socket-tcp-keepalive.c b/test/zdtm/static/socket-tcp-keepalive.c new file mode 100644 index 000000000..a977a03b5 --- /dev/null +++ b/test/zdtm/static/socket-tcp-keepalive.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "test checkpoint/restore of SO_KEEPALIVE\n"; +const char *test_author = "Radostin Stoyanov \n"; + +int main(int argc, char **argv) +{ + int sk; + int alive = 1; + int cnt = 5; + int idle = 10; + int intvl = 15; + int optval; + socklen_t optlen; + + test_init(argc, argv); + + sk = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + pr_perror("Can't create socket"); + return 1; + } + + /* Set the option active */ + if (setsockopt(sk, SOL_SOCKET, SO_KEEPALIVE, &alive, sizeof(alive)) < 0) { + pr_perror("setsockopt SO_KEEPALIVE"); + return 1; + } + + if (setsockopt(sk, SOL_TCP, TCP_KEEPCNT, &cnt, sizeof(cnt)) < 0) { + pr_perror("setsockopt TCP_KEEPCNT"); + return 1; + } + + if (setsockopt(sk, SOL_TCP, TCP_KEEPIDLE, &idle, sizeof(idle)) < 0) { + pr_perror("setsockopt TCP_KEEPIDLE"); + return 1; + } + + optval = 5; + optlen = sizeof(optval); + if (setsockopt(sk, SOL_TCP, TCP_KEEPINTVL, &intvl, sizeof(intvl)) < 0) { + pr_perror("setsockopt TCP_KEEPINTVL"); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (getsockopt(sk, SOL_SOCKET, SO_KEEPALIVE, &optval, &optlen)) { + pr_perror("getsockopt SO_KEEPALIVE"); + return 1; + } + + if (optlen != sizeof(optval) || optval != alive) { + fail("SO_KEEPALIVE not set"); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPCNT, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPCNT"); + return 1; + } + + if (optval != cnt) { + fail("TCP_KEEPCNT has incorrect value (%d != %d)", cnt, optval); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPIDLE, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPIDLE"); + return 1; + } + + if (optval != idle) { + fail("TCP_KEEPIDLE has incorrect value (%d != %d)", idle, optval); + return 1; + } + + if (getsockopt(sk, SOL_TCP, TCP_KEEPINTVL, &optval, &optlen) < 0) { + pr_perror("getsockopt TCP_KEEPINTVL"); + return 1; + } + + if (optval != intvl) { + fail("TCP_KEEPINTVL has incorrect value (%d != %d)", intvl, optval); + return 1; + } + + pass(); + return 0; +} \ No newline at end of file From 79559bef92b524911b766d674ac8bc4470b8b378 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 20 Dec 2019 17:50:37 +0100 Subject: [PATCH 0282/2030] Fix tests on Ubuntu It seems like Ubuntu introduced a overlayfs change which breaks CRIU: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 This disables overlayfs (unfortunately) in most tests by switching to devicemapper or vfs. Upstream kernels do not seem to have this problem. This also adds the 'docker-test' for xenial which still has a working overlayfs from CRIU's point of view. Also adjust Podman Ubuntu package location Podman Ubuntu packages are now available via OBS and no longer via PPA. Signed-off-by: Adrian Reber --- .travis.yml | 13 +++++++++++-- scripts/travis/Makefile | 5 ++++- scripts/travis/docker-test.sh | 15 ++++++++++----- scripts/travis/podman-test.sh | 13 ++++++++++--- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/.travis.yml b/.travis.yml index e6e410191..25dd6a29b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,6 @@ env: - TR_ARCH=local CLANG=1 COMPAT_TEST=y - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - - TR_ARCH=docker-test - TR_ARCH=openj9-test matrix: include: @@ -57,6 +56,16 @@ matrix: arch: amd64 env: TR_ARCH=podman-test dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=docker-test + dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=docker-test DIST=xenial + # On xenial it should be possible to test overlayfs; + # broken on the latest bionic kernel + dist: xenial - os: linux arch: amd64 env: TR_ARCH=alpine CLANG=1 @@ -79,9 +88,9 @@ matrix: dist: bionic allow_failures: - env: TR_ARCH=docker-test + - env: TR_ARCH=docker-test DIST=xenial - env: TR_ARCH=fedora-rawhide - env: TR_ARCH=local GCOV=1 - - env: TR_ARCH=podman-test script: - sudo make CCACHE=1 -C scripts/travis $TR_ARCH after_success: diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 373171149..17abb703a 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -58,7 +58,10 @@ docker-test: podman-test: ./podman-test.sh -openj9-test: +# overlayfs behaves differently on Ubuntu and breaks CRIU +# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 +# Switch to devicemapper +openj9-test: restart-docker ./openj9-test.sh %: diff --git a/scripts/travis/docker-test.sh b/scripts/travis/docker-test.sh index ee96fef48..ac420a445 100755 --- a/scripts/travis/docker-test.sh +++ b/scripts/travis/docker-test.sh @@ -19,11 +19,16 @@ apt-get update -qq apt-get install -qq docker-ce -cat > /etc/docker/daemon.json < /etc/docker/daemon.json +else + echo '{ "experimental": true }' > /etc/docker/daemon.json +fi service docker restart diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index eafdc73be..5189477cd 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -1,7 +1,13 @@ #!/bin/bash set -x -e -o pipefail -add-apt-repository -y ppa:projectatomic/ppa +echo 'deb http://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_18.04/ /' > /etc/apt/sources.list.d/devel:kubic:libcontainers:stable.list + +wget -nv https://download.opensuse.org/repositories/devel:kubic:libcontainers:stable/xUbuntu_18.04/Release.key -O- | apt-key add - + +# podman conflicts with a man page from docker-ce +# this is a podman packaging bug (https://github.com/containers/libpod/issues/4747) +apt-get -y purge docker-ce apt-get install -qq \ apt-transport-https \ @@ -10,7 +16,6 @@ apt-get install -qq \ software-properties-common apt-get update -qq - apt-get install -qqy podman containernetworking-plugins export SKIP_TRAVIS_TEST=1 @@ -21,7 +26,9 @@ cd ../../ make install -podman info +# overlaysfs behaves differently on Ubuntu and breaks CRIU +# https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1857257 +podman --storage-driver vfs info criu --version From 8bb3c17a0f7f14baaed8d9b6ebf953c24a793ccc Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 21 Dec 2019 18:08:23 +0000 Subject: [PATCH 0283/2030] style: Enforce kernel style -Wdeclaration-after-statement Include warnings that the kernel uses during compilation: -Wdeclaration-after-statement: enforces having variables declared at the top of scopes Signed-off-by: Nicolas Viennot [Generated a commit message from the pull request] Signed-off-by: Dmitry Safonov --- Makefile | 2 +- criu/net.c | 7 +++++-- criu/page-xfer.c | 10 +++++----- criu/pie/util-vdso.c | 3 ++- test/zdtm/Makefile.inc | 1 + test/zdtm/static/arm-neon00.c | 11 +++++----- test/zdtm/static/child_subreaper.c | 6 +++--- test/zdtm/static/config_inotify_irmap.c | 3 ++- test/zdtm/static/inotify00.c | 3 ++- test/zdtm/static/maps03.c | 3 ++- test/zdtm/static/mnt_ext_dev.c | 3 ++- test/zdtm/static/mntns_link_remap.c | 2 +- test/zdtm/static/mntns_open.c | 2 +- test/zdtm/static/mountpoints.c | 2 +- test/zdtm/static/remap_dead_pid.c | 4 ++-- test/zdtm/static/selinux01.c | 3 ++- test/zdtm/static/sigaltstack.c | 20 +++++++++---------- test/zdtm/static/socket-tcp-syn-sent.c | 4 ++-- test/zdtm/static/unlink_multiple_largefiles.c | 3 ++- test/zdtm/transition/file_aio.c | 3 ++- test/zdtm/transition/file_read.c | 5 ++++- test/zdtm/transition/maps008.c | 14 +++++++------ 22 files changed, 66 insertions(+), 48 deletions(-) diff --git a/Makefile b/Makefile index ef76d706c..133390f17 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ export PROTOUFIX DEFINES DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE -WARNINGS := -Wall -Wformat-security +WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV diff --git a/criu/net.c b/criu/net.c index e960a34f9..712837782 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2817,6 +2817,9 @@ int macvlan_ext_add(struct external *ext) static int prep_ns_sockets(struct ns_id *ns, bool for_dump) { int nsret = -1, ret; +#ifdef CONFIG_HAS_SELINUX + security_context_t ctx; +#endif if (ns->type != NS_CRIU) { pr_info("Switching to %d's net for collecting sockets\n", ns->ns_pid); @@ -2854,7 +2857,6 @@ static int prep_ns_sockets(struct ns_id *ns, bool for_dump) * policies installed. For Fedora based systems this is part * of the container-selinux package. */ - security_context_t ctx; /* * This assumes that all processes CRIU wants to dump are labeled @@ -3294,6 +3296,7 @@ int kerndat_link_nsid() } if (pid == 0) { + bool has_link_nsid; NetDeviceEntry nde = NET_DEVICE_ENTRY__INIT; struct net_link link = { .created = false, @@ -3336,7 +3339,7 @@ int kerndat_link_nsid() exit(1); } - bool has_link_nsid = false; + has_link_nsid = false; if (check_link_nsid(sk, &has_link_nsid)) exit(1); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 4d2d046ef..9affc2706 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -624,17 +624,17 @@ unsigned long handle_faulty_iov(int pid, struct iovec* riov, unsigned long* aux_len, unsigned long partial_read_bytes) { + struct iovec dummy; + ssize_t bytes_read; + unsigned long offset = 0; + unsigned long final_read_cnt = 0; + /* Handling Case 2*/ if (riov[faulty_index].iov_len == PAGE_SIZE) { cnt_sub(CNT_PAGES_WRITTEN, 1); return 0; } - struct iovec dummy; - ssize_t bytes_read; - unsigned long offset = 0; - unsigned long final_read_cnt = 0; - /* Handling Case 3-Part 3.2*/ offset = (partial_read_bytes)? partial_read_bytes : PAGE_SIZE; diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c index 104da0633..58b27680c 100644 --- a/criu/pie/util-vdso.c +++ b/criu/pie/util-vdso.c @@ -243,10 +243,11 @@ static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, k = elf_hash((const unsigned char *)symbol); for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) { - addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; Sym_t *sym; char *name; + addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; + addr += sizeof(Sym_t)*j; if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size)) continue; diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 32fc72d32..6958d128e 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -38,6 +38,7 @@ ifeq ($(origin CC), default) CC := $(CROSS_COMPILE)$(HOSTCC) endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 +CFLAGS += -Wdeclaration-after-statement CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include diff --git a/test/zdtm/static/arm-neon00.c b/test/zdtm/static/arm-neon00.c index 96da16c6b..ce8123e51 100644 --- a/test/zdtm/static/arm-neon00.c +++ b/test/zdtm/static/arm-neon00.c @@ -12,13 +12,14 @@ const char *test_author = "Alexander Karatshov "; int main(int argc, char ** argv) { + int a, b, c, y1, y2; + srand(time(0)); - int a = rand() % 100; - int b = rand() % 100; - int c = rand() % 100; - int y1 = a + b*c; - int y2; + a = rand() % 100; + b = rand() % 100; + c = rand() % 100; + y1 = a + b*c; test_init(argc, argv); diff --git a/test/zdtm/static/child_subreaper.c b/test/zdtm/static/child_subreaper.c index 267795249..6d02c9f93 100644 --- a/test/zdtm/static/child_subreaper.c +++ b/test/zdtm/static/child_subreaper.c @@ -8,10 +8,11 @@ const char *test_author = "Michał Cłapiński "; int main(int argc, char **argv) { + int cs_before = 1, cs_after, ret; + test_init(argc, argv); - int cs_before = 1; - int ret = prctl(PR_SET_CHILD_SUBREAPER, cs_before, 0, 0, 0); + ret = prctl(PR_SET_CHILD_SUBREAPER, cs_before, 0, 0, 0); if (ret) { pr_perror("Can't set child subreaper attribute, err = %d", ret); exit(1); @@ -20,7 +21,6 @@ int main(int argc, char **argv) test_daemon(); test_waitsig(); - int cs_after; ret = prctl(PR_GET_CHILD_SUBREAPER, (unsigned long)&cs_after, 0, 0, 0); if (ret) { pr_perror("Can't get child subreaper attribute, err = %d", ret); diff --git a/test/zdtm/static/config_inotify_irmap.c b/test/zdtm/static/config_inotify_irmap.c index 831dc1974..3cbeba7d3 100644 --- a/test/zdtm/static/config_inotify_irmap.c +++ b/test/zdtm/static/config_inotify_irmap.c @@ -31,6 +31,7 @@ char test_files[2][128] = {TDIR"/zdtm-test", TDIR"/zdtm-test1",}; int main (int argc, char *argv[]) { + FILE *configfile; char buf[BUFF_SIZE]; int fd, wd, i; @@ -56,7 +57,7 @@ int main (int argc, char *argv[]) } } - FILE *configfile = fopen(CONFIG_PATH, "w"); + configfile = fopen(CONFIG_PATH, "w"); if (configfile == NULL) { pr_perror("Unable to create configuration file %s", CONFIG_PATH); goto err; diff --git a/test/zdtm/static/inotify00.c b/test/zdtm/static/inotify00.c index 67088edd8..635c05047 100644 --- a/test/zdtm/static/inotify00.c +++ b/test/zdtm/static/inotify00.c @@ -125,9 +125,10 @@ int main (int argc, char *argv[]) { pid_t pid; task_waiter_t t; - task_waiter_init(&t); static char buf[PATH_MAX]; + task_waiter_init(&t); + if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL)) { pr_perror("Unable to remount /"); return 1; diff --git a/test/zdtm/static/maps03.c b/test/zdtm/static/maps03.c index f2bf7957a..0e0a5b8f2 100644 --- a/test/zdtm/static/maps03.c +++ b/test/zdtm/static/maps03.c @@ -16,9 +16,10 @@ const char *test_author = "Cyrill Gorcunov "; int main(int argc, char **argv) { - test_init(argc, argv); unsigned char *mem; + test_init(argc, argv); + test_msg("Alloc huge VMA\n"); mem = (void *)mmap(NULL, (10L << 30), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/test/zdtm/static/mnt_ext_dev.c b/test/zdtm/static/mnt_ext_dev.c index a9ac01333..1d60fc92f 100644 --- a/test/zdtm/static/mnt_ext_dev.c +++ b/test/zdtm/static/mnt_ext_dev.c @@ -20,10 +20,11 @@ TEST_OPTION(dirname, string, "directory name", 1); int main(int argc, char **argv) { char *loop, fd, dfd, fd2; - test_init(argc, argv); struct stat st, stp, st2; char dname[PATH_MAX], dname2[PATH_MAX]; + test_init(argc, argv); + snprintf(dname, sizeof(dname), "%s/test_dir", dirname); snprintf(dname2, sizeof(dname2), "%s/test_dir2", dirname); diff --git a/test/zdtm/static/mntns_link_remap.c b/test/zdtm/static/mntns_link_remap.c index 642641b16..6ac08191a 100644 --- a/test/zdtm/static/mntns_link_remap.c +++ b/test/zdtm/static/mntns_link_remap.c @@ -230,8 +230,8 @@ int main(int argc, char **argv) if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (WIFEXITED(status)) { if (WEXITSTATUS(status) == AWK_OK) diff --git a/test/zdtm/static/mntns_open.c b/test/zdtm/static/mntns_open.c index e19c4ea72..c687080a7 100644 --- a/test/zdtm/static/mntns_open.c +++ b/test/zdtm/static/mntns_open.c @@ -119,8 +119,8 @@ int main(int argc, char **argv) test_waitsig(); if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (WIFEXITED(status)) { if (WEXITSTATUS(status) == AWK_OK) diff --git a/test/zdtm/static/mountpoints.c b/test/zdtm/static/mountpoints.c index 00475cdc5..cf54d1096 100644 --- a/test/zdtm/static/mountpoints.c +++ b/test/zdtm/static/mountpoints.c @@ -292,8 +292,8 @@ int main(int argc, char **argv) } if (pid > 0) { - kill(pid, SIGTERM); int status = 1; + kill(pid, SIGTERM); wait(&status); if (status) return 1; diff --git a/test/zdtm/static/remap_dead_pid.c b/test/zdtm/static/remap_dead_pid.c index 261c591b7..5d4241fc6 100644 --- a/test/zdtm/static/remap_dead_pid.c +++ b/test/zdtm/static/remap_dead_pid.c @@ -40,12 +40,12 @@ int main(int argc, char **argv) while(1) sleep(10); } else { - test_msg("child is %d\n", pid); - int fd, ret; char path[PATH_MAX]; pid_t result; + test_msg("child is %d\n", pid); + sprintf(path, proc_path, pid); fd = open(path, O_RDONLY); if (fd < 0) { diff --git a/test/zdtm/static/selinux01.c b/test/zdtm/static/selinux01.c index 9966455c4..cec5980e8 100644 --- a/test/zdtm/static/selinux01.c +++ b/test/zdtm/static/selinux01.c @@ -133,6 +133,7 @@ int check_sockcreate_empty() int main(int argc, char **argv) { + int sk; char ctx[1024]; test_init(argc, argv); @@ -159,7 +160,7 @@ int main(int argc, char **argv) #endif /* Open our test socket */ - int sk = socket(AF_INET, SOCK_STREAM, 0); + sk = socket(AF_INET, SOCK_STREAM, 0); memset(ctx, 0, 1024); /* Read out the socket label */ if (fgetxattr(sk, "security.selinux", ctx, 1024) == -1) { diff --git a/test/zdtm/static/sigaltstack.c b/test/zdtm/static/sigaltstack.c index d324b0d37..f36d409f5 100644 --- a/test/zdtm/static/sigaltstack.c +++ b/test/zdtm/static/sigaltstack.c @@ -61,17 +61,17 @@ void thread_sigaction(int signo, siginfo_t *info, void *context) static void *thread_func(void *arg) { + struct sigaction sa = { + .sa_sigaction = thread_sigaction, + .sa_flags = SA_RESTART | SA_ONSTACK, + }; + sas_state[SAS_THRD_OLD] = (stack_t) { .ss_size = sizeof(stack_thread) - 8, .ss_sp = stack_thread, .ss_flags = 0, }; - struct sigaction sa = { - .sa_sigaction = thread_sigaction, - .sa_flags = SA_RESTART | SA_ONSTACK, - }; - sigemptyset(&sa.sa_mask); if (sigaction(SIGUSR2, &sa, NULL)) { @@ -103,17 +103,17 @@ int main(int argc, char *argv[]) { pthread_t thread; + struct sigaction sa = { + .sa_sigaction = leader_sigaction, + .sa_flags = SA_RESTART | SA_ONSTACK, + }; + sas_state[SAS_MAIN_OLD] = (stack_t) { .ss_size = sizeof(stack_main) - 8, .ss_sp = stack_main, .ss_flags = 0, }; - struct sigaction sa = { - .sa_sigaction = leader_sigaction, - .sa_flags = SA_RESTART | SA_ONSTACK, - }; - sigemptyset(&sa.sa_mask); test_init(argc, argv); diff --git a/test/zdtm/static/socket-tcp-syn-sent.c b/test/zdtm/static/socket-tcp-syn-sent.c index cf4c3bb46..755532a8a 100644 --- a/test/zdtm/static/socket-tcp-syn-sent.c +++ b/test/zdtm/static/socket-tcp-syn-sent.c @@ -37,7 +37,7 @@ int main(int argc, char **argv) { int fd, fd_s, sock, sk; union sockaddr_inet addr; - char cmd[4096]; + char c, cmd[4096]; test_init(argc, argv); @@ -113,7 +113,7 @@ int main(int argc, char **argv) fcntl(sock, F_SETFL, 0); - char c = 5; + c = 5; if (write(sock, &c, 1) != 1) { fail("Unable to send data"); return 1; diff --git a/test/zdtm/static/unlink_multiple_largefiles.c b/test/zdtm/static/unlink_multiple_largefiles.c index 7cf628606..2f9248c2f 100644 --- a/test/zdtm/static/unlink_multiple_largefiles.c +++ b/test/zdtm/static/unlink_multiple_largefiles.c @@ -30,10 +30,11 @@ void create_check_pattern(char *buf, size_t count, unsigned char seed) struct fiemap *read_fiemap(int fd) { - test_msg("Obtaining fiemap for fd %d\n", fd); struct fiemap *fiemap, *tmp; int extents_size; + test_msg("Obtaining fiemap for fd %d\n", fd); + fiemap = malloc(sizeof(struct fiemap)); if (fiemap == NULL) { pr_perror("Cannot allocate fiemap"); diff --git a/test/zdtm/transition/file_aio.c b/test/zdtm/transition/file_aio.c index a16010158..4a76c9390 100644 --- a/test/zdtm/transition/file_aio.c +++ b/test/zdtm/transition/file_aio.c @@ -17,7 +17,6 @@ const char *test_author = "Andrew Vagin "; int main(int argc, char **argv) { - test_init(argc, argv); char buf[BUF_SIZE]; int fd; struct aiocb aiocb; @@ -25,6 +24,8 @@ int main(int argc, char **argv) char tmpfname[256]="/tmp/file_aio.XXXXXX"; int ret; + test_init(argc, argv); + fd = mkstemp(tmpfname); if (fd == -1) { pr_perror("mkstemp() failed"); diff --git a/test/zdtm/transition/file_read.c b/test/zdtm/transition/file_read.c index 50dffd8c4..5d6e4dbba 100644 --- a/test/zdtm/transition/file_read.c +++ b/test/zdtm/transition/file_read.c @@ -158,9 +158,11 @@ static void chew_some_file(int num) rv = SEEK_FAILED; goto out_exit; case 1: - rv = FILE_CORRUPTED; + { int fd1; char str[PATH_MAX]; + + rv = FILE_CORRUPTED; // create standard file sprintf(str, "standard_%s.%d", filename, num); fd1 = open(str, O_WRONLY | O_CREAT | O_TRUNC, 0666); @@ -168,6 +170,7 @@ static void chew_some_file(int num) pr_perror("can't write %s", str); close(fd1); goto out_exit; + } } } rv = SUCCESS; diff --git a/test/zdtm/transition/maps008.c b/test/zdtm/transition/maps008.c index 5f6eb0887..7ed7c10a5 100644 --- a/test/zdtm/transition/maps008.c +++ b/test/zdtm/transition/maps008.c @@ -348,6 +348,7 @@ static int proc11_func(task_waiter_t *setup_waiter) void *mem3_old = mem3; size_t mem3_size_old = mem3_size; uint32_t crc_epoch = 0; + uint8_t *proc1_mem3; pstree->proc11 = getpid(); xmunmap(mem3, MEM3_START_CUT); @@ -382,7 +383,7 @@ static int proc11_func(task_waiter_t *setup_waiter) chk_proc_mem_eq(pstree->proc11, mem3, mem3_size, pstree->proc112, mem3, mem3_size + MEM3_END_CUT); - uint8_t *proc1_mem3 = mmap_proc_mem(pstree->proc1, + proc1_mem3 = mmap_proc_mem(pstree->proc1, (unsigned long)mem3_old, mem3_size_old); check_mem_eq(mem3, mem3_size, proc1_mem3 + MEM3_START_CUT, mem3_size); xmunmap(proc1_mem3, mem3_size_old); @@ -489,16 +490,17 @@ static void sigchld_hand(int signo, siginfo_t *info, void *ucontext) int main(int argc, char **argv) { - test_init(argc, argv); - - pstree = (struct pstree *)mmap_ashmem(PAGE_SIZE); - test_sync = (struct test_sync *)mmap_ashmem(sizeof(*test_sync)); - struct sigaction sa = { .sa_sigaction = sigchld_hand, .sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP }; sigemptyset(&sa.sa_mask); + + test_init(argc, argv); + + pstree = (struct pstree *)mmap_ashmem(PAGE_SIZE); + test_sync = (struct test_sync *)mmap_ashmem(sizeof(*test_sync)); + if (sigaction(SIGCHLD, &sa, NULL)) { pr_perror("SIGCHLD handler setup"); exit(1); From 17c4a8b24507d1bd1a906aa4a9d5ea3054072141 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 21 Dec 2019 18:13:06 +0000 Subject: [PATCH 0284/2030] style: Enforce kernel style -Wstrict-prototypes Include warnings that the kernel uses during compilation: -Wstrict-prototypes: enforces full declaration of functions. Previously, when declaring extern void func(), one can call func(123) and have no compilation error. This is dangerous. The correct declaration is extern void func(void). Signed-off-by: Nicolas Viennot [Generated a commit message from the pull request] Signed-off-by: Dmitry Safonov --- Makefile | 2 +- criu/config.c | 2 +- criu/cr-check.c | 10 ++++----- criu/cr-dump.c | 4 ++-- criu/cr-restore.c | 8 +++---- criu/cr-service.c | 4 ++-- criu/crtools.c | 2 +- criu/fault-injection.c | 2 +- criu/include/cr_options.h | 4 ++-- criu/include/lsm.h | 4 ++-- criu/include/mount.h | 2 +- criu/include/net.h | 8 +++---- criu/include/tls.h | 2 +- criu/kerndat.c | 6 ++--- criu/lsm.c | 2 +- criu/mount.c | 6 ++--- criu/namespaces.c | 2 +- criu/net.c | 10 ++++----- criu/pstree.c | 2 +- criu/seize.c | 2 +- criu/tls.c | 6 ++--- criu/util.c | 4 ++-- soccr/test/tcp-conn.c | 2 +- soccr/test/tcp-constructor.c | 2 +- test/others/unix-callback/unix-client.c | 2 +- test/others/unix-callback/unix-server.c | 2 +- test/zdtm/Makefile.inc | 2 +- test/zdtm/lib/test.c | 6 ++--- test/zdtm/static/apparmor.c | 4 ++-- .../static/child_subreaper_and_reparent.c | 6 ++--- .../static/child_subreaper_existing_child.c | 6 ++--- test/zdtm/static/dumpable02.c | 2 +- test/zdtm/static/fdt_shared.c | 4 ++-- test/zdtm/static/file_locks00.c | 2 +- test/zdtm/static/inotify_system.c | 2 +- test/zdtm/static/maps00.c | 7 +++--- test/zdtm/static/selinux00.c | 8 +++---- test/zdtm/static/selinux01.c | 10 ++++----- test/zdtm/static/session02.c | 8 +++---- test/zdtm/static/session03.c | 10 ++++----- test/zdtm/transition/netlink00.c | 22 +++++++++---------- 41 files changed, 101 insertions(+), 100 deletions(-) diff --git a/Makefile b/Makefile index 133390f17..00e563c11 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ export PROTOUFIX DEFINES DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE -WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement +WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement -Wstrict-prototypes CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV diff --git a/criu/config.c b/criu/config.c index e5d42efe4..73c62f5bb 100644 --- a/criu/config.c +++ b/criu/config.c @@ -853,7 +853,7 @@ bad_arg: return 1; } -int check_options() +int check_options(void) { if (opts.tcp_established_ok) pr_info("Will dump/restore TCP connections\n"); diff --git a/criu/cr-check.c b/criu/cr-check.c index 729b2dc38..17dd29b42 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -51,7 +51,7 @@ #include "restorer.h" #include "uffd.h" -static char *feature_name(int (*func)()); +static char *feature_name(int (*func)(void)); static int check_tty(void) { @@ -513,7 +513,7 @@ static int check_ipc(void) return -1; } -static int check_sigqueuinfo() +static int check_sigqueuinfo(void) { siginfo_t info = { .si_code = 1 }; @@ -960,7 +960,7 @@ static int clone_cb(void *_arg) { exit(0); } -static int check_clone_parent_vs_pid() +static int check_clone_parent_vs_pid(void) { struct clone_arg ca; pid_t pid; @@ -1447,7 +1447,7 @@ static int check_external_net_ns(void) struct feature_list { char *name; - int (*func)(); + int (*func)(void); }; static struct feature_list feature_list[] = { @@ -1517,7 +1517,7 @@ int check_add_feature(char *feat) return -1; } -static char *feature_name(int (*func)()) +static char *feature_name(int (*func)(void)) { struct feature_list *fl; diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 4b5a01cfd..88323af92 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1439,7 +1439,7 @@ err_cure_imgset: static int alarm_attempts = 0; -bool alarm_timeouted() { +bool alarm_timeouted(void) { return alarm_attempts > 0; } @@ -1456,7 +1456,7 @@ static void alarm_handler(int signo) BUG(); } -static int setup_alarm_handler() +static int setup_alarm_handler(void) { struct sigaction sa = { .sa_handler = alarm_handler, diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b920ce262..687cd6c68 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -182,13 +182,13 @@ static int __restore_wait_inprogress_tasks(int participants) return 0; } -static int restore_wait_inprogress_tasks() +static int restore_wait_inprogress_tasks(void) { return __restore_wait_inprogress_tasks(0); } /* Wait all tasks except the current one */ -static int restore_wait_other_tasks() +static int restore_wait_other_tasks(void) { int participants, stage; @@ -1587,7 +1587,7 @@ static void restore_pgid(void) futex_set_and_wake(&rsti(current)->pgrp_set, 1); } -static int __legacy_mount_proc() +static int __legacy_mount_proc(void) { char proc_mountpoint[] = "/tmp/crtools-proc.XXXXXX"; int fd; @@ -1941,7 +1941,7 @@ static int catch_tasks(bool root_seized, enum trace_flags *flag) return 0; } -static int clear_breakpoints() +static int clear_breakpoints(void) { struct pstree_item *item; int ret = 0, i; diff --git a/criu/cr-service.c b/criu/cr-service.c index 549b3368b..279016bcd 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -1278,7 +1278,7 @@ static void reap_worker(int signo) } } -static int setup_sigchld_handler() +static int setup_sigchld_handler(void) { struct sigaction action; @@ -1295,7 +1295,7 @@ static int setup_sigchld_handler() return 0; } -static int restore_sigchld_handler() +static int restore_sigchld_handler(void) { struct sigaction action; diff --git a/criu/crtools.c b/criu/crtools.c index 700fad994..9b6e94809 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -47,7 +47,7 @@ #include "setproctitle.h" #include "sysctl.h" -void flush_early_log_to_stderr() __attribute__((destructor)); +void flush_early_log_to_stderr(void) __attribute__((destructor)); void flush_early_log_to_stderr(void) { diff --git a/criu/fault-injection.c b/criu/fault-injection.c index 4128814d5..4b0650008 100644 --- a/criu/fault-injection.c +++ b/criu/fault-injection.c @@ -3,7 +3,7 @@ enum faults fi_strategy; -int fault_injection_init() +int fault_injection_init(void) { char *val; int start; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 2c1451e86..c5af33186 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -158,7 +158,7 @@ extern struct cr_options opts; char *rpc_cfg_file; extern int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, int state); -extern int check_options(); -extern void init_opts(); +extern int check_options(void); +extern void init_opts(void); #endif /* __CR_OPTIONS_H__ */ diff --git a/criu/include/lsm.h b/criu/include/lsm.h index 3b8271282..a41915a4c 100644 --- a/criu/include/lsm.h +++ b/criu/include/lsm.h @@ -39,7 +39,7 @@ extern int lsm_check_opts(void); #ifdef CONFIG_HAS_SELINUX int dump_xattr_security_selinux(int fd, FdinfoEntry *e); int run_setsockcreatecon(FdinfoEntry *e); -int reset_setsockcreatecon(); +int reset_setsockcreatecon(void); #else static inline int dump_xattr_security_selinux(int fd, FdinfoEntry *e) { return 0; @@ -47,7 +47,7 @@ static inline int dump_xattr_security_selinux(int fd, FdinfoEntry *e) { static inline int run_setsockcreatecon(FdinfoEntry *e) { return 0; } -static inline int reset_setsockcreatecon() { +static inline int reset_setsockcreatecon(void) { return 0; } #endif diff --git a/criu/include/mount.h b/criu/include/mount.h index d9b375f5d..8bf19b266 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -96,7 +96,7 @@ extern int collect_binfmt_misc(void); static inline int collect_binfmt_misc(void) { return 0; } #endif -extern struct mount_info *mnt_entry_alloc(); +extern struct mount_info *mnt_entry_alloc(void); extern void mnt_entry_free(struct mount_info *mi); extern int __mntns_get_root_fd(pid_t pid); diff --git a/criu/include/net.h b/criu/include/net.h index 9976f6eb0..0a556f3da 100644 --- a/criu/include/net.h +++ b/criu/include/net.h @@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump); extern int network_lock(void); extern void network_unlock(void); -extern int network_lock_internal(); +extern int network_lock_internal(void); extern struct ns_desc net_ns_desc; @@ -47,11 +47,11 @@ extern int move_veth_to_bridge(void); extern int kerndat_link_nsid(void); extern int net_get_nsid(int rtsk, int fd, int *nsid); -extern struct ns_id *net_get_root_ns(); +extern struct ns_id *net_get_root_ns(void); extern int kerndat_nsid(void); extern void check_has_netns_ioc(int fd, bool *kdat_val, const char *name); extern int net_set_ext(struct ns_id *ns); -extern struct ns_id *get_root_netns(); -extern int read_net_ns_img(); +extern struct ns_id *get_root_netns(void); +extern int read_net_ns_img(void); #endif /* __CR_NET_H__ */ diff --git a/criu/include/tls.h b/criu/include/tls.h index aa2517887..b48e4b480 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -4,7 +4,7 @@ # ifdef CONFIG_GNUTLS int tls_x509_init(int sockfd, bool is_server); -void tls_terminate_session(); +void tls_terminate_session(void); ssize_t tls_send(const void *buf, size_t len, int flags); ssize_t tls_recv(void *buf, size_t len, int flags); diff --git a/criu/kerndat.c b/criu/kerndat.c index b0dd83135..d1afde71d 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -364,7 +364,7 @@ no_dt: } /* The page frame number (PFN) is constant for the zero page */ -static int init_zero_page_pfn() +static int init_zero_page_pfn(void) { void *addr; int ret = 0; @@ -429,7 +429,7 @@ static int get_task_size(void) return 0; } -static int kerndat_fdinfo_has_lock() +static int kerndat_fdinfo_has_lock(void) { int fd, pfd = -1, exit_code = -1, len; char buf[PAGE_SIZE]; @@ -464,7 +464,7 @@ out: return exit_code; } -static int get_ipv6() +static int get_ipv6(void) { if (access("/proc/sys/net/ipv6", F_OK) < 0) { if (errno == ENOENT) { diff --git a/criu/lsm.c b/criu/lsm.c index 9d7e55c11..060f10259 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -133,7 +133,7 @@ static int selinux_get_sockcreate_label(pid_t pid, char **output) return 0; } -int reset_setsockcreatecon() +int reset_setsockcreatecon(void) { /* Currently this only works for SELinux. */ if (kdat.lsm != LSMTYPE__SELINUX) diff --git a/criu/mount.c b/criu/mount.c index 24a8516c6..180f2a62d 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2140,7 +2140,7 @@ static int restore_ext_mount(struct mount_info *mi) static char mnt_clean_path[] = "/tmp/cr-tmpfs.XXXXXX"; -static int mount_clean_path() +static int mount_clean_path(void) { /* * To make a bind mount, we need to have access to a source directory, @@ -2167,7 +2167,7 @@ static int mount_clean_path() return 0; } -static int umount_clean_path() +static int umount_clean_path(void) { if (umount2(mnt_clean_path, MNT_DETACH)) { pr_perror("Unable to umount %s", mnt_clean_path); @@ -2659,7 +2659,7 @@ static int find_remap_mounts(struct mount_info *root) } /* Move remapped mounts to places where they have to be */ -static int fixup_remap_mounts() +static int fixup_remap_mounts(void) { struct mnt_remap_entry *r; diff --git a/criu/namespaces.c b/criu/namespaces.c index 57f6bdfef..21266df7c 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -976,7 +976,7 @@ err: return exit_code; } -void free_userns_maps() +void free_userns_maps(void) { if (userns_entry.n_uid_map > 0) { xfree(userns_entry.uid_map[0]); diff --git a/criu/net.c b/criu/net.c index 712837782..8e6cfaff1 100644 --- a/criu/net.c +++ b/criu/net.c @@ -1765,7 +1765,7 @@ static int __restore_links(struct ns_id *nsid, int *nrlinks, int *nrcreated) return 0; } -static int restore_links() +static int restore_links(void) { int nrcreated, nrlinks; struct ns_id *nsid; @@ -2080,7 +2080,7 @@ out: * iptables-restore is executed from a target userns and it may have not enough * rights to open /run/xtables.lock. Here we try to workaround this problem. */ -static int prepare_xtable_lock() +static int prepare_xtable_lock(void) { int fd; @@ -2700,7 +2700,7 @@ err: return ret; } -int network_lock_internal() +int network_lock_internal(void) { char conf[] = "*filter\n" ":CRIU - [0:0]\n" @@ -2731,7 +2731,7 @@ int network_lock_internal() return ret; } -static int network_unlock_internal() +static int network_unlock_internal(void) { char conf[] = "*filter\n" ":CRIU - [0:0]\n" @@ -3284,7 +3284,7 @@ static int check_link_nsid(int rtsk, void *args) return do_rtnl_req(rtsk, &req, sizeof(req), check_one_link_nsid, NULL, NULL, args); } -int kerndat_link_nsid() +int kerndat_link_nsid(void) { int status; pid_t pid; diff --git a/criu/pstree.c b/criu/pstree.c index 92b4167aa..19cf5ad38 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -608,7 +608,7 @@ err: } #define RESERVED_PIDS 300 -static int get_free_pid() +static int get_free_pid(void) { static struct pid *prev, *next; diff --git a/criu/seize.c b/criu/seize.c index e1e6b8195..fd314666f 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -194,7 +194,7 @@ static int seize_cgroup_tree(char *root_path, const char *state) * A freezer cgroup can contain tasks which will not be dumped * and we need to wait them, because the are interrupted them by ptrace. */ -static int freezer_wait_processes() +static int freezer_wait_processes(void) { int i; diff --git a/criu/tls.c b/criu/tls.c index db9cc4f5a..f7b94dee8 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -31,7 +31,7 @@ static gnutls_certificate_credentials_t x509_cred; static int tls_sk = -1; static int tls_sk_flags = 0; -void tls_terminate_session() +void tls_terminate_session(void) { int ret; @@ -227,7 +227,7 @@ static int tls_x509_verify_peer_cert(void) return 0; } -static int tls_handshake() +static int tls_handshake(void) { int ret = -1; while (ret != GNUTLS_E_SUCCESS) { @@ -241,7 +241,7 @@ static int tls_handshake() return 0; } -static int tls_x509_setup_creds() +static int tls_x509_setup_creds(void) { int ret; char *cacert = CRIU_CACERT; diff --git a/criu/util.c b/criu/util.c index 3bae18ab2..1646ce1c4 100644 --- a/criu/util.c +++ b/criu/util.c @@ -326,7 +326,7 @@ int close_pid_proc(void) return 0; } -void close_proc() +void close_proc(void) { close_pid_proc(); close_service_fd(PROC_FD_OFF); @@ -690,7 +690,7 @@ int cr_daemon(int nochdir, int noclose, int close_fd) return 0; } -int is_root_user() +int is_root_user(void) { if (geteuid() != 0) { pr_err("You need to be root to run this command\n"); diff --git a/soccr/test/tcp-conn.c b/soccr/test/tcp-conn.c index 1a1a5bb39..e31f58e7e 100644 --- a/soccr/test/tcp-conn.c +++ b/soccr/test/tcp-conn.c @@ -23,7 +23,7 @@ static void pr_printf(unsigned int level, const char *fmt, ...) va_end(args); } -int main() +int main(void) { union libsoccr_addr addr, dst; int srv, sock, clnt, rst; diff --git a/soccr/test/tcp-constructor.c b/soccr/test/tcp-constructor.c index 89f201000..973dbf10c 100644 --- a/soccr/test/tcp-constructor.c +++ b/soccr/test/tcp-constructor.c @@ -20,7 +20,7 @@ struct tcp { uint16_t wscale; }; -static void usage() +static void usage(void) { printf( "Usage: --addr ADDR -port PORT --seq SEQ --next --addr ADDR -port PORT --seq SEQ -- CMD ...\n" diff --git a/test/others/unix-callback/unix-client.c b/test/others/unix-callback/unix-client.c index 69808b53c..676c4adbc 100644 --- a/test/others/unix-callback/unix-client.c +++ b/test/others/unix-callback/unix-client.c @@ -86,7 +86,7 @@ static int check_sock(int i) return 0; } -int main() +int main(void) { int i, fd; sigset_t set; diff --git a/test/others/unix-callback/unix-server.c b/test/others/unix-callback/unix-server.c index 8f32f53dd..47bebd05d 100644 --- a/test/others/unix-callback/unix-server.c +++ b/test/others/unix-callback/unix-server.c @@ -19,7 +19,7 @@ struct ticket *tickets; #define SK_NAME "/tmp/criu.unix.callback.test" -int main() +int main(void) { int sk, ret, id; char buf[4096]; diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 6958d128e..43763321f 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -38,7 +38,7 @@ ifeq ($(origin CC), default) CC := $(CROSS_COMPILE)$(HOSTCC) endif CFLAGS += -g -O2 -Wall -Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 -CFLAGS += -Wdeclaration-after-statement +CFLAGS += -Wdeclaration-after-statement -Wstrict-prototypes CFLAGS += $(USERCFLAGS) CFLAGS += -D_GNU_SOURCE CPPFLAGS += -iquote $(LIBDIR)/arch/$(ARCH)/include diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c index a1bdfc1b4..630476de0 100644 --- a/test/zdtm/lib/test.c +++ b/test/zdtm/lib/test.c @@ -71,7 +71,7 @@ static void test_fini(void) unlinkat(cwd, pidfile, 0); } -static void setup_outfile() +static void setup_outfile(void) { if (!access(outfile, F_OK) || errno != ENOENT) { fprintf(stderr, "Output file %s appears to exist, aborting\n", @@ -93,7 +93,7 @@ static void setup_outfile() exit(1); } -static void redir_stdfds() +static void redir_stdfds(void) { int nullfd; @@ -346,7 +346,7 @@ void test_init(int argc, char **argv) srand48(time(NULL)); /* just in case we need it */ } -void test_daemon() +void test_daemon(void) { futex_set_and_wake(&test_shared_state->stage, TEST_RUNNING_STAGE); } diff --git a/test/zdtm/static/apparmor.c b/test/zdtm/static/apparmor.c index 15930c761..b3a4d7549 100644 --- a/test/zdtm/static/apparmor.c +++ b/test/zdtm/static/apparmor.c @@ -15,7 +15,7 @@ const char *test_author = "Tycho Andersen "; #define PROFILE "criu_test" -int setprofile() +int setprofile(void) { char profile[1024]; int fd, len; @@ -45,7 +45,7 @@ int setprofile() return 0; } -int checkprofile() +int checkprofile(void) { FILE *f; char path[PATH_MAX], profile[1024]; diff --git a/test/zdtm/static/child_subreaper_and_reparent.c b/test/zdtm/static/child_subreaper_and_reparent.c index 57943a67b..e3955d3d9 100644 --- a/test/zdtm/static/child_subreaper_and_reparent.c +++ b/test/zdtm/static/child_subreaper_and_reparent.c @@ -25,7 +25,7 @@ struct shared { int parent_after_cr; } *sh; -int orphan() +int orphan(void) { /* * Wait until reparented to the pidns init. (By waiting @@ -45,7 +45,7 @@ int orphan() return 0; } -int helper() +int helper(void) { int pid; @@ -59,7 +59,7 @@ int helper() return 0; } -int subreaper() +int subreaper(void) { int pid, ret, status; diff --git a/test/zdtm/static/child_subreaper_existing_child.c b/test/zdtm/static/child_subreaper_existing_child.c index 28e9dbb8a..8291aba08 100644 --- a/test/zdtm/static/child_subreaper_existing_child.c +++ b/test/zdtm/static/child_subreaper_existing_child.c @@ -24,7 +24,7 @@ struct shared { } *sh; -int orphan() +int orphan(void) { /* Return the control back to MAIN worker to do C/R */ futex_set_and_wake(&sh->fstate, TEST_CRIU); @@ -36,7 +36,7 @@ int orphan() return 0; } -int helper() +int helper(void) { int pid; @@ -52,7 +52,7 @@ int helper() return 0; } -int subreaper() +int subreaper(void) { int pid, ret, status; diff --git a/test/zdtm/static/dumpable02.c b/test/zdtm/static/dumpable02.c index 024371bd8..7e2eee2d1 100644 --- a/test/zdtm/static/dumpable02.c +++ b/test/zdtm/static/dumpable02.c @@ -13,7 +13,7 @@ const char *test_doc = "Check dumpable flag handling (non-dumpable case)"; const char *test_author = "Filipe Brandenburger "; -int dumpable_server() { +int dumpable_server(void) { char buf[256]; int ret; diff --git a/test/zdtm/static/fdt_shared.c b/test/zdtm/static/fdt_shared.c index 2111356f5..a84444af5 100644 --- a/test/zdtm/static/fdt_shared.c +++ b/test/zdtm/static/fdt_shared.c @@ -22,7 +22,7 @@ TEST_OPTION(filename, string, "file name", 1); #define CHILDREN 4 static int fork_pfd[2]; -static void forked() +static void forked(void) { char c = 0; @@ -32,7 +32,7 @@ static void forked() } } -static void wait_children() +static void wait_children(void) { int i; char c; diff --git a/test/zdtm/static/file_locks00.c b/test/zdtm/static/file_locks00.c index 59e19cfe1..fa98a31b3 100644 --- a/test/zdtm/static/file_locks00.c +++ b/test/zdtm/static/file_locks00.c @@ -101,7 +101,7 @@ static int check_write_lock(int fd, int whence, off_t offset, off_t len) return -1; } -static int check_file_locks() +static int check_file_locks(void) { int fd_0, fd_1; int ret0, ret1; diff --git a/test/zdtm/static/inotify_system.c b/test/zdtm/static/inotify_system.c index 59f47c41c..3e6b2ad48 100644 --- a/test/zdtm/static/inotify_system.c +++ b/test/zdtm/static/inotify_system.c @@ -68,7 +68,7 @@ typedef struct { int dir; } desc; -void do_wait() { +void do_wait(void) { test_daemon(); test_waitsig(); } diff --git a/test/zdtm/static/maps00.c b/test/zdtm/static/maps00.c index a6c68cd25..f2da9b975 100644 --- a/test/zdtm/static/maps00.c +++ b/test/zdtm/static/maps00.c @@ -123,7 +123,7 @@ static void segfault(int signo) * after test func should be placed check map, because size of test_func * is calculated as (check_map-test_func) */ -int test_func() +int test_func(void) { return 1; } @@ -176,8 +176,9 @@ static int check_map(struct map *map) memcpy(map->ptr,test_func, getpagesize()); } else { if (!(map->flag & MAP_ANONYMOUS)) { + uint8_t funlen = (uint8_t *)check_map - (uint8_t *)test_func; lseek(map->fd,0,SEEK_SET); - if (write(map->fd,test_func,check_map - test_func)fd,test_func,funlen)filename); return -1; } @@ -185,7 +186,7 @@ static int check_map(struct map *map) } if (!(map->flag & MAP_ANONYMOUS) || map->prot & PROT_WRITE) /* Function body has been copied into the mapping */ - ((int (*)())map->ptr)(); /* perform exec access */ + ((int (*)(void))map->ptr)(); /* perform exec access */ else /* No way to copy function body into mapping, * clear exec bit from effective protection diff --git a/test/zdtm/static/selinux00.c b/test/zdtm/static/selinux00.c index db8420eac..b5b3e3cc0 100644 --- a/test/zdtm/static/selinux00.c +++ b/test/zdtm/static/selinux00.c @@ -26,14 +26,14 @@ const char *test_author = "Adrian Reber "; */ char state; -int check_for_selinux() +int check_for_selinux(void) { if (access("/sys/fs/selinux", F_OK) == 0) return 0; return 1; } -int setprofile() +int setprofile(void) { int fd, len; @@ -54,7 +54,7 @@ int setprofile() return 0; } -int checkprofile() +int checkprofile(void) { int fd; char context[1024]; @@ -83,7 +83,7 @@ int checkprofile() return 0; } -int check_sockcreate() +int check_sockcreate(void) { char *output = NULL; FILE *f = fopen("/proc/self/attr/sockcreate", "r"); diff --git a/test/zdtm/static/selinux01.c b/test/zdtm/static/selinux01.c index cec5980e8..cbf145d2a 100644 --- a/test/zdtm/static/selinux01.c +++ b/test/zdtm/static/selinux01.c @@ -28,14 +28,14 @@ const char *test_author = "Adrian Reber "; */ char state; -int check_for_selinux() +int check_for_selinux(void) { if (access("/sys/fs/selinux", F_OK) == 0) return 0; return 1; } -int setprofile() +int setprofile(void) { int fd, len; @@ -56,7 +56,7 @@ int setprofile() return 0; } -int set_sockcreate() +int set_sockcreate(void) { int fd, len; @@ -77,7 +77,7 @@ int set_sockcreate() return 0; } -int check_sockcreate() +int check_sockcreate(void) { int fd; char context[1024]; @@ -106,7 +106,7 @@ int check_sockcreate() return 0; } -int check_sockcreate_empty() +int check_sockcreate_empty(void) { char *output = NULL; FILE *f = fopen("/proc/self/attr/sockcreate", "r"); diff --git a/test/zdtm/static/session02.c b/test/zdtm/static/session02.c index 37f245d2e..f5c81df16 100644 --- a/test/zdtm/static/session02.c +++ b/test/zdtm/static/session02.c @@ -25,7 +25,7 @@ struct process *processes; int nr_processes = 20; int current = 0; -static void cleanup() +static void cleanup(void) { int i; @@ -55,9 +55,9 @@ struct command int arg2; }; -static void handle_command(); +static void handle_command(void); -static void mainloop() +static void mainloop(void) { while (1) handle_command(); @@ -100,7 +100,7 @@ static int make_child(int id, int flags) return cid; } -static void handle_command() +static void handle_command(void) { int sk = processes[current].sks[0], ret, status = 0; struct command cmd; diff --git a/test/zdtm/static/session03.c b/test/zdtm/static/session03.c index 2b3c46c32..8ca16e410 100644 --- a/test/zdtm/static/session03.c +++ b/test/zdtm/static/session03.c @@ -36,7 +36,7 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data) waitpid(pid, NULL, WNOHANG); } -static void cleanup() +static void cleanup(void) { int i, ret; @@ -72,7 +72,7 @@ enum commands int cmd_weght[TEST_MAX] = {10, 3, 1, 10, 7}; int sum_weight = 0; -static int get_rnd_op() +static int get_rnd_op(void) { int i, m; if (sum_weight == 0) { @@ -97,9 +97,9 @@ struct command int arg2; }; -static void handle_command(); +static void handle_command(void); -static void mainloop() +static void mainloop(void) { while (1) handle_command(); @@ -142,7 +142,7 @@ static int make_child(int id, int flags) return cid; } -static void handle_command() +static void handle_command(void) { int sk = processes[current].sks[0], ret, status = 0; struct command cmd; diff --git a/test/zdtm/transition/netlink00.c b/test/zdtm/transition/netlink00.c index c9b2303e8..3504a48a1 100644 --- a/test/zdtm/transition/netlink00.c +++ b/test/zdtm/transition/netlink00.c @@ -56,12 +56,12 @@ struct rtmsg *rtp; int rtl; struct rtattr *rtap; -int send_request(); -int recv_reply(); -int form_request_add(); -int form_request_del(); -int read_reply(); -typedef int (*cmd_t)(); +int send_request(void); +int recv_reply(void); +int form_request_add(void); +int form_request_del(void); +int read_reply(void); +typedef int (*cmd_t)(void); #define CMD_NUM 2 cmd_t cmd[CMD_NUM]={form_request_add, form_request_del}; @@ -120,7 +120,7 @@ out: return 0; } -int send_request() +int send_request(void) { // create the remote address // to communicate @@ -145,7 +145,7 @@ int send_request() } return 0; } -int recv_reply() +int recv_reply(void) { char *p; // initialize the socket read buffer @@ -191,7 +191,7 @@ int recv_reply() return 0; } -int read_reply() +int read_reply(void) { //string to hold content of the route // table (i.e. one entry) @@ -250,7 +250,7 @@ int read_reply() #define NLMSG_TAIL(nmsg) \ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) -int form_request_del() +int form_request_del(void) { bzero(&req, sizeof(req)); req.nl.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); @@ -272,7 +272,7 @@ int form_request_del() return 0; } -int form_request_add() +int form_request_add(void) { int ifcn = 1; //interface number From e1c4871759d6edb4d7c2d3129981060b873ec912 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 11 Nov 2019 19:07:52 +0300 Subject: [PATCH 0285/2030] net: add nftables c/r After Centos-8 nft used instead of iptables. But we had never supported nft rules in CRIU, and after c/r all rules are flushed. Co-developed-by: Pavel Tikhomirov Signed-off-by: Pavel Tikhomirov Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Dmitry Safonov --- Makefile.config | 17 ++++ criu/image-desc.c | 1 + criu/include/image-desc.h | 1 + criu/include/magic.h | 1 + criu/net.c | 115 +++++++++++++++++++++++++++ scripts/build/Dockerfile.alpine | 1 + scripts/build/Dockerfile.fedora.tmpl | 2 + scripts/feature-tests.mak | 23 ++++++ 8 files changed, 161 insertions(+) diff --git a/Makefile.config b/Makefile.config index 81aae24f8..161365960 100644 --- a/Makefile.config +++ b/Makefile.config @@ -23,6 +23,23 @@ else $(info Note: Building without GnuTLS support) endif +ifeq ($(call pkg-config-check,libnftables),y) + LIB_NFTABLES := $(shell pkg-config --libs libnftables) + ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_0),$(LIB_NFTABLES)),true) + LIBS_FEATURES += $(LIB_NFTABLES) + FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_0 + else ifeq ($(call try-cc,$(FEATURE_TEST_NFTABLES_LIB_API_1),$(LIB_NFTABLES)),true) + LIBS_FEATURES += $(LIB_NFTABLES) + FEATURE_DEFINES += -DCONFIG_HAS_NFTABLES_LIB_API_1 + else + $(warning Warn: you have libnftables installed but it has incompatible API) + $(warning Warn: Building without nftables support) + endif +else + $(warning Warn: you have no libnftables installed) + $(warning Warn: Building without nftables support) +endif + export LIBS += $(LIBS_FEATURES) CONFIG_FILE = .config diff --git a/criu/image-desc.c b/criu/image-desc.c index 81cd07484..ae5d817fe 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -76,6 +76,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY_F(RULE, "rule-%u", O_NOBUF), FD_ENTRY_F(IPTABLES, "iptables-%u", O_NOBUF), FD_ENTRY_F(IP6TABLES, "ip6tables-%u", O_NOBUF), + FD_ENTRY_F(NFTABLES, "nftables-%u", O_NOBUF), FD_ENTRY_F(TMPFS_IMG, "tmpfs-%u.tar.gz", O_NOBUF), FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%u.tar.gz", O_NOBUF), FD_ENTRY_F(AUTOFS, "autofs-%u", O_NOBUF), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index fea80a719..6db8bf94f 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -42,6 +42,7 @@ enum { CR_FD_RULE, CR_FD_IPTABLES, CR_FD_IP6TABLES, + CR_FD_NFTABLES, CR_FD_NETNS, CR_FD_NETNF_CT, CR_FD_NETNF_EXP, diff --git a/criu/include/magic.h b/criu/include/magic.h index 05101f436..1a583f4ed 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -103,6 +103,7 @@ #define TMPFS_DEV_MAGIC RAW_IMAGE_MAGIC #define IPTABLES_MAGIC RAW_IMAGE_MAGIC #define IP6TABLES_MAGIC RAW_IMAGE_MAGIC +#define NFTABLES_MAGIC RAW_IMAGE_MAGIC #define NETNF_CT_MAGIC RAW_IMAGE_MAGIC #define NETNF_EXP_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/net.c b/criu/net.c index 8e6cfaff1..762f9b547 100644 --- a/criu/net.c +++ b/criu/net.c @@ -17,6 +17,10 @@ #include #include +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +#include +#endif + #ifdef CONFIG_HAS_SELINUX #include #endif @@ -1897,6 +1901,55 @@ static inline int dump_iptables(struct cr_imgset *fds) return 0; } +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +static inline int dump_nftables(struct cr_imgset *fds) +{ + int ret = -1; + struct cr_img *img; + int img_fd; + FILE *fp; + struct nft_ctx *nft; + + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + return -1; + + img = img_from_set(fds, CR_FD_NFTABLES); + img_fd = dup(img_raw_fd(img)); + if (img_fd < 0) { + pr_perror("dup() failed"); + goto nft_ctx_free_out; + } + + fp = fdopen(img_fd, "w"); + if (!fp) { + pr_perror("fdopen() failed"); + close(img_fd); + goto nft_ctx_free_out; + } + + nft_ctx_set_output(nft, fp); +#define DUMP_NFTABLES_CMD "list ruleset" +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) + if (nft_run_cmd_from_buffer(nft, DUMP_NFTABLES_CMD, strlen(DUMP_NFTABLES_CMD))) +#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (nft_run_cmd_from_buffer(nft, DUMP_NFTABLES_CMD)) +#else + BUILD_BUG_ON(1); +#endif + goto fp_close_out; + + ret = 0; + +fp_close_out: + fclose(fp); +nft_ctx_free_out: + nft_ctx_free(nft); + + return ret; +} +#endif + static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) { void *buf, *o_buf; @@ -2149,6 +2202,60 @@ out: return ret; } +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) +static inline int restore_nftables(int pid) +{ + int ret = -1; + struct cr_img *img; + struct nft_ctx *nft; + off_t img_data_size; + char *buf; + + img = open_image(CR_FD_NFTABLES, O_RSTR, pid); + if (img == NULL) + return -1; + if (empty_image(img)) { + /* Backward compatibility */ + pr_info("Skipping nft restore, no image"); + ret = 0; + goto image_close_out; + } + + if ((img_data_size = img_raw_size(img)) < 0) + goto image_close_out; + + if (read_img_str(img, &buf, img_data_size) < 0) + goto image_close_out; + + nft = nft_ctx_new(NFT_CTX_DEFAULT); + if (!nft) + goto buf_free_out; + + if (nft_ctx_buffer_output(nft) || nft_ctx_buffer_error(nft) || +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) + nft_run_cmd_from_buffer(nft, buf, strlen(buf))) +#elif defined(CONFIG_HAS_NFTABLES_LIB_API_1) + nft_run_cmd_from_buffer(nft, buf)) +#else + { + BUILD_BUG_ON(1); + } +#endif + goto nft_ctx_free_out; + + ret = 0; + +nft_ctx_free_out: + nft_ctx_free(nft); +buf_free_out: + xfree(buf); +image_close_out: + close_image(img); + + return ret; +} +#endif + int read_net_ns_img(void) { struct ns_id *ns; @@ -2380,6 +2487,10 @@ int dump_net_ns(struct ns_id *ns) ret = dump_rule(fds); if (!ret) ret = dump_iptables(fds); +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (!ret) + ret = dump_nftables(fds); +#endif if (!ret) ret = dump_netns_conf(ns, fds); } else if (ns->type != NS_ROOT) { @@ -2473,6 +2584,10 @@ static int prepare_net_ns_second_stage(struct ns_id *ns) ret = restore_rule(nsid); if (!ret) ret = restore_iptables(nsid); +#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) + if (!ret) + ret = restore_nftables(nsid); +#endif } if (!ret) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index a1d1d9191..29a754058 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -14,6 +14,7 @@ RUN apk update && apk add \ libcap-dev \ libnet-dev \ libnl3-dev \ + nftables \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 0500a8fc5..138588bce 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -10,6 +10,8 @@ RUN dnf install -y \ gnutls-devel \ iproute \ iptables \ + nftables \ + nftables-devel \ libaio-devel \ libasan \ libcap-devel \ diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 39ddfd053..6f67c6035 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -147,4 +147,27 @@ int main(void) return 0; return 0; } + +endef + +define FEATURE_TEST_NFTABLES_LIB_API_0 + +#include + +int main(int argc, char **argv) +{ + return nft_run_cmd_from_buffer(nft_ctx_new(NFT_CTX_DEFAULT), \"cmd\", strlen(\"cmd\")); +} + +endef + +define FEATURE_TEST_NFTABLES_LIB_API_1 + +#include + +int main(int argc, char **argv) +{ + return nft_run_cmd_from_buffer(nft_ctx_new(NFT_CTX_DEFAULT), \"cmd\"); +} + endef From acb42456dc707a303b308fef67b2be92c81ab427 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 11 Nov 2019 19:20:22 +0300 Subject: [PATCH 0286/2030] zdtm: nft tables preservation test Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn [Added test_author to zdtm test] Signed-off-by: Dmitry Safonov --- scripts/build/Dockerfile.alpine | 1 + test/zdtm/static/Makefile | 1 + test/zdtm/static/netns-nft.c | 64 ++++++++++++++++++++++++++++ test/zdtm/static/netns-nft.checkskip | 3 ++ test/zdtm/static/netns-nft.desc | 5 +++ 5 files changed, 74 insertions(+) create mode 100644 test/zdtm/static/netns-nft.c create mode 100755 test/zdtm/static/netns-nft.checkskip create mode 100644 test/zdtm/static/netns-nft.desc diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 29a754058..601a8693a 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -31,6 +31,7 @@ RUN apk add \ py-pip \ ip6tables \ iptables \ + nftables \ iproute2 \ tar \ bash \ diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index ea5d3c42e..19d93e315 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -288,6 +288,7 @@ TST_FILE = \ file_locks07 \ file_locks08 \ netns-nf \ + netns-nft \ maps_file_prot \ socket_close_data01 \ diff --git a/test/zdtm/static/netns-nft.c b/test/zdtm/static/netns-nft.c new file mode 100644 index 000000000..f4991afda --- /dev/null +++ b/test/zdtm/static/netns-nft.c @@ -0,0 +1,64 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check that nft rules (some) are kept"; +const char *test_author = "Alexander Mikhalitsyn "; + +char *filename; +TEST_OPTION(filename, string, "file name", 1); + +int main(int argc, char **argv) +{ + char cmd[128]; + + test_init(argc, argv); + + /* create nft table */ + if (system("nft add table inet netns-nft-zdtm-test")) { + pr_perror("Can't create nft table"); + return -1; + } + + /* create input chain in table */ + if (system("nft add chain inet netns-nft-zdtm-test input { type filter hook input priority 0 \\; }")) { + pr_perror("Can't create input chain in nft table"); + return -1; + } + + /* block ICMPv4 traffic */ + if (system("nft add rule inet netns-nft-zdtm-test input meta nfproto ipv4 icmp type { echo-request } reject")) { + pr_perror("Can't set input rule"); + return -1; + } + + /* save resulting nft table */ + sprintf(cmd, "nft list table inet netns-nft-zdtm-test > pre-%s", filename); + if (system(cmd)) { + pr_perror("Can't get nft table"); + return -1; + } + + test_daemon(); + test_waitsig(); + + /* get nft table */ + sprintf(cmd, "nft list table inet netns-nft-zdtm-test > post-%s", filename); + if (system(cmd)) { + fail("Can't get nft table"); + return -1; + } + + /* compare nft table before/after c/r */ + sprintf(cmd, "diff pre-%s post-%s", filename, filename); + if (system(cmd)) { + fail("nft table differ"); + return -1; + } + + pass(); + return 0; +} diff --git a/test/zdtm/static/netns-nft.checkskip b/test/zdtm/static/netns-nft.checkskip new file mode 100755 index 000000000..270cafeb5 --- /dev/null +++ b/test/zdtm/static/netns-nft.checkskip @@ -0,0 +1,3 @@ +#!/bin/bash + +test -f /usr/sbin/nft || exit 1 diff --git a/test/zdtm/static/netns-nft.desc b/test/zdtm/static/netns-nft.desc new file mode 100644 index 000000000..f53890a24 --- /dev/null +++ b/test/zdtm/static/netns-nft.desc @@ -0,0 +1,5 @@ +{ 'deps': [ '/bin/sh', + '/usr/sbin/nft', + '/usr/bin/diff'], + 'flags': 'suid', + 'flavor': 'ns uns'} From 7622b7a70eb7a00b2ba542f9118f4351b3df2538 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0287/2030] files: fix ghost file error path Signed-off-by: Nicolas Viennot --- criu/files-reg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 2f68bc03f..90fb7dd7f 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -844,10 +844,13 @@ static int dump_ghost_remap(char *path, const struct stat *st, gf->dev = phys_dev; gf->ino = st->st_ino; gf->id = ghost_file_ids++; - list_add_tail(&gf->list, &ghost_files); - if (dump_ghost_file(lfd, gf->id, st, phys_dev)) + if (dump_ghost_file(lfd, gf->id, st, phys_dev)) { + xfree(gf); return -1; + } + + list_add_tail(&gf->list, &ghost_files); dump_entry: rpe.orig_id = id; From 2ac43cd426badfc6d68582b203ace318e5f79427 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:27:40 +0000 Subject: [PATCH 0288/2030] python: Improve decoding of file flags Signed-off-by: Nicolas Viennot --- lib/py/images/pb2dict.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index daaa7297e..6fce4be22 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -105,11 +105,22 @@ mmap_status_map = [ ] rfile_flags_map = [ - ('O_WRONLY', 0o1), - ('O_RDWR', 0o2), - ('O_APPEND', 0o2000), - ('O_DIRECT', 0o40000), - ('O_LARGEFILE', 0o100000), + ('O_WRONLY', 0o00000001), + ('O_RDWR', 0o00000002), + ('O_CREAT', 0o00000100), + ('O_EXCL', 0o00000200), + ('O_NOCTTY', 0o00000400), + ('O_TRUNC', 0o00001000), + ('O_APPEND', 0o00002000), + ('O_NONBLOCK', 0o00004000), + ('O_DSYNC', 0o00010000), + ('FASYNC', 0o00020000), + ('O_DIRECT', 0o00040000), + ('O_LARGEFILE', 0o00100000), + ('O_DIRECTORY', 0o00200000), + ('O_NOFOLLOW', 0o00400000), + ('O_NOATIME', 0o01000000), + ('O_CLOEXEC', 0o02000000), ] pmap_flags_map = [ From 8255caf27b5e2bb96af6affc161b8d0d3bbdccbe Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:29:27 +0000 Subject: [PATCH 0289/2030] files: Remove O_CLOEXEC from file flags The kernel artificially adds the O_CLOEXEC flag when reading from the /proc/fdinfo/fd interface if FD_CLOEXEC is set on the file descriptor used to access the file. This commit removes the O_CLOEXEC flag in our file flags. To restore the proper FD_CLOEXEC value in each of the file descriptors, CRIU uses fcntl(F_GETFD) to retrieve the FD_CLOEXEC status, and restore it later with fcntl(F_SETFD). This is necessary because multiple file descriptors may point to the same open file. --- criu/files.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index ffdaa459f..e26897870 100644 --- a/criu/files.c +++ b/criu/files.c @@ -382,7 +382,13 @@ static int fill_fd_params(struct pid *owner_pid, int fd, int lfd, p->fs_type = fsbuf.f_type; p->fd = fd; p->pos = fdinfo.pos; - p->flags = fdinfo.flags; + /* + * The kernel artificially adds the O_CLOEXEC flag on the file pointer + * flags by looking at the flags on the file descriptor (see kernel + * code fs/proc/fd.c). FD_CLOEXEC is a file descriptor property, which + * is saved in fd_flags. + */ + p->flags = fdinfo.flags & ~O_CLOEXEC; p->mnt_id = fdinfo.mnt_id; p->pid = owner_pid->real; p->fd_flags = opts->flags; From 75a74423801a83ef7657e67e0b016a76f741db11 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 30 Dec 2019 20:21:03 +0000 Subject: [PATCH 0290/2030] files: Add FD_CLOEXEC test --- test/zdtm/static/Makefile | 1 + test/zdtm/static/file_cloexec.c | 63 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 test/zdtm/static/file_cloexec.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 19d93e315..5ca05ee9e 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -122,6 +122,7 @@ TST_NOFILE := \ groups \ pdeath_sig \ file_fown \ + file_cloexec \ proc-self \ eventfs00 \ epoll \ diff --git a/test/zdtm/static/file_cloexec.c b/test/zdtm/static/file_cloexec.c new file mode 100644 index 000000000..b8eba39e5 --- /dev/null +++ b/test/zdtm/static/file_cloexec.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check FD_CLOEXEC flag"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static void assert_fd_flags(int fd, int mask, int value) +{ + int flags = fcntl(fd, F_GETFD); + if (flags == -1) + err(1, "Can't get fd flags"); + + if ((flags & mask) != value) { + fail("fd flags mismatch"); + exit(1); + } +} + +int main(int argc, char *argv[]) +{ + int fd1, fd2, fd3, fd4; + + test_init(argc, argv); + + fd1 = open("/", O_RDONLY | O_CLOEXEC); + if (fd1 < 0) + err(1, "Can't open()"); + + fd2 = open("/", O_RDONLY); + if (fd2 < 0) + err(1, "Can't open()"); + + fd3 = dup(fd1); + if (fd3 < 0) + err(1, "Can't dup()"); + + fd4 = fcntl(fd2, F_DUPFD_CLOEXEC, 0); + if (fd4 < 0) + err(1, "Can't dup()"); + + test_daemon(); + test_waitsig(); + + assert_fd_flags(fd1, FD_CLOEXEC, FD_CLOEXEC); + assert_fd_flags(fd2, FD_CLOEXEC, 0); + assert_fd_flags(fd3, FD_CLOEXEC, 0); + assert_fd_flags(fd4, FD_CLOEXEC, FD_CLOEXEC); + + pass(); + + return 0; +} From 8b5dea33f6bff8e8d35e709e026218caf7d4a2d8 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 9 Jan 2020 10:31:41 +0000 Subject: [PATCH 0291/2030] travis: switch alpine to python3 Now that Python 2 has officially reached its end of life also switch the Alpine based test to Python 3. Signed-off-by: Adrian Reber --- scripts/build/Dockerfile.alpine | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 601a8693a..5785102da 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -18,7 +18,7 @@ RUN apk update && apk add \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ - python \ + python3 \ sudo COPY . /criu @@ -28,7 +28,6 @@ RUN mv .ccache /tmp && make mrproper && ccache -sz && \ date && make -j $(nproc) CC="$CC" && date && ccache -s RUN apk add \ - py-pip \ ip6tables \ iptables \ nftables \ @@ -37,10 +36,16 @@ RUN apk add \ bash \ go \ e2fsprogs \ + py-yaml \ + py3-flake8 \ asciidoctor # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip install PyYAML future protobuf ipaddress junit_xml flake8 +RUN pip3 install protobuf junit_xml + +# For zdtm we need an unversioned python binary +RUN ln -s /usr/bin/python3 /usr/bin/python + RUN make -C test/zdtm From 1e9ff2aa03206102a7aeaf1d32f61056d3d05e46 Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Sun, 8 Dec 2019 10:14:40 +0530 Subject: [PATCH 0292/2030] Add Socket-based Java Functional Tests Signed-off-by: Nidhi Gupta --- scripts/build/Dockerfile.openj9-ubuntu | 1 + test/javaTests/README.md | 11 +- .../criu/java/tests/CheckpointRestore.java | 1 + .../src/org/criu/java/tests/Helper.java | 2 +- .../src/org/criu/java/tests/SocketHelper.java | 100 ++++++++ .../src/org/criu/java/tests/Sockets.java | 141 ++++++++++++ .../org/criu/java/tests/SocketsClient.java | 133 +++++++++++ .../org/criu/java/tests/SocketsConnect.java | 157 +++++++++++++ .../criu/java/tests/SocketsConnectClient.java | 130 +++++++++++ .../criu/java/tests/SocketsConnectServer.java | 151 ++++++++++++ .../src/org/criu/java/tests/SocketsData.java | 156 +++++++++++++ .../criu/java/tests/SocketsDataClient.java | 141 ++++++++++++ .../criu/java/tests/SocketsDataServer.java | 124 ++++++++++ .../org/criu/java/tests/SocketsListen.java | 153 +++++++++++++ .../criu/java/tests/SocketsListenClient.java | 136 +++++++++++ .../criu/java/tests/SocketsListenServer.java | 160 +++++++++++++ .../org/criu/java/tests/SocketsMultiple.java | 152 +++++++++++++ .../java/tests/SocketsMultipleClient.java | 174 ++++++++++++++ .../java/tests/SocketsMultipleServer.java | 215 ++++++++++++++++++ .../org/criu/java/tests/SocketsServer.java | 142 ++++++++++++ test/javaTests/test.xml | 46 ++++ 21 files changed, 2424 insertions(+), 2 deletions(-) create mode 100644 test/javaTests/src/org/criu/java/tests/SocketHelper.java create mode 100644 test/javaTests/src/org/criu/java/tests/Sockets.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnect.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsData.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsDataClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsDataServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListen.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListenClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsListenServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultiple.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java create mode 100644 test/javaTests/src/org/criu/java/tests/SocketsServer.java diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu index 13d9080ff..f235cc004 100644 --- a/scripts/build/Dockerfile.openj9-ubuntu +++ b/scripts/build/Dockerfile.openj9-ubuntu @@ -18,6 +18,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends protobuf-c-comp make \ git \ pkg-config \ + iptables \ gcc \ maven diff --git a/test/javaTests/README.md b/test/javaTests/README.md index 670741677..4315b9b12 100644 --- a/test/javaTests/README.md +++ b/test/javaTests/README.md @@ -30,7 +30,16 @@ Here we test the File-Based Java APIs by checkpointing the application in the fo ## Memory mapping Java APIs Here we test the Memory Mapping APIs by checkpointing the application in following scenario and verifying the contents after restore: -- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) +- Memory-mapping a file and writing its content to another file. (MemoryMappings.java) + +## Socket-based Java APIs + +Here we test the Socket-based API's by checkpointing the application in the following scenario and verifying the state after restore: +- Checkpointing the server process in the middle of data transfer. (Sockets.java) +- Checkpointing the server process after it has bound to a port but is not listening for client connections. (SocketListen.java) +- Checkpointing the server process while it is listening for client connections, and no client has connected yet. (SocketConnect.java) +- Checkpointing the server process when it has multiple clients in multiple states connected to it. (SocketMultiple.java) +- Checkpointing the client process in the middle of data transfer. (SocketsData.java) ### Prerequisites for running the tests: - Maven diff --git a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java index b848c9938..9d61e126f 100644 --- a/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java +++ b/test/javaTests/src/org/criu/java/tests/CheckpointRestore.java @@ -154,6 +154,7 @@ public class CheckpointRestore { */ while (Helper.STATE_INIT == currentState) { currentState = mappedByteBuffer.getChar(Helper.MAPPED_INDEX); + Thread.sleep(100); } /* diff --git a/test/javaTests/src/org/criu/java/tests/Helper.java b/test/javaTests/src/org/criu/java/tests/Helper.java index fdf20bb52..9a1b33328 100644 --- a/test/javaTests/src/org/criu/java/tests/Helper.java +++ b/test/javaTests/src/org/criu/java/tests/Helper.java @@ -30,7 +30,7 @@ class Helper { * the pid to the pidFile. * * @param testName Name of the java test - * @param pid Pid of the java test process + * @param pid Pid of the java test process * @param logger * @return 0 or 1 denoting whether the function was successful or not. * @throws IOException diff --git a/test/javaTests/src/org/criu/java/tests/SocketHelper.java b/test/javaTests/src/org/criu/java/tests/SocketHelper.java new file mode 100644 index 000000000..684125019 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketHelper.java @@ -0,0 +1,100 @@ +package org.criu.java.tests; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.logging.SimpleFormatter; + +class SocketHelper { + + static char STATE_LISTEN = 'S'; + static char STATE_SUCCESS = 'Z'; + static String IP_ADDRESS = "127.0.0.1"; + + /** + * Creates a new log file, for the logger to log in. + * + * @param testName Name of the server or client program + * @param parentTestName Name of the test + * @param logger + * @throws IOException + */ + static void init(String testName, String parentTestName, Logger logger) throws IOException { + FileHandler handler = new FileHandler(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/" + testName + ".log", false); + handler.setFormatter(new SimpleFormatter()); + handler.setLevel(Level.FINE); + logger.addHandler(handler); + logger.setLevel(Level.FINE); + } + + /** + * Writes pid of the process to be checkpointed in the file + * + * @param parentTestName Name of the test + * @param pid Pid of the process to be checkpointed + * @throws IOException + */ + static void writePid(String parentTestName, String pid) throws IOException { + File pidfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/" + parentTestName + Helper.PID_APPEND); + BufferedWriter pidwriter = new BufferedWriter(new FileWriter(pidfile)); + /* + * Overwriting pid to be checkpointed + */ + pidwriter.write(pid + "\n"); + pidwriter.close(); + } + + /** + * Waits for the MappedByteBuffer to change state from STATE_CHECKPOINT to STATE_RESTORE + * + * @param socketMappedBuffer MappedByteBuffer between the client, server and the controller process. + * @param logger + */ + static void socketWaitForRestore(MappedByteBuffer socketMappedBuffer, Logger logger) { + while (Helper.STATE_CHECKPOINT == socketMappedBuffer.getChar(Helper.MAPPED_INDEX)) { + ; + } + if (Helper.STATE_RESTORE != socketMappedBuffer.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Server socket was not in expected restore state " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } else { + logger.log(Level.INFO, "Restored!!!"); + } + } + + /** + * Puts the MappedByteBuffer to Helper.STATE_CHECKPOINT and waits for CheckpointRestore.java to change its state to Helper.STATE_RESTORE + * + * @param b MappedByteBuffer between the controller process and CheckpointRestore.java + * @param logger Logger to log the messages + * @param p1 Process object for the client process + * @param p2 Process object for the server process + */ + static void checkpointAndWait(MappedByteBuffer b, Logger logger, Process p1, Process p2) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + char c = b.getChar(Helper.MAPPED_INDEX); + while (Helper.STATE_CHECKPOINT == c) { + c = b.getChar(Helper.MAPPED_INDEX); + } + if (Helper.STATE_TERMINATE == c) { + logger.log(Level.SEVERE, "Error during checkpoint-restore, Test terminated"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + p1.destroy(); + p2.destroy(); + System.exit(1); + } + if (Helper.STATE_RESTORE != c) { + logger.log(Level.SEVERE, "Error: Test state is not the expected Restored state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + p1.destroy(); + p2.destroy(); + System.exit(1); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/Sockets.java b/test/javaTests/src/org/criu/java/tests/Sockets.java new file mode 100644 index 000000000..94cc217c4 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/Sockets.java @@ -0,0 +1,141 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class Sockets { + static String TESTNAME = "Sockets"; + + /** + * Runs the client and server process, checkpoints the server process while its in the middle of data transfer + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (Helper.STATE_INIT != b.getChar(Helper.MAPPED_INDEX)) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Creating socketBufferFile and setting the init value of buffer"); + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsServer", TESTNAME, port); + Process serverProcess = builder.start(); + logger.log(Level.INFO, "Server process started"); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Error took place in the client or server process; check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored"); + } + /* + * Loop while test is running. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_FAIL && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_PASS) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server and client process"); + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + /* + * Client process puts socketMappedBuffer to Pass state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsClient.java b/test/javaTests/src/org/criu/java/tests/SocketsClient.java new file mode 100644 index 000000000..1c8e7b9a1 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsClient.java @@ -0,0 +1,133 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsClient { + static String TESTNAME = "SocketsClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + Logger logger = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + String parentTestName, portArg; + int port; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + /* + * Ensure client does not try to connect to port before server has bound itself. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + /* + * Socket Buffer should be put in SocketHelper.STATE_LISTEN state by server process, just before + * it starts listening for client connections. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Buffer does not contain the expected 'server bound to port and listening' state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + /* + * Ensure server has bound to port + */ + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "InterruptedException occurred!"); + } + + socket = new Socket(SocketHelper.IP_ADDRESS, port); + + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "Error: wrong message received; message expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "Error: wrong message received; message expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + /* + * Wait for server process to end and then check whether it ended successfully or not + * If it has finished properly the socketMappedBuffer will contain SocketHelper.STATE_SUCCESS + */ + logger.log(Level.INFO, "Waiting for server process to end...."); + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + logger.log(Level.INFO, "Test ends"); + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnect.java b/test/javaTests/src/org/criu/java/tests/SocketsConnect.java new file mode 100644 index 000000000..164c21089 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnect.java @@ -0,0 +1,157 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnect { + static String TESTNAME = "SocketsConnect"; + + /** + * Runs the client and server process, checkpoints the server when its listening for incoming client connection requests on a port but no client has connected yet + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsConnectServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsConnectClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interrupted"); + } + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to 'P'-Pass state if the test passed. + * Send pass message to Checkpoint-restore.java + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java b/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java new file mode 100644 index 000000000..ed1c7fab3 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnectClient.java @@ -0,0 +1,130 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnectClient { + static String TESTNAME = "SocketsConnectClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Waiting for CR"); + /* + * Wait for Checkpoint-Restore to occur + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Error:Buffer does not contain the expected restored state: " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Restored"); + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + /* + * Server should has have been listening for client connections when it was checkpointed, and it should continue to listen after restore. + */ + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java b/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java new file mode 100644 index 000000000..1e4cf3aeb --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsConnectServer.java @@ -0,0 +1,151 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsConnectServer { + static String TESTNAME = "SocketsConnectServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", msg3 = "@Ft@rCPM$$g3", + msg4 = "Aft@rCPM$$g4", readMssg; + Logger logger = null; + String parentTestName, portArg; + int port; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsConnectFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port: " + port); + + /* + * Timeout after 7 sec if client does not connect + */ + try { + ser.setSoTimeout(7 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "Cannot set timeout!"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + logger.log(Level.INFO, "Waiting for client to connect"); + logger.log(Level.INFO, "Going to checkpoint"); + + try { + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + ser.close(); + System.exit(1); + } + /* + * Checkpoint when server is listening for connections, and no client has connected to the server. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + socket = ser.accept(); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect\n" + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "Server is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong,received: " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong, received: " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + socket.close(); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsData.java b/test/javaTests/src/org/criu/java/tests/SocketsData.java new file mode 100644 index 000000000..67d8cef0e --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsData.java @@ -0,0 +1,156 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsData { + static String TESTNAME = "SocketsData"; + + /** + * Runs the server and client processes, checkpoints the client process when its in the middle of data transfer + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + Logger logger = null; + String port = "49200"; + try { + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsDataServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsDataClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint client process"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interrupted"); + } + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + serverProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + serverProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to STATE_PASS if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + logger.log(Level.INFO, "Did not receive pass message from the client process"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java b/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java new file mode 100644 index 000000000..49885a886 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsDataClient.java @@ -0,0 +1,141 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsDataClient { + static String TESTNAME = "SocketsDataClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + readMssg, msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + + logger.log(Level.INFO, "Client pid: " + pid); + SocketHelper.writePid(parentTestName, pid); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + /* + * Socket Mapped Buffer should be in 'Server listening for connections' state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "socket-buffer not in expected state, current state: " + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Server starts listening on port after putting the Mapped Buffer is in SocketHelper.STATE_LISTEN state + */ + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (IOException e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + /* + * Checkpoints and wait for Restore + */ + logger.log(Level.INFO, "Going to checkpoint"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to pass state, else to failed state + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java b/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java new file mode 100644 index 000000000..65fe92a9d --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsDataServer.java @@ -0,0 +1,124 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsDataServer { + static String TESTNAME = "SocketsDataServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Socket socket = null; + Logger logger = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4", readMssg; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsDataFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + /* + * Wait for 7 seconds for client to connect, else throw a timeout exception + */ + try { + ser.setSoTimeout(7 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "cannot set timeout"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Waiting for client to connect"); + /* + * Put Socket Mapped Buffer to SocketHelper.STATE_LISTEN state - server has bound to port and + * begin listening for connections. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + socket.close(); + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListen.java b/test/javaTests/src/org/criu/java/tests/SocketsListen.java new file mode 100644 index 000000000..3fad38549 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListen.java @@ -0,0 +1,153 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListen { + static String TESTNAME = "SocketsListen"; + + /** + * Runs the client and server process, checkpoints the server process when the server has bound to a port, but has not yet started listening + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Creating socketbufferfile and setting the init value of buffer"); + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsListenServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsListenClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to Helper.STATE_PASS-Pass state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java b/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java new file mode 100644 index 000000000..efcb3d545 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListenClient.java @@ -0,0 +1,136 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListenClient { + static String TESTNAME = "SocketsListenClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + Socket socket = null; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", readMssg, + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE && socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + logger.log(Level.INFO, "Waiting for CR"); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + + logger.log(Level.INFO, "Restored"); + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Buffer does not contain the expected 'server bound to port' state" + socketMappedBuffer.getChar(Helper.MAPPED_INDEX)); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * Make the thread sleep to ensure server is listening on the port for client connections. + */ + logger.log(Level.INFO, "Put thread to sleep"); + try { + Thread.sleep(10); + } catch (InterruptedException e) { + logger.log(Level.WARNING, "Thread was interuptedp"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Client socket sending req to server at IP: 127.0.0.1 port:" + port); + try { + socket = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception occured when connecting to port: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + PrintStream out = new PrintStream(socket.getOutputStream()); + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + + logger.log(Level.INFO, "Sending message to server " + msg1); + out.println(msg1); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg2); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + logger.log(Level.INFO, "Sending message to server " + msg3); + out.println(msg3); + + readMssg = br.readLine(); + logger.log(Level.INFO, "message received from server " + readMssg); + if (!msg4.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Expected " + msg4); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + socket.close(); + + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put MappedBuffer to STATE_PASS, else to STATE_FAIL + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java b/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java new file mode 100644 index 000000000..46fef40ec --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsListenServer.java @@ -0,0 +1,160 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.SocketException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsListenServer { + static String TESTNAME = "SocketsListenServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + Socket socket = null; + String readMssg, msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4"; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsListenFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + logger.log(Level.INFO, "Server will be listening on Port " + port); + ServerSocket ser = new ServerSocket(port); + /* + * Server has bound to a port but is not listening yet! + */ + logger.log(Level.INFO, "Going to checkpoint"); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + ser.close(); + System.exit(1); + } + /* + * Checkpoint and wait for Restore. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "SServer is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + /* + * Timeout after 5 sec if client does not connect + */ + try { + ser.setSoTimeout(5 * 1000); + + } catch (SocketException e) { + logger.log(Level.SEVERE, "cannot set timeout"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + try { + logger.log(Level.INFO, "Waiting for client to connect"); + /* + * Put Socket Mapped Buffer to SocketHelper.STATE_LISTEN state - server has bound to port and + * will begin listening for connections. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect\n" + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sending message: " + msg4); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + socket.close(); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java b/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java new file mode 100644 index 000000000..5e55c4274 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultiple.java @@ -0,0 +1,152 @@ +package org.criu.java.tests; + +import java.io.File; +import java.io.PrintWriter; +import java.io.StringWriter; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultiple { + static String TESTNAME = "SocketsMultiple"; + + /** + * Runs the Client and Server Processes, Multiple clients connect to server Process, checkpoints the server process + * + * @param args Not used + */ + public static void main(String[] args) { + MappedByteBuffer b = null, socketMappedBuffer = null; + FileChannel channel; + String pid; + String port = "49200"; + Logger logger = null; + try { + /* + * Mapped buffer 'b' to communicate between CheckpointRestore.java and this process. + */ + File f = new File(Helper.MEMORY_MAPPED_FILE_NAME); + channel = FileChannel.open(f.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + b = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + pid = bean.getName(); + Helper.init(TESTNAME, pid, logger); + logger.log(Level.INFO, "Test init done; pid written to pid file; beginning with test"); + + if (b.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Error: Error in memory mapping, test is not in init state"); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * Socket Mapped Buffer to communicate between server process, client process and this process. + */ + logger.log(Level.INFO, "Creating socketBufferFile and setting the init value of buffer"); + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + TESTNAME + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + /* + * Set socketMappedBuffer to init state. + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_INIT); + + logger.log(Level.INFO, "Starting server and client process"); + ProcessBuilder builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsMultipleServer", TESTNAME, port); + Process serverProcess = builder.start(); + builder = new ProcessBuilder("java", "-cp", "target/classes", Helper.PACKAGE_NAME + "." + "SocketsMultipleClient", TESTNAME, port); + Process clientProcess = builder.start(); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Some error took place in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "Exception occured in the client or server process: check their log for details"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_CHECKPOINT) { + logger.log(Level.SEVERE, "Killing the server process and client process"); + logger.log(Level.SEVERE, "State is not the expected 'to be checkpointed' state"); + serverProcess.destroy(); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + logger.log(Level.INFO, "Going to checkpoint server process"); + SocketHelper.checkpointAndWait(b, logger, serverProcess, clientProcess); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_RESTORE); + logger.log(Level.INFO, "Process has been restored!"); + } + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + char bufchar = socketMappedBuffer.getChar(Helper.MAPPED_INDEX); + if (bufchar != Helper.STATE_FAIL && bufchar != Helper.STATE_PASS && bufchar != SocketHelper.STATE_SUCCESS) { + logger.log(Level.SEVERE, "Received wrong message from the child process: not the expected finish message"); + logger.log(Level.SEVERE, "Check their log files for more details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + logger.log(Level.SEVERE, "Error in the client or server process: check their log for details"); + clientProcess.destroy(); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + ; + } + + /* + * Client process puts socketMappedBuffer to STATE_PASS state if the test passed. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_PASS) { + logger.log(Level.INFO, Helper.PASS_MESSAGE); + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + System.exit(0); + + } catch (Exception e) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + e.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + e); + logger.log(Level.FINE, writer.toString()); + } + if (b != null) { + b.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + System.exit(5); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java b/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java new file mode 100644 index 000000000..d97a946fd --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultipleClient.java @@ -0,0 +1,174 @@ +package org.criu.java.tests; + +import java.io.*; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultipleClient { + static String TESTNAME = "SocketsMultipleClient"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String msg1 = "Message1", msg2 = "Message2", readMssg; + Socket socket1 = null, socket2 = null, socket3 = null, socket4 = null; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != SocketHelper.STATE_LISTEN) { + logger.log(Level.SEVERE, "Error: Socket-buffer not in expected state"); + + } + try { + logger.log(Level.INFO, "client 1 connecting..."); + socket1 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 1 connected to server successfully"); + PrintStream out1 = new PrintStream(socket1.getOutputStream()); + BufferedReader br1 = new BufferedReader(new InputStreamReader(socket1.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket1"); + try { + logger.log(Level.INFO, "client 2 connecting..."); + socket2 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 2 connected to server successfully"); + PrintStream out2 = new PrintStream(socket2.getOutputStream()); + BufferedReader br2 = new BufferedReader(new InputStreamReader(socket2.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket2"); + + try { + logger.log(Level.INFO, "client 3 connecting..."); + socket3 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + logger.log(Level.INFO, "Client 3 connected to server successfully"); + PrintStream out3 = new PrintStream(socket3.getOutputStream()); + BufferedReader br3 = new BufferedReader(new InputStreamReader(socket3.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket3"); + + out1.println(msg1); + + readMssg = br1.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + socket1.close(); + + out2.println(msg1); + + /* + * Wait for Checkpoint-Restore + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_INIT || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_LISTEN || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_CHECKPOINT) { + ; + } + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_RESTORE) { + logger.log(Level.SEVERE, "Socket-mapped-buffer is not in restored state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Server is Restored!!"); + + out3.println(msg1); + readMssg = br2.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 2; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + readMssg = br3.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 3; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + socket2.close(); + socket3.close(); + + try { + logger.log(Level.INFO, "client 4 connecting..."); + socket4 = new Socket(SocketHelper.IP_ADDRESS, port); + } catch (Exception e) { + logger.log(Level.SEVERE, "Exception when client connects to server: " + e); + } + logger.log(Level.INFO, "Client 4 connected to server successfully"); + PrintStream out4 = new PrintStream(socket4.getOutputStream()); + BufferedReader br4 = new BufferedReader(new InputStreamReader(socket4.getInputStream())); + logger.log(Level.INFO, "Got input and output streams for socket4"); + + out4.println(msg1); + readMssg = br4.readLine(); + if (!msg2.equals(readMssg)) { + logger.log(Level.SEVERE, "wrong message received by client 4; Received: " + readMssg); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + socket4.close(); + /* + * Wait for server process to end. + */ + while (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_RESTORE) { + ; + } + /* + * Check the server process has ended successfully, if it was a success put Mapped Buffer to STATE_PASS, else to STATE_FAIL + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == SocketHelper.STATE_SUCCESS) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_PASS); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occured:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java b/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java new file mode 100644 index 000000000..a7e4d3b9e --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsMultipleServer.java @@ -0,0 +1,215 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsMultipleServer { + static String TESTNAME = "SocketsMultipleServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsMultipleFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + SocketHelper.init(TESTNAME, parentTestName, logger); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + logger.log(Level.INFO, "Server pid: " + pid); + logger.log(Level.INFO, "socket buffer connection opened"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + System.exit(1); + } + + /* + * The array indexes 3, 5, 7 and 9 will map the state of client 1, 2, 3 and 4. + * Set these array indexes to init state. + */ + + socketMappedBuffer.putChar(3, Helper.STATE_INIT); + socketMappedBuffer.putChar(5, Helper.STATE_INIT); + socketMappedBuffer.putChar(7, Helper.STATE_INIT); + socketMappedBuffer.putChar(9, Helper.STATE_INIT); + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + Socket[] sockets = new Socket[4]; + + /* + * Set the SocketMappedBuffer to S state-server will be listening for connections + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + + for (int i = 1; i <= 4; i++) { + sockets[i - 1] = ser.accept(); + ServerThread serverThread = new ServerThread(sockets[i - 1], "ser-socket " + i, 2 * i + 1, logger, socketMappedBuffer); + serverThread.start(); + if (i == 3) { + logger.log(Level.INFO, "Connected to client: 3"); + /* + * Client 3 has connected, wait for thread 1 to finish and then checkpoint. + */ + while (socketMappedBuffer.getChar(3) != Helper.STATE_FAIL && socketMappedBuffer.getChar(3) != Helper.STATE_PASS) { + ; + } + logger.log(Level.INFO, "Going to checkpoint"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + } + } + + /* + * Loop while any of the 4 thread is running + */ + while (socketMappedBuffer.getChar(3) == Helper.STATE_INIT || socketMappedBuffer.getChar(5) == Helper.STATE_INIT + || socketMappedBuffer.getChar(7) == Helper.STATE_INIT || socketMappedBuffer.getChar(9) == Helper.STATE_INIT) { + ; + } + + /* + * Check Socket Mapped Buffer for a thread that failed + */ + for (int i = 1; i <= 4; i++) { + if (socketMappedBuffer.getChar(i * 2 + 1) == Helper.STATE_FAIL) { + logger.log(Level.SEVERE, "Error in thread connected to client " + i); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + + /* + * Check the 1st Socket is closed + */ + if (!sockets[0].isClosed()) { + logger.log(Level.SEVERE, "socket 1 is not closed"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + logger.log(Level.INFO, "Socket 1 is in expected closed state: " + sockets[0].isClosed()); + + /* + * Check all threads are in expected pass state + */ + for (int i = 1; i <= 4; i++) { + if (socketMappedBuffer.getChar(i * 2 + 1) != Helper.STATE_PASS) { + logger.log(Level.SEVERE, "Unexpected State of buffer: " + socketMappedBuffer.getChar(i * 2 + 1) + ", client: " + i); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + } + logger.log(Level.INFO, "Done"); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} + +class ServerThread extends Thread { + Socket socket = null; + String name; + int num; + MappedByteBuffer socketMappedBuffer; + Logger logger; + + ServerThread(Socket socket, String name, int num, Logger logger, MappedByteBuffer socketMappedBuffer) { + this.socket = socket; + this.name = name; + this.logger = logger; + this.num = num; + this.socketMappedBuffer = socketMappedBuffer; + } + + public void run() { + try { + String readMssg, msg1 = "Message1", msg2 = "Message2"; + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream out = new PrintStream(socket.getOutputStream()); + readMssg = br.readLine(); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message read by thread " + name + " was not 'Message1', received Message: " + readMssg); + socket.close(); + socketMappedBuffer.putChar(num, Helper.STATE_FAIL); + } else { + logger.log(Level.INFO, name + " received correct message"); + out.println(msg2); + logger.log(Level.INFO, name + " has sent message"); + socket.close(); + socketMappedBuffer.putChar(num, Helper.STATE_PASS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred in thread :" + name + " " + exception); + logger.log(Level.FINE, writer.toString()); + } + + try { + if (socket != null) { + socket.close(); + } + } catch (IOException e) { + ; + } + + /* + * If exception occurs fail the thread + */ + socketMappedBuffer.putChar(num, Helper.STATE_FAIL); + } + } +} diff --git a/test/javaTests/src/org/criu/java/tests/SocketsServer.java b/test/javaTests/src/org/criu/java/tests/SocketsServer.java new file mode 100644 index 000000000..051233443 --- /dev/null +++ b/test/javaTests/src/org/criu/java/tests/SocketsServer.java @@ -0,0 +1,142 @@ +package org.criu.java.tests; + +import java.io.*; +import java.lang.management.ManagementFactory; +import java.lang.management.RuntimeMXBean; +import java.net.ServerSocket; +import java.net.Socket; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; +import java.util.logging.Level; +import java.util.logging.Logger; + +class SocketsServer { + static String TESTNAME = "SocketsServer"; + + public static void main(String[] args) { + MappedByteBuffer socketMappedBuffer = null; + String msg1 = "Ch@ckM@$$@Ge!1", msg2 = "cH@C!m$SG!!2", + msg3 = "@Ft@rCPM$$g3", msg4 = "Aft@rCPM$$g4", readMssg; + FileChannel channel; + String parentTestName, portArg; + int port; + Logger logger = null; + + try { + parentTestName = args[0]; + portArg = args[1]; + port = Integer.parseInt(portArg); + + /* + * Socket Mapped Buffer to communicate between server process, client process and the calling parent process. + */ + File socketfile = new File(Helper.OUTPUT_FOLDER_NAME + "/" + parentTestName + "/SocketsFile"); + channel = FileChannel.open(socketfile.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE); + socketMappedBuffer = channel.map(MapMode.READ_WRITE, 0, Helper.MAPPED_REGION_SIZE); + channel.close(); + + logger = Logger.getLogger(Helper.PACKAGE_NAME + "." + TESTNAME); + + SocketHelper.init(TESTNAME, parentTestName, logger); + logger.log(Level.INFO, "Begin"); + logger.log(Level.INFO, "Parent name: " + parentTestName); + + RuntimeMXBean bean = ManagementFactory.getRuntimeMXBean(); + String pid = bean.getName(); + SocketHelper.writePid(parentTestName, pid); + + logger.log(Level.INFO, "Socket buffer mapped"); + + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) != Helper.STATE_INIT) { + logger.log(Level.SEVERE, "Socket-buffer not in expected Init state"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + ServerSocket ser = new ServerSocket(port); + logger.log(Level.INFO, "Server will be listening on Port " + port); + + /* + * Timeout after 5 second if client does not connect + */ + ser.setSoTimeout(5 * 1000); + logger.log(Level.INFO, "Waiting for client to connect"); + Socket socket = null; + try { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_LISTEN); + socket = ser.accept(); + } catch (Exception e) { + logger.log(Level.SEVERE, "Timed out while waiting for client to connect"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream())); + PrintStream outstream = new PrintStream(socket.getOutputStream()); + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 1: " + readMssg); + if (!msg1.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 1 received was wrong:rec " + readMssg + " expected: " + msg1); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_END); + } + + logger.log(Level.INFO, "Sending message: " + msg2); + outstream.println(msg2); + + logger.log(Level.INFO, "Going to checkpoint"); + /* + * Put socket Mapped Buffer to 'to be checkpointed' state and wait for restore + */ + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_CHECKPOINT); + SocketHelper.socketWaitForRestore(socketMappedBuffer, logger); + + if (!ser.isBound()) { + logger.log(Level.SEVERE, "Server is not bound to a port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + if (ser.getLocalPort() != port) { + logger.log(Level.SEVERE, "Server is not listening on correct port"); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + System.exit(1); + } + + readMssg = br.readLine(); + logger.log(Level.INFO, "Read message 3: " + readMssg); + + if (!msg3.equals(readMssg)) { + logger.log(Level.SEVERE, "Message 3 received was wrong:rec " + readMssg + " expected: " + msg3); + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + socket.close(); + System.exit(1); + } + + outstream.println(msg4); + logger.log(Level.INFO, "Sent message 4 " + msg4); + + /* + * Put Socket-MappedBuffer to state SocketHelper.STATE_SUCCESS telling the server process has ended successfully. + */ + socket.close(); + if (socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_FAIL || socketMappedBuffer.getChar(Helper.MAPPED_INDEX) == Helper.STATE_END) { + System.exit(1); + } else { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, SocketHelper.STATE_SUCCESS); + } + + } catch (Exception exception) { + if (null != logger) { + StringWriter writer = new StringWriter(); + PrintWriter printWriter = new PrintWriter(writer); + exception.printStackTrace(printWriter); + logger.log(Level.SEVERE, "Exception occurred:" + exception); + logger.log(Level.FINE, writer.toString()); + } + + if (socketMappedBuffer != null) { + socketMappedBuffer.putChar(Helper.MAPPED_INDEX, Helper.STATE_FAIL); + } + } + } +} diff --git a/test/javaTests/test.xml b/test/javaTests/test.xml index b73a31db2..4768bf193 100644 --- a/test/javaTests/test.xml +++ b/test/javaTests/test.xml @@ -40,4 +40,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 0c218746d587483346d5dcb7638b642392a4985f Mon Sep 17 00:00:00 2001 From: Nidhi Gupta Date: Thu, 9 Jan 2020 22:43:25 +0530 Subject: [PATCH 0293/2030] Switch open-j9 alpine tests to python3 Signed-off-by: Nidhi Gupta --- scripts/build/Dockerfile.openj9-alpine | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/build/Dockerfile.openj9-alpine b/scripts/build/Dockerfile.openj9-alpine index 43a993444..39ea4d08e 100644 --- a/scripts/build/Dockerfile.openj9-alpine +++ b/scripts/build/Dockerfile.openj9-alpine @@ -14,7 +14,7 @@ RUN apk update && apk add \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ - python \ + python3 \ sudo \ maven \ ip6tables \ From f1abc9aa26421b0c0ea52a703590cd998e676b55 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 Jan 2020 14:47:18 +0100 Subject: [PATCH 0294/2030] ppc64le: remove register '1' from clobber list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling 'criu-dev' on Fedora 31 gives two errors about wrong clobber lists: compel/include/uapi/compel/asm/sigframe.h:47:9: error: listing the stack pointer register ‘1’ in a clobber list is deprecated [-Werror=deprecated] criu/arch/ppc64/include/asm/restore.h:14:2: error: listing the stack pointer register ‘1’ in a clobber list is deprecated [-Werror=deprecated] There was also a bug report from Debian that CRIU does not build because of this. Each of these errors comes with the following note: note: the value of the stack pointer after an ‘asm’ statement must be the same as it was before the statement As far as I understand it this should not be a problem in this cases as the code never returns anyway. Running zdtm very seldom fails during 'zdtm/static/cgroup_ifpriomap' with a double free or corruption. This happens not very often and I cannot verify if it happens without this patch. As CRIU does not build without the patch. Signed-off-by: Adrian Reber --- compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h | 2 +- criu/arch/ppc64/include/asm/restore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h index 9467a1b99..5c98b199d 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/sigframe.h @@ -50,7 +50,7 @@ struct rt_sigframe { "sc \n" \ : \ : "r"(new_sp) \ - : "1", "memory") + : "memory") #if _CALL_ELF != 2 # error Only supporting ABIv2. diff --git a/criu/arch/ppc64/include/asm/restore.h b/criu/arch/ppc64/include/asm/restore.h index 8d4516090..f065ec3a0 100644 --- a/criu/arch/ppc64/include/asm/restore.h +++ b/criu/arch/ppc64/include/asm/restore.h @@ -21,7 +21,7 @@ : "r"(new_sp), \ "r"((unsigned long)restore_task_exec_start), \ "r"(task_args) \ - : "1", "3", "12") + : "3", "12") /* There is nothing to do since TLS is accessed through r13 */ #define core_get_tls(pcore, ptls) From 4232b270b8a0c866b9ee26e7b5f381cbf7cea56a Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Fri, 10 Jan 2020 15:57:50 +0300 Subject: [PATCH 0295/2030] image: core -- Reserve start_time field To ensure consistency of runtime environment processes within a container need to see same start time values over suspend/resume cycles. We introduce new field to the core image structure to store start time of a dumped process. Later same value would be restored to a newly created task. In future the feature is likely to be pulled here, so we reserve field id in protobuf descriptor. Signed-off-by: Valeriy Vdovin --- images/core.proto | 2 ++ 1 file changed, 2 insertions(+) diff --git a/images/core.proto b/images/core.proto index c3dba6f6d..e90522914 100644 --- a/images/core.proto +++ b/images/core.proto @@ -53,6 +53,8 @@ message task_core_entry { //optional int32 tty_pgrp = 17; optional bool child_subreaper = 18; + // Reserved for container relative start time + //optional uint64 start_time = 19; } message task_kobj_ids_entry { From 8fea2647b69fc0be0cac6a43639ed648cdd08db6 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 9 Jan 2020 18:54:50 +0000 Subject: [PATCH 0296/2030] travis: reduce the number of podman tests We are running each podman test loop 50 times. This takes more than 20 minutes in Travis. Reduce both test loops to only run 20 times. Signed-off-by: Adrian Reber --- scripts/travis/podman-test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 5189477cd..825bca746 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -35,7 +35,7 @@ criu --version podman run --name cr -d docker.io/library/alpine /bin/sh -c 'i=0; while true; do echo $i; i=$(expr $i + 1); sleep 1; done' sleep 1 -for i in `seq 50`; do +for i in `seq 20`; do echo "Test $i for podman container checkpoint" podman exec cr ps axf podman logs cr @@ -48,7 +48,7 @@ for i in `seq 50`; do podman logs cr done -for i in `seq 50`; do +for i in `seq 20`; do echo "Test $i for podman container checkpoint --export" podman ps -a podman exec cr ps axf From ca02c47075b69c3387d03ae2a09ab9499d5bd27f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 15 Dec 2019 20:38:46 +0000 Subject: [PATCH 0297/2030] kerndat: detect if system support clone3() with set_tid Linux kernel 5.4 extends clone3() with set_tid to allow processes to specify the PID of a newly created process. This introduces detection of the clone3() syscall and if set_tid is supported. This first implementation is X86_64 only. Signed-off-by: Adrian Reber --- .../arch/arm/plugins/std/syscalls/syscall.def | 1 + .../plugins/std/syscalls/syscall-ppc64.tbl | 1 + .../plugins/std/syscalls/syscall-s390.tbl | 1 + .../x86/plugins/std/syscalls/syscall_32.tbl | 1 + .../x86/plugins/std/syscalls/syscall_64.tbl | 1 + .../plugins/include/uapi/std/syscall-types.h | 1 + criu/cr-check.c | 12 ++++++ criu/include/kerndat.h | 1 + criu/include/sched.h | 33 +++++++++++++++ criu/kerndat.c | 41 +++++++++++++++++++ 10 files changed, 93 insertions(+) create mode 100644 criu/include/sched.h diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index d5bdc677e..f7ebc8527 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -115,3 +115,4 @@ ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *t fsopen 430 430 (char *fsname, unsigned int flags) fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) +clone3 435 435 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 4e283d5e9..1afaf1e70 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -111,3 +111,4 @@ __NR_ppoll 281 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index fd48e3950..ae6fdb5f8 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -111,3 +111,4 @@ __NR_ppoll 302 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl index 038aeb4f7..7a487110d 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_32.tbl @@ -99,3 +99,4 @@ __NR_ppoll 309 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 215f32026..6667c07db 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -110,3 +110,4 @@ __NR_ppoll 271 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struc __NR_fsopen 430 sys_fsopen (char *fsname, unsigned int flags) __NR_fsconfig 431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) __NR_fsmount 432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 435 sys_clone3 (struct clone_args *uargs, size_t size) diff --git a/compel/plugins/include/uapi/std/syscall-types.h b/compel/plugins/include/uapi/std/syscall-types.h index 57865e741..031e773bb 100644 --- a/compel/plugins/include/uapi/std/syscall-types.h +++ b/compel/plugins/include/uapi/std/syscall-types.h @@ -39,6 +39,7 @@ struct msghdr; struct rusage; struct iocb; struct pollfd; +struct clone_args; typedef unsigned long aio_context_t; diff --git a/criu/cr-check.c b/criu/cr-check.c index 17dd29b42..80df3f7cd 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1224,6 +1224,16 @@ static int check_uffd_noncoop(void) return 0; } +static int check_clone3_set_tid(void) +{ + if (!kdat.has_clone3_set_tid) { + pr_warn("clone3() with set_tid not supported\n"); + return -1; + } + + return 0; +} + static int check_can_map_vdso(void) { if (kdat_can_map_vdso() == 1) @@ -1373,6 +1383,7 @@ int cr_check(void) ret |= check_sk_netns(); ret |= check_kcmp_epoll(); ret |= check_net_diag_raw(); + ret |= check_clone3_set_tid(); } /* @@ -1476,6 +1487,7 @@ static struct feature_list feature_list[] = { { "link_nsid", check_link_nsid}, { "kcmp_epoll", check_kcmp_epoll}, { "external_net_ns", check_external_net_ns}, + { "clone3_set_tid", check_clone3_set_tid}, { NULL, NULL }, }; diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 771195860..27c870bb8 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -66,6 +66,7 @@ struct kerndat_s { bool has_inotify_setnextwd; bool has_kcmp_epoll_tfd; bool has_fsopen; + bool has_clone3_set_tid; }; extern struct kerndat_s kdat; diff --git a/criu/include/sched.h b/criu/include/sched.h new file mode 100644 index 000000000..78f65e3b7 --- /dev/null +++ b/criu/include/sched.h @@ -0,0 +1,33 @@ +#ifndef __CR_SCHED_H__ +#define __CR_SCHED_H__ + +#include + +#ifndef ptr_to_u64 +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +#endif +#ifndef u64_to_ptr +#define u64_to_ptr(x) ((void *)(uintptr_t)x) +#endif + +/* + * This structure is needed by clone3(). The kernel + * calls it 'struct clone_args'. As CRIU will always + * need at least this part of the structure (VER1) + * to be able to test if clone3() with set_tid works, + * the structure is defined here as 'struct _clone_args'. + */ + +struct _clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; +}; +#endif /* __CR_SCHED_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index d1afde71d..0772828bc 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -41,6 +41,7 @@ #include "uffd.h" #include "vdso.h" #include "kcmp.h" +#include "sched.h" struct kerndat_s kdat = { }; @@ -986,6 +987,44 @@ static int kerndat_tun_netns(void) return check_tun_netns_cr(&kdat.tun_ns); } +static bool kerndat_has_clone3_set_tid(void) +{ + pid_t pid; + struct _clone_args args = {}; + +#ifndef CONFIG_X86_64 + /* + * Currently the CRIU PIE assembler clone3() wrapper is + * only implemented for X86_64. + */ + kdat.has_clone3_set_tid = false; + return 0; +#endif + + args.set_tid = -1; + /* + * On a system without clone3() this will return ENOSYS. + * On a system with clone3() but without set_tid this + * will return E2BIG. + * On a system with clone3() and set_tid it will return + * EINVAL. + */ + pid = syscall(__NR_clone3, &args, sizeof(args)); + + if (pid == -1 && (errno == ENOSYS || errno == E2BIG)) { + kdat.has_clone3_set_tid = false; + return 0; + } + if (pid == -1 && errno == EINVAL) { + kdat.has_clone3_set_tid = true; + } else { + pr_perror("Unexpected error from clone3\n"); + return -1; + } + + return 0; +} + int kerndat_init(void) { int ret; @@ -1059,6 +1098,8 @@ int kerndat_init(void) ret = has_kcmp_epoll_tfd(); if (!ret) ret = kerndat_has_fsopen(); + if (!ret) + ret = kerndat_has_clone3_set_tid(); kerndat_lsm(); kerndat_mmap_min_addr(); From 97c03b97d00e27397ca2ea0f9b5569739e24ae27 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 16 Dec 2019 07:57:03 +0000 Subject: [PATCH 0298/2030] Add assembler wrapper for clone3() To create a new process/thread with a certain PID based on clone3() a new assembler wrapper is necessary as there is not glibc wrapper (yet). Signed-off-by: Adrian Reber --- criu/arch/aarch64/include/asm/restorer.h | 7 ++ criu/arch/arm/include/asm/restorer.h | 7 ++ criu/arch/ppc64/include/asm/restorer.h | 7 ++ criu/arch/s390/include/asm/restorer.h | 7 ++ criu/arch/x86/include/asm/restorer.h | 92 ++++++++++++++++++++++++ 5 files changed, 120 insertions(+) diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index f502cdcaf..2fe58915b 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -42,6 +42,13 @@ "r"(&thread_args[i]) \ : "x0", "x1", "x2", "x3", "x8", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "mov sp, %0 \n" \ diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h index 217d920e8..ad4b58f93 100644 --- a/criu/arch/arm/include/asm/restorer.h +++ b/criu/arch/arm/include/asm/restorer.h @@ -43,6 +43,13 @@ "r"(&thread_args[i]) \ : "r0", "r1", "r2", "r3", "r7", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "mov sp, %0 \n" \ diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h index d48d833d6..19bc3ea36 100644 --- a/criu/arch/ppc64/include/asm/restorer.h +++ b/criu/arch/ppc64/include/asm/restorer.h @@ -48,6 +48,13 @@ "r"(&thread_args[i]) /* %6 */ \ : "memory","0","3","4","5","6","7","14","15") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define arch_map_vdso(map, compat) -1 int restore_gpregs(struct rt_sigframe *f, UserPpc64RegsEntry *r); diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h index cfdefcab9..733f2de33 100644 --- a/criu/arch/s390/include/asm/restorer.h +++ b/criu/arch/s390/include/asm/restorer.h @@ -39,6 +39,13 @@ "d"(&thread_args[i]) \ : "0", "1", "2", "3", "4", "5", "6", "cc", "memory") +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ + ret = -1; \ +} while (0) + #define arch_map_vdso(map, compat) -1 int restore_gpregs(struct rt_sigframe *f, UserS390RegsEntry *r); diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h index 25559b57c..731477ec9 100644 --- a/criu/arch/x86/include/asm/restorer.h +++ b/criu/arch/x86/include/asm/restorer.h @@ -25,6 +25,21 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) } #endif /* !CONFIG_COMPAT */ +/* + * Documentation copied from glibc sysdeps/unix/sysv/linux/x86_64/clone.S + * The kernel expects: + * rax: system call number + * rdi: flags + * rsi: child_stack + * rdx: TID field in parent + * r10: TID field in child + * r8: thread pointer + * + * int clone(unsigned long clone_flags, unsigned long newsp, + * int *parent_tidptr, int *child_tidptr, + * unsigned long tls); + */ + #define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ thread_args, clone_restore_fn) \ asm volatile( \ @@ -63,6 +78,83 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) "g"(&thread_args[i]) \ : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") +/* int clone3(struct clone_args *args, size_t size) */ +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + "clone3_emul: \n" \ + /* + * Prepare stack pointer for child process. The kernel does + * stack + stack_size before passing the stack pointer to the + * child process. As we have to put the function and the + * arguments for the new process on that stack we have handle + * the kernel's implicit stack + stack_size. + */ \ + "movq (%3), %%rsi /* new stack pointer */ \n" \ + /* Move the stack_size to %rax to use later as the offset */ \ + "movq %4, %%rax \n" \ + /* 16 bytes are needed on the stack for function and args */ \ + "subq $16, (%%rsi, %%rax) \n" \ + "movq %6, %%rdi /* thread args */ \n" \ + "movq %%rdi, 8(%%rsi, %%rax) \n" \ + "movq %5, %%rdi /* thread function */ \n" \ + "movq %%rdi, 0(%%rsi, %%rax) \n" \ + /* + * The stack address has been modified for the two + * elements above (child function, child arguments). + * This modified stack needs to be stored back into the + * clone_args structure. + */ \ + "movq (%%rsi), %3 \n" \ + /* + * Do the actual clone3() syscall. First argument (%rdi) is + * the clone_args structure, second argument is the size + * of clone_args. + */ \ + "movq %1, %%rdi /* clone_args */ \n" \ + "movq %2, %%rsi /* size */ \n" \ + "movl $"__stringify(__NR_clone3)", %%eax \n" \ + "syscall \n" \ + /* + * If clone3() was successful and if we are in the child + * '0' is returned. Jump to the child function handler. + */ \ + "testq %%rax,%%rax \n" \ + "jz thread3_run \n" \ + /* Return the PID to the parent process. */ \ + "movq %%rax, %0 \n" \ + "jmp clone3_end \n" \ + \ + "thread3_run: /* Child process */ \n" \ + /* Clear the frame pointer */ \ + "xorq %%rbp, %%rbp \n" \ + /* Pop the child function from the stack */ \ + "popq %%rax \n" \ + /* Pop the child function arguments from the stack */ \ + "popq %%rdi \n" \ + /* Run the child function */ \ + "callq *%%rax \n" \ + /* + * If the child function is expected to return, this + * would be the place to handle the return code. In CRIU's + * case the child function is expected to not return + * and do exit() itself. + */ \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + /* + * This uses the "r" modifier for all parameters + * as clang complained if using "g". + */ \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(&clone_args.stack), \ + "r"(clone_args.stack_size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "rax", "rcx", "rdi", "rsi", "rdx", "r10", "r11", "memory") + #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ "movq %0, %%rsp \n" \ From a1ea8deb4c0824dc2178dbe116c9b10f81a608aa Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 16 Dec 2019 10:42:13 +0000 Subject: [PATCH 0299/2030] Use clone3() with set_tid to create processes With the in Linux Kernel 5.4 introduced clone3() with set_tid it is no longer necessary to write to to /proc/../ns_last_pid to influence the next PID number. clone3() can directly select a PID for the newly created process/thread. After checking for the availability of clone3() with set_tid and adding the assembler wrapper for clone3() in previous patches, this extends criu/pie/restorer.c and criu/clone-noasan.c to use the newly added assembler clone3() wrapper to create processes with a certain PID. This is a RFC and WIP, but I wanted to share it and run it through CI for feedback. As the CI will probably not use a 5.4 based kernel it should just keep on working as before. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 32 +++++++++++++++++++ criu/cr-restore.c | 64 +++++++++++++++++++++++-------------- criu/include/clone-noasan.h | 2 ++ criu/include/restorer.h | 1 + criu/include/rst_info.h | 1 + criu/pie/restorer.c | 64 ++++++++++++++++++++++++------------- 6 files changed, 117 insertions(+), 47 deletions(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index 5f1858d4d..bcbc3e4bd 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -1,4 +1,10 @@ +#include #include +#include + +#include + +#include "sched.h" #include "common/compiler.h" #include "log.h" #include "common/bug.h" @@ -31,6 +37,7 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg) { void *stack_ptr = (void *)round_down((unsigned long)&stack_ptr - 1024, 16); + BUG_ON((flags & CLONE_VM) && !(flags & CLONE_VFORK)); /* * Reserve some bytes for clone() internal needs @@ -38,3 +45,28 @@ int clone_noasan(int (*fn)(void *), int flags, void *arg) */ return clone(fn, stack_ptr, flags, arg); } + +int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, + int exit_signal, pid_t pid) +{ + struct _clone_args c_args = {}; + + BUG_ON(flags & CLONE_VM); + + /* + * Make sure no child signals are requested. clone3() uses + * exit_signal for that. + */ + BUG_ON(flags & 0xff); + + pr_debug("Creating process using clone3()\n"); + + c_args.exit_signal = exit_signal; + c_args.flags = flags; + c_args.set_tid = ptr_to_u64(&pid); + c_args.set_tid_size = 1; + pid = syscall(__NR_clone3, &c_args, sizeof(c_args)); + if (pid == 0) + exit(fn(arg)); + return pid; +} diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 687cd6c68..b4f8d9e75 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1374,40 +1374,55 @@ static inline int fork_with_pid(struct pstree_item *item) if (!(ca.clone_flags & CLONE_NEWPID)) { char buf[32]; int len; - int fd; + int fd = -1; - fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); - if (fd < 0) - goto err; + if (!kdat.has_clone3_set_tid) { + fd = open_proc_rw(PROC_GEN, LAST_PID_PATH); + if (fd < 0) + goto err; + } lock_last_pid(); - len = snprintf(buf, sizeof(buf), "%d", pid - 1); - if (write(fd, buf, len) != len) { - pr_perror("%d: Write %s to %s", pid, buf, LAST_PID_PATH); + if (!kdat.has_clone3_set_tid) { + len = snprintf(buf, sizeof(buf), "%d", pid - 1); + if (write(fd, buf, len) != len) { + pr_perror("%d: Write %s to %s", pid, buf, + LAST_PID_PATH); + close(fd); + goto err_unlock; + } close(fd); - goto err_unlock; } - close(fd); } else { BUG_ON(pid != INIT_PID); } - /* - * Some kernel modules, such as network packet generator - * run kernel thread upon net-namespace creattion taking - * the @pid we've been requeting via LAST_PID_PATH interface - * so that we can't restore a take with pid needed. - * - * Here is an idea -- unhare net namespace in callee instead. - */ - /* - * The cgroup namespace is also unshared explicitly in the - * move_in_cgroup(), so drop this flag here as well. - */ - close_pid_proc(); - ret = clone_noasan(restore_task_with_children, - (ca.clone_flags & ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, &ca); + if (kdat.has_clone3_set_tid) { + ret = clone3_with_pid_noasan(restore_task_with_children, + &ca, (ca.clone_flags & + ~(CLONE_NEWNET | CLONE_NEWCGROUP)), + SIGCHLD, pid); + } else { + /* + * Some kernel modules, such as network packet generator + * run kernel thread upon net-namespace creation taking + * the @pid we've been requesting via LAST_PID_PATH interface + * so that we can't restore a take with pid needed. + * + * Here is an idea -- unshare net namespace in callee instead. + */ + /* + * The cgroup namespace is also unshared explicitly in the + * move_in_cgroup(), so drop this flag here as well. + */ + close_pid_proc(); + ret = clone_noasan(restore_task_with_children, + (ca.clone_flags & + ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, + &ca); + } + if (ret < 0) { pr_perror("Can't fork for %d", pid); goto err_unlock; @@ -3588,6 +3603,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns task_args->vdso_maps_rt = vdso_maps_rt; task_args->vdso_rt_size = vdso_rt_size; task_args->can_map_vdso = kdat.can_map_vdso; + task_args->has_clone3_set_tid = kdat.has_clone3_set_tid; new_sp = restorer_stack(task_args->t->mz); diff --git a/criu/include/clone-noasan.h b/criu/include/clone-noasan.h index 8ef75fa73..0cfdaa1d9 100644 --- a/criu/include/clone-noasan.h +++ b/criu/include/clone-noasan.h @@ -2,5 +2,7 @@ #define __CR_CLONE_NOASAN_H__ int clone_noasan(int (*fn)(void *), int flags, void *arg); +int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, + int exit_signal, pid_t pid); #endif /* __CR_CLONE_NOASAN_H__ */ diff --git a/criu/include/restorer.h b/criu/include/restorer.h index b93807f5f..dfb4e6b71 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -221,6 +221,7 @@ struct task_restore_args { #endif int lsm_type; int child_subreaper; + bool has_clone3_set_tid; } __aligned(64); /* diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h index 07c634f4a..3283849e4 100644 --- a/criu/include/rst_info.h +++ b/criu/include/rst_info.h @@ -4,6 +4,7 @@ #include "common/lock.h" #include "common/list.h" #include "vma.h" +#include "kerndat.h" struct task_entries { int nr_threads, nr_tasks, nr_helpers; diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 888eb8e65..7012b88a1 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -35,6 +35,7 @@ #include "sk-inet.h" #include "vma.h" #include "uffd.h" +#include "sched.h" #include "common/lock.h" #include "common/page.h" @@ -1771,16 +1772,19 @@ long __export_restore_task(struct task_restore_args *args) long clone_flags = CLONE_VM | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | CLONE_FS; long last_pid_len; + pid_t thread_pid; long parent_tid; int i, fd = -1; - /* One level pid ns hierarhy */ - fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); - if (fd < 0) { - pr_err("can't open last pid fd %d\n", fd); - goto core_restore_end; - } + if (!args->has_clone3_set_tid) { + /* One level pid ns hierarhy */ + fd = sys_openat(args->proc_fd, LAST_PID_PATH, O_RDWR, 0); + if (fd < 0) { + pr_err("can't open last pid fd %d\n", fd); + goto core_restore_end; + } + } mutex_lock(&task_entries_local->last_pid_mutex); for (i = 0; i < args->nr_threads; i++) { @@ -1791,24 +1795,38 @@ long __export_restore_task(struct task_restore_args *args) continue; new_sp = restorer_stack(thread_args[i].mz); - last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); - sys_lseek(fd, 0, SEEK_SET); - ret = sys_write(fd, s, last_pid_len); - if (ret < 0) { - pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); - sys_close(fd); - mutex_unlock(&task_entries_local->last_pid_mutex); - goto core_restore_end; + if (args->has_clone3_set_tid) { + struct _clone_args c_args = {}; + thread_pid = thread_args[i].pid; + c_args.set_tid = ptr_to_u64(&thread_pid); + c_args.flags = clone_flags; + c_args.set_tid_size = 1; + /* The kernel does stack + stack_size. */ + c_args.stack = new_sp - RESTORE_STACK_SIZE; + c_args.stack_size = RESTORE_STACK_SIZE; + c_args.child_tid = ptr_to_u64(&thread_args[i].pid); + c_args.parent_tid = ptr_to_u64(&parent_tid); + pr_debug("Using clone3 to restore the process\n"); + RUN_CLONE3_RESTORE_FN(ret, c_args, sizeof(c_args), &thread_args[i], args->clone_restore_fn); + } else { + last_pid_len = std_vprint_num(last_pid_buf, sizeof(last_pid_buf), thread_args[i].pid - 1, &s); + sys_lseek(fd, 0, SEEK_SET); + ret = sys_write(fd, s, last_pid_len); + if (ret < 0) { + pr_err("Can't set last_pid %ld/%s\n", ret, last_pid_buf); + sys_close(fd); + mutex_unlock(&task_entries_local->last_pid_mutex); + goto core_restore_end; + } + + /* + * To achieve functionality like libc's clone() + * we need a pure assembly here, because clone()'ed + * thread will run with own stack and we must not + * have any additional instructions... oh, dear... + */ + RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); } - - /* - * To achieve functionality like libc's clone() - * we need a pure assembly here, because clone()'ed - * thread will run with own stack and we must not - * have any additional instructions... oh, dear... - */ - - RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, thread_args, args->clone_restore_fn); if (ret != thread_args[i].pid) { pr_err("Unable to create a thread: %ld\n", ret); mutex_unlock(&task_entries_local->last_pid_mutex); From 4c4f67a56be60300e734d92411e16e26928d3776 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 Jan 2020 14:27:09 +0100 Subject: [PATCH 0300/2030] s390x: remove stack pointer from clobber list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just like on all other supported architectures gcc complains about the stack pointer register being part of the clobber list: error: listing the stack pointer register ‘15’ in a clobber list is deprecated [-Werror=deprecated] This removes the stack pointer from the clobber list. 'zdtm.py run -a' still runs without any errors after this change. Signed-off-by: Adrian Reber --- compel/arch/s390/src/lib/include/uapi/asm/sigframe.h | 2 +- criu/arch/s390/include/asm/restore.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h index b6b894473..c599ef3ab 100644 --- a/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/s390/src/lib/include/uapi/asm/sigframe.h @@ -66,7 +66,7 @@ struct rt_sigframe { "svc 0\n" \ : \ : "d" (new_sp) \ - : "15", "memory") + : "memory") #define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc) #define RT_SIGFRAME_REGIP(rt_sigframe) (rt_sigframe)->uc.uc_mcontext.regs.psw.addr diff --git a/criu/arch/s390/include/asm/restore.h b/criu/arch/s390/include/asm/restore.h index 6463d8e62..b77e36c77 100644 --- a/criu/arch/s390/include/asm/restore.h +++ b/criu/arch/s390/include/asm/restore.h @@ -18,7 +18,7 @@ : "d" (new_sp), \ "d"((unsigned long)restore_task_exec_start), \ "d" (task_args) \ - : "2", "14", "15", "memory") + : "2", "14", "memory") /* There is nothing to do since TLS is accessed through %a01 */ #define core_get_tls(pcore, ptls) From cbadd201cbd20b7e44b9f8edea932a9420f67230 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 16 Jan 2020 16:41:40 +0100 Subject: [PATCH 0301/2030] s390x: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for s390x. In contrast to the x86_64 implementation the thread start address and arguments are not put on the thread stack but passed via r4 and r5. As those registers are caller-saved they still contain the correct value (thread start address and arguments) after returning from the syscall. Tested on 5.5.0-rc6. Signed-off-by: Adrian Reber --- criu/arch/s390/include/asm/restorer.h | 40 +++++++++++++++++++++++---- criu/kerndat.c | 4 +-- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/criu/arch/s390/include/asm/restorer.h b/criu/arch/s390/include/asm/restorer.h index 733f2de33..2fc266535 100644 --- a/criu/arch/s390/include/asm/restorer.h +++ b/criu/arch/s390/include/asm/restorer.h @@ -40,11 +40,41 @@ : "0", "1", "2", "3", "4", "5", "6", "cc", "memory") #define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) + clone_restore_fn) \ + asm volatile( \ + /* + * clone3 only needs two arguments (r2, r3), this means + * we can use r4 and r5 for args and thread function. + * r4 and r5 are callee-saved and are not overwritten. + * No need to put these values on the child stack. + */ \ + "lgr %%r4,%4\n" /* Save args in %r4 */ \ + "lgr %%r5,%3\n" /* Save clone_restore_fn in %r5 */ \ + "lgr %%r2,%1\n" /* Parameter 1: clone_args */ \ + "lgr %%r3,%2\n" /* Parameter 2: size */ \ + /* + * On s390x a syscall is done sc . + * That only works for syscalls < 255. clone3 is 435, + * therefore it is necessary to load the syscall number + * into r1 and do 'svc 0'. + */ \ + "lghi %%r1,"__stringify(__NR_clone3)"\n" \ + "svc 0\n" \ + "ltgr %0,%%r2\n" /* Set and check "ret" */ \ + "jnz 0f\n" /* ret != 0: Continue caller */ \ + "lgr %%r2,%%r4\n" /* Thread arguments taken from r4. */ \ + "lgr %%r1,%%r5\n" /* Thread function taken from r5. */ \ + "aghi %%r15,-160\n" /* Prepare stack frame */ \ + "xc 0(8,%%r15),0(%%r15)\n" \ + "basr %%r14,%%r1\n" /* Jump to clone_restore_fn() */ \ + "j .+2\n" /* BUG(): Force PGM check */ \ +"0:\n" /* Continue caller */ \ + : "=d"(ret) \ + : "a"(&clone_args), \ + "d"(size), \ + "d"(clone_restore_fn), \ + "d"(args) \ + : "0", "1", "2", "3", "4", "5", "cc", "memory") #define arch_map_vdso(map, compat) -1 diff --git a/criu/kerndat.c b/criu/kerndat.c index 0772828bc..2261cca60 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#ifndef CONFIG_X86_64 +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64. + * only implemented for X86_64 and S390X. */ kdat.has_clone3_set_tid = false; return 0; From 55c8ec62a53e2728b09ee93d36fd6dd36eeb0c49 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 21 Jan 2020 14:20:05 +0100 Subject: [PATCH 0302/2030] arm: remove stack pointer from clobber list Just like on all other supported architectures gcc complains about the stack pointer register being part of the clobber list. This removes the stack pointer from the clobber list. Signed-off-by: Adrian Reber --- criu/arch/arm/include/asm/restore.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/arch/arm/include/asm/restore.h b/criu/arch/arm/include/asm/restore.h index 4c64d58ef..c3b64c5b7 100644 --- a/criu/arch/arm/include/asm/restore.h +++ b/criu/arch/arm/include/asm/restore.h @@ -16,7 +16,7 @@ : "r"(new_sp), \ "r"(restore_task_exec_start), \ "r"(task_args) \ - : "sp", "r0", "r1", "memory") + : "r0", "r1", "memory") static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) { From f6469493dd739d4c16c8230524fa5d5a88731b80 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 17 Jan 2020 13:35:48 +0100 Subject: [PATCH 0303/2030] ppc64le: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for ppc64le. Signed-off-by: Adrian Reber --- criu/arch/ppc64/include/asm/restorer.h | 46 ++++++++++++++++++++++---- criu/kerndat.c | 4 +-- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/criu/arch/ppc64/include/asm/restorer.h b/criu/arch/ppc64/include/asm/restorer.h index 19bc3ea36..c447eefea 100644 --- a/criu/arch/ppc64/include/asm/restorer.h +++ b/criu/arch/ppc64/include/asm/restorer.h @@ -48,12 +48,46 @@ "r"(&thread_args[i]) /* %6 */ \ : "memory","0","3","4","5","6","7","14","15") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ +/* + * The clone3() function accepts following parameters: + * int clone3(struct clone_args *args, size_t size) + * + * Always consult the CLONE3 wrappers for other architectures + * for additional details. + * + * For PPC64LE the first parameter (clone_args) is passed in r3 and + * the second parameter (size) is passed in r4. + * + * This clone3() wrapper is based on the clone() wrapper from above. + */ \ + asm volatile( \ + "clone3_emul: \n" \ + "/* Save fn, args across syscall. */ \n" \ + "mr 14, %3 /* clone_restore_fn in r14 */ \n" \ + "mr 15, %4 /* &thread_args[i] in r15 */ \n" \ + "mr 3, %1 /* clone_args */ \n" \ + "mr 4, %2 /* size */ \n" \ + "li 0,"__stringify(__NR_clone3)" \n" \ + "sc \n" \ + "/* Check for child process. */ \n" \ + "cmpdi cr1,3,0 \n" \ + "crandc cr1*4+eq,cr1*4+eq,cr0*4+so \n" \ + "bne- cr1,clone3_end \n" \ + "/* child */ \n" \ + "addi 14, 14, 8 /* jump over r2 fixup */ \n" \ + "mtctr 14 \n" \ + "mr 3,15 \n" \ + "bctr \n" \ + "clone3_end: \n" \ + "mr %0,3 \n" \ + : "=r"(ret) /* %0 */ \ + : "r"(&clone_args), /* %1 */ \ + "r"(size), /* %2 */ \ + "r"(clone_restore_fn), /* %3 */ \ + "r"(args) /* %4 */ \ + : "memory","0","3","4","5","14","15") #define arch_map_vdso(map, compat) -1 diff --git a/criu/kerndat.c b/criu/kerndat.c index 2261cca60..c1fc9259b 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64 and S390X. + * only implemented for X86_64, S390X and PPC64LE. */ kdat.has_clone3_set_tid = false; return 0; From 3dabd38a8292872dcf4c5710449a4c8017304ac2 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 25 Jan 2020 13:25:21 +0100 Subject: [PATCH 0304/2030] clone3: handle clone3() with CLONE_PARENT clone3() explicitly blocks setting an exit_signal if CLONE_PARENT is specified. With clone() it also did not work, but there was no error message. The exit signal from the thread group leader is taken. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index bcbc3e4bd..a2190ba0a 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -61,7 +61,19 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, pr_debug("Creating process using clone3()\n"); - c_args.exit_signal = exit_signal; + /* + * clone3() explicitly blocks setting an exit_signal + * if CLONE_PARENT is specified. With clone() it also + * did not work, but there was no error message. The + * exit signal from the thread group leader is taken. + */ + if (!(flags & CLONE_PARENT)) { + if (exit_signal != SIGCHLD) { + pr_err("Exit signal not SIGCHLD\n"); + return -1; + } + c_args.exit_signal = exit_signal; + } c_args.flags = flags; c_args.set_tid = ptr_to_u64(&pid); c_args.set_tid_size = 1; From f991f235064807a59ad8e4f98e82f34767b05e18 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 19 Jan 2020 21:42:58 +0100 Subject: [PATCH 0305/2030] aarch64: use clone3() if possible This adds the parasite clone3() with set_tid wrapper for aarch64. Tested on Fedora 31 with 5.5.0-rc6. Signed-off-by: Adrian Reber --- criu/arch/aarch64/include/asm/restorer.h | 67 +++++++++++++++++++++--- criu/kerndat.c | 4 +- 2 files changed, 63 insertions(+), 8 deletions(-) diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index 2fe58915b..120fa8fb2 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -42,12 +42,67 @@ "r"(&thread_args[i]) \ : "x0", "x1", "x2", "x3", "x8", "memory") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) +/* + * Based on sysdeps/unix/sysv/linux/aarch64/clone.S + * + * int clone(int (*fn)(void *arg), x0 + * void *child_stack, x1 + * int flags, x2 + * void *arg, x3 + * pid_t *ptid, x4 + * struct user_desc *tls, x5 + * pid_t *ctid); x6 + * + * int clone3(struct clone_args *args, x0 + * size_t size); x1 + * + * Always consult the CLONE3 wrappers for other architectures + * for additional details. + * + */ + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + /* In contrast to the clone() wrapper above this does not put + * the thread function and its arguments on the child stack, + * but uses registers to pass these parameters to the child process. + * Based on the glibc clone() wrapper at + * sysdeps/unix/sysv/linux/aarch64/clone.S. + */ \ + "clone3_emul: \n" \ + /* + * Based on the glibc clone() wrapper, which uses x10 and x11 + * to save the arguments for the child process, this does the same. + * x10 for the thread function and x11 for the thread arguments. + */ \ + "mov x10, %3 /* clone_restore_fn */ \n" \ + "mov x11, %4 /* args */ \n" \ + "mov x0, %1 /* &clone_args */ \n" \ + "mov x1, %2 /* size */ \n" \ + /* Load syscall number */ \ + "mov x8, #"__stringify(__NR_clone3)" \n" \ + /* Do the syscall */ \ + "svc #0 \n" \ + \ + "cbz x0, clone3_thread_run \n" \ + \ + "mov %0, x0 \n" \ + "b clone3_end \n" \ + \ + "clone3_thread_run: \n" \ + /* Move args to x0 */ \ + "mov x0, x11 \n" \ + /* Jump to clone_restore_fn */ \ + "br x10 \n" \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "x0", "x1", "x8", "x10", "x11", "memory") #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ diff --git a/criu/kerndat.c b/criu/kerndat.c index c1fc9259b..4070e01d2 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,10 +992,10 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) +#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) && !defined(CONFIG_AARCH64) /* * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64, S390X and PPC64LE. + * only implemented for X86_64, S390X, AARCH64 and PPC64LE. */ kdat.has_clone3_set_tid = false; return 0; From 0e291d26c9e0258f60ae8921f0e03c89f332dc31 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 22 Jan 2020 19:41:41 +0100 Subject: [PATCH 0306/2030] arm: use clone3() if it exists This is the last architecture specific change to make CRIU use clone3() with set_tid if available. Just as on all other architectures this adds a clone3() based assembler wrapper to be used in the restorer code. Tested on Fedora 31 with the same 5.5.0-rc6 kernel as on the other architectures. Signed-off-by: Adrian Reber --- criu/arch/arm/include/asm/restorer.h | 62 +++++++++++++++++++++++++--- criu/kerndat.c | 9 ---- 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/criu/arch/arm/include/asm/restorer.h b/criu/arch/arm/include/asm/restorer.h index ad4b58f93..13ed15b26 100644 --- a/criu/arch/arm/include/asm/restorer.h +++ b/criu/arch/arm/include/asm/restorer.h @@ -43,12 +43,62 @@ "r"(&thread_args[i]) \ : "r0", "r1", "r2", "r3", "r7", "memory") -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) do { \ - pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ - pr_err("Not creating a process with PID: %d\n", ((pid_t *)u64_to_ptr(clone_args.set_tid))[0]); \ - ret = -1; \ -} while (0) + +/* + * The clone3() assembler wrapper is based on the clone() wrapper above + * and on code from the glibc wrapper at + * sysdeps/unix/sysv/linux/arm/clone.S + * + * For arm it is necessary to change the child stack as on x86_64 as + * it seems there are not registers which stay the same over a syscall + * like on s390x, ppc64le and aarch64. + * + * Changing the child stack means that this code has to deal with the + * kernel doing stack + stack_size implicitly. + * + * int clone3(struct clone_args *args, size_t size) + */ + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) \ + asm volatile( \ + "clone3_emul: \n" \ + /* Load thread stack pointer */ \ + "ldr r1, [%3] \n" \ + /* Load thread stack size */ \ + "mov r2, %4 \n" \ + /* Goto to the end of stack */ \ + "add r1, r1, r2 \n" \ + /* Load thread function and arguments and push on stack */ \ + "mov r2, %6 /* args */ \n" \ + "str r2, [r1, #4] /* args */ \n" \ + "mov r2, %5 /* function */ \n" \ + "str r2, [r1] /* function */ \n" \ + "mov r0, %1 /* clone_args */ \n" \ + "mov r1, %2 /* size */ \n" \ + "mov r7, #"__stringify(__NR_clone3)" \n" \ + "svc #0 \n" \ + \ + "cmp r0, #0 \n" \ + "beq thread3_run \n" \ + \ + "mov %0, r0 \n" \ + "b clone3_end \n" \ + \ + "thread3_run: \n" \ + "pop { r1 } \n" \ + "pop { r0 } \n" \ + "bx r1 \n" \ + \ + "clone3_end: \n" \ + : "=r"(ret) \ + : "r"(&clone_args), \ + "r"(size), \ + "r"(&clone_args.stack), \ + "r"(clone_args.stack_size), \ + "r"(clone_restore_fn), \ + "r"(args) \ + : "r0", "r1", "r2", "r7", "memory") #define ARCH_FAIL_CORE_RESTORE \ asm volatile( \ diff --git a/criu/kerndat.c b/criu/kerndat.c index 4070e01d2..e0b5731d5 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -992,15 +992,6 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; -#if !defined(CONFIG_X86_64) && !defined(CONFIG_S390) && !defined(CONFIG_PPC64) && !defined(CONFIG_AARCH64) - /* - * Currently the CRIU PIE assembler clone3() wrapper is - * only implemented for X86_64, S390X, AARCH64 and PPC64LE. - */ - kdat.has_clone3_set_tid = false; - return 0; -#endif - args.set_tid = -1; /* * On a system without clone3() this will return ENOSYS. From f6de8d4ea9a2d0ce8f9d3373ab04a3c080348a86 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 14:43:59 +0000 Subject: [PATCH 0307/2030] travis: fix warning and errors from validation This fixes the validation errors from Travis: Build config validation root: deprecated key sudo (The key `sudo` has no effect anymore.) root: missing os, using the default linux root: key matrix is an alias for jobs, using jobs Signed-off-by: Adrian Reber --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 25dd6a29b..7c36af006 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,5 @@ language: c -sudo: required +os: linux dist: bionic cache: ccache services: @@ -12,7 +12,7 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=openj9-test -matrix: +jobs: include: - os: linux arch: ppc64le From c98af78c58e2168d2322cd0ee15837468fd4ffb0 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 14 Jan 2020 12:04:40 +0300 Subject: [PATCH 0308/2030] compel: add -ffreestanding to force gcc not to use builtin memcpy, memset This patch fixes the problem with SSE (xmm) registers corruption on amd64 architecture. The problem was that gcc generates parasite blob that uses xmm registers, but we don't preserve this registers in CRIU when injecting parasite. Also, gcc, even with -nostdlib option uses builtin memcpy, memset functions that optimized for amd64 and involves SSE registers. It seems, that optimal solution is to use -ffreestanding gcc option to compile parasite. This option implies -fno-builtin and also it designed for OS kernels compilation/another code that suited to work on non-hosted environments and could prevent future sumilar bugs. To check that you amd64 CRIU build affected by this problem you could simply objdump -dS criu/pie/parasite.o | grep xmm Output should be empty. Reported-by: Diyu Zhou Signed-off-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn --- compel/src/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compel/src/main.c b/compel/src/main.c index 51bac099f..8b2c8bc8d 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -21,7 +21,7 @@ #define CFLAGS_DEFAULT_SET \ "-Wstrict-prototypes " \ - "-fno-stack-protector -nostdlib -fomit-frame-pointer " + "-fno-stack-protector -nostdlib -fomit-frame-pointer -ffreestanding " #define COMPEL_CFLAGS_PIE CFLAGS_DEFAULT_SET "-fpie" #define COMPEL_CFLAGS_NOPIC CFLAGS_DEFAULT_SET "-fno-pic" From 8477875dc29e82485318b1c0f8482735755d5265 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:21:54 -0800 Subject: [PATCH 0309/2030] doc/Makefile: don't hide xmlto stderr In case asciidoc is installed and xmlto is not, make returns an error but there's no diagnostics shown, since "xmlto: command not found" goes to /dev/null. Remove the redirect. Signed-off-by: Kir Kolyshkin --- Documentation/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/Makefile b/Documentation/Makefile index cbc7ff2c8..5025e2b99 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -54,7 +54,7 @@ ifneq ($(USE_ASCIIDOCTOR),) $(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $< else $(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.1,%.xml,$@) $< - $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@) 2>/dev/null + $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.1,%.xml,$@) endif %.8: %.txt $(FOOTER) custom.xsl @@ -63,7 +63,7 @@ ifneq ($(USE_ASCIIDOCTOR),) $(Q) $(ASCIIDOC) -b manpage -d manpage -o $@ $< else $(Q) $(ASCIIDOC) -b docbook -d manpage -o $(patsubst %.8,%.xml,$@) $< - $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@) 2>/dev/null + $(Q) $(XMLTO) man -m custom.xsl $(patsubst %.8,%.xml,$@) endif %.ps: %.1 From a15426a111eb50e2339607225b0ab0d1dc49e0ed Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:27:07 -0800 Subject: [PATCH 0310/2030] criu(8): some minor rewording 1. Add a/the articles where I see them missing 2. s/Forbid/disable/ 3. s/crit/crit(1)/ as we're referring to a man page 4. Simplify some descriptions Signed-off-by: Kir Kolyshkin --- Documentation/criu.txt | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 133a094c0..64b33ce6d 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -24,8 +24,8 @@ on a different system, or both. OPTIONS ------- -Most of the true / false long options (the ones without arguments) can be -prefixed with *--no-* to negate the option (example: *--display-stats* +Most of the long flags can be +prefixed with *no-* to negate the option (example: *--display-stats* and *--no-display-stats*). Common options @@ -33,9 +33,8 @@ Common options Common options are applicable to any 'command'. *-v*[*v*...], *--verbosity*:: - Increase verbosity up from the default level. Multiple *v* can be used, - each increasing verbosity by one level. Using long option without argument - increases verbosity by one level. + Increase verbosity up from the default level. In case of short option, + multiple *v* can be used, each increasing verbosity by one. *-v*'num', *--verbosity*='num':: Set verbosity level to 'num'. The higher the level, the more output @@ -57,22 +56,22 @@ The following levels are available: Pass a specific configuration file to criu. *--no-default-config*:: - Forbid parsing of default configuration files. + Disable parsing of default configuration files. *--pidfile* 'file':: Write root task, service or page-server pid into a 'file'. *-o*, *--log-file* 'file':: - Write logging messages to 'file'. + Write logging messages to a 'file'. *--display-stats*:: - During dump as well as during restore *criu* collects information - like the time required to dump or restore the process or the + During dump, as well as during restore, *criu* collects some statistics, + like the time required to dump or restore the process, or the number of pages dumped or restored. This information is always - written to the files 'stats-dump' and 'stats-restore' and can - be easily displayed using *crit*. The option *--display-stats* - additionally prints out this information on the console at the end - of a dump or a restore. + saved to the *stats-dump* and *stats-restore* files, and can + be shown using *crit*(1). The option *--display-stats* + prints out this information on the console at the end + of a dump or restore operation. *-D*, *--images-dir* 'path':: Use 'path' as a base directory where to look for sets of image files. From 23374b779898470016b2a0e95af56b0766aa6b3e Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Thu, 30 Jan 2020 15:30:57 -0800 Subject: [PATCH 0311/2030] criu(8): fix for asciidoctor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0493724c8eda3 added support for using asciidoctor (instead of asciidoc + xmlto) to generate man pages. For some reason, asciidoctor does not deal well with some complex formatting that we use for options such as --external, leading to literal ’ and ' appearing in the man page instead of italic formatting. For example: > --inherit-fd fd[’N']:’resource' (here both N and resource should be in italic). Asciidoctor documentation (asciidoctor --help syntax) tells: > == Text Formatting > > .Constrained (applied at word boundaries) > *strong importance* (aka bold) > _stress emphasis_ (aka italic) > `monospaced` (aka typewriter text) > "`double`" and '`single`' typographic quotes > +passthrough text+ (substitutions disabled) > `+literal text+` (monospaced with substitutions disabled) > > .Unconstrained (applied anywhere) > **C**reate+**R**ead+**U**pdate+**D**elete > fan__freakin__tastic > ``mono``culture so I had to carefully replace *bold* with **bold** and 'italic' with __italic__ to make it all work. Tested with both terminal and postscript output, with both asciidoctor and asciidoc+xmlto. TODO: figure out how to fix examples (literal multi-line text), since asciidoctor does not display it in monospaced font (this is only true for postscript/pdf output so low priority). Signed-off-by: Kir Kolyshkin --- Documentation/criu.txt | 51 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 64b33ce6d..a6b9f7fae 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -36,8 +36,8 @@ Common options are applicable to any 'command'. Increase verbosity up from the default level. In case of short option, multiple *v* can be used, each increasing verbosity by one. -*-v*'num', *--verbosity*='num':: - Set verbosity level to 'num'. The higher the level, the more output +**-v**__num__, **--verbosity=**__num__:: + Set verbosity level to _num_. The higher the level, the more output is produced. + The following levels are available: @@ -184,7 +184,7 @@ In other words, do not use it unless really needed. *-s*, *--leave-stopped*:: Leave tasks in stopped state after checkpoint, instead of killing. -*--external* 'type'*[*'id'*]:*'value':: +*--external* __type__**[**__id__**]:**__value__:: Dump an instance of an external resource. The generic syntax is 'type' of resource, followed by resource 'id' (enclosed in literal square brackets), and optional 'value' (prepended by a literal colon). @@ -193,35 +193,36 @@ In other words, do not use it unless really needed. Note to restore external resources, either *--external* or *--inherit-fd* is used, depending on resource type. -*--external mnt[*'mountpoint'*]:*'name':: +*--external* **mnt[**__mountpoint__**]:**__name__:: Dump an external bind mount referenced by 'mountpoint', saving it to image under the identifier 'name'. -*--external mnt[]:*'flags':: +*--external* **mnt[]:**__flags__:: Dump all external bind mounts, autodetecting those. Optional 'flags' can contain *m* to also dump external master mounts, *s* to also dump external shared mounts (default behavior is to abort dumping if such mounts are found). If 'flags' are not provided, colon is optional. -*--external dev[*'major'*/*'minor'*]:*'name':: +*--external* **dev[**__major__**/**__minor__**]:**__name__:: Allow to dump a mount namespace having a real block device mounted. A block device is identified by its 'major' and 'minor' numbers, and *criu* saves its information to image under the identifier 'name'. -*--external file[*'mnt_id'*:*'inode'*]*:: +*--external* **file[**__mnt_id__**:**__inode__**]**:: Dump an external file, i.e. an opened file that is can not be resolved from the current mount namespace, which can not be dumped without using this option. The file is identified by 'mnt_id' (a field obtained from - */proc/*'pid'*/fdinfo/*'N') and 'inode' (as returned by *stat*(2)). + **/proc/**__pid__**/fdinfo/**__N__) and 'inode' (as returned by + *stat*(2)). -*--external tty[*'rdev'*:*'dev'*]*:: +*--external* **tty[**__rdev__**:**__dev__**]**:: Dump an external TTY, identified by *st_rdev* and *st_dev* fields returned by *stat*(2). -*--external unix[*'id'*]*:: +*--external* **unix[**__id__**]**:: Tell *criu* that one end of a pair of UNIX sockets (created by - *socketpair*(2)) with 'id' is OK to be disconnected. + *socketpair*(2)) with the given _id_ is OK to be disconnected. *--freeze-cgroup*:: Use cgroup freezer to collect processes. @@ -379,7 +380,7 @@ By default the option is set to *fpu* and *ins*. ~~~~~~~~~ Restores previously checkpointed processes. -*--inherit-fd* *fd[*'N'*]:*'resource':: +*--inherit-fd* **fd[**__N__**]:**__resource__:: Inherit a file descriptor. This option lets *criu* use an already opened file descriptor 'N' for restoring a file identified by 'resource'. This option can be used to restore an external resource dumped @@ -387,10 +388,10 @@ Restores previously checkpointed processes. + The 'resource' argument can be one of the following: + - - *tty[*'rdev'*:*'dev'*]* - - *pipe[*'inode'*]* - - *socket[*'inode'*]* - - *file[*'mnt_id'*:*'inode'*]* + - **tty[**__rdev__**:**__dev__**]** + - **pipe[**__inode__**]** + - **socket[**__inode__*]* + - **file[**__mnt_id__**:**__inode__**]** - 'path/to/file' + @@ -416,7 +417,7 @@ usually need to be escaped from shell. This option is required to restore a mount namespace. The directory 'path' must be a mount point and its parent must not be overmounted. -*--external* 'type'*[*'id'*]:*'value':: +*--external* __type__**[**__id__**]:**__value__:: Restore an instance of an external resource. The generic syntax is 'type' of resource, followed by resource 'id' (enclosed in literal square brackets), and optional 'value' (prepended by a literal colon). @@ -426,7 +427,7 @@ usually need to be escaped from shell. the help of *--external* *file*, *tty*, and *unix* options), option *--inherit-fd* should be used. -*--external mnt[*'name'*]:*'mountpoint':: +*--external* **mnt[**__name__**]:**__mountpoint__:: Restore an external bind mount referenced in the image by 'name', bind-mounting it from the host 'mountpoint' to a proper mount point. @@ -434,17 +435,17 @@ usually need to be escaped from shell. Restore all external bind mounts (dumped with the help of *--external mnt[]* auto-detection). -*--external dev[*'name'*]:*'/dev/path':: +*--external* **dev[**__name__**]:**__/dev/path__:: Restore an external mount device, identified in the image by 'name', using the existing block device '/dev/path'. -*--external veth[*'inner_dev'*]:*'outer_dev'*@*'bridge':: +*--external* **veth[**__inner_dev__**]:**__outer_dev__**@**__bridge__:: Set the outer VETH device name (corresponding to 'inner_dev' being - restored) to 'outer_dev'. If optional *@*'bridge' is specified, + restored) to 'outer_dev'. If optional **@**_bridge_ is specified, 'outer_dev' is added to that bridge. If the option is not used, 'outer_dev' will be autogenerated by the kernel. -*--external macvlan[*'inner_dev'*]:*'outer_dev':: +*--external* **macvlan[**__inner_dev__**]:**__outer_dev__:: When restoring an image that have a MacVLAN device in it, this option must be used to specify to which 'outer_dev' (an existing network device in CRIU namespace) the restored 'inner_dev' should be bound to. @@ -489,14 +490,14 @@ The 'mode' may be one of the following: *--tcp-close*:: Restore connected TCP sockets in closed state. -*--veth-pair* 'IN'*=*'OUT':: +*--veth-pair* __IN__**=**__OUT__:: Correspondence between outside and inside names of veth devices. *-l*, *--file-locks*:: Restore file locks from the image. -*--lsm-profile* 'type'*:*'name':: - Specify an LSM profile to be used during restore. The `type` can be +*--lsm-profile* __type__**:**__name__:: + Specify an LSM profile to be used during restore. The _type_ can be either *apparmor* or *selinux*. *--auto-dedup*:: From 56258da17619883631d0d3c96ad583bc697f953e Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 2 Feb 2020 18:45:59 +0000 Subject: [PATCH 0312/2030] criu: fix build failure against gcc-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On gcc-10 (and gcc-9 -fno-common) build fails as: ``` ld: criu/arch/x86/crtools.o:criu/include/cr_options.h:159: multiple definition of `rpc_cfg_file'; criu/arch/x86/cpu.o:criu/include/cr_options.h:159: first defined here make[2]: *** [scripts/nmk/scripts/build.mk:164: criu/arch/x86/crtools.built-in.o] Error 1 ``` gcc-10 will change the default from -fcommon to fno-common: https://gcc.gnu.org/PR85678. The error also happens if CFLAGS=-fno-common passed explicitly. Reported-by: Toralf Förster Bug: https://bugs.gentoo.org/707942 Signed-off-by: Sergei Trofimovich --- criu/config.c | 1 + criu/include/cr_options.h | 2 +- criu/include/pstree.h | 2 +- criu/include/tun.h | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/criu/config.c b/criu/config.c index 73c62f5bb..b84b7da28 100644 --- a/criu/config.c +++ b/criu/config.c @@ -30,6 +30,7 @@ #include "common/xmalloc.h" struct cr_options opts; +char *rpc_cfg_file; static int count_elements(char **to_count) { diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index c5af33186..ba405182e 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -155,7 +155,7 @@ struct cr_options { }; extern struct cr_options opts; -char *rpc_cfg_file; +extern char *rpc_cfg_file; extern int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, int state); extern int check_options(void); diff --git a/criu/include/pstree.h b/criu/include/pstree.h index 7303c1fed..61ab0ce0e 100644 --- a/criu/include/pstree.h +++ b/criu/include/pstree.h @@ -42,7 +42,7 @@ enum { }; #define FDS_EVENT (1 << FDS_EVENT_BIT) -struct pstree_item *current; +extern struct pstree_item *current; struct rst_info; /* See alloc_pstree_item() for details */ diff --git a/criu/include/tun.h b/criu/include/tun.h index ce0b266a6..b82c445a7 100644 --- a/criu/include/tun.h +++ b/criu/include/tun.h @@ -5,7 +5,7 @@ #define TUN_MINOR 200 #endif -struct ns_id *ns; +extern struct ns_id *ns; #include From f1714ccce714093170a2616474cfc7b33298c75e Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 4 Feb 2020 23:12:22 -0800 Subject: [PATCH 0313/2030] test/vdso: check the code path when here is no API to map vDSO Signed-off-by: Andrei Vagin --- criu/crtools.c | 3 +++ criu/include/fault-injection.h | 1 + test/jenkins/criu-fault.sh | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/criu/crtools.c b/criu/crtools.c index 9b6e94809..3cd40e87d 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -176,6 +176,9 @@ int main(int argc, char *argv[], char *envp[]) if (kerndat_init()) return 1; + if (fault_injected(FI_CANNOT_MAP_VDSO)) + kdat.can_map_vdso = 0; + if (opts.deprecated_ok) pr_debug("DEPRECATED ON\n"); diff --git a/criu/include/fault-injection.h b/criu/include/fault-injection.h index 852d27166..31fe16178 100644 --- a/criu/include/fault-injection.h +++ b/criu/include/fault-injection.h @@ -17,6 +17,7 @@ enum faults { FI_NO_BREAKPOINTS = 130, FI_PARTIAL_PAGES = 131, FI_HUGE_ANON_SHMEM_ID = 132, + FI_CANNOT_MAP_VDSO = 133, FI_MAX, }; diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index ec6d26f89..4e3790e59 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -12,6 +12,10 @@ prep ./test/zdtm.py run -t zdtm/static/vdso01 --fault 127 || fail ./test/zdtm.py run -t zdtm/static/vdso-proxy --fault 127 --iters 3 || fail +if [ "${COMPAT_TEST}" != "y" ] ; then + ./test/zdtm.py run -t zdtm/static/vdso01 --fault 133 -f h || fail +fi + ./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 2 --keep-going --report report || fail ./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 4 --keep-going --report report || fail @@ -23,3 +27,4 @@ prep ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail + From 9bc9366c94d2c81f706d56d0227bf32a2425eef1 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 4 Feb 2020 23:13:43 -0800 Subject: [PATCH 0314/2030] vdso: use correct offsets to remap vdso and vvar mappings In the current version, the offsets of remapping vvar and vdso regions are mixed up. If vdso is before vvar, vvar has to be mapped with the vdso_size offset. if vvar is before vdso, vdso has to be mapped with the vvar_size offset. Signed-off-by: Andrei Vagin --- criu/pie/parasite-vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 38da76680..3a1684d35 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -119,9 +119,9 @@ int vdso_do_park(struct vdso_maps *rt, unsigned long addr, unsigned long space) BUG_ON((vdso_size + vvar_size) < space); if (rt->sym.vdso_before_vvar) - return park_at(rt, addr, addr + vvar_size); + return park_at(rt, addr, addr + vdso_size); else - return park_at(rt, addr + vdso_size, addr); + return park_at(rt, addr + vvar_size, addr); } #ifndef CONFIG_COMPAT From 0f438ceeed27f3473a9ebda6c9e15d593ceeebde Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 5 Feb 2020 22:33:02 +0000 Subject: [PATCH 0315/2030] typo: fix missing space in error message Signed-off-by: Nicolas Viennot --- criu/sk-unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f43aa2124..048ff44ae 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -1877,7 +1877,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) !(opts.ext_unix_sk)) { pr_err("External socket found in image. " "Consider using the --" USK_EXT_PARAM - "option to allow restoring it.\n"); + " option to allow restoring it.\n"); return -1; } From 72ff29070816e57b408f0bd6b8f71ff50c2e9cd4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 6 Feb 2020 18:01:00 +0000 Subject: [PATCH 0316/2030] criu: Make use strlcpy() to copy into allocated strings strncpy() with n == strlen(src) won't put NULL-terminator in dst. Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b4f8d9e75..c1dfc44f1 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -75,6 +75,7 @@ #include "sk-queue.h" #include "sigframe.h" #include "fdstore.h" +#include "string.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -3142,7 +3143,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos) args = rst_mem_remap_ptr(this_pos, RM_PRIVATE); args->lsm_profile = lsm_profile; - strncpy(args->lsm_profile, rendered, lsm_profile_len); + strlcpy(args->lsm_profile, rendered, lsm_profile_len + 1); xfree(rendered); } } else { @@ -3176,7 +3177,7 @@ rst_prep_creds_args(CredsEntry *ce, unsigned long *prev_pos) args = rst_mem_remap_ptr(this_pos, RM_PRIVATE); args->lsm_sockcreate = lsm_sockcreate; - strncpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len); + strlcpy(args->lsm_sockcreate, rendered, lsm_sockcreate_len + 1); xfree(rendered); } } else { From 99346a28247a3abeae094008fefa2edbc78fbb4d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Fri, 24 Jan 2020 11:55:00 +0000 Subject: [PATCH 0317/2030] zdtm: Make test_{doc,author} weak variables Allows to override them in every test, optionally. Signed-off-by: Dmitry Safonov --- test/zdtm/lib/parseargs.c | 4 ++-- test/zdtm/lib/zdtmtst.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/zdtm/lib/parseargs.c b/test/zdtm/lib/parseargs.c index 7e411f6b6..d8aa4ed63 100644 --- a/test/zdtm/lib/parseargs.c +++ b/test/zdtm/lib/parseargs.c @@ -113,8 +113,8 @@ static void helpexit(void) exit(1); } -const char *test_doc; -const char *test_author; +const char __attribute__((weak)) *test_doc; +const char __attribute__((weak)) *test_author; static void prdoc(void) { diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h index 1fbf795bf..2cd4bdd1d 100644 --- a/test/zdtm/lib/zdtmtst.h +++ b/test/zdtm/lib/zdtmtst.h @@ -155,6 +155,9 @@ struct zdtm_tcp_opts { int flags; }; +extern const char *test_author; +extern const char *test_doc; + extern int tcp_init_server_with_opts(int family, int *port, struct zdtm_tcp_opts *opts); extern pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid, void *child_tid, unsigned long newtls); From 0022c28468714a5329fd41ec12d744340b250cd4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 21 Jan 2020 12:31:00 +0000 Subject: [PATCH 0318/2030] vdso: Add vdso_is_present() helper Use it in kerndat to check if the kernel provides vDSO. Signed-off-by: Dmitry Safonov --- criu/include/util-vdso.h | 5 +++++ criu/vdso.c | 10 ++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/criu/include/util-vdso.h b/criu/include/util-vdso.h index 33b7411de..046cd96d7 100644 --- a/criu/include/util-vdso.h +++ b/criu/include/util-vdso.h @@ -41,6 +41,11 @@ struct vdso_maps { bool compatible; }; +static inline bool vdso_is_present(struct vdso_maps *m) +{ + return m->vdso_start != VDSO_BAD_ADDR; +} + #define VDSO_SYMBOL_INIT { .offset = VDSO_BAD_ADDR, } #define VDSO_SYMTABLE_INIT \ diff --git a/criu/vdso.c b/criu/vdso.c index 50b8b8dba..b8df2d7a6 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -611,6 +611,12 @@ int kerndat_vdso_fill_symtable(void) return -1; } + if (!vdso_is_present(&vdso_maps)) { + pr_debug("Kernel doesn't premap vDSO - probably CONFIG_VDSO is not set\n"); + kdat.vdso_sym = vdso_maps.sym; + return 0; + } + if (vdso_fill_self_symtable(&vdso_maps)) { pr_err("Failed to fill self vdso symtable\n"); return -1; @@ -643,7 +649,7 @@ int kerndat_vdso_preserves_hint(void) kdat.vdso_hint_reliable = 0; - if (vdso_maps.vdso_start == VDSO_BAD_ADDR) + if (!vdso_is_present(&vdso_maps)) return 0; child = fork(); @@ -693,7 +699,7 @@ int kerndat_vdso_preserves_hint(void) goto out_kill; } - if (vdso_maps_after.vdso_start != VDSO_BAD_ADDR) + if (vdso_is_present(&vdso_maps_after)) kdat.vdso_hint_reliable = 1; ret = 0; From a96a7ed87fece0eeb397b9dd4901f680fbf2b4f6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 22 Jan 2020 14:00:27 +0000 Subject: [PATCH 0319/2030] vdso: Repair !CONFIG_VDSO Apparently, C/R is broken when CONFIG_VDSO is not set. Probably, I've broken it while adding arm vdso support. Or maybe some commits after. Repair it by adding checks into vdso_init_dump(), vdso_init_restore(). Also, don't try handling vDSO in restorer if it wasn't present in parent. And prevent summing VDSO_BAD_SIZE to {vdso,vvar}_rt_size. Reported-by: Adrian Reber Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 9 +++++--- criu/pie/parasite-vdso.c | 12 ++++++++++ criu/pie/restorer.c | 2 +- criu/vdso.c | 48 ++++++++++++++++++++++++++-------------- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index c1dfc44f1..03dbc850f 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3373,10 +3373,13 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns vdso_maps_rt = vdso_maps; /* * Figure out how much memory runtime vdso and vvar will need. + * Check if vDSO or VVAR is not provided by kernel. */ - vdso_rt_size = vdso_maps_rt.sym.vdso_size; - if (vdso_rt_size && vdso_maps_rt.sym.vvar_size) - vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + if (vdso_maps_rt.sym.vdso_size != VDSO_BAD_SIZE) { + vdso_rt_size = vdso_maps_rt.sym.vdso_size; + if (vdso_maps_rt.sym.vvar_size != VVAR_BAD_SIZE) + vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + } task_args->bootstrap_len += vdso_rt_size; /* diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index 3a1684d35..3f5cb1431 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -292,6 +292,18 @@ int vdso_proxify(struct vdso_maps *rt, bool *added_proxy, return -1; } + /* + * We could still do something about it here.. + * 1. Hope that vDSO from images still works (might not be the case). + * 2. Try to map vDSO. + * But, hopefully no one intends to migrate application that uses + * vDSO to a dut where kernel doesn't provide it. + */ + if (!vdso_is_present(rt)) { + pr_err("vDSO isn't provided by kernel, but exists in images\n"); + return -1; + } + /* * vDSO mark overwrites Elf program header of proxy vDSO thus * it must never ever be greater in size. diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 7012b88a1..afe185f04 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1454,7 +1454,7 @@ long __export_restore_task(struct task_restore_args *args) * it's presence in original task: vdso will be used for fast * getttimeofday() in restorer's log timings. */ - if (!args->can_map_vdso) { + if (!args->can_map_vdso && vdso_is_present(&args->vdso_maps_rt)) { /* It's already checked in kdat, but let's check again */ if (args->compatible_mode) { pr_err("Compatible mode without vdso map support\n"); diff --git a/criu/vdso.c b/criu/vdso.c index b8df2d7a6..19ba4765d 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -275,6 +275,10 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, struct vma_area *vma; int fd = -1; + /* vDSO is not provided by kernel */ + if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) + return 0; + vcheck = get_vdso_check_type(ctl); if (vcheck == VDSO_CHECK_PFN) { BUG_ON(vdso_pfn == VDSO_BAD_PFN); @@ -534,21 +538,6 @@ out_unmap: } #endif /* CONFIG_COMPAT */ -int vdso_init_dump(void) -{ - if (vdso_parse_maps(PROC_SELF, &vdso_maps)) { - pr_err("Failed reading self/maps for filling vdso/vvar bounds\n"); - return -1; - } - - if (kdat.pmap != PM_FULL) - pr_info("VDSO detection turned off\n"); - else if (vaddr_to_pfn(-1, vdso_maps.vdso_start, &vdso_pfn)) - return -1; - - return 0; -} - /* * Check vdso/vvar sized read from maps to kdat values. * We do not read /proc/self/maps for compatible vdso as it's @@ -566,11 +555,36 @@ static int is_kdat_vdso_sym_valid(void) return true; } +int vdso_init_dump(void) +{ + if (vdso_parse_maps(PROC_SELF, &vdso_maps)) { + pr_err("Failed reading self/maps for filling vdso/vvar bounds\n"); + return -1; + } + + if (!is_kdat_vdso_sym_valid()) { + pr_err("Kdat sizes of vdso/vvar differ to maps file \n"); + return -1; + } + + if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) { + pr_debug("Kdat has empty vdso symtable - probably CONFIG_VDSO is not set\n"); + return 0; + } + + if (kdat.pmap != PM_FULL) + pr_info("VDSO detection turned off\n"); + else if (vaddr_to_pfn(-1, vdso_maps.vdso_start, &vdso_pfn)) + return -1; + + return 0; +} + int vdso_init_restore(void) { if (kdat.vdso_sym.vdso_size == VDSO_BAD_SIZE) { - pr_err("Kdat has empty vdso symtable\n"); - return -1; + pr_debug("Kdat has empty vdso symtable - probably CONFIG_VDSO is not set\n"); + return 0; } /* Already filled vdso_maps during kdat test */ From 9cb4067e132ba48eb36b0b8075043b13e96e8974 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 22 Jan 2020 14:05:47 +0000 Subject: [PATCH 0320/2030] vdso: Don't page-align vvar It's always page-aligned (as any VMA). Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 03dbc850f..e5e8fc9c5 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3378,7 +3378,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns if (vdso_maps_rt.sym.vdso_size != VDSO_BAD_SIZE) { vdso_rt_size = vdso_maps_rt.sym.vdso_size; if (vdso_maps_rt.sym.vvar_size != VVAR_BAD_SIZE) - vdso_rt_size += ALIGN(vdso_maps_rt.sym.vvar_size, PAGE_SIZE); + vdso_rt_size += vdso_maps_rt.sym.vvar_size; } task_args->bootstrap_len += vdso_rt_size; From 3a4c33c502b8ed685d1ffe81b15d11159d43848a Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 18 Jan 2020 14:28:03 +0000 Subject: [PATCH 0321/2030] zdtm: mntns_rw_ro_rw update error msg Signed-off-by: Radostin Stoyanov --- test/zdtm/static/mntns_rw_ro_rw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/mntns_rw_ro_rw.c b/test/zdtm/static/mntns_rw_ro_rw.c index 7aed254b6..6179c4788 100644 --- a/test/zdtm/static/mntns_rw_ro_rw.c +++ b/test/zdtm/static/mntns_rw_ro_rw.c @@ -31,12 +31,12 @@ int main(int argc, char **argv) test_waitsig(); if (access("/proc/sys/net/ipv4/ip_forward", W_OK)) { - fail("Unable to access /proc/sys/net/core/wmem_max"); + fail("Unable to access /proc/sys/net/ipv4/ip_forward"); return 1; } if (access("/proc/sys/kernel/ns_last_pid", W_OK) != -1 || errno != EROFS) { - fail("Unable to access /proc/sys/kernel/pid_max"); + fail("Unable to access /proc/sys/kernel/ns_last_pid"); return 1; } From f5181b2767d03f17e72dd6f70c83ce394b750e68 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 8 Feb 2020 19:43:39 +0100 Subject: [PATCH 0322/2030] Travis: fix podman test case Podman changed the output of 'podman ps'. For the test only running containers are interesting. Adding the filter '-f status=running' only returns running containers as previously. Signed-off-by: Adrian Reber --- scripts/travis/podman-test.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/travis/podman-test.sh b/scripts/travis/podman-test.sh index 825bca746..7490d5fe9 100755 --- a/scripts/travis/podman-test.sh +++ b/scripts/travis/podman-test.sh @@ -39,12 +39,12 @@ for i in `seq 20`; do echo "Test $i for podman container checkpoint" podman exec cr ps axf podman logs cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container checkpoint cr - [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "0" ] podman ps -a podman container restore cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman logs cr done @@ -53,16 +53,16 @@ for i in `seq 20`; do podman ps -a podman exec cr ps axf podman logs cr - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container checkpoint -l --export /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "0" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "0" ] podman ps -a podman rm -fa podman ps -a podman container restore --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman container restore --name cr2 --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr2 -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr2 -q -f status=running | wc -l` -eq "1" ] podman ps -a podman logs cr podman logs cr2 @@ -70,7 +70,7 @@ for i in `seq 20`; do podman rm -fa podman ps -a podman container restore --import /tmp/chkpt.tar.gz - [ `podman ps -f name=cr -q | wc -l` -eq "1" ] + [ `podman ps -f name=cr -q -f status=running | wc -l` -eq "1" ] podman ps -a rm -f /tmp/chkpt.tar.gz done From d68a68b8f478e12bd457eccfbf8e49b50ca95e86 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 20:46:17 -0800 Subject: [PATCH 0323/2030] test/zdtm/inhfd: update dump options one each iteration This allows to run inhfd tests with many iterations of C/R. Signed-off-by: Andrei Vagin --- test/zdtm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/zdtm.py b/test/zdtm.py index 16ff0b379..47c89a162 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -757,6 +757,11 @@ class inhfd_test: fcntl.fcntl(fd, fcntl.F_SETFD, fdflags) peer_file_name = self.__peer_file_names[i] ropts.extend(["--inherit-fd", "fd[%d]:%s" % (fd, peer_file_name)]) + self.__peer_file_names = [] + self.__dump_opts = [] + for _, peer_file in self.__files: + self.__peer_file_names.append(self.__fdtyp.filename(peer_file)) + self.__dump_opts += self.__fdtyp.dump_opts(peer_file) return ropts def print_output(self): From ff756cbb28c4ee10651ed80f38b8ef37ee74fc39 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 21:20:20 -0800 Subject: [PATCH 0324/2030] python: sort imports 202 Additional newline in a group of imports. I100 Import statements are in the wrong order. Signed-off-by: Andrei Vagin --- lib/py/images/pb2dict.py | 13 ++++++------ test/inhfd/socket.py | 2 +- test/others/rpc/config_file.py | 7 ++++--- test/zdtm.py | 38 ++++++++++++++++++---------------- 4 files changed, 32 insertions(+), 28 deletions(-) diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index 6fce4be22..a89850a1d 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -1,12 +1,13 @@ -from google.protobuf.descriptor import FieldDescriptor as FD -import opts_pb2 -from ipaddress import IPv4Address, ip_address -from ipaddress import IPv6Address -import socket +import base64 import collections import os -import base64 import quopri +import socket +from ipaddress import IPv4Address, IPv6Address, ip_address + +from google.protobuf.descriptor import FieldDescriptor as FD + +import opts_pb2 if "encodebytes" not in dir(base64): base64.encodebytes = base64.encodestring diff --git a/test/inhfd/socket.py b/test/inhfd/socket.py index 9cea16ffb..7efe7faab 100755 --- a/test/inhfd/socket.py +++ b/test/inhfd/socket.py @@ -1,5 +1,5 @@ -import socket import os +import socket def create_fds(): diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py index 7b07bc145..90c80fcae 100755 --- a/test/others/rpc/config_file.py +++ b/test/others/rpc/config_file.py @@ -1,11 +1,12 @@ #!/usr/bin/python +import argparse import os import sys -import rpc_pb2 as rpc -import argparse -from tempfile import mkstemp import time +from tempfile import mkstemp + +import rpc_pb2 as rpc from setup_swrk import setup_swrk diff --git a/test/zdtm.py b/test/zdtm.py index 47c89a162..3fc57ba55 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1,31 +1,33 @@ #!/usr/bin/env python # vim: noet ts=8 sw=8 sts=8 from __future__ import absolute_import, division, print_function, unicode_literals -from builtins import (str, open, range, zip, int, input) import argparse -import glob -import os -import subprocess -import time -import tempfile -import shutil -import re -import stat -import signal import atexit -import sys -import linecache -import random -import string -import fcntl -import errno import datetime -import yaml -import struct +import errno +import fcntl +import glob +import linecache import mmap +import os +import random +import re +import shutil +import signal +import stat +import string +import struct +import subprocess +import sys +import tempfile +import time +from builtins import (input, int, open, range, str, zip) + import pycriu as crpc +import yaml + os.chdir(os.path.dirname(os.path.abspath(__file__))) prev_line = None From 872b795a5678d82a415419e139d680cbf81391ff Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 21 Feb 2020 18:48:41 +0300 Subject: [PATCH 0325/2030] Maintainers: Suggest the maintainers codex (#932) The guide is based on the one from the RunC project, but has some criu-related specifics. Signed-off-by: Pavel Emelyanov --- MAINTAINERS | 2 + MAINTAINERS_GUIDE.md | 136 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 MAINTAINERS create mode 100644 MAINTAINERS_GUIDE.md diff --git a/MAINTAINERS b/MAINTAINERS new file mode 100644 index 000000000..fbd5d03e8 --- /dev/null +++ b/MAINTAINERS @@ -0,0 +1,2 @@ +Pavel Emelyanov (chief) +Andrey Vagin diff --git a/MAINTAINERS_GUIDE.md b/MAINTAINERS_GUIDE.md new file mode 100644 index 000000000..2830a3caa --- /dev/null +++ b/MAINTAINERS_GUIDE.md @@ -0,0 +1,136 @@ +## Introduction + +Dear maintainer. Thank you for investing the time and energy to help +make CRIU as useful as possible. Maintaining a project is difficult, +sometimes unrewarding work. Sure, you will contribute cool features +to the project, but most of your time will be spent reviewing patches, +cleaning things up, documenting, answering questions, justifying design +decisions - while everyone else will just have fun! But remember -- the +quality of the maintainers work is what distinguishes the good projects +from the great. So please be proud of your work, even the unglamorous +parts, and encourage a culture of appreciation and respect for *every* +aspect of improving the project -- not just the hot new features. + +Being a maintainer is a time consuming commitment and should not be +taken lightly. This document is a manual for maintainers old and new. +It explains what is expected of maintainers, how they should work, and +what tools are available to them. + +This is a living document - if you see something out of date or missing, +speak up! + +## What are a maintainer's responsibility? + +Part of a healthy project is to have active maintainers to support the +community in contributions and perform tasks to keep the project running. +It is every maintainer's responsibility to: + + * Keep the community a friendly place + * Deliver prompt feedback and decisions on pull requests and mailing + list threads + * Encourage other members to help each other, especially in cases the + maintainer is overloaded or feels the lack of needed expertise + * Make sure the changes made respects the philosophy, design and + roadmap of the project + +## How are decisions made? + +CRIU is an open-source project with an open design philosophy. This +means that the repository is the source of truth for EVERY aspect of the +project. *If it's part of the project, it's in the repo. It's in the +repo, it's part of the project.* + +All decisions affecting CRIU, big and small, follow the same 3 steps: + + * Submit a change. Anyone can do this + + * Discuss it. Anyone can and is encouraged to do this + + * Accept or decline it. Only maintainers do this + +*I'm a maintainer, should I make pull requests / send patches too?* + +Yes. Nobody should ever push to the repository directly. All changes +should be made through submitting (and accepting) the change. + +### Two-steps decision making ### + +Since CRIU is extremely complex piece of software we try double hard +not to make mistakes, that would be hard to fix in the future. In order +to facilitate this, the "final" decision is made in two stages: + + * We definitely want to try something out + + * We think that the attempt was successful + +Respectively, new features get accepted first into the *criu-dev* branch and +after they have been validated they are merged into the *master* branch. Yet, +urgent bug fixes may land directly in the master branch. If a change in +the criu-dev branch is considered to be bad (whatever it means), then it +can be reverted without propagation to the master branch. Reverting from +the master branch is expected not to happen at all, but if such an +extraordinary case occurs, the impact of this step, especially the question +of backward compatibility, should be considered in the most careful manner. + +## Who decides what? + +All decisions can be expressed as changes to the repository (either in the +form of pull requests, or patches sent to the mailing list), and maintainers +make decisions by merging or rejecting them. Review and approval or +disagreement can be done by anyone and is denoted by adding a respective +comment in the pull request. However, merging the change into either branch +only happens after approvals from maintainers. + +In order for a patch to be merged into the criu-dev branch at least two +maintainers should accept it. In order for a patch to be merged into the +master branch the majority of maintainers should decide that (then prepare +a pull request, submit it, etc.). + +Overall the maintainer system works because of mutual respect across the +maintainers of the project. The maintainers trust one another to make +decisions in the best interests of the project. Sometimes maintainers +can disagree and this is part of a healthy project to represent the point +of views of various people. In the case where maintainers cannot find +agreement on a specific change the role of a Chief Maintainer comes into +play. + +### Chief maintainer + +The chief maintainer for the project is responsible for overall architecture +of the project to maintain conceptual integrity. Large decisions and +architecture changes should be reviewed by the chief maintainer. + +Also the chief maintainer has the veto power on any change submitted +to any branch. Naturally, a change in the criu-dev branch can be reverted +after a chief maintainer veto, a change in the master branch must be +carefully reviwed by the chief maintainer and vetoed in advance. + +### How are maintainers added (and removed)? + +The best maintainers have a vested interest in the project. Maintainers +are first and foremost contributors that have shown they are committed to +the long term success of the project. Contributors wanting to become +maintainers are expected to be deeply involved in contributing code, +patches review, and paying needed attention to the issues in the project. +Just contributing does not make you a maintainer, it is about building trust +with the current maintainers of the project and being a person that they can +rely on and trust to make decisions in the best interest of the project. + +When a contributor wants to become a maintainer or nominate someone as a +maintainer, one can submit a "nomination", which technically is the +respective modification to the `MAINTAINERS` file. When a maintainer feels +they is unable to perform the required duties, or someone else wants to draw +the community attention to this fact, one can submit a "(self-)removing" +change. + +The final vote to add or to remove a maintainer is to be approved by the +majority of current maintainers (with the chief maintainer having veto power +on that too). + +One might have noticed, that the chief maintainer (re-)assignment is not +regulated by this document. That's true :) However, this can be done. If +the community decides that the chief maintainer needs to be changed the +respective "decision making rules" are to be prepared, submitted and +accepted into this file first. + +Good luck! From 42db2c1563544fc4307c95b8cc2cba8ddfc51262 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 26 Feb 2020 20:35:12 +0200 Subject: [PATCH 0326/2030] MAINTAINERS: add Mike Signed-off-by: Mike Rapoport --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index fbd5d03e8..ed5bf25c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,2 +1,3 @@ Pavel Emelyanov (chief) Andrey Vagin +Mike Rapoport From e19f4cf3b120b91384cdd87fb138a319857f8d8b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 3 Mar 2020 11:46:08 -0800 Subject: [PATCH 0327/2030] MAINTAINERS: Add Dima and Adrian to maintainers Signed-off-by: Andrei Vagin --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ed5bf25c1..5c28463a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1,3 +1,5 @@ Pavel Emelyanov (chief) Andrey Vagin Mike Rapoport +Dmitry Safonov <0x7f454c46@gmail.com> +Adrian Reber From 5dbc24b206cd365db7498dddcd03798c5d8ed4e4 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 25 Nov 2019 09:50:08 +0300 Subject: [PATCH 0328/2030] util: introduce the mount_detached_fs helper Signed-off-by: Andrei Vagin --- criu/util.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/util.c b/criu/util.c index 1646ce1c4..a0a49c5a3 100644 --- a/criu/util.c +++ b/criu/util.c @@ -30,6 +30,8 @@ #include "linux/mount.h" +#include "linux/mount.h" + #include "kerndat.h" #include "page.h" #include "util.h" From c1e72aa936bd86fb4cd819e84791e8a9a5c4d572 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0329/2030] memfd: add file support See "man memfd_create" for more information of what memfd is. This adds support for memfd open files, that are not not memory mapped. * We add a new kind of file: MEMFD. * We add two image types MEMFD_FILE, and MEMFD_INODE. MEMFD_FILE contains usual file information (e.g., position). MEMFD_INODE contains the memfd name, and a shmid identifier referring to the content. * We reuse the shmem facilities for dumping memfd content as it would be easier to support incremental checkpoints in the future. Signed-off-by: Nicolas Viennot --- Makefile.config | 2 +- criu/Makefile.crtools | 1 + criu/cr-restore.c | 2 + criu/files.c | 18 +- criu/image-desc.c | 1 + criu/include/image-desc.h | 2 + criu/include/magic.h | 1 + criu/include/memfd.h | 24 +++ criu/include/protobuf-desc.h | 2 + criu/include/shmem.h | 3 + criu/kerndat.c | 3 +- criu/memfd.c | 350 +++++++++++++++++++++++++++++++++++ criu/shmem.c | 66 ++++++- images/Makefile | 1 + images/fdinfo.proto | 3 + images/memfd.proto | 20 ++ lib/py/images/images.py | 2 + scripts/feature-tests.mak | 11 ++ 18 files changed, 503 insertions(+), 9 deletions(-) create mode 100644 criu/include/memfd.h create mode 100644 criu/memfd.c create mode 100644 images/memfd.proto diff --git a/Makefile.config b/Makefile.config index 161365960..98ba5d892 100644 --- a/Makefile.config +++ b/Makefile.config @@ -64,7 +64,7 @@ export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ - SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG + SETPROCTITLE_INIT MEMFD TCP_REPAIR_WINDOW FSCONFIG MEMFD_CREATE # $1 - config name define gen-feature-test diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 4588ea5b8..1a6e0b5b5 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -37,6 +37,7 @@ obj-y += libnetlink.o obj-y += log.o obj-y += lsm.o obj-y += mem.o +obj-y += memfd.o obj-y += mount.o obj-y += filesystems.o obj-y += namespaces.o diff --git a/criu/cr-restore.c b/criu/cr-restore.c index e5e8fc9c5..13d1001c9 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,6 +76,7 @@ #include "sigframe.h" #include "fdstore.h" #include "string.h" +#include "memfd.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -289,6 +290,7 @@ static struct collect_image_info *cinfos_files[] = { &fanotify_cinfo, &fanotify_mark_cinfo, &ext_file_cinfo, + &memfd_cinfo, }; /* These images are required to restore namespaces */ diff --git a/criu/files.c b/criu/files.c index e26897870..ea86deaa3 100644 --- a/criu/files.c +++ b/criu/files.c @@ -34,6 +34,7 @@ #include "sk-packet.h" #include "mount.h" #include "signalfd.h" +#include "memfd.h" #include "namespaces.h" #include "tun.h" #include "timerfd.h" @@ -546,13 +547,17 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return -1; p.link = &link; - if (link.name[1] == '/') - return do_dump_gen_file(&p, lfd, ®file_dump_ops, e); - if (check_ns_proc(&link)) - return do_dump_gen_file(&p, lfd, &nsfile_dump_ops, e); + if (is_memfd(p.stat.st_dev, &link.name[1])) + ops = &memfd_dump_ops; + else if (link.name[1] == '/') + ops = ®file_dump_ops; + else if (check_ns_proc(&link)) + ops = &nsfile_dump_ops; + else + return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e); - return dump_unsupp_fd(&p, lfd, "reg", link.name + 1, e); + return do_dump_gen_file(&p, lfd, ops, e); } if (S_ISFIFO(p.stat.st_mode)) { @@ -1721,6 +1726,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i) case FD_TYPES__TTY: ret = collect_one_file_entry(fe, fe->tty->id, &fe->tty->base, &tty_cinfo); break; + case FD_TYPES__MEMFD: + ret = collect_one_file_entry(fe, fe->memfd->id, &fe->memfd->base, &memfd_cinfo); + break; } return ret; diff --git a/criu/image-desc.c b/criu/image-desc.c index ae5d817fe..b538a76ea 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -66,6 +66,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(FS, "fs-%u"), FD_ENTRY(REMAP_FPATH, "remap-fpath"), FD_ENTRY_F(GHOST_FILE, "ghost-file-%x", O_NOBUF), + FD_ENTRY_F(MEMFD_INODE, "memfd-%u", O_NOBUF), FD_ENTRY(TCP_STREAM, "tcp-stream-%x"), FD_ENTRY(MNTS, "mountpoints-%u"), FD_ENTRY(NETDEV, "netdev-%u"), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 6db8bf94f..9ca9643a1 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -106,6 +106,8 @@ enum { CR_FD_FIFO, CR_FD_PIPES, CR_FD_TTY_FILES, + CR_FD_MEMFD_FILE, + CR_FD_MEMFD_INODE, CR_FD_AUTOFS, diff --git a/criu/include/magic.h b/criu/include/magic.h index 1a583f4ed..bdaca968d 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -94,6 +94,7 @@ #define BINFMT_MISC_MAGIC 0x67343323 /* Apatity */ #define AUTOFS_MAGIC 0x49353943 /* Sochi */ #define FILES_MAGIC 0x56303138 /* Toropets */ +#define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/memfd.h b/criu/include/memfd.h new file mode 100644 index 000000000..c1d7949cb --- /dev/null +++ b/criu/include/memfd.h @@ -0,0 +1,24 @@ +#ifndef __CR_MEMFD_H__ +#define __CR_MEMFD_H__ + +#include +#include "int.h" +#include "common/config.h" + +extern int is_memfd(dev_t dev, const char *path); +extern const struct fdtype_ops memfd_dump_ops; + +extern struct collect_image_info memfd_cinfo; + +#ifdef CONFIG_HAS_MEMFD_CREATE +# include +#else +# include +# include +static inline int memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} +#endif /* CONFIG_HAS_MEMFD_CREATE */ + +#endif /* __CR_MEMFD_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 31f5b9a79..7e0385ef4 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -61,6 +61,8 @@ enum { PB_AUTOFS, PB_GHOST_CHUNK, PB_FILE, + PB_MEMFD_FILE, + PB_MEMFD_INODE, /* 60 */ /* PB_AUTOGEN_STOP */ diff --git a/criu/include/shmem.h b/criu/include/shmem.h index 04ab8d076..9afdb799a 100644 --- a/criu/include/shmem.h +++ b/criu/include/shmem.h @@ -13,8 +13,11 @@ extern int collect_sysv_shmem(unsigned long shmid, unsigned long size); extern int cr_dump_shmem(void); extern int add_shmem_area(pid_t pid, VmaEntry *vma, u64 *map); extern int fixup_sysv_shmems(void); +extern int dump_one_memfd_shmem(int fd, unsigned long shmid, unsigned long size); extern int dump_one_sysv_shmem(void *addr, unsigned long size, unsigned long shmid); extern int restore_sysv_shmem_content(void *addr, unsigned long size, unsigned long shmid); +extern int restore_memfd_shmem_content(int fd, unsigned long shmid, unsigned long size); + #define SYSV_SHMEM_SKIP_FD (0x7fffffff) diff --git a/criu/kerndat.c b/criu/kerndat.c index e0b5731d5..8ac83820b 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -42,6 +42,7 @@ #include "vdso.h" #include "kcmp.h" #include "sched.h" +#include "memfd.h" struct kerndat_s kdat = { }; @@ -409,7 +410,7 @@ static bool kerndat_has_memfd_create(void) { int ret; - ret = syscall(SYS_memfd_create, NULL, 0); + ret = memfd_create(NULL, 0); if (ret == -1 && errno == ENOSYS) kdat.has_memfd = false; diff --git a/criu/memfd.c b/criu/memfd.c new file mode 100644 index 000000000..bcef35e75 --- /dev/null +++ b/criu/memfd.c @@ -0,0 +1,350 @@ +#include + +#include "common/compiler.h" +#include "common/lock.h" +#include "memfd.h" +#include "fdinfo.h" +#include "imgset.h" +#include "image.h" +#include "util.h" +#include "log.h" +#include "files.h" +#include "fs-magic.h" +#include "kerndat.h" +#include "files-reg.h" +#include "rst-malloc.h" +#include "fdstore.h" +#include "file-ids.h" +#include "namespaces.h" +#include "shmem.h" + +#include "protobuf.h" +#include "images/memfd.pb-c.h" + +#define MEMFD_PREFIX "/memfd:" +#define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX)-1) + +struct memfd_inode { + struct list_head list; + u32 id; + union { + /* Only for dump */ + struct { + u32 dev; + u32 ino; + }; + /* Only for restore */ + struct { + mutex_t lock; + int fdstore_id; + }; + }; +}; + +static LIST_HEAD(memfd_inodes); + +/* + * Dump only + */ + +static u32 memfd_inode_ids = 1; + +int is_memfd(dev_t dev, const char *path) +{ + /* + * TODO When MAP_HUGETLB is used, the file device is not shmem_dev, + * Note that other parts of CRIU have similar issues, see + * is_anon_shmem_map(). + */ + return dev == kdat.shmem_dev && + !strncmp(path, MEMFD_PREFIX, MEMFD_PREFIX_LEN); +} + +static int dump_memfd_inode(int fd, struct memfd_inode *inode, + const char *name, const struct stat *st) +{ + int ret = -1; + struct cr_img *img = NULL; + MemfdInodeEntry mie = MEMFD_INODE_ENTRY__INIT; + u32 shmid; + + /* + * shmids are chosen as the inode number of the corresponding mmaped + * file. See handle_vma() in proc_parse.c. + * It works for memfd too, because we share the same device as the + * shmem device. + */ + shmid = inode->ino; + + pr_info("Dumping memfd:%s contents (id %#x, shmid: %#x, size: %"PRIu64")\n", + name, inode->id, shmid, st->st_size); + + if (dump_one_memfd_shmem(fd, shmid, st->st_size) < 0) + goto out; + + img = open_image(CR_FD_MEMFD_INODE, O_DUMP, inode->id); + if (!img) + goto out; + + mie.uid = userns_uid(st->st_uid); + mie.gid = userns_gid(st->st_gid); + mie.name = (char *)name; + mie.size = st->st_size; + mie.shmid = shmid; + + if (pb_write_one(img, &mie, PB_MEMFD_INODE)) + goto out; + + ret = 0; + +out: + if (img) + close_image(img); + return ret; +} + +static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) +{ + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) + if ((inode->dev == st->st_dev) && (inode->ino == st->st_ino)) + return inode; + + inode = xmalloc(sizeof(*inode)); + if (inode == NULL) + return NULL; + + inode->dev = st->st_dev; + inode->ino = st->st_ino; + inode->id = memfd_inode_ids++; + + if (dump_memfd_inode(lfd, inode, name, st)) { + xfree(inode); + return NULL; + } + + list_add_tail(&inode->list, &memfd_inodes); + + return inode; +} + +static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) +{ + MemfdFileEntry mfe = MEMFD_FILE_ENTRY__INIT; + FileEntry fe = FILE_ENTRY__INIT; + struct memfd_inode *inode; + struct fd_link _link, *link; + const char *name; + + if (!p->link) { + if (fill_fdlink(lfd, p, &_link)) + return -1; + link = &_link; + } else + link = p->link; + + strip_deleted(link); + name = &link->name[1+MEMFD_PREFIX_LEN]; + + inode = dump_unique_memfd_inode(lfd, name, &p->stat); + if (!inode) + return -1; + + mfe.id = id; + mfe.flags = p->flags; + mfe.pos = p->pos; + mfe.fown = (FownEntry *)&p->fown; + mfe.inode_id = inode->id; + + fe.type = FD_TYPES__MEMFD; + fe.id = mfe.id; + fe.memfd = &mfe; + + return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); +} + +const struct fdtype_ops memfd_dump_ops = { + .type = FD_TYPES__MEMFD, + .dump = dump_one_memfd, +}; + + +/* + * Restore only + */ + +struct memfd_info { + MemfdFileEntry *mfe; + struct file_desc d; + struct memfd_inode *inode; +}; + +static struct memfd_inode *memfd_alloc_inode(int id) +{ + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) + if (inode->id == id) + return inode; + + inode = shmalloc(sizeof(*inode)); + if (!inode) + return NULL; + + inode->id = id; + mutex_init(&inode->lock); + inode->fdstore_id = -1; + + list_add_tail(&inode->list, &memfd_inodes); + return inode; +} + +extern int restore_memfd_shm(int fd, u64 id, u64 size); +static int memfd_open_inode_nocache(struct memfd_inode *inode) +{ + MemfdInodeEntry *mie = NULL; + struct cr_img *img = NULL; + int fd = -1; + int ret = -1; + int flags; + + img = open_image(CR_FD_MEMFD_INODE, O_RSTR, inode->id); + if (!img) + goto out; + + if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) + goto out; + + fd = memfd_create(mie->name, 0); + if (fd < 0) { + pr_perror("Can't create memfd:%s", mie->name); + goto out; + } + + if (restore_memfd_shmem_content(fd, mie->shmid, mie->size)) + goto out; + + if (fchown(fd, mie->uid, mie->gid)) { + pr_perror("Can't change uid %d gid %d of memfd:%s", + (int)mie->uid, (int)mie->gid, mie->name); + goto out; + } + + inode->fdstore_id = fdstore_add(fd); + if (inode->fdstore_id < 0) + goto out; + + ret = fd; + fd = -1; + +out: + if (fd != -1) + close(fd); + if (img) + close_image(img); + if (mie) + memfd_inode_entry__free_unpacked(mie, NULL); + return ret; +} + +static int memfd_open_inode(struct memfd_inode *inode) +{ + int fd; + + if (inode->fdstore_id != -1) + return fdstore_get(inode->fdstore_id); + + mutex_lock(&inode->lock); + if (inode->fdstore_id != -1) + fd = fdstore_get(inode->fdstore_id); + else + fd = memfd_open_inode_nocache(inode); + mutex_unlock(&inode->lock); + + return fd; +} + +static int memfd_open(struct file_desc *d, u32 *fdflags) +{ + char lpath[PSFDS]; + struct memfd_info *mfi; + MemfdFileEntry *mfe; + int fd, _fd; + u32 flags; + + mfi = container_of(d, struct memfd_info, d); + mfe = mfi->mfe; + + pr_info("Restoring memfd id=%d\n", mfe->id); + + fd = memfd_open_inode(mfi->inode); + if (fd < 0) + goto err; + + /* Reopen the fd with original permissions */ + sprintf(lpath, "/proc/self/fd/%d", fd); + flags = fdflags ? *fdflags : mfe->flags; + /* + * Ideally we should call compat version open() to not force the + * O_LARGEFILE file flag with regular open(). It doesn't seem that + * important though. + */ + _fd = open(lpath, flags); + if (_fd < 0) { + pr_perror("Can't reopen memfd id=%d", mfe->id); + goto err; + } + close(fd); + fd = _fd; + + if (restore_fown(fd, mfe->fown) < 0) + goto err; + + if (lseek(fd, mfe->pos, SEEK_SET) < 0) { + pr_perror("Can't restore file position of memfd id=%d", mfe->id); + goto err; + } + + return fd; + +err: + if (fd >= 0) + close(fd); + return -1; +} + +static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) +{ + int tmp; + + tmp = memfd_open(fd, NULL); + if (tmp < 0) + return -1; + *new_fd = tmp; + return 0; +} + +static struct file_desc_ops memfd_desc_ops = { + .type = FD_TYPES__MEMFD, + .open = memfd_open_fe_fd, +}; + +static int collect_one_memfd(void *o, ProtobufCMessage *msg, struct cr_img *i) +{ + struct memfd_info *info = o; + + info->mfe = pb_msg(msg, MemfdFileEntry); + info->inode = memfd_alloc_inode(info->mfe->inode_id); + if (!info->inode) + return -1; + + return file_desc_add(&info->d, info->mfe->id, &memfd_desc_ops); +} + +struct collect_image_info memfd_cinfo = { + .fd_type = CR_FD_MEMFD_FILE, + .pb_type = PB_MEMFD_FILE, + .priv_size = sizeof(struct memfd_info), + .collect = collect_one_memfd, +}; diff --git a/criu/shmem.c b/criu/shmem.c index cee47dba7..29383e79a 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -23,6 +23,7 @@ #include "types.h" #include "page.h" #include "util.h" +#include "memfd.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" @@ -490,7 +491,7 @@ static int do_restore_shmem_content(void *addr, unsigned long size, unsigned lon return ret; } -static int restore_shmem_content(void *addr, struct shmem_info *si) +int restore_shmem_content(void *addr, struct shmem_info *si) { return do_restore_shmem_content(addr, si->size, si->shmid); } @@ -500,6 +501,41 @@ int restore_sysv_shmem_content(void *addr, unsigned long size, unsigned long shm return do_restore_shmem_content(addr, round_up(size, PAGE_SIZE), shmid); } +int restore_memfd_shmem_content(int fd, unsigned long shmid, unsigned long size) +{ + void *addr = NULL; + int ret = 1; + + if (size == 0) + return 0; + + if (ftruncate(fd, size) < 0) { + pr_perror("Can't resize shmem 0x%lx size=%ld", shmid, size); + goto out; + } + + addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + pr_perror("Can't mmap shmem 0x%lx size=%ld", shmid, size); + goto out; + } + + /* + * do_restore_shmem_content needs size to be page aligned. + */ + if (do_restore_shmem_content(addr, round_up(size, PAGE_SIZE), shmid) < 0) { + pr_err("Can't restore shmem content\n"); + goto out; + } + + ret = 0; + +out: + if (addr) + munmap(addr, size); + return ret; +} + static int open_shmem(int pid, struct vma_area *vma) { VmaEntry *vi = vma->e; @@ -532,7 +568,7 @@ static int open_shmem(int pid, struct vma_area *vma) flags = MAP_SHARED; if (kdat.has_memfd) { - f = syscall(SYS_memfd_create, "", 0); + f = memfd_create("", 0); if (f < 0) { pr_perror("Unable to create memfd"); goto err; @@ -779,6 +815,32 @@ err: return ret; } +int dump_one_memfd_shmem(int fd, unsigned long shmid, unsigned long size) +{ + int ret = -1; + void *addr; + struct shmem_info si; + + if (size == 0) + return 0; + + memset(&si, 0, sizeof(si)); + si.shmid = shmid; + si.size = size; + + addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) { + pr_perror("Can't mmap shmem 0x%lx", shmid); + goto err; + } + + ret = do_dump_one_shmem(fd, addr, &si); + + munmap(addr, size); +err: + return ret; +} + int dump_one_sysv_shmem(void *addr, unsigned long size, unsigned long shmid) { int fd, ret; diff --git a/images/Makefile b/images/Makefile index edaab0633..e7f0580cf 100644 --- a/images/Makefile +++ b/images/Makefile @@ -63,6 +63,7 @@ proto-obj-y += sysctl.o proto-obj-y += autofs.o proto-obj-y += macvlan.o proto-obj-y += sit.o +proto-obj-y += memfd.o CFLAGS += -iquote $(obj)/ diff --git a/images/fdinfo.proto b/images/fdinfo.proto index 77e375aa9..d966d5bc5 100644 --- a/images/fdinfo.proto +++ b/images/fdinfo.proto @@ -16,6 +16,7 @@ import "sk-unix.proto"; import "fifo.proto"; import "pipe.proto"; import "tty.proto"; +import "memfd.proto"; enum fd_types { UND = 0; @@ -36,6 +37,7 @@ enum fd_types { TUNF = 15; EXT = 16; TIMERFD = 17; + MEMFD = 18; /* Any number above the real used. Not stored to image */ CTL_TTY = 65534; @@ -70,4 +72,5 @@ message file_entry { optional fifo_entry fifo = 17; optional pipe_entry pipe = 18; optional tty_file_entry tty = 19; + optional memfd_file_entry memfd = 20; } diff --git a/images/memfd.proto b/images/memfd.proto new file mode 100644 index 000000000..8eccd6f4f --- /dev/null +++ b/images/memfd.proto @@ -0,0 +1,20 @@ +syntax = "proto2"; + +import "opts.proto"; +import "fown.proto"; + +message memfd_file_entry { + required uint32 id = 1; + required uint32 flags = 2 [(criu).flags = "rfile.flags"]; + required uint64 pos = 3; + required fown_entry fown = 4; + required uint32 inode_id = 5; +}; + +message memfd_inode_entry { + required string name = 1; + required uint32 uid = 2; + required uint32 gid = 3; + required uint64 size = 4; + required uint32 shmid = 5; +}; diff --git a/lib/py/images/images.py b/lib/py/images/images.py index 3eedfca69..dca080657 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -522,6 +522,8 @@ handlers = { 'AUTOFS': entry_handler(pb.autofs_entry), 'FILES': entry_handler(pb.file_entry), 'CPUINFO': entry_handler(pb.cpuinfo_entry), + 'MEMFD_FILE': entry_handler(pb.memfd_file_entry), + 'MEMFD_INODE': entry_handler(pb.memfd_inode_entry), } diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 6f67c6035..21b390092 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -171,3 +171,14 @@ int main(int argc, char **argv) } endef + +define FEATURE_TEST_MEMFD_CREATE + +#include +#include + +int main(void) +{ + return memfd_create(NULL, 0); +} +endef From 875ac4d03f9034adb88eec5875d63d0561c48107 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0330/2030] files: increase path buffer size in inherited_fd() Prepare memfd to use inherited_fd(), needing long path names support. Signed-off-by: Nicolas Viennot --- criu/files.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index ea86deaa3..789b08a4c 100644 --- a/criu/files.c +++ b/criu/files.c @@ -1608,7 +1608,7 @@ int inherit_fd_lookup_id(char *id) bool inherited_fd(struct file_desc *d, int *fd_p) { - char buf[32], *id_str; + char buf[PATH_MAX], *id_str; int i_fd; if (!d->ops->name) From b25684e24ae7643f2a8da73617c22a44dc9023ca Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0331/2030] memfd: add --inherit-fd support Upon file restore, inherited_fd() is called to check for a user-defined inerit-fd override. Note that the MEMFD_INODE image is read at each invocation (memfd name is not cached). Signed-off-by: Nicolas Viennot --- criu/crtools.c | 1 + criu/memfd.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/criu/crtools.c b/criu/crtools.c index 3cd40e87d..7f72dde27 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -392,6 +392,7 @@ usage: " pipe[inode]\n" " socket[inode]\n" " file[mnt_id:inode]\n" +" /memfd:name\n" " path/to/file\n" " --empty-ns net Create a namespace, but don't restore its properties\n" " (assuming it will be restored by action scripts)\n" diff --git a/criu/memfd.c b/criu/memfd.c index bcef35e75..36b3be8df 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -276,6 +276,9 @@ static int memfd_open(struct file_desc *d, u32 *fdflags) mfi = container_of(d, struct memfd_info, d); mfe = mfi->mfe; + if (inherited_fd(d, &fd)) + return fd; + pr_info("Restoring memfd id=%d\n", mfe->id); fd = memfd_open_inode(mfi->inode); @@ -325,9 +328,42 @@ static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) return 0; } +static char *memfd_d_name(struct file_desc *d, char *buf, size_t s) +{ + MemfdInodeEntry *mie = NULL; + struct cr_img *img = NULL; + struct memfd_info *mfi; + char *ret = NULL; + + mfi = container_of(d, struct memfd_info, d); + + img = open_image(CR_FD_MEMFD_INODE, O_RSTR, mfi->inode->id); + if (!img) + goto out; + + if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) + goto out; + + if (snprintf(buf, s, "%s%s", MEMFD_PREFIX, mie->name) >= s) { + pr_err("Buffer too small for memfd name %s\n", mie->name); + goto out; + } + + ret = buf; + +out: + if (img) + close_image(img); + if (mie) + memfd_inode_entry__free_unpacked(mie, NULL); + + return ret; +} + static struct file_desc_ops memfd_desc_ops = { .type = FD_TYPES__MEMFD, .open = memfd_open_fe_fd, + .name = memfd_d_name, }; static int collect_one_memfd(void *o, ProtobufCMessage *msg, struct cr_img *i) From 29a1a88bcebaf9d83591077d2bec424da82c0e71 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0332/2030] memfd: add memory mapping support * During checkpoint, we add a vma flags: VMA_AREA_MEMFD to denote memfd regions. * Even though memfd is backed by the shmem device, we use the file semantics of memfd (via /proc/map_files/) which we already have support for. Signed-off-by: Nicolas Viennot --- criu/cr-dump.c | 6 +++++- criu/files-reg.c | 11 +++++++++-- criu/include/image.h | 1 + criu/include/memfd.h | 6 ++++++ criu/memfd.c | 19 ++++++++++++++++++- criu/proc_parse.c | 35 +++++++++++++++++++++++++++++++++++ 6 files changed, 74 insertions(+), 4 deletions(-) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 88323af92..6aa114c2d 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -80,6 +80,7 @@ #include "fault-injection.h" #include "dump.h" #include "eventpoll.h" +#include "memfd.h" /* * Architectures can overwrite this function to restore register sets that @@ -414,7 +415,10 @@ static int dump_filemap(struct vma_area *vma_area, int fd) /* Flags will be set during restore in open_filmap() */ - ret = dump_one_reg_file_cond(fd, &id, &p); + if (vma->status & VMA_AREA_MEMFD) + ret = dump_one_memfd_cond(fd, &id, &p); + else + ret = dump_one_reg_file_cond(fd, &id, &p); vma->shmid = id; return ret; diff --git a/criu/files-reg.c b/criu/files-reg.c index 90fb7dd7f..b0dad78e6 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -35,6 +35,7 @@ #include "pstree.h" #include "fault-injection.h" #include "external.h" +#include "memfd.h" #include "protobuf.h" #include "util.h" @@ -1879,7 +1880,10 @@ static int open_filemap(int pid, struct vma_area *vma) flags = vma->e->fdflags; if (ctx.flags != flags || ctx.desc != vma->vmfd) { - ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); + if (vma->e->status & VMA_AREA_MEMFD) + ret = memfd_open(vma->vmfd, &flags); + else + ret = open_path(vma->vmfd, do_open_reg_noseek_flags, &flags); if (ret < 0) return ret; @@ -1909,7 +1913,10 @@ int collect_filemap(struct vma_area *vma) vma->e->fdflags = O_RDONLY; } - fd = collect_special_file(vma->e->shmid); + if (vma->e->status & VMA_AREA_MEMFD) + fd = collect_memfd(vma->e->shmid); + else + fd = collect_special_file(vma->e->shmid); if (!fd) return -1; diff --git a/criu/include/image.h b/criu/include/image.h index 2baa39496..1c7cc5471 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -83,6 +83,7 @@ #define VMA_AREA_SOCKET (1 << 11) #define VMA_AREA_VVAR (1 << 12) #define VMA_AREA_AIORING (1 << 13) +#define VMA_AREA_MEMFD (1 << 14) #define VMA_CLOSE (1 << 28) #define VMA_NO_PROT_WRITE (1 << 29) diff --git a/criu/include/memfd.h b/criu/include/memfd.h index c1d7949cb..0a9aeff2f 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -5,10 +5,16 @@ #include "int.h" #include "common/config.h" +struct fd_parms; +struct file_desc; + extern int is_memfd(dev_t dev, const char *path); +extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms); extern const struct fdtype_ops memfd_dump_ops; +extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; +extern struct file_desc *collect_memfd(u32 id); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 36b3be8df..1cca96a32 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -164,6 +164,13 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); } +int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms) +{ + if (fd_id_generate_special(parms, id)) + return dump_one_memfd(lfd, *id, parms); + return 0; +} + const struct fdtype_ops memfd_dump_ops = { .type = FD_TYPES__MEMFD, .dump = dump_one_memfd, @@ -265,7 +272,7 @@ static int memfd_open_inode(struct memfd_inode *inode) return fd; } -static int memfd_open(struct file_desc *d, u32 *fdflags) +int memfd_open(struct file_desc *d, u32 *fdflags) { char lpath[PSFDS]; struct memfd_info *mfi; @@ -384,3 +391,13 @@ struct collect_image_info memfd_cinfo = { .priv_size = sizeof(struct memfd_info), .collect = collect_one_memfd, }; + +struct file_desc *collect_memfd(u32 id) { + struct file_desc *fdesc; + + fdesc = find_file_desc_raw(FD_TYPES__MEMFD, id); + if (fdesc == NULL) + pr_err("No entry for memfd %#x\n", id); + + return fdesc; +} diff --git a/criu/proc_parse.c b/criu/proc_parse.c index fa7644992..468afcdf3 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -41,6 +41,7 @@ #include "timerfd.h" #include "path.h" #include "fault-injection.h" +#include "memfd.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" @@ -303,6 +304,26 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, } vfi_dev = makedev(vfi->dev_maj, vfi->dev_min); + + if (is_memfd(vfi_dev, fname)) { + struct fd_link link; + link.len = strlen(fname); + strlcpy(link.name, fname, sizeof(link.name)); + strip_deleted(&link); + + /* + * The error EPERM will be shown in the following pr_perror(). + * It comes from the previous open() call. + */ + pr_perror("Can't open mapped [%s]", link.name); + + /* + * TODO Perhaps we could do better than failing and dump the + * memory like what is being done in shmem.c + */ + return -1; + } + if (is_anon_shmem_map(vfi_dev)) { if (!(vma->e->flags & MAP_SHARED)) return -1; @@ -578,7 +599,20 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, /* * /dev/zero stands for anon-shared mapping * otherwise it's some file mapping. + * + * We treat memfd mappings as regular file mappings because + * their backing can be seen as files, which is easy to + * support. So even though memfd is an anonymous shmem, we + * treat it differently. + * Note: maybe we should revisit this as /proc/map_files/ + * may not always be accessible. */ + + if (is_memfd(st_buf->st_dev, file_path)) { + vma_area->e->status |= VMA_AREA_MEMFD; + goto normal_file; + } + if (is_anon_shmem_map(st_buf->st_dev)) { if (!(vma_area->e->flags & MAP_SHARED)) goto err_bogus_mapping; @@ -594,6 +628,7 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE; } } else { +normal_file: if (vma_area->e->flags & MAP_PRIVATE) vma_area->e->status |= VMA_FILE_PRIVATE; else From 56d8e2455fb86b885775db6c236cbb04ba403f4d Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 18 Dec 2019 23:32:32 +0000 Subject: [PATCH 0333/2030] memfd: add seals support See "man fcntl" for more information about seals. memfd are the only files that can be sealed, currently. For this reason, we dump the seal values in the MEMFD_INODE image. Restoring seals must be done carefully as the seal F_SEAL_FUTURE_WRITE prevents future write access. This means that any memory mapping with write access must be restored before restoring the seals. Signed-off-by: Nicolas Viennot --- criu/cr-restore.c | 4 +++ criu/include/fcntl.h | 8 ++++++ criu/include/memfd.h | 1 + criu/memfd.c | 59 +++++++++++++++++++++++++++++++++++++++- images/memfd.proto | 1 + lib/py/images/pb2dict.py | 9 ++++++ 6 files changed, 81 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 13d1001c9..f50448cd2 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2232,6 +2232,10 @@ skip_ns_bouncing: if (ret < 0) goto out_kill; + ret = apply_memfd_seals(); + if (ret < 0) + goto out_kill; + /* * Zombies die after CR_STATE_RESTORE which is switched * by root task, not by us. See comment before CR_STATE_FORKING diff --git a/criu/include/fcntl.h b/criu/include/fcntl.h index d9c5c5e7b..ea9d48c72 100644 --- a/criu/include/fcntl.h +++ b/criu/include/fcntl.h @@ -34,6 +34,14 @@ struct f_owner_ex { # define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8) #endif +#ifndef F_ADD_SEALS +# define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_GET_SEALS +# define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif + #ifndef O_PATH # define O_PATH 010000000 #endif diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 0a9aeff2f..2d8eda545 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -15,6 +15,7 @@ extern const struct fdtype_ops memfd_dump_ops; extern int memfd_open(struct file_desc *d, u32 *fdflags); extern struct collect_image_info memfd_cinfo; extern struct file_desc *collect_memfd(u32 id); +extern int apply_memfd_seals(void); #ifdef CONFIG_HAS_MEMFD_CREATE # include diff --git a/criu/memfd.c b/criu/memfd.c index 1cca96a32..d17c10fb7 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -1,4 +1,5 @@ #include +#include #include "common/compiler.h" #include "common/lock.h" @@ -24,6 +25,13 @@ #define MEMFD_PREFIX "/memfd:" #define MEMFD_PREFIX_LEN (sizeof(MEMFD_PREFIX)-1) +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +/* Linux 5.1+ */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ + struct memfd_inode { struct list_head list; u32 id; @@ -37,6 +45,7 @@ struct memfd_inode { struct { mutex_t lock; int fdstore_id; + unsigned int pending_seals; }; }; }; @@ -92,6 +101,10 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, mie.size = st->st_size; mie.shmid = shmid; + mie.seals = fcntl(fd, F_GET_SEALS); + if (mie.seals == -1) + goto out; + if (pb_write_one(img, &mie, PB_MEMFD_INODE)) goto out; @@ -187,6 +200,8 @@ struct memfd_info { struct memfd_inode *inode; }; +static int memfd_open_inode(struct memfd_inode *inode); + static struct memfd_inode *memfd_alloc_inode(int id) { struct memfd_inode *inode; @@ -202,6 +217,7 @@ static struct memfd_inode *memfd_alloc_inode(int id) inode->id = id; mutex_init(&inode->lock); inode->fdstore_id = -1; + inode->pending_seals = 0; list_add_tail(&inode->list, &memfd_inodes); return inode; @@ -223,7 +239,16 @@ static int memfd_open_inode_nocache(struct memfd_inode *inode) if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) goto out; - fd = memfd_create(mie->name, 0); + if (mie->seals == F_SEAL_SEAL) { + inode->pending_seals = 0; + flags = 0; + } else { + /* Seals are applied later due to F_SEAL_FUTURE_WRITE */ + inode->pending_seals = mie->seals; + flags = MFD_ALLOW_SEALING; + } + + fd = memfd_create(mie->name, flags); if (fd < 0) { pr_perror("Can't create memfd:%s", mie->name); goto out; @@ -401,3 +426,35 @@ struct file_desc *collect_memfd(u32 id) { return fdesc; } + +int apply_memfd_seals(void) +{ + /* + * We apply the seals after all the mappings are done because the seal + * F_SEAL_FUTURE_WRITE prevents future write access (added in + * Linux 5.1). Thus we must make sure all writable mappings are opened + * before applying this seal. + */ + + int ret, fd; + struct memfd_inode *inode; + + list_for_each_entry(inode, &memfd_inodes, list) { + if (!inode->pending_seals) + continue; + + fd = memfd_open_inode(inode); + if (fd < 0) + return -1; + + ret = fcntl(fd, F_ADD_SEALS, inode->pending_seals); + close(fd); + + if (ret < 0) { + pr_perror("Cannot apply seals on memfd"); + return -1; + } + } + + return 0; +} diff --git a/images/memfd.proto b/images/memfd.proto index 8eccd6f4f..546ffc2ab 100644 --- a/images/memfd.proto +++ b/images/memfd.proto @@ -17,4 +17,5 @@ message memfd_inode_entry { required uint32 gid = 3; required uint64 size = 4; required uint32 shmid = 5; + required uint32 seals = 6 [(criu).flags = "seals.flags"]; }; diff --git a/lib/py/images/pb2dict.py b/lib/py/images/pb2dict.py index a89850a1d..40a6036cf 100644 --- a/lib/py/images/pb2dict.py +++ b/lib/py/images/pb2dict.py @@ -124,6 +124,14 @@ rfile_flags_map = [ ('O_CLOEXEC', 0o02000000), ] +seals_flags_map = [ + ('F_SEAL_SEAL', 0x0001), + ('F_SEAL_SHRINK', 0x0002), + ('F_SEAL_GROW', 0x0004), + ('F_SEAL_WRITE', 0x0008), + ('F_SEAL_FUTURE_WRITE', 0x0010), +] + pmap_flags_map = [ ('PE_PARENT', 1 << 0), ('PE_LAZY', 1 << 1), @@ -136,6 +144,7 @@ flags_maps = { 'mmap.status': mmap_status_map, 'rfile.flags': rfile_flags_map, 'pmap.flags': pmap_flags_map, + 'seals.flags': seals_flags_map, } gen_maps = { From b133c375ad2d21cf6a1a9e96e7dab3741c966fbe Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 4 Feb 2020 16:39:53 +0000 Subject: [PATCH 0334/2030] inhfd_test: add support for non-pair files File pairs naturally block on read() until the write() happen (or the writer is closed). This is not the case for regular files, so we take extra precaution for these. Also cleaned-up an extra my_file.close() Signed-off-by: Nicolas Viennot --- test/zdtm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 3fc57ba55..4110b5142 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -691,9 +691,14 @@ class inhfd_test: i = 0 for _, peer_file in self.__files: msg = self.__get_message(i) - my_file.close() try: - data = peer_file.read(16) + # File pairs naturally block on read() until the write() + # happen (or the writer is closed). This is not the case for + # regular files, so we loop. + data = b'' + while not data: + data = peer_file.read(16) + time.sleep(0.1) except Exception as e: print("Unable to read a peer file: %s" % e) sys.exit(1) From 2dd105b8dfb23399e18ab4e3f7d13b00c19ad910 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 20 Dec 2019 21:56:38 -0500 Subject: [PATCH 0335/2030] memfd: add tests Testing for all the memfd features, namely support for CR of: * the same fd shared by multiple processes * the same file shared by multiple processes * the memfd content * file flags and fd flags * mmaps, MAP_SHARED and MAP_PRIVATE * seals, excluding F_SEAL_FUTURE_WRITE because this feature only exists in recent kernels (5.1 and up) * inherited fd Signed-off-by: Nicolas Viennot --- test/inhfd/memfd.py | 28 +++++++++ test/inhfd/memfd.py.checkskip | 7 +++ test/inhfd/memfd.py.desc | 1 + test/zdtm/static/Makefile | 4 ++ test/zdtm/static/memfd00.c | 103 ++++++++++++++++++++++++++++++ test/zdtm/static/memfd01.c | 114 ++++++++++++++++++++++++++++++++++ test/zdtm/static/memfd02.c | 87 ++++++++++++++++++++++++++ test/zdtm/static/memfd03.c | 97 +++++++++++++++++++++++++++++ 8 files changed, 441 insertions(+) create mode 100755 test/inhfd/memfd.py create mode 100755 test/inhfd/memfd.py.checkskip create mode 100644 test/inhfd/memfd.py.desc create mode 100644 test/zdtm/static/memfd00.c create mode 100644 test/zdtm/static/memfd01.c create mode 100644 test/zdtm/static/memfd02.c create mode 100644 test/zdtm/static/memfd03.c diff --git a/test/inhfd/memfd.py b/test/inhfd/memfd.py new file mode 100755 index 000000000..d9ce01e41 --- /dev/null +++ b/test/inhfd/memfd.py @@ -0,0 +1,28 @@ +import os +import ctypes +libc = ctypes.CDLL(None) + + +def memfd_create(name, flags): + return libc.memfd_create(name.encode('utf8'), flags) + + +def create_fds(): + def create_memfd_pair(name): + fd = memfd_create(name, 0) + fw = open('/proc/self/fd/{}'.format(fd), 'wb') + fr = open('/proc/self/fd/{}'.format(fd), 'rb') + os.close(fd) + return (fw, fr) + + return [create_memfd_pair("name{}".format(i)) for i in range(10)] + + +def filename(f): + name = os.readlink('/proc/self/fd/{}'.format(f.fileno())) + name = name.replace(' (deleted)', '') + return name + + +def dump_opts(sockf): + return [] diff --git a/test/inhfd/memfd.py.checkskip b/test/inhfd/memfd.py.checkskip new file mode 100755 index 000000000..252778969 --- /dev/null +++ b/test/inhfd/memfd.py.checkskip @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +import ctypes +libc = ctypes.CDLL(None) + +# libc may not have memfd_create (e.g., centos on travis) +libc.memfd_create("test".encode('utf8'), 0) diff --git a/test/inhfd/memfd.py.desc b/test/inhfd/memfd.py.desc new file mode 100644 index 000000000..10666c823 --- /dev/null +++ b/test/inhfd/memfd.py.desc @@ -0,0 +1 @@ +{ 'flavor': 'h' } diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 5ca05ee9e..5afd18cd6 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -220,6 +220,10 @@ TST_NOFILE := \ child_subreaper \ child_subreaper_existing_child \ child_subreaper_and_reparent \ + memfd00 \ + memfd01 \ + memfd02 \ + memfd03 \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/memfd00.c b/test/zdtm/static/memfd00.c new file mode 100644 index 000000000..6b56eca01 --- /dev/null +++ b/test/zdtm/static/memfd00.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd file descriptor"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ + int fd, fl_flags1, fl_flags2, fd_flags1, fd_flags2; + struct statfs statfs1, statfs2; + off_t pos1, pos2; + char buf[5]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (fcntl(fd, F_SETFL, O_APPEND) < 0) + err(1, "Can't get fl flags"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fstatfs(fd, &statfs1) < 0) + err(1, "statfs issue"); + + if (write(fd, "hello", 5) != 5) + err(1, "write error"); + + pos1 = 3; + if (lseek(fd, pos1, SEEK_SET) < 0) + err(1, "seek error"); + + test_daemon(); + test_waitsig(); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) { + fail("fl flags differs"); + return 1; + } + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) { + fail("fd flags differs"); + return 1; + } + + if (fstatfs(fd, &statfs2) < 0) + err(1, "statfs issue"); + + if (statfs1.f_type != statfs2.f_type) { + fail("statfs.f_type differs"); + return 1; + } + + pos2 = lseek(fd, 0, SEEK_CUR); + if (pos1 != pos2) { + fail("position differs"); + return 1; + } + + if (pread(fd, buf, sizeof(buf), 0) != sizeof(buf)) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "hello", sizeof(buf))) { + fail("content mismatch"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd01.c b/test/zdtm/static/memfd01.c new file mode 100644 index 000000000..7a7853642 --- /dev/null +++ b/test/zdtm/static/memfd01.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd with different file pointer"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ + pid_t pid, pid_child; + int fd, ret, status; + task_waiter_t t; + + test_init(argc, argv); + + task_waiter_init(&t); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + pid = getpid(); + + pid_child = fork(); + if (pid_child < 0) + err(1, "Can't fork"); + + if (!pid_child) { + char fdpath[100]; + char buf[1]; + int fl_flags1, fl_flags2, fd_flags1, fd_flags2; + + snprintf(fdpath, sizeof(fdpath), "/proc/%d/fd/%d", pid, fd); + /* + * We pass O_LARGEFILE because in compat mode, our file + * descriptor does not get O_LARGEFILE automatically, but the + * restorer using non-compat open() is forced O_LARGEFILE. + * This creates a flag difference, which we don't want to deal + * with this at the moment. + */ + fd = open(fdpath, O_RDONLY | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open memfd via proc"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + task_waiter_complete(&t, 1); + // checkpoint-restore happens here + task_waiter_wait4(&t, 2); + + if (read(fd, buf, 1) != 1) + err(1, "Can't read"); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) + err(1, "fl flags differs"); + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) + err(1, "fd flags differs"); + + if (buf[0] != 'x') + err(1, "Read incorrect"); + + return 0; + } + + task_waiter_wait4(&t, 1); + + test_daemon(); + test_waitsig(); + + if (write(fd, "x", 1) != 1) + err(1, "Can't write"); + + task_waiter_complete(&t, 2); + + ret = wait(&status); + if (ret == -1 || !WIFEXITED(status) || WEXITSTATUS(status)) { + kill(pid, SIGKILL); + fail("child had issue"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd02.c b/test/zdtm/static/memfd02.c new file mode 100644 index 000000000..1843e9c9a --- /dev/null +++ b/test/zdtm/static/memfd02.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd mmap"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ +#define LEN 6 + int fd; + void *addr_shared, *addr_private; + char buf[LEN]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (ftruncate(fd, LEN) < 0) + err(1, "Can't truncate"); + + addr_shared = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (addr_shared == MAP_FAILED) + err(1, "Can't mmap"); + + write(fd, "write1", LEN); + + addr_private = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + if (addr_private == MAP_FAILED) + err(1, "Can't mmap"); + + test_daemon(); + test_waitsig(); + + if (memcmp(addr_shared, "write1", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + strcpy(addr_shared, "write2"); + + if (pread(fd, buf, LEN, 0) != LEN) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "write2", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + if (memcmp(addr_private, "write2", LEN)) { + fail("content mismatch (private)"); + return 1; + } + + strcpy(addr_private, "write3"); + + if (memcmp(addr_shared, "write2", LEN)) { + fail("content mismatch (shared)"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/memfd03.c b/test/zdtm/static/memfd03.c new file mode 100644 index 000000000..faedf9383 --- /dev/null +++ b/test/zdtm/static/memfd03.c @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "memfd seals"; +const char *test_author = "Nicolas Viennot "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +static int _memfd_create(const char *name, unsigned int flags) +{ + return syscall(SYS_memfd_create, name, flags); +} + + +#ifndef F_LINUX_SPECIFIC_BASE +# define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_ADD_SEALS + #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) +#endif + +#ifndef F_GET_SEALS + #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) +#endif + + +#ifndef F_SEAL_SEAL +#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */ +#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ +#define F_SEAL_GROW 0x0004 /* prevent file from growing */ +#define F_SEAL_WRITE 0x0008 /* prevent writes */ +#endif + +int main(int argc, char *argv[]) +{ +#define LEN 5 + int fd, fd2; + void *addr_write, *addr_read; + char fdpath[100]; + + test_init(argc, argv); + + fd = _memfd_create("somename", MFD_ALLOW_SEALING | MFD_CLOEXEC); + if (fd < 0) + err(1, "Can't call memfd_create"); + + if (write(fd, "hello", LEN) != LEN) + err(1, "Can't write"); + + if (fcntl(fd, F_ADD_SEALS, F_SEAL_WRITE) < 0) + err(1, "Can't add seals"); + + test_daemon(); + test_waitsig(); + + snprintf(fdpath, sizeof(fdpath), "/proc/self/fd/%d", fd); + fd2 = open(fdpath, O_RDWR); + if (fd2 < 0) + err(1, "Can't open memfd via proc"); + + if (fcntl(fd, F_GET_SEALS) != F_SEAL_WRITE) { + fail("Seals are different"); + return 1; + } + + addr_write = mmap(NULL, LEN, PROT_WRITE, MAP_SHARED, fd2, 0); + if (addr_write != MAP_FAILED) { + fail("Should not be able to get write access"); + return 1; + } + + addr_read = mmap(NULL, 1, PROT_READ, MAP_PRIVATE, fd2, 0); + if (addr_read == MAP_FAILED) + err(1, "Can't mmap"); + + if (memcmp(addr_read, "hello", LEN)) { + fail("Mapping has bad data"); + return 1; + } + + pass(); + + return 0; +} From ec116449544cd2f062b7523c2eadc6d791baa0ac Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 23 Jan 2020 16:39:28 +0000 Subject: [PATCH 0336/2030] criu: Use strlcpy() instead of strncpy() gcc8 in Fedora Rawhide has a new useful warning: > criu/img-remote.c: In function 'push_snapshot_id': > criu/img-remote.c:1099:2: error: 'strncpy' specified bound 4096 equals destination size [-Werror=stringop-truncation] > 1099 | strncpy(rn.snapshot_id, snapshot_id, PATH_MAX); > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From man 3 strncpy: > Warning: If there is no null byte among the first n bytes of src, > the string placed in dest will not be null-terminated. Signed-off-by: Dmitry Safonov --- criu/files-reg.c | 3 ++- criu/files.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index b0dad78e6..c2a55aeb3 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -33,6 +33,7 @@ #include "namespaces.h" #include "proc_parse.h" #include "pstree.h" +#include "string.h" #include "fault-injection.h" #include "external.h" #include "memfd.h" @@ -457,7 +458,7 @@ static int open_remap_ghost(struct reg_file_info *rfi, gf->remap.rmnt_id = rfi->rfe->mnt_id; if (S_ISDIR(gfe->mode)) - strncpy(gf->remap.rpath, rfi->path, PATH_MAX); + strlcpy(gf->remap.rpath, rfi->path, PATH_MAX); else ghost_path(gf->remap.rpath, PATH_MAX, rfi, rpe); diff --git a/criu/files.c b/criu/files.c index 789b08a4c..f7963bf54 100644 --- a/criu/files.c +++ b/criu/files.c @@ -45,6 +45,7 @@ #include "autofs.h" #include "parasite.h" #include "parasite-syscall.h" +#include "string.h" #include "kerndat.h" #include "fdstore.h" @@ -291,8 +292,7 @@ static int fixup_overlayfs(struct fd_parms *p, struct fd_link *link) char buf[PATH_MAX]; int n; - strncpy(buf, link->name, PATH_MAX); - buf[PATH_MAX - 1] = 0; + strlcpy(buf, link->name, PATH_MAX); n = snprintf(link->name, PATH_MAX, "%s/%s", m->mountpoint, buf + 2); if (n >= PATH_MAX) { pr_err("Not enough space to replace %s\n", buf); From bc49927bbc28b41e4b2759d42dc24f1d66e22df3 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 6 Feb 2020 18:01:00 +0000 Subject: [PATCH 0337/2030] criu: Make use strlcpy() to copy into allocated strings strncpy() with n == strlen(src) won't put NULL-terminator in dst. Signed-off-by: Dmitry Safonov --- criu/cr-restore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index f50448cd2..85105a18e 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -77,6 +77,7 @@ #include "fdstore.h" #include "string.h" #include "memfd.h" +#include "string.h" #include "parasite-syscall.h" #include "files-reg.h" From 3eab205bae1d31f00922d1e717a1cd56c1cb7177 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 6 Feb 2020 21:20:20 -0800 Subject: [PATCH 0338/2030] python: sort imports 202 Additional newline in a group of imports. I100 Import statements are in the wrong order. Signed-off-by: Andrei Vagin --- test/inhfd/memfd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/inhfd/memfd.py b/test/inhfd/memfd.py index d9ce01e41..b06e35068 100755 --- a/test/inhfd/memfd.py +++ b/test/inhfd/memfd.py @@ -1,5 +1,5 @@ -import os import ctypes +import os libc = ctypes.CDLL(None) From 38793699e7f0e97d14b6b17e83e8bc071c3ce283 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 21 Feb 2020 12:14:38 +0300 Subject: [PATCH 0339/2030] test/jenkins: remove empty line at the end of file Signed-off-by: Pavel Tikhomirov --- test/jenkins/criu-fault.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index 4e3790e59..c27dd3738 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -27,4 +27,3 @@ fi ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail - From cdd08cdff8692aad9d05dd83e2ab24379cd83393 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 26 Feb 2020 12:25:37 +0200 Subject: [PATCH 0340/2030] uffd: use userns_call() to execute ioctl(UFFDIO_API) In the recent kernels the userfaultfd support for FORK events is limited to CAP_SYS_PTRACE. That causes the followong error when the ioctl(UFFDIO_API) is executed from non-privilieged userns: Error (criu/uffd.c:273): uffd: Failed to get uffd API: Operation not permitted Wrapping the call to ioctl(UFFDIO_API) in userns_call() resolves the issue. Fixes: #964 Signed-off-by: Mike Rapoport --- criu/uffd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/criu/uffd.c b/criu/uffd.c index c47b35b1f..99373c04d 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -40,6 +40,7 @@ #include "tls.h" #include "fdstore.h" #include "util.h" +#include "namespaces.h" #undef LOG_PREFIX #define LOG_PREFIX "uffd: " @@ -254,6 +255,13 @@ bool uffd_noncooperative(void) return (kdat.uffd_features & features) == features; } +static int uffd_api_ioctl(void *arg, int fd, pid_t pid) +{ + struct uffdio_api *uffdio_api = arg; + + return ioctl(fd, UFFDIO_API, uffdio_api); +} + int uffd_open(int flags, unsigned long *features) { struct uffdio_api uffdio_api = { 0 }; @@ -269,7 +277,8 @@ int uffd_open(int flags, unsigned long *features) if (features) uffdio_api.features = *features; - if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { + if (userns_call(uffd_api_ioctl, 0, &uffdio_api, sizeof(uffdio_api), + uffd)) { pr_perror("Failed to get uffd API"); goto err; } From 48f3b6516b384f9c4f240aff76f671697198884b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 18 Feb 2020 19:45:08 +0000 Subject: [PATCH 0341/2030] criu(8): Add documentation for --enable-fs This option was introduced with: https://github.com/checkpoint-restore/criu/commit/e2c38245c613df5e36dcf0253c7652f928e46abf v2: (comment from Pavel Tikhomirov) --enable-fs does not fit with --external dev[]:, see try_resolve_ext_mount, external dev mounts only determined for FSTYPE__UNSUPPORTED. Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index a6b9f7fae..0ac29103a 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -90,6 +90,19 @@ The following levels are available: *-L*, *--libdir* 'path':: Path to plugins directory. +*--enable-fs* ['fs'[,'fs'...]]:: + Specify a comma-separated list of filesystem names that should + be auto-detected. The value 'all' enables auto-detection for + all filesystems. ++ +Note: This option is not safe, use at your own risk. +Auto-detecting a filesystem mount assumes that the mountpoint can +be restored with *mount(src, mountpoint, flags, options)*. When used, +*dump* is expected to always succeed if a mountpoint is to be +auto-detected, however *restore* may fail (or do something wrong) +if the assumption for restore logic is incorrect. This option is +not compatable with *--external* *dev*. + *--action-script* 'script':: Add an external action script to be executed at certain stages. The environment variable *CRTOOLS_SCRIPT_ACTION* is available From bb032cc3e218c4aee5394642caa59be3909b259a Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 18 Feb 2020 19:53:36 +0000 Subject: [PATCH 0342/2030] criu(8): Convert tabs to spaces Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 0ac29103a..ab63e461c 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -477,7 +477,7 @@ The 'mode' may be one of the following: *soft*::: Restore cgroup properties if only cgroup has been created by *criu*, otherwise do not restore properties. This is the - default if mode is unspecified. + default if mode is unspecified. *full*::: Always restore all cgroups and their properties. @@ -575,17 +575,17 @@ check* always checks Category 1 features unless *--feature* is specified which only checks a specified feature. *Category 1*::: Absolutely required. These are features like support for - */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket - monitoring, */proc/sys/kernel/ns_last_pid* etc. + */proc/PID/map_files*, *NETLINK_SOCK_DIAG* socket + monitoring, */proc/sys/kernel/ns_last_pid* etc. *Category 2*::: Required only for specific cases. These are features - like AIO remap, */dev/net/tun* and others that are only - required if a process being dumped or restored - is using those. + like AIO remap, */dev/net/tun* and others that are only + required if a process being dumped or restored + is using those. *Category 3*::: Experimental. These are features like *task-diag* that - are used for experimental purposes (mostly - during development). + are used for experimental purposes (mostly + during development). If there are no errors or warnings, *criu* prints "Looks good." and its exit code is 0. From 563c5e5e763949de7b1c48bd04c777db17d768e1 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 09:10:49 +0000 Subject: [PATCH 0343/2030] seize: prepare for cgroupv2 freezer The cgroupv2 freezer does not return the same strings as v1. Instead of THAWED and FROZEN v2 returns 0 and 1 (strings). This prepares the seize code to use 0 and 1 everywhere and THAWED and FROZEN only for v1 specific code paths. Signed-off-by: Adrian Reber --- criu/seize.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index fd314666f..14cd82417 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -30,7 +30,14 @@ static const char frozen[] = "FROZEN"; static const char freezing[] = "FREEZING"; static const char thawed[] = "THAWED"; -static const char *get_freezer_state(int fd) +enum freezer_state { + FREEZER_ERROR = -1, + THAWED, + FROZEN, + FREEZING +}; + +static enum freezer_state get_freezer_state(int fd) { char state[32]; int ret; @@ -52,15 +59,15 @@ static const char *get_freezer_state(int fd) pr_debug("freezer.state=%s\n", state); if (strcmp(state, frozen) == 0) - return frozen; + return FROZEN; else if (strcmp(state, freezing) == 0) - return freezing; + return FREEZING; else if (strcmp(state, thawed) == 0) - return thawed; + return THAWED; pr_err("Unknown freezer state: %s\n", state); err: - return NULL; + return FREEZER_ERROR; } static bool freezer_thawed; @@ -98,7 +105,7 @@ static int freezer_restore_state(void) static int processes_to_wait; static pid_t *processes_to_wait_pids; -static int seize_cgroup_tree(char *root_path, const char *state) +static int seize_cgroup_tree(char *root_path, enum freezer_state state) { DIR *dir; struct dirent *de; @@ -134,7 +141,7 @@ static int seize_cgroup_tree(char *root_path, const char *state) if (!compel_interrupt_task(pid)) { pr_debug("SEIZE %d: success\n", pid); processes_to_wait++; - } else if (state == frozen) { + } else if (state == FROZEN) { char buf[] = "/proc/XXXXXXXXXX/exe"; struct stat st; @@ -332,7 +339,7 @@ static int freeze_processes(void) { int fd, exit_code = -1; char path[PATH_MAX]; - const char *state = thawed; + enum freezer_state state = THAWED; static const unsigned long step_ms = 100; unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms; @@ -361,11 +368,11 @@ static int freeze_processes(void) return -1; } state = get_freezer_state(fd); - if (!state) { + if (state == FREEZER_ERROR) { close(fd); return -1; } - if (state == thawed) { + if (state == THAWED) { freezer_thawed = true; lseek(fd, 0, SEEK_SET); @@ -384,12 +391,12 @@ static int freeze_processes(void) */ for (; i <= nr_attempts; i++) { state = get_freezer_state(fd); - if (!state) { + if (state == FREEZER_ERROR) { close(fd); return -1; } - if (state == frozen) + if (state == FROZEN) break; if (alarm_timeouted()) goto err; From 9f902e0c6b74ce2d7abf6a632d3abdf6b1370751 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 09:53:51 +0000 Subject: [PATCH 0344/2030] seize: factor out opening and writing the freezer state More preparations for cgroupv2 freezer. Factor our the freezer state opening and writing to have one location where to handle v1 and v2 differences. Signed-off-by: Adrian Reber --- criu/seize.c | 81 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 26 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index 14cd82417..b53707e44 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -20,6 +20,7 @@ #include "seccomp.h" #include "seize.h" #include "stats.h" +#include "string.h" #include "xmalloc.h" #include "util.h" #include @@ -77,13 +78,39 @@ const char *get_real_freezer_state(void) return freezer_thawed ? thawed : frozen; } -static int freezer_restore_state(void) +static int freezer_write_state(int fd, enum freezer_state new_state) { - int fd; - char path[PATH_MAX]; + char state[32]; + int ret; - if (!opts.freeze_cgroup || freezer_thawed) - return 0; + if (new_state == THAWED) { + if (strlcpy(state, thawed, sizeof(state)) >= sizeof(state)) + return -1; + } else if (new_state == FROZEN) { + if (strlcpy(state, frozen, sizeof(state)) >= sizeof(state)) + return -1; + } else { + return -1; + } + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + pr_perror("Unable to seek freezer FD"); + return -1; + } + if (write(fd, state, sizeof(state)) != sizeof(state)) { + pr_perror("Unable to %s tasks", + (new_state == THAWED) ? "thaw" : "freeze"); + return -1; + } + + return 0; +} + +static int freezer_open(void) +{ + char path[PATH_MAX]; + int fd; snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); fd = open(path, O_RDWR); @@ -92,13 +119,24 @@ static int freezer_restore_state(void) return -1; } - if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) { - pr_perror("Unable to freeze tasks"); - close(fd); + return fd; +} + +static int freezer_restore_state(void) +{ + int fd; + int ret; + + if (!opts.freeze_cgroup || freezer_thawed) + return 0; + + fd = freezer_open(); + if (fd < 0) return -1; - } + + ret = freezer_write_state(fd, FROZEN); close(fd); - return 0; + return ret; } /* A number of tasks in a freezer cgroup which are not going to be dumped */ @@ -338,7 +376,6 @@ static int log_unfrozen_stacks(char *root) static int freeze_processes(void) { int fd, exit_code = -1; - char path[PATH_MAX]; enum freezer_state state = THAWED; static const unsigned long step_ms = 100; @@ -361,12 +398,10 @@ static int freeze_processes(void) pr_debug("freezing processes: %lu attempts with %lu ms steps\n", nr_attempts, step_ms); - snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); - fd = open(path, O_RDWR); - if (fd < 0) { - pr_perror("Unable to open %s", path); + fd = freezer_open(); + if (fd < 0) return -1; - } + state = get_freezer_state(fd); if (state == FREEZER_ERROR) { close(fd); @@ -375,9 +410,7 @@ static int freeze_processes(void) if (state == THAWED) { freezer_thawed = true; - lseek(fd, 0, SEEK_SET); - if (write(fd, frozen, sizeof(frozen)) != sizeof(frozen)) { - pr_perror("Unable to freeze tasks"); + if (freezer_write_state(fd, FROZEN)) { close(fd); return -1; } @@ -427,13 +460,9 @@ static int freeze_processes(void) } err: - if (exit_code == 0 || freezer_thawed) { - lseek(fd, 0, SEEK_SET); - if (write(fd, thawed, sizeof(thawed)) != sizeof(thawed)) { - pr_perror("Unable to thaw tasks"); - exit_code = -1; - } - } + if (exit_code == 0 || freezer_thawed) + exit_code = freezer_write_state(fd, THAWED); + if (close(fd)) { pr_perror("Unable to thaw tasks"); return -1; From 10416bcbcb3c2ab4732971083f99d9390f8d168b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 4 Feb 2020 13:38:42 +0000 Subject: [PATCH 0345/2030] seize: support cgroup v2 freezer This adds support to checkpoint processes using the cgroup v2 freezer. Signed-off-by: Adrian Reber --- criu/seize.c | 150 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 133 insertions(+), 17 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index b53707e44..0ba2d9b1d 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -38,7 +38,10 @@ enum freezer_state { FREEZING }; -static enum freezer_state get_freezer_state(int fd) +/* Track if we are running on cgroup v2 system. */ +static bool cgroup_v2 = false; + +static enum freezer_state get_freezer_v1_state(int fd) { char state[32]; int ret; @@ -71,6 +74,70 @@ err: return FREEZER_ERROR; } +static enum freezer_state get_freezer_v2_state(int fd) +{ + int exit_code = FREEZER_ERROR; + char path[PATH_MAX]; + FILE *event; + char state; + int ret; + + /* + * cgroupv2 freezer uses cgroup.freeze to control the state. The file + * can return 0 or 1. 1 means the cgroup is frozen; 0 means it is not + * frozen. Writing 1 to an unfrozen cgroup can freeze it. Freezing can + * take some time and if the cgroup has finished freezing can be + * seen in cgroup.events: frozen 0|1. + */ + + ret = lseek(fd, 0, SEEK_SET); + if (ret < 0) { + pr_perror("Unable to seek freezer FD"); + goto out; + } + ret = read(fd, &state, 1); + if (ret <= 0) { + pr_perror("Unable to read from freezer FD"); + goto out; + } + pr_debug("cgroup.freeze=%c\n", state); + if (state == '0') { + exit_code = THAWED; + goto out; + } + + snprintf(path, sizeof(path), "%s/cgroup.events", opts.freeze_cgroup); + event = fopen(path, "r"); + if (event == NULL) { + pr_perror("Unable to open %s", path); + goto out; + } + while (fgets(path, sizeof(path), event)) { + if (strncmp(path, "frozen", 6) != 0) { + continue; + } else if (strncmp(path, "frozen 0", 8) == 0) { + exit_code = FREEZING; + goto close; + } else if (strncmp(path, "frozen 1", 8) == 0) { + exit_code = FROZEN; + goto close; + } + } + + pr_err("Unknown freezer state: %c\n", state); +close: + fclose(event); +out: + return exit_code; +} + +static enum freezer_state get_freezer_state(int fd) +{ + if (cgroup_v2) + return get_freezer_v2_state(fd); + return get_freezer_v1_state(fd); +} + static bool freezer_thawed; const char *get_real_freezer_state(void) @@ -80,15 +147,23 @@ const char *get_real_freezer_state(void) static int freezer_write_state(int fd, enum freezer_state new_state) { - char state[32]; + char state[32] = {0}; int ret; if (new_state == THAWED) { - if (strlcpy(state, thawed, sizeof(state)) >= sizeof(state)) - return -1; + if (cgroup_v2) + state[0] = '0'; + else + if (strlcpy(state, thawed, sizeof(state)) >= + sizeof(state)) + return -1; } else if (new_state == FROZEN) { - if (strlcpy(state, frozen, sizeof(state)) >= sizeof(state)) - return -1; + if (cgroup_v2) + state[0] = '1'; + else + if (strlcpy(state, frozen, sizeof(state)) >= + sizeof(state)) + return -1; } else { return -1; } @@ -109,10 +184,13 @@ static int freezer_write_state(int fd, enum freezer_state new_state) static int freezer_open(void) { + const char freezer_v1[] = "freezer.state"; + const char freezer_v2[] = "cgroup.freeze"; char path[PATH_MAX]; int fd; - snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); + snprintf(path, sizeof(path), "%s/%s", opts.freeze_cgroup, + cgroup_v2 ? freezer_v2 : freezer_v1); fd = open(path, O_RDWR); if (fd < 0) { pr_perror("Unable to open %s", path); @@ -139,6 +217,22 @@ static int freezer_restore_state(void) return ret; } +static FILE *freezer_open_thread_list(char *root_path) +{ + char path[PATH_MAX]; + FILE *f; + + snprintf(path, sizeof(path), "%s/%s", root_path, + cgroup_v2 ? "cgroup.threads" : "tasks"); + f = fopen(path, "r"); + if (f == NULL) { + pr_perror("Unable to open %s", path); + return NULL; + } + + return f; +} + /* A number of tasks in a freezer cgroup which are not going to be dumped */ static int processes_to_wait; static pid_t *processes_to_wait_pids; @@ -154,12 +248,10 @@ static int seize_cgroup_tree(char *root_path, enum freezer_state state) * New tasks can appear while a freezer state isn't * frozen, so we need to catch all new tasks. */ - snprintf(path, sizeof(path), "%s/tasks", root_path); - f = fopen(path, "r"); - if (f == NULL) { - pr_perror("Unable to open %s", path); + f = freezer_open_thread_list(root_path); + if (f == NULL) return -1; - } + while (fgets(path, sizeof(path), f)) { pid_t pid; int ret; @@ -306,12 +398,10 @@ static int log_unfrozen_stacks(char *root) char path[PATH_MAX]; FILE *f; - snprintf(path, sizeof(path), "%s/tasks", root); - f = fopen(path, "r"); - if (f == NULL) { - pr_perror("Unable to open %s", path); + f = freezer_open_thread_list(root); + if (f == NULL) return -1; - } + while (fgets(path, sizeof(path), f)) { pid_t pid; int ret, stack; @@ -820,6 +910,27 @@ err_close: return -1; } +static int cgroup_version(void) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/freezer.state", opts.freeze_cgroup); + if (access(path, F_OK) == 0) { + cgroup_v2 = false; + return 0; + } + + snprintf(path, sizeof(path), "%s/cgroup.freeze", opts.freeze_cgroup); + if (access(path, F_OK) == 0) { + cgroup_v2 = true; + return 0; + } + + pr_err("Neither a cgroupv1 (freezer.state) or cgroupv2 (cgroup.freeze) control file found.\n"); + + return -1; +} + int collect_pstree(void) { pid_t pid = root_item->pid->real; @@ -835,6 +946,11 @@ int collect_pstree(void) */ alarm(opts.timeout); + if (opts.freeze_cgroup && cgroup_version()) + goto err; + + pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1); + if (opts.freeze_cgroup && freeze_processes()) goto err; From 4129d3262ad2d2ac6875c2c86d565528969d8e72 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 5 Feb 2020 09:39:32 +0000 Subject: [PATCH 0346/2030] cgroup2: add minimal cgroup2 support The runc test cases are (sometimes) mounting a cgroup inside of the container. For these tests to succeed, let CRIU know that cgroup2 exists and how to restore such a mount. This does not fix any specific cgroup2 settings, it just enables CRIU to mount cgroup2 in the restored container. Signed-off-by: Adrian Reber --- criu/filesystems.c | 5 +++++ images/mnt.proto | 2 ++ 2 files changed, 7 insertions(+) diff --git a/criu/filesystems.c b/criu/filesystems.c index 1e4550b37..d76b18291 100644 --- a/criu/filesystems.c +++ b/criu/filesystems.c @@ -747,6 +747,11 @@ static struct fstype fstypes[] = { .code = FSTYPE__CGROUP, .parse = cgroup_parse, .sb_equal = cgroup_sb_equal, + }, { + .name = "cgroup2", + .code = FSTYPE__CGROUP2, + .parse = cgroup_parse, + .sb_equal = cgroup_sb_equal, }, { .name = "aufs", .code = FSTYPE__AUFS, diff --git a/images/mnt.proto b/images/mnt.proto index 4160acbf6..8983395ae 100644 --- a/images/mnt.proto +++ b/images/mnt.proto @@ -28,6 +28,8 @@ enum fstype { // RPC_PIPEFS = 20; // NFS = 21; // NFS4 = 22; + + CGROUP2 = 23; }; message mnt_entry { From ffe0896ed01790e62cd617cf01bc6a4076fa4e87 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 1 Mar 2020 04:26:12 +0300 Subject: [PATCH 0347/2030] fs: use __open_proc instead of open("/proc/...", ... ) Processes can run in a mount namespace without /proc. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/files-reg.c | 4 +--- criu/memfd.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index c2a55aeb3..0d0076666 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -778,14 +778,12 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de if (S_ISREG(st->st_mode)) { int fd, ret; - char lpath[PSFDS]; /* * Reopen file locally since it may have no read * permissions when drained */ - sprintf(lpath, "/proc/self/fd/%d", _fd); - fd = open(lpath, O_RDONLY); + fd = open_proc(PROC_SELF, "fd/%d", _fd); if (fd < 0) { pr_perror("Can't open ghost original file"); goto err_out; diff --git a/criu/memfd.c b/criu/memfd.c index d17c10fb7..30ccdf22c 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -299,7 +299,6 @@ static int memfd_open_inode(struct memfd_inode *inode) int memfd_open(struct file_desc *d, u32 *fdflags) { - char lpath[PSFDS]; struct memfd_info *mfi; MemfdFileEntry *mfe; int fd, _fd; @@ -318,14 +317,13 @@ int memfd_open(struct file_desc *d, u32 *fdflags) goto err; /* Reopen the fd with original permissions */ - sprintf(lpath, "/proc/self/fd/%d", fd); flags = fdflags ? *fdflags : mfe->flags; /* * Ideally we should call compat version open() to not force the * O_LARGEFILE file flag with regular open(). It doesn't seem that * important though. */ - _fd = open(lpath, flags); + _fd = __open_proc(getpid(), 0, flags, "fd/%d", fd); if (_fd < 0) { pr_perror("Can't reopen memfd id=%d", mfe->id); goto err; From fce196d88df8363666922ac2fa2d6e23bb774289 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 29 Feb 2020 09:51:33 +0300 Subject: [PATCH 0348/2030] memfd: don't corrupt a state of the dumped fd Right now, criu uses a dumped fd to dump content of a memfd "file". Here are two reasons why we should not do this: * a state of a dumped fd doesn't have to be changed, but now criu calls lseek on it. This can be workarounded by using pread. * a dumped descriptor can be write-only. Reported-by: Mr Jenkins Cc: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/memfd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/criu/memfd.c b/criu/memfd.c index 30ccdf22c..983e01b38 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -119,6 +119,7 @@ out: static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) { struct memfd_inode *inode; + int fd; list_for_each_entry(inode, &memfd_inodes, list) if ((inode->dev == st->st_dev) && (inode->ino == st->st_ino)) @@ -132,11 +133,19 @@ static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, co inode->ino = st->st_ino; inode->id = memfd_inode_ids++; - if (dump_memfd_inode(lfd, inode, name, st)) { + fd = open_proc(PROC_SELF, "fd/%d", lfd); + if (fd < 0) { xfree(inode); return NULL; } + if (dump_memfd_inode(fd, inode, name, st)) { + close(fd); + xfree(inode); + return NULL; + } + close(fd); + list_add_tail(&inode->list, &memfd_inodes); return inode; From 58fd63042c925c6422f5453b9a13147ebd2a6769 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 1 Mar 2020 01:04:20 +0300 Subject: [PATCH 0349/2030] zdtm/inhfd: force python to read new data from a file python 2.7 doesn't call the read system call if it's read file to the end once. The next seek allows to workaround this problem. inhfd/memfd.py hangs due to this issue. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- test/zdtm.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 4110b5142..0bd7b84cc 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -697,7 +697,10 @@ class inhfd_test: # regular files, so we loop. data = b'' while not data: - data = peer_file.read(16) + # In python 2.7, peer_file.read() doesn't call the read + # system call if it's read file to the end once. The + # next seek allows to workaround this problem. + data = os.read(peer_file.fileno(), 16) time.sleep(0.1) except Exception as e: print("Unable to read a peer file: %s" % e) From f167d1f4e9eb24dfbda077746f25cfdf8a2b59b9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 7 Feb 2020 15:59:45 +0300 Subject: [PATCH 0350/2030] fown: Don't fail on dumping files opened with O_PATH O_PATH opened files are special: they have empty file operations in kernel space, so there not that much we can do with them, even setting position is not allowed. Same applies to a signal number for owner settings. Signed-off-by: Cyrill Gorcunov Co-developed-by: Alexander Mikhalitsyn Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- criu/files-reg.c | 16 +++++++--- criu/files.c | 5 ++- criu/pie/parasite.c | 75 +++++++++++++++++++++++++++++---------------- 3 files changed, 64 insertions(+), 32 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 0d0076666..d1d1ee5af 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1776,11 +1776,17 @@ static int do_open_reg(int ns_root_fd, struct reg_file_info *rfi, void *arg) if (fd < 0) return fd; - if ((rfi->rfe->pos != -1ULL) && - lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) { - pr_perror("Can't restore file pos"); - close(fd); - return -1; + /* + * O_PATH opened files carry empty fops in kernel, + * just ignore positioning at all. + */ + if (!(rfi->rfe->flags & O_PATH)) { + if (rfi->rfe->pos != -1ULL && + lseek(fd, rfi->rfe->pos, SEEK_SET) < 0) { + pr_perror("Can't restore file pos"); + close(fd); + return -1; + } } return fd; diff --git a/criu/files.c b/criu/files.c index f7963bf54..3f1d77931 100644 --- a/criu/files.c +++ b/criu/files.c @@ -399,7 +399,10 @@ static int fill_fd_params(struct pid *owner_pid, int fd, int lfd, pr_info("%d fdinfo %d: pos: %#16"PRIx64" flags: %16o/%#x\n", owner_pid->real, fd, p->pos, p->flags, (int)p->fd_flags); - ret = fcntl(lfd, F_GETSIG, 0); + if (p->flags & O_PATH) + ret = 0; + else + ret = fcntl(lfd, F_GETSIG, 0); if (ret < 0) { pr_perror("Can't get owner signum on %d", lfd); return -1; diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 387a976da..64b5bbb3e 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -317,15 +317,60 @@ grps_err: return -1; } +static int fill_fds_fown(int fd, struct fd_opts *p) +{ + int flags, ret; + struct f_owner_ex owner_ex; + uint32_t v[2]; + + /* + * For O_PATH opened files there is no owner at all. + */ + flags = sys_fcntl(fd, F_GETFL, 0); + if (flags < 0) { + pr_err("fcntl(%d, F_GETFL) -> %d\n", fd, flags); + return -1; + } + if (flags & O_PATH) { + p->fown.pid = 0; + return 0; + } + + ret = sys_fcntl(fd, F_GETOWN_EX, (long)&owner_ex); + if (ret) { + pr_err("fcntl(%d, F_GETOWN_EX) -> %d\n", fd, ret); + return -1; + } + + /* + * Simple case -- nothing is changed. + */ + if (owner_ex.pid == 0) { + p->fown.pid = 0; + return 0; + } + + ret = sys_fcntl(fd, F_GETOWNER_UIDS, (long)&v); + if (ret) { + pr_err("fcntl(%d, F_GETOWNER_UIDS) -> %d\n", fd, ret); + return -1; + } + + p->fown.uid = v[0]; + p->fown.euid = v[1]; + p->fown.pid_type = owner_ex.type; + p->fown.pid = owner_ex.pid; + + return 0; +} + static int fill_fds_opts(struct parasite_drain_fd *fds, struct fd_opts *opts) { int i; for (i = 0; i < fds->nr_fds; i++) { - int flags, fd = fds->fds[i], ret; + int flags, fd = fds->fds[i]; struct fd_opts *p = opts + i; - struct f_owner_ex owner_ex; - uint32_t v[2]; flags = sys_fcntl(fd, F_GETFD, 0); if (flags < 0) { @@ -335,30 +380,8 @@ static int fill_fds_opts(struct parasite_drain_fd *fds, struct fd_opts *opts) p->flags = (char)flags; - ret = sys_fcntl(fd, F_GETOWN_EX, (long)&owner_ex); - if (ret) { - pr_err("fcntl(%d, F_GETOWN_EX) -> %d\n", fd, ret); + if (fill_fds_fown(fd, p)) return -1; - } - - /* - * Simple case -- nothing is changed. - */ - if (owner_ex.pid == 0) { - p->fown.pid = 0; - continue; - } - - ret = sys_fcntl(fd, F_GETOWNER_UIDS, (long)&v); - if (ret) { - pr_err("fcntl(%d, F_GETOWNER_UIDS) -> %d\n", fd, ret); - return -1; - } - - p->fown.uid = v[0]; - p->fown.euid = v[1]; - p->fown.pid_type = owner_ex.type; - p->fown.pid = owner_ex.pid; } return 0; From 8b9c1f4c5bebd501f544cfe81534e4386f85246f Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 7 Feb 2020 15:59:55 +0300 Subject: [PATCH 0351/2030] zdtm: add a test for files opened with O_PATH On these test without the patch ("fown: Don't fail on dumping files opened wit O_PATH") we trigger these errors: Error (criu/pie/parasite.c:340): fcntl(4, F_GETOWN_EX) -> -9 Error (criu/files.c:403): Can't get owner signum on 18: Bad file descriptor Error (criu/files-reg.c:1887): Can't restore file pos: Bad file descriptor Signed-off-by: Pavel Tikhomirov Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/zdtm/static/Makefile | 1 + test/zdtm/static/opath_file.c | 95 +++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 test/zdtm/static/opath_file.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 5afd18cd6..035b8fa9c 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -363,6 +363,7 @@ TST_DIR = \ private_bind_propagation \ ghost_on_rofs \ overmounted_file \ + opath_file \ TST_DIR_FILE = \ chroot \ diff --git a/test/zdtm/static/opath_file.c b/test/zdtm/static/opath_file.c new file mode 100644 index 000000000..602a5af27 --- /dev/null +++ b/test/zdtm/static/opath_file.c @@ -0,0 +1,95 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +#define TEST_FILE "test_file" +#define BUF_SIZE 4096 +#define fdinfo_field(str, field) !strncmp(str, field":", sizeof(field)) +#define pr_debug(format, arg...) test_msg("DBG: %s:%d: " format, __FILE__, __LINE__, ## arg) + +const char *test_doc = "Check open file with O_PATH preserved"; +const char *test_author = "Pavel Tikhomirov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +struct fdinfo { + int flags; +}; + +static int parse_self_fdinfo(int fd, struct fdinfo *fi) +{ + char path[PATH_MAX], line[BUF_SIZE]; + FILE *file; + int ret = -1; + unsigned long long val; + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd); + file = fopen(path, "r"); + if (!file) { + pr_perror("fopen"); + return -1; + } + + while (fgets(line, sizeof(line), file)) { + if (fdinfo_field(line, "flags")) { + if (sscanf(line, "%*s %llu", &val) != 1) { + pr_err("failed to read flags: %s", line); + goto fail; + } + pr_debug("Open flags = %llu\n", val); + fi->flags = val; + ret = 0; + break; + } + } +fail: + fclose(file); + return ret; +} + +int main(int argc, char **argv) +{ + char test_file[PATH_MAX]; + struct fdinfo fi; + int fd; + + test_init(argc, argv); + + if (mkdir(dirname, 0700)) { + pr_perror("can't make directory %s", dirname); + exit(1); + } + + snprintf(test_file, sizeof(test_file), "%s/%s", dirname, TEST_FILE); + fd = creat(test_file, 0644); + if (fd == -1) { + pr_perror("cat't create %s", test_file); + return 1; + } + close(fd); + + fd = open(test_file, O_PATH); + if (fd == -1) { + pr_perror("cat't open file %s with O_PATH", test_file); + return 1; + } + + test_daemon(); + test_waitsig(); + + if (parse_self_fdinfo(fd, &fi)) + return 1; + + if (!(fi.flags & O_PATH)) { + fail("File lost O_PATH open flag"); + return 1; + } + + close(fd); + pass(); + return 0; +} From 1936608ce42283b6c5aa007c883092bb2776af4b Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 7 Feb 2020 16:00:01 +0300 Subject: [PATCH 0352/2030] files: allow dumping opened symlinks To really open symlink file and not the regular file below it, one needs to do open with O_PATH|O_NOFOLLOW flags. Looks like systemd started to open /etc/localtime symlink this way sometimes, and before that nobody actually used this and thus we never supported this in CRIU. Error (criu/files-ext.c:96): Can't dump file 11 of that type [120777] (unknown /etc/localtime) Looks like it is quiet easy to support, as c/r of symlink file is almost the same as c/r of regular one. We need to only make fstatat not following links in check_path_remap. Also we need to take into account support of ghost symlinks. Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) Co-developed-by: Pavel Tikhomirov Signed-off-by: Pavel Tikhomirov --- criu/files-reg.c | 88 +++++++++++++++++++++++++++++++++++++---- criu/files.c | 3 +- images/ghost-file.proto | 2 + 3 files changed, 84 insertions(+), 9 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index d1d1ee5af..b53e9b080 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -282,19 +282,53 @@ static int mkreg_ghost(char *path, GhostFileEntry *gfe, struct cr_img *img) return ret; } +static int mklnk_ghost(char *path, GhostFileEntry *gfe) +{ + if (!gfe->symlnk_target) { + pr_err("Ghost symlink target is NULL for %s. Image from old CRIU?\n", path); + return -1; + } + + if (symlink(gfe->symlnk_target, path) < 0) { + /* + * ENOENT case is OK + * Take a look closer on create_ghost() function + */ + if (errno != ENOENT) + pr_perror("symlink(%s, %s) failed", gfe->symlnk_target, path); + return -1; + } + + return 0; +} + static int ghost_apply_metadata(const char *path, GhostFileEntry *gfe) { struct timeval tv[2]; int ret = -1; - if (chown(path, gfe->uid, gfe->gid) < 0) { - pr_perror("Can't reset user/group on ghost %s", path); - goto err; - } + if (S_ISLNK(gfe->mode)) { + if (lchown(path, gfe->uid, gfe->gid) < 0) { + pr_perror("Can't reset user/group on ghost %s", path); + goto err; + } - if (chmod(path, gfe->mode)) { - pr_perror("Can't set perms %o on ghost %s", gfe->mode, path); - goto err; + /* + * We have no lchmod() function, and fchmod() will fail on + * O_PATH | O_NOFOLLOW fd. Yes, we have fchmodat() + * function and flag AT_SYMLINK_NOFOLLOW described in + * man 2 fchmodat, but it is not currently implemented. %) + */ + } else { + if (chown(path, gfe->uid, gfe->gid) < 0) { + pr_perror("Can't reset user/group on ghost %s", path); + goto err; + } + + if (chmod(path, gfe->mode)) { + pr_perror("Can't set perms %o on ghost %s", gfe->mode, path); + goto err; + } } if (gfe->atim) { @@ -353,6 +387,9 @@ again: } else if (S_ISDIR(gfe->mode)) { if ((ret = mkdirpat(AT_FDCWD, path, gfe->mode)) < 0) msg = "Can't make ghost dir"; + } else if (S_ISLNK(gfe->mode)) { + if ((ret = mklnk_ghost(path, gfe)) < 0) + msg = "Can't create ghost symlink"; } else { if ((ret = mkreg_ghost(path, gfe, img)) < 0) msg = "Can't create ghost regfile"; @@ -740,6 +777,7 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de int exit_code = -1; GhostFileEntry gfe = GHOST_FILE_ENTRY__INIT; Timeval atim = TIMEVAL__INIT, mtim = TIMEVAL__INIT; + char pathbuf[PATH_MAX]; pr_info("Dumping ghost file contents (id %#x)\n", id); @@ -773,6 +811,36 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de gfe.size = st->st_size; } + /* + * We set gfe.symlnk_target only if we need to dump + * symlink content, otherwise we leave it NULL. + * It will be taken into account on restore in mklnk_ghost function. + */ + if (S_ISLNK(st->st_mode)) { + ssize_t ret; + + /* + * We assume that _fd opened with O_PATH | O_NOFOLLOW + * flags because S_ISLNK(st->st_mode). With current kernel version, + * it's looks like correct assumption in any case. + */ + ret = readlinkat(_fd, "", pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + pr_perror("Can't readlinkat"); + goto err_out; + } + + pathbuf[ret] = 0; + + if (ret != st->st_size) { + pr_err("Buffer for readlinkat is too small: ret %zd, st_size %"PRId64", buf %u %s\n", + ret, st->st_size, PATH_MAX, pathbuf); + goto err_out; + } + + gfe.symlnk_target = pathbuf; + } + if (pb_write_one(img, &gfe, PB_GHOST_FILE)) goto err_out; @@ -1116,6 +1184,7 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, int ret, mntns_root; struct stat pst; const struct stat *ost = &parms->stat; + int flags = 0; if (parms->fs_type == PROC_SUPER_MAGIC) { /* The file points to /proc/pid/ where pid is a dead @@ -1212,7 +1281,10 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, if (mntns_root < 0) return -1; - ret = fstatat(mntns_root, rpath, &pst, 0); + if (S_ISLNK(parms->stat.st_mode)) + flags = AT_SYMLINK_NOFOLLOW; + + ret = fstatat(mntns_root, rpath, &pst, flags); if (ret < 0) { /* * Linked file, but path is not accessible (unless any diff --git a/criu/files.c b/criu/files.c index 3f1d77931..f6ba39a30 100644 --- a/criu/files.c +++ b/criu/files.c @@ -545,7 +545,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return do_dump_gen_file(&p, lfd, ops, e); } - if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode)) { + if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || + S_ISLNK(p.stat.st_mode)) { if (fill_fdlink(lfd, &p, &link)) return -1; diff --git a/images/ghost-file.proto b/images/ghost-file.proto index eda466451..0576089fd 100644 --- a/images/ghost-file.proto +++ b/images/ghost-file.proto @@ -15,6 +15,8 @@ message ghost_file_entry { optional timeval mtim = 8; optional bool chunks = 9; optional uint64 size = 10; + /* this field makes sense only when S_ISLNK(mode) */ + optional string symlnk_target = 11; } message ghost_chunk_entry { From 73e0ed3b8ae2a87d4232dd4a39ce3dab4edb1f24 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 7 Feb 2020 16:00:05 +0300 Subject: [PATCH 0353/2030] zdtm: add a test on open symlink migration Signed-off-by: Pavel Tikhomirov Co-Developed-by: Vitaly Ostrosablin Signed-off-by: Vitaly Ostrosablin Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/zdtm/static/Makefile | 3 + test/zdtm/static/opath_file.c | 2 +- test/zdtm/static/symlink.c | 102 ++++++++++++++++++++++++++++++++++ test/zdtm/static/symlink01.c | 1 + 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 test/zdtm/static/symlink.c create mode 120000 test/zdtm/static/symlink01.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 035b8fa9c..ee69612c7 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -364,6 +364,8 @@ TST_DIR = \ ghost_on_rofs \ overmounted_file \ opath_file \ + symlink \ + symlink01 \ TST_DIR_FILE = \ chroot \ @@ -539,6 +541,7 @@ clone_fs: LDLIBS += -pthread # we have to explicitly specify both .o and .d for this case: netns_sub_veth.o netns_sub_veth.d: CPPFLAGS += $(call pkg-cflags, libnl-3.0) netns_sub_veth: LDLIBS += $(call pkg-libs, libnl-route-3.0 libnl-3.0) +symlink01: CFLAGS += -DZDTM_UNLINK_SYMLINK socket-tcp-fin-wait1: CFLAGS += -D ZDTM_TCP_FIN_WAIT1 socket-tcp-fin-wait2: CFLAGS += -D ZDTM_TCP_FIN_WAIT2 diff --git a/test/zdtm/static/opath_file.c b/test/zdtm/static/opath_file.c index 602a5af27..943f4eddb 100644 --- a/test/zdtm/static/opath_file.c +++ b/test/zdtm/static/opath_file.c @@ -36,7 +36,7 @@ static int parse_self_fdinfo(int fd, struct fdinfo *fi) while (fgets(line, sizeof(line), file)) { if (fdinfo_field(line, "flags")) { - if (sscanf(line, "%*s %llu", &val) != 1) { + if (sscanf(line, "%*s %llo", &val) != 1) { pr_err("failed to read flags: %s", line); goto fail; } diff --git a/test/zdtm/static/symlink.c b/test/zdtm/static/symlink.c new file mode 100644 index 000000000..074c80052 --- /dev/null +++ b/test/zdtm/static/symlink.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +#define TEST_FILE "test_file" +#define TEST_SYMLINK "test_symlink" + +const char *test_doc = "Check open symlink preserved"; +const char *test_author = "Pavel Tikhomirov "; + +char *dirname; +TEST_OPTION(dirname, string, "directory name", 1); + +int main(int argc, char **argv) +{ + char test_symlink[PATH_MAX]; + char test_file[PATH_MAX]; + char pathbuf[PATH_MAX]; + struct stat stb, sta; + int ret, fd; + + test_init(argc, argv); + + if (mkdir(dirname, 0700)) { + pr_perror("can't make directory %s", dirname); + exit(1); + } + + snprintf(test_file, sizeof(test_file), "%s/%s", dirname, TEST_FILE); + ret = creat(test_file, 0644); + if (ret == -1) { + pr_perror("cat't create %s", test_file); + return 1; + } + close(ret); + + snprintf(test_symlink, sizeof(test_symlink), "%s/%s", dirname, TEST_SYMLINK); + ret = symlink(test_file, test_symlink); + if (ret == -1) { + pr_perror("cat't symlink to %s", test_symlink); + return 1; + } + + fd = open(test_symlink, O_PATH | O_NOFOLLOW); + if (fd == -1) { + pr_perror("cat't open symlink %s", test_symlink); + return 1; + } + + ret = fstat(fd, &sta); + if (ret == -1) { + pr_perror("cat't fstat %s", test_symlink); + return 1; + } + + if (!S_ISLNK(sta.st_mode)) { + pr_perror("file is not symlink %s", test_symlink); + return 1; + } + +#ifdef ZDTM_UNLINK_SYMLINK + if (unlink(test_symlink)) { + pr_perror("can't unlink symlink %s", test_symlink); + return 1; + } +#endif + + test_daemon(); + test_waitsig(); + + ret = fstat(fd, &stb); + if (ret == -1) { + fail("cat't fstat %s", test_symlink); + return 1; + } + + if (!S_ISLNK(stb.st_mode)) { + fail("file is not symlink %s", test_symlink); + return 1; + } + + ret = readlinkat(fd, "", pathbuf, sizeof(pathbuf) - 1); + if (ret < 0) { + fail("Can't readlinkat"); + return 1; + } + pathbuf[ret] = 0; + + if (strcmp(test_file, pathbuf)) { + fail("symlink points to %s but %s expected", pathbuf, test_file); + return 1; + } + + close(fd); + pass(); + return 0; +} diff --git a/test/zdtm/static/symlink01.c b/test/zdtm/static/symlink01.c new file mode 120000 index 000000000..e2d071ea4 --- /dev/null +++ b/test/zdtm/static/symlink01.c @@ -0,0 +1 @@ +symlink.c \ No newline at end of file From 065ff6f4151805fe50cc881ef506b6ae6407ec57 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 5 Mar 2020 08:30:23 +0300 Subject: [PATCH 0354/2030] zdtm/fifo_loop: don't try to write more than pipe size ... otherwise write() can block. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- test/zdtm/transition/fifo_loop.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/zdtm/transition/fifo_loop.c b/test/zdtm/transition/fifo_loop.c index 2e28320ba..b028c2fd5 100644 --- a/test/zdtm/transition/fifo_loop.c +++ b/test/zdtm/transition/fifo_loop.c @@ -39,6 +39,7 @@ int main(int argc, char **argv) int i; uint8_t buf[0x100000]; char *file_path; + int pipe_size; test_init(argc, argv); @@ -104,6 +105,13 @@ int main(int argc, char **argv) exit(1); } + pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); + if (pipe_size != sizeof(buf)) { + pr_perror("fcntl(writefd, F_GETPIPE_SZ) -> %d", pipe_size); + kill(0, SIGKILL); + exit(1); + } + file_path = path[i - 1]; readfd = open(file_path, O_RDONLY); if (readfd < 0) { @@ -138,13 +146,14 @@ int main(int argc, char **argv) for (p = rbuf, len = wlen; len > 0; p += rlen, len -= rlen) { rlen = read(readfd, p, len); + if (rlen < 0 && errno == EINTR) { + continue; + } + if (rlen <= 0) break; } - if (rlen < 0 && errno == EINTR) - continue; - if (len > 0) { fail("read failed: %m\n"); ret = 1; From 62ad2f6095b466bfade1af6fea60bcb0fa1505ec Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 14:45:34 +0000 Subject: [PATCH 0355/2030] criu: Remove compel.h includes The plan is to remove "compel.h". That file only includes other headers (which may be not needed). If we aim for one-include-for-compel, we could instead paste all subheaders into "compel.h". Rather, I think it's worth to migrate to more fine-grained compel headers than follow the strategy 'one header to rule them all'. Further, the header creates problems for cross-compilation: it's included in files, those are used by host-compel. Which rightfully confuses compiler/linker as host's definitions for fpu regs/other platform details get drained into host's compel. As a first step - stop including "compel.h" in criu. Signed-off-by: Dmitry Safonov --- criu/aio.c | 2 +- criu/arch/aarch64/crtools.c | 2 +- criu/arch/arm/crtools.c | 3 +-- criu/arch/ppc64/crtools.c | 2 +- criu/arch/s390/crtools.c | 2 +- criu/arch/x86/crtools.c | 2 +- criu/arch/x86/sys-exec-tbl.c | 1 - criu/cr-restore.c | 1 - criu/include/proc_parse.h | 2 +- criu/kerndat.c | 1 - criu/mem.c | 2 +- criu/parasite-syscall.c | 2 -- criu/pie/pie-relocs.h | 2 -- criu/seize.c | 1 - criu/vdso.c | 1 - 15 files changed, 8 insertions(+), 18 deletions(-) diff --git a/criu/aio.c b/criu/aio.c index 45651f2d3..6ee65d5f4 100644 --- a/criu/aio.c +++ b/criu/aio.c @@ -11,7 +11,7 @@ #include "parasite.h" #include "parasite-syscall.h" #include "images/mm.pb-c.h" -#include +#include "compel/infect.h" #define NR_IOEVENTS_IN_NPAGES(npages) ((PAGE_SIZE * (npages) - sizeof(struct aio_ring)) / sizeof(struct io_event)) diff --git a/criu/arch/aarch64/crtools.c b/criu/arch/aarch64/crtools.c index f98743a23..76bd1fea7 100644 --- a/criu/arch/aarch64/crtools.c +++ b/criu/arch/aarch64/crtools.c @@ -19,7 +19,7 @@ #include "util.h" #include "cpu.h" #include "restorer.h" -#include +#include "compel/infect.h" #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))(src)->e diff --git a/criu/arch/arm/crtools.c b/criu/arch/arm/crtools.c index c216cdc5c..840d489a6 100644 --- a/criu/arch/arm/crtools.c +++ b/criu/arch/arm/crtools.c @@ -18,8 +18,7 @@ #include "elf.h" #include "parasite-syscall.h" #include "restorer.h" - -#include +#include "compel/infect.h" #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))((src)->ARM_##e) diff --git a/criu/arch/ppc64/crtools.c b/criu/arch/ppc64/crtools.c index 5a5966ad4..0d9f49c3f 100644 --- a/criu/arch/ppc64/crtools.c +++ b/criu/arch/ppc64/crtools.c @@ -17,7 +17,7 @@ #include "log.h" #include "util.h" #include "cpu.h" -#include +#include "compel/infect.h" #include "protobuf.h" #include "images/core.pb-c.h" diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c index 238035b76..000b7779f 100644 --- a/criu/arch/s390/crtools.c +++ b/criu/arch/s390/crtools.c @@ -17,7 +17,7 @@ #include "log.h" #include "util.h" #include "cpu.h" -#include +#include "compel/infect.h" #include "protobuf.h" #include "images/core.pb-c.h" diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index e4073c27b..9c8beeedd 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -1,5 +1,5 @@ #include "compel/asm/fpu.h" -#include "compel/compel.h" +#include "compel/infect.h" #include "compel/plugins/std/syscall-codes.h" #include "cpu.h" #include "cr_options.h" diff --git a/criu/arch/x86/sys-exec-tbl.c b/criu/arch/x86/sys-exec-tbl.c index 608dc2510..225b8a153 100644 --- a/criu/arch/x86/sys-exec-tbl.c +++ b/criu/arch/x86/sys-exec-tbl.c @@ -1,4 +1,3 @@ -#include static struct syscall_exec_desc sc_exec_table_64[] = { #include "sys-exec-tbl-64.c" diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 85105a18e..41146d4ad 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -67,7 +67,6 @@ #include "timerfd.h" #include "action-scripts.h" #include "shmem.h" -#include #include "aio.h" #include "lsm.h" #include "seccomp.h" diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h index 96a097b3d..fd50ff47e 100644 --- a/criu/include/proc_parse.h +++ b/criu/include/proc_parse.h @@ -3,7 +3,7 @@ #include -#include +#include "compel/infect.h" #define PROC_TASK_COMM_LEN 32 #define PROC_TASK_COMM_LEN_FMT "(%31s" diff --git a/criu/kerndat.c b/criu/kerndat.c index 8ac83820b..2ad72c350 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -33,7 +33,6 @@ #include "net.h" #include "tun.h" #include -#include #include "netfilter.h" #include "fsnotify.h" #include "linux/userfaultfd.h" diff --git a/criu/mem.c b/criu/mem.c index 4e110c9e9..55022d94a 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -29,7 +29,7 @@ #include "pagemap-cache.h" #include "fault-injection.h" #include "prctl.h" -#include +#include "compel/infect-util.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index e5a8194e5..b649d1b51 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -45,8 +45,6 @@ #include "infect-rpc.h" #include "pie/parasite-blob.h" -#include - unsigned long get_exec_start(struct vm_area_list *vmas) { struct vma_area *vma_area; diff --git a/criu/pie/pie-relocs.h b/criu/pie/pie-relocs.h index 6797486c2..e36126be6 100644 --- a/criu/pie/pie-relocs.h +++ b/criu/pie/pie-relocs.h @@ -1,8 +1,6 @@ #ifndef __PIE_RELOCS_H__ #define __PIE_RELOCS_H__ -#include - #include "common/config.h" #include "common/compiler.h" diff --git a/criu/seize.c b/criu/seize.c index 0ba2d9b1d..f973806d9 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -23,7 +23,6 @@ #include "string.h" #include "xmalloc.h" #include "util.h" -#include #define NR_ATTEMPTS 5 diff --git a/criu/vdso.c b/criu/vdso.c index 19ba4765d..433a54728 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -20,7 +20,6 @@ #include "criu-log.h" #include "mem.h" #include "vma.h" -#include #include #ifdef LOG_PREFIX From 327554ee646ac8c7728981d0607dc42420c3a85a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 15:00:08 +0000 Subject: [PATCH 0356/2030] compel: Remove compel.h The file only includes other headers (which may be not needed). If we aim for one-include-for-compel, we could instead paste all subheaders into "compel.h". Rather, I think it's worth to migrate to more fine-grained compel headers than follow the strategy 'one header to rule them all'. Further, the header creates problems for cross-compilation: it's included in files, those are used by host-compel. Which rightfully confuses compiler/linker as host's definitions for fpu regs/other platform details get drained into host's compel. Signed-off-by: Dmitry Safonov --- Documentation/compel.txt | 2 +- compel/arch/aarch64/src/lib/handle-elf.c | 3 +-- compel/arch/arm/src/lib/handle-elf.c | 3 +-- compel/arch/arm/src/lib/infect.c | 1 + compel/arch/ppc64/src/lib/cpu.c | 1 + compel/arch/ppc64/src/lib/handle-elf.c | 3 +-- compel/arch/s390/src/lib/handle-elf.c | 3 +-- compel/arch/s390/src/lib/infect.c | 1 + compel/arch/x86/src/lib/handle-elf.c | 3 +-- compel/arch/x86/src/lib/infect.c | 1 + compel/include/log.h | 3 +-- compel/include/uapi/compel.h | 14 -------------- compel/src/lib/handle-elf.c | 6 ++---- compel/src/lib/log.c | 3 --- compel/src/main.c | 2 -- compel/test/fdspy/spy.c | 1 - compel/test/infect/spy.c | 1 - compel/test/rsys/spy.c | 2 -- include/common/scm.h | 2 ++ 19 files changed, 15 insertions(+), 40 deletions(-) delete mode 100644 compel/include/uapi/compel.h diff --git a/Documentation/compel.txt b/Documentation/compel.txt index 744a3b35d..6ccd20861 100644 --- a/Documentation/compel.txt +++ b/Documentation/compel.txt @@ -86,7 +86,7 @@ Infecting code ~~~~~~~~~~~~~~ The parasitic code is compiled and converted to a header using *compel*, and included here. -*#include * +*#include * *#include "parasite.h"* diff --git a/compel/arch/aarch64/src/lib/handle-elf.c b/compel/arch/aarch64/src/lib/handle-elf.c index 1c3686c48..1ee65ee2c 100644 --- a/compel/arch/aarch64/src/lib/handle-elf.c +++ b/compel/arch/aarch64/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/arm/src/lib/handle-elf.c b/compel/arch/arm/src/lib/handle-elf.c index 8abf8dad1..5b8d00a6f 100644 --- a/compel/arch/arm/src/lib/handle-elf.c +++ b/compel/arch/arm/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c index c17cb9c9b..0053bef58 100644 --- a/compel/arch/arm/src/lib/infect.c +++ b/compel/arch/arm/src/lib/infect.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include "common/page.h" diff --git a/compel/arch/ppc64/src/lib/cpu.c b/compel/arch/ppc64/src/lib/cpu.c index 338ab4891..7a3972790 100644 --- a/compel/arch/ppc64/src/lib/cpu.c +++ b/compel/arch/ppc64/src/lib/cpu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "compel-cpu.h" diff --git a/compel/arch/ppc64/src/lib/handle-elf.c b/compel/arch/ppc64/src/lib/handle-elf.c index 3d4020f59..f29fdc8a3 100644 --- a/compel/arch/ppc64/src/lib/handle-elf.c +++ b/compel/arch/ppc64/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/s390/src/lib/handle-elf.c b/compel/arch/s390/src/lib/handle-elf.c index 01a8bf4c8..6ed382c92 100644 --- a/compel/arch/s390/src/lib/handle-elf.c +++ b/compel/arch/s390/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index 7e7d24ce2..5a4675449 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/compel/arch/x86/src/lib/handle-elf.c b/compel/arch/x86/src/lib/handle-elf.c index 62fb28f49..938999b2e 100644 --- a/compel/arch/x86/src/lib/handle-elf.c +++ b/compel/arch/x86/src/lib/handle-elf.c @@ -1,6 +1,5 @@ #include - -#include "uapi/compel.h" +#include #include "handle-elf.h" #include "piegen.h" diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 11e7f4c91..9c4abb60c 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -3,6 +3,7 @@ #include #include #include +#include #include diff --git a/compel/include/log.h b/compel/include/log.h index 559f909ce..49e65bb50 100644 --- a/compel/include/log.h +++ b/compel/include/log.h @@ -1,8 +1,7 @@ #ifndef COMPEL_LOG_H__ #define COMPEL_LOG_H__ -#include "uapi/compel/compel.h" -#include "uapi/compel/loglevels.h" +#include "uapi/compel/log.h" #ifndef LOG_PREFIX # define LOG_PREFIX diff --git a/compel/include/uapi/compel.h b/compel/include/uapi/compel.h deleted file mode 100644 index 318a472da..000000000 --- a/compel/include/uapi/compel.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef UAPI_COMPEL_H__ -#define UAPI_COMPEL_H__ - -#include -#include - -#include -#include -#include -#include -#include -#include - -#endif /* UAPI_COMPEL_H__ */ diff --git a/compel/src/lib/handle-elf.c b/compel/src/lib/handle-elf.c index ca7c53b71..69d5104b6 100644 --- a/compel/src/lib/handle-elf.c +++ b/compel/src/lib/handle-elf.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -12,8 +12,6 @@ #include #include -#include "uapi/compel.h" - #include "handle-elf.h" #include "piegen.h" #include "log.h" @@ -228,7 +226,7 @@ int __handle_elf(void *mem, size_t size) } pr_out("/* Autogenerated from %s */\n", opts.input_filename); - pr_out("#include \n"); + pr_out("#include \n"); for (i = 0; i < symtab_hdr->sh_size / symtab_hdr->sh_entsize; i++) { Elf_Sym *sym = &symbols[i]; diff --git a/compel/src/lib/log.c b/compel/src/lib/log.c index d195343e4..c86be02c5 100644 --- a/compel/src/lib/log.c +++ b/compel/src/lib/log.c @@ -4,11 +4,8 @@ #include #include #include - #include -#include - #include "log.h" static unsigned int current_loglevel = COMPEL_DEFAULT_LOGLEVEL; diff --git a/compel/src/main.c b/compel/src/main.c index 8b2c8bc8d..36127c357 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -13,8 +13,6 @@ #include #include -#include "uapi/compel/compel.h" - #include "version.h" #include "piegen.h" #include "log.h" diff --git a/compel/test/fdspy/spy.c b/compel/test/fdspy/spy.c index 258e3ab75..1a373b6bb 100644 --- a/compel/test/fdspy/spy.c +++ b/compel/test/fdspy/spy.c @@ -5,7 +5,6 @@ #include #include -#include #include "parasite.h" #define PARASITE_CMD_GETFD PARASITE_USER_CMDS diff --git a/compel/test/infect/spy.c b/compel/test/infect/spy.c index a5aba7308..b5f8b2559 100644 --- a/compel/test/infect/spy.c +++ b/compel/test/infect/spy.c @@ -3,7 +3,6 @@ #include #include -#include #include "parasite.h" #define PARASITE_CMD_INC PARASITE_USER_CMDS diff --git a/compel/test/rsys/spy.c b/compel/test/rsys/spy.c index f5c999d5a..98654efcf 100644 --- a/compel/test/rsys/spy.c +++ b/compel/test/rsys/spy.c @@ -4,8 +4,6 @@ #include #include -#include - static void print_vmsg(unsigned int lvl, const char *fmt, va_list parms) { printf("\tLC%u: ", lvl); diff --git a/include/common/scm.h b/include/common/scm.h index ab27137b8..a8eb9ec4c 100644 --- a/include/common/scm.h +++ b/include/common/scm.h @@ -3,7 +3,9 @@ #include #include +#include #include +#include /* * Because of kernel doing kmalloc for user data passed From 18ac1540c4b64108b53fcd8fa3b3df256075e3f6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 15:04:15 +0000 Subject: [PATCH 0357/2030] travis: Add aarch64-cross test on amd64 Fixes: #924 Signed-off-by: Dmitry Safonov --- .travis.yml | 4 +++ scripts/build/Dockerfile.aarch64-cross | 45 ++++++++++++++++++++++++++ scripts/build/Makefile | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 scripts/build/Dockerfile.aarch64-cross diff --git a/.travis.yml b/.travis.yml index 7c36af006..ffa82f15f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -86,6 +86,10 @@ jobs: arch: amd64 env: TR_ARCH=armv7-cross dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=aarch64-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.aarch64-cross b/scripts/build/Dockerfile.aarch64-cross new file mode 100644 index 000000000..38229497a --- /dev/null +++ b/scripts/build/Dockerfile.aarch64-cross @@ -0,0 +1,45 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ + dpkg --add-architecture arm64 && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-arm64 \ + libc6-dev-arm64-cross \ + libc6-arm64-cross \ + libbz2-dev:arm64 \ + libexpat1-dev:arm64 \ + ncurses-dev:arm64 \ + libssl-dev:arm64 \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:arm64 \ + libprotobuf-dev:arm64 \ + libnet-dev:arm64 \ + libprotobuf-c-dev:arm64 \ + libcap-dev:arm64 \ + libaio-dev:arm64 \ + libnl-route-3-dev:arm64 + +ENV CROSS_TRIPLE=aarch64-linux-gnu +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=aarch64 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index d093ce76c..913a86d6c 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross +TARGETS += armv7-cross aarch64-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From 1f74f8d77087ce06624cb54f2fa70afaf1380103 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Thu, 5 Mar 2020 16:43:48 +0000 Subject: [PATCH 0358/2030] travis: Use debian/buster as base for cross build tests Jessie is called 'oldoldstable', migrate to Buster. Suggested-by: Adrian Reber Signed-off-by: Dmitry Safonov --- scripts/build/Dockerfile.aarch64-cross | 2 +- scripts/build/Dockerfile.armv7-cross | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/build/Dockerfile.aarch64-cross b/scripts/build/Dockerfile.aarch64-cross index 38229497a..252e0f875 100644 --- a/scripts/build/Dockerfile.aarch64-cross +++ b/scripts/build/Dockerfile.aarch64-cross @@ -1,7 +1,7 @@ FROM dockcross/base:latest # Add the cross compiler sources -RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ dpkg --add-architecture arm64 && \ apt-get install emdebian-archive-keyring diff --git a/scripts/build/Dockerfile.armv7-cross b/scripts/build/Dockerfile.armv7-cross index 434934aad..17a55561e 100644 --- a/scripts/build/Dockerfile.armv7-cross +++ b/scripts/build/Dockerfile.armv7-cross @@ -1,7 +1,7 @@ FROM dockcross/base:latest # Add the cross compiler sources -RUN echo "deb http://ftp.us.debian.org/debian/ jessie main" >> /etc/apt/sources.list && \ +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ dpkg --add-architecture armhf && \ apt-get install emdebian-archive-keyring From b9c8e957d8f198fb47ed9e73a5d5c3727ba4d4cc Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Tue, 10 Mar 2020 17:40:57 +0300 Subject: [PATCH 0359/2030] crit-recode: skip (not try to parse) nftables raw image We should ignore (not parse) images that has non-crtool format, that images has no magic number (RAW_IMAGE_MAGIC equals 0). nftables images has format compatible with `nft -f /proc/self/fd/0` input format. Reported-by: Mr Jenkins Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- test/crit-recode.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/crit-recode.py b/test/crit-recode.py index a7dcc7272..adaf33733 100755 --- a/test/crit-recode.py +++ b/test/crit-recode.py @@ -47,6 +47,8 @@ for imgf in find.stdout.readlines(): continue if imgf_b.startswith(b'ip6tables-'): continue + if imgf_b.startswith(b'nftables-'): + continue if imgf_b.startswith(b'route-'): continue if imgf_b.startswith(b'route6-'): From c3ad4942d43524c617bb77345ce8912461b6f9aa Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 5 Mar 2020 20:46:16 +0200 Subject: [PATCH 0360/2030] travis: add ppc64-cross test on amd64 Signed-off-by: Mike Rapoport --- .travis.yml | 4 +++ scripts/build/Dockerfile.ppc64-cross | 45 ++++++++++++++++++++++++++++ scripts/build/Makefile | 2 +- 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 scripts/build/Dockerfile.ppc64-cross diff --git a/.travis.yml b/.travis.yml index ffa82f15f..9928f16c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -90,6 +90,10 @@ jobs: arch: amd64 env: TR_ARCH=aarch64-cross dist: bionic + - os: linux + arch: amd64 + env: TR_ARCH=ppc64-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.ppc64-cross b/scripts/build/Dockerfile.ppc64-cross new file mode 100644 index 000000000..44061c558 --- /dev/null +++ b/scripts/build/Dockerfile.ppc64-cross @@ -0,0 +1,45 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ + dpkg --add-architecture ppc64el && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-ppc64el \ + libc6-dev-ppc64el-cross \ + libc6-ppc64el-cross \ + libbz2-dev:ppc64el \ + libexpat1-dev:ppc64el \ + ncurses-dev:ppc64el \ + libssl-dev:ppc64el \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:ppc64el \ + libprotobuf-dev:ppc64el \ + libnet-dev:ppc64el \ + libprotobuf-c-dev:ppc64el \ + libcap-dev:ppc64el \ + libaio-dev:ppc64el \ + libnl-route-3-dev:ppc64el + +ENV CROSS_TRIPLE=powerpc64le-linux-gnu +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=ppc64 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 913a86d6c..855539152 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross aarch64-cross +TARGETS += armv7-cross aarch64-cross ppc64-cross all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all From d0d6f1ad108a6bd7eb0e2019aaca7689bff45275 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 10:35:42 -0700 Subject: [PATCH 0361/2030] mailmap: update my email Signed-off-by: Andrei Vagin --- .mailmap | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index d8c3f594d..6f046b972 100644 --- a/.mailmap +++ b/.mailmap @@ -1,6 +1,8 @@ Stanislav Kinsbursky Pavel Emelyanov -Andrey Vagin -Andrey Vagin -Andrey Vagin Andrew Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin +Andrei Vagin Cyrill Gorcunov From f42ae70c75802787e980715a7ca895eb2b390d06 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 20 Mar 2020 23:12:59 +0000 Subject: [PATCH 0362/2030] make: use cflags/ldflags for config.h detection mechanism The config.h detection scripts should use the provided CFLAGS/LDFLAGS as it tries to link libnl, libnet, and others. Signed-off-by: Nicolas Viennot --- scripts/nmk/scripts/utils.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/nmk/scripts/utils.mk b/scripts/nmk/scripts/utils.mk index 0cf216bc0..b9790615c 100644 --- a/scripts/nmk/scripts/utils.mk +++ b/scripts/nmk/scripts/utils.mk @@ -3,7 +3,7 @@ ifndef ____nmk_defined__utils # # Usage: option := $(call try-compile,language,source-to-build,cc-options,cc-defines) try-compile = $(shell sh -c 'echo "$(2)" | \ - $(CC) $(4) -x $(1) - $(3) -o /dev/null > /dev/null 2>&1 && \ + $(CC) $(CFLAGS) $(LDFLAGS) $(4) -x $(1) - $(3) -o /dev/null > /dev/null 2>&1 && \ echo true || echo false') # From fb65ab2b1a47558c2fe92a635630d53b971e5876 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Mar 2020 10:44:14 +0300 Subject: [PATCH 0363/2030] mem: dump shared memory file descriptors Any shared memroy mapping can be opened via /proc/self/maps_files/. Such file descriptors look like memfd file descriptors, so they can be dumped by the same way. Signed-off-by: Andrei Vagin --- criu/files.c | 2 +- criu/include/memfd.h | 2 +- criu/memfd.c | 11 +++++++---- criu/proc_parse.c | 38 ++++++++++---------------------------- 4 files changed, 19 insertions(+), 34 deletions(-) diff --git a/criu/files.c b/criu/files.c index f6ba39a30..a1fd26764 100644 --- a/criu/files.c +++ b/criu/files.c @@ -552,7 +552,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, p.link = &link; - if (is_memfd(p.stat.st_dev, &link.name[1])) + if (is_memfd(p.stat.st_dev)) ops = &memfd_dump_ops; else if (link.name[1] == '/') ops = ®file_dump_ops; diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 2d8eda545..4189766fd 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -8,7 +8,7 @@ struct fd_parms; struct file_desc; -extern int is_memfd(dev_t dev, const char *path); +extern int is_memfd(dev_t dev); extern int dump_one_memfd_cond(int lfd, u32 *id, struct fd_parms *parms); extern const struct fdtype_ops memfd_dump_ops; diff --git a/criu/memfd.c b/criu/memfd.c index 983e01b38..bca6900cb 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -58,15 +58,14 @@ static LIST_HEAD(memfd_inodes); static u32 memfd_inode_ids = 1; -int is_memfd(dev_t dev, const char *path) +int is_memfd(dev_t dev) { /* * TODO When MAP_HUGETLB is used, the file device is not shmem_dev, * Note that other parts of CRIU have similar issues, see * is_anon_shmem_map(). */ - return dev == kdat.shmem_dev && - !strncmp(path, MEMFD_PREFIX, MEMFD_PREFIX_LEN); + return dev == kdat.shmem_dev; } static int dump_memfd_inode(int fd, struct memfd_inode *inode, @@ -167,7 +166,11 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) link = p->link; strip_deleted(link); - name = &link->name[1+MEMFD_PREFIX_LEN]; + /* link->name is always started with "." which has to be skipped. */ + if (strncmp(link->name + 1, MEMFD_PREFIX, MEMFD_PREFIX_LEN) == 0) + name = &link->name[1 + MEMFD_PREFIX_LEN]; + else + name = link->name + 1; inode = dump_unique_memfd_inode(lfd, name, &p->stat); if (!inode) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 468afcdf3..980342870 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -305,7 +305,7 @@ static int vma_get_mapfile_user(const char *fname, struct vma_area *vma, vfi_dev = makedev(vfi->dev_maj, vfi->dev_min); - if (is_memfd(vfi_dev, fname)) { + if (is_memfd(vfi_dev)) { struct fd_link link; link.len = strlen(fname); strlcpy(link.name, fname, sizeof(link.name)); @@ -596,39 +596,21 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, goto err; } - /* - * /dev/zero stands for anon-shared mapping - * otherwise it's some file mapping. - * - * We treat memfd mappings as regular file mappings because - * their backing can be seen as files, which is easy to - * support. So even though memfd is an anonymous shmem, we - * treat it differently. - * Note: maybe we should revisit this as /proc/map_files/ - * may not always be accessible. - */ - - if (is_memfd(st_buf->st_dev, file_path)) { - vma_area->e->status |= VMA_AREA_MEMFD; - goto normal_file; - } - - if (is_anon_shmem_map(st_buf->st_dev)) { - if (!(vma_area->e->flags & MAP_SHARED)) - goto err_bogus_mapping; + if (is_anon_shmem_map(st_buf->st_dev) && !strncmp(file_path, "/SYSV", 5)) { vma_area->e->flags |= MAP_ANONYMOUS; vma_area->e->status |= VMA_ANON_SHARED; vma_area->e->shmid = st_buf->st_ino; - - if (!strncmp(file_path, "/SYSV", 5)) { - pr_info("path: %s\n", file_path); - vma_area->e->status |= VMA_AREA_SYSVIPC; - } else { + if (!(vma_area->e->flags & MAP_SHARED)) + goto err_bogus_mapping; + pr_info("path: %s\n", file_path); + vma_area->e->status |= VMA_AREA_SYSVIPC; + } else { + if (is_anon_shmem_map(st_buf->st_dev)) { + vma_area->e->status |= VMA_AREA_MEMFD; if (fault_injected(FI_HUGE_ANON_SHMEM_ID)) vma_area->e->shmid += FI_HUGE_ANON_SHMEM_ID_BASE; } - } else { -normal_file: + if (vma_area->e->flags & MAP_PRIVATE) vma_area->e->status |= VMA_FILE_PRIVATE; else From 10b1d46f674ec458cd1a006eb1b0546bf5a7135c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 19 Mar 2020 09:37:18 +0300 Subject: [PATCH 0364/2030] mem/vma: set VMA_FILE_{PRIVATE,SHARED} if a vma file is borrowed Here is a fast path when two consequent vma-s share the same file. But one of these vma-s can map a file with MAP_SHARED, but another one can map it with MAP_PRIVATE and we need to take this into account. --- criu/proc_parse.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 980342870..60aba8788 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -584,6 +584,14 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, vma_area->e->shmid = prev->e->shmid; vma_area->vmst = prev->vmst; vma_area->mnt_id = prev->mnt_id; + + if (!(vma_area->e->status & VMA_AREA_SYSVIPC)) { + vma_area->e->status &= ~(VMA_FILE_PRIVATE | VMA_FILE_SHARED); + if (vma_area->e->flags & MAP_PRIVATE) + vma_area->e->status |= VMA_FILE_PRIVATE; + else + vma_area->e->status |= VMA_FILE_SHARED; + } } else if (*vm_file_fd >= 0) { struct stat *st_buf = vma_area->vmst; From c40c09cbbf03afc058a761314fcdb14a3f69cb53 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sun, 15 Mar 2020 10:53:51 +0300 Subject: [PATCH 0365/2030] test/zdtmp: add a test to C/R shared memory file descriptors Any shared memory region can be openned via /proc/self/map_files. Signed-off-by: Andrei Vagin --- test/zdtm/static/Makefile | 2 + test/zdtm/static/shmemfd-priv.c | 84 ++++++++++++++++++++++ test/zdtm/static/shmemfd-priv.desc | 1 + test/zdtm/static/shmemfd.c | 107 +++++++++++++++++++++++++++++ test/zdtm/static/shmemfd.desc | 1 + 5 files changed, 195 insertions(+) create mode 100644 test/zdtm/static/shmemfd-priv.c create mode 100644 test/zdtm/static/shmemfd-priv.desc create mode 100644 test/zdtm/static/shmemfd.c create mode 100644 test/zdtm/static/shmemfd.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index ee69612c7..a8e4107d3 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -224,6 +224,8 @@ TST_NOFILE := \ memfd01 \ memfd02 \ memfd03 \ + shmemfd \ + shmemfd-priv \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/shmemfd-priv.c b/test/zdtm/static/shmemfd-priv.c new file mode 100644 index 000000000..bbdb46905 --- /dev/null +++ b/test/zdtm/static/shmemfd-priv.c @@ -0,0 +1,84 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Test C/R of shared memory file descriptors"; +const char *test_author = "Andrei Vagin "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +int main(int argc, char *argv[]) +{ + void *addr, *priv_addr, *addr2; + char path[4096]; + int fd; + + test_init(argc, argv); + + addr = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + *(int *) addr = 1; + *(int *) (addr + PAGE_SIZE) = 11; + *(int *) (addr + 2 * PAGE_SIZE) = 111; + + snprintf(path, sizeof(path), "/proc/self/map_files/%lx-%lx", + (long)addr, (long)addr + 5 * PAGE_SIZE); + fd = open(path, O_RDWR | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open %s", path); + + priv_addr = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_PRIVATE, fd, PAGE_SIZE); + if (priv_addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + addr2 = mmap(NULL, 5 * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 2 * PAGE_SIZE); + if (addr2 == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + *(int *) (priv_addr + PAGE_SIZE) = 22; + + test_daemon(); + test_waitsig(); + + if (*(int *) (priv_addr + PAGE_SIZE) != 22) { + fail("the second page of the private mapping is corrupted"); + return 1; + } + if (*(int *) (priv_addr) != 11) { + fail("the first page of the private mapping is corrupted"); + return 1; + } + if (*(int *) (addr2) != 111) { + fail("the first page of the second shared mapping is corrupted"); + return 1; + } + *(int *) (addr2) = 333; + if (*(int *) (addr + 2 * PAGE_SIZE) != 333) { + fail("the first page of the second shared mapping isn't shared"); + return 1; + } + *(int *) (addr + 3 * PAGE_SIZE) = 444; + if (*(int *) (priv_addr + 2 * PAGE_SIZE) != 444) { + fail("the third page of the private mapping is corrupted"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/shmemfd-priv.desc b/test/zdtm/static/shmemfd-priv.desc new file mode 100644 index 000000000..d969725f6 --- /dev/null +++ b/test/zdtm/static/shmemfd-priv.desc @@ -0,0 +1 @@ +{'flavor': 'h ns', 'flags': 'suid'} diff --git a/test/zdtm/static/shmemfd.c b/test/zdtm/static/shmemfd.c new file mode 100644 index 000000000..b65faa2e1 --- /dev/null +++ b/test/zdtm/static/shmemfd.c @@ -0,0 +1,107 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Test C/R of shared memory file descriptors"; +const char *test_author = "Andrei Vagin "; + +#define err(exitcode, msg, ...) ({ pr_perror(msg, ##__VA_ARGS__); exit(exitcode); }) + +int main(int argc, char *argv[]) +{ + int fd, fl_flags1, fl_flags2, fd_flags1, fd_flags2; + struct statfs statfs1, statfs2; + off_t pos1, pos2; + char path[4096]; + char buf[5]; + void *addr; + + test_init(argc, argv); + + addr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + if (addr == MAP_FAILED) { + pr_perror("mmap"); + return 1; + } + + snprintf(path, sizeof(path), "/proc/self/map_files/%lx-%lx", + (long)addr, (long)addr + PAGE_SIZE); + fd = open(path, O_RDWR | O_LARGEFILE); + if (fd < 0) + err(1, "Can't open %s", path); + ftruncate(fd, 0); + munmap(addr, PAGE_SIZE); + + if (fcntl(fd, F_SETFL, O_APPEND) < 0) + err(1, "Can't get fl flags"); + + if ((fl_flags1 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if ((fd_flags1 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fstatfs(fd, &statfs1) < 0) + err(1, "statfs issue"); + + if (write(fd, "hello", 5) != 5) + err(1, "write error"); + + pos1 = 3; + if (lseek(fd, pos1, SEEK_SET) < 0) + err(1, "seek error"); + + test_daemon(); + test_waitsig(); + + if ((fl_flags2 = fcntl(fd, F_GETFL)) == -1) + err(1, "Can't get fl flags"); + + if (fl_flags1 != fl_flags2) { + fail("fl flags differs %x %x", fl_flags1, fl_flags2); + return 1; + } + + if ((fd_flags2 = fcntl(fd, F_GETFD)) == -1) + err(1, "Can't get fd flags"); + + if (fd_flags1 != fd_flags2) { + fail("fd flags differs"); + return 1; + } + + if (fstatfs(fd, &statfs2) < 0) + err(1, "statfs issue"); + + if (statfs1.f_type != statfs2.f_type) { + fail("statfs.f_type differs"); + return 1; + } + + pos2 = lseek(fd, 0, SEEK_CUR); + if (pos1 != pos2) { + fail("position differs"); + return 1; + } + + if (pread(fd, buf, sizeof(buf), 0) != sizeof(buf)) { + fail("read problem"); + return 1; + } + + if (memcmp(buf, "hello", sizeof(buf))) { + fail("content mismatch"); + return 1; + } + + pass(); + + return 0; +} diff --git a/test/zdtm/static/shmemfd.desc b/test/zdtm/static/shmemfd.desc new file mode 100644 index 000000000..d969725f6 --- /dev/null +++ b/test/zdtm/static/shmemfd.desc @@ -0,0 +1 @@ +{'flavor': 'h ns', 'flags': 'suid'} From 691b4a4e7ee980778d8f13eaebddf9b04063942a Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Mon, 3 Feb 2020 15:08:26 +0300 Subject: [PATCH 0366/2030] zdtm: Implemented get_current_dir_name wrapper that checks for 'x' permissions Any filesystem syscall, that needs to navigate to inode by it's absolute path performs successive lookup operations for each part of the path. Lookup operation includes access rights check. Usually but not always zdtm tests processes fall under 'other' access category. Also, usually directories don't have 'x' bit set for other. In case when bit 'x' is not set and user-ID and group-ID of a process relate it to 'other', test's will not succeed in performing these syscalls which are most of filesystem api, that has const char *path as part of it arguments (open, openat, mkdir, bind, etc). The observable behavior of that is that zdtm tests fail at file creation ops on one system and pass on the other. The above is not immediately clear to the developer by just looking at failed test's logs. Investigation of that is also not quick for a developer due to the complex structure of zdtm runtime where nested clones with NAMESPACE flags take place alongside with bind-mounts. As an additional note: 'get_current_dir_name' is documented as returning EACCESS in case when some part of the path lacks read/list permissions. But in fact it's not always so. Practice shows, that test processes can get false success on this operation only to fail on later call to something like mkdir/mknod/bind with a given path in arguments. 'get_cwd_check_perm' is a wrapper around 'get_current_dir_name'. It also checks for permissions on the given filepath and logs the error. This directs the developer towards the right investigation path or even eliminates the need for investigation completely. Signed-off-by: Valeriy Vdovin --- test/zdtm/lib/fs.c | 24 ++++++++++++++++++++++++ test/zdtm/lib/fs.h | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c index 0decfc37b..e82011ec8 100644 --- a/test/zdtm/lib/fs.c +++ b/test/zdtm/lib/fs.c @@ -94,3 +94,27 @@ err: mnt_info_free(&m); goto out; } + +int get_cwd_check_perm(char **result) +{ + char *cwd; + *result = 0; + cwd = get_current_dir_name(); + if (!cwd) { + pr_perror("failed to get current directory"); + return -1; + } + + if (access(cwd, X_OK)) { + pr_err("access check for bit X for current dir path '%s' " + "failed for uid:%d,gid:%d, error: %d(%s). " + "Bit 'x' should be set in all path components of " + "this directory\n", + cwd, getuid(), getgid(), errno, strerror(errno) + ); + return -1; + } + + *result = cwd; + return 0; +} diff --git a/test/zdtm/lib/fs.h b/test/zdtm/lib/fs.h index 972b15aba..af7a665fb 100644 --- a/test/zdtm/lib/fs.h +++ b/test/zdtm/lib/fs.h @@ -50,4 +50,28 @@ extern mnt_info_t *mnt_info_alloc(void); extern void mnt_info_free(mnt_info_t **m); extern mnt_info_t *get_cwd_mnt_info(void); +/* + * get_cwd_check_perm is called to check that cwd is actually usable for a calling + * process. + * + * Example output of a stat command on a '/root' path shows file access bits: + * > stat /root + * File: ‘/root’ + * ... + * Access: (0550/dr-xr-x---) Uid: ( 0/root) Gid: ( 0/root) + * ^- no 'x' bit for other + * + * Here we can see that '/root' dir (that often can be part of cwd path) does not + * allow non-root user and non-root group to list contents of this directory. + * Calling process matching 'other' access category may succeed getting cwd path, but will + * fail performing further filesystem operations based on this path with confusing errors. + * + * This function calls get_current_dir_name and explicitly checks that bit 'x' is enabled for + * a calling process and logs the error. + * + * If check passes, stores get_current_dir's result in *result and returns 0 + * If check fails, stores 0 in *result and returns -1 + */ +extern int get_cwd_check_perm(char **result); + #endif /* ZDTM_FS_H_ */ From fa705e418b4e7c2bce0925ad9f8689cd40b0c00d Mon Sep 17 00:00:00 2001 From: Valeriy Vdovin Date: Mon, 3 Feb 2020 15:27:40 +0300 Subject: [PATCH 0367/2030] zdtm: Use safe helper function to initialize unix socket sockaddr structure The helper function removes code duplication from tests that want to initialize unix socket address to an absolute file path, derived from current working directory of the test + relative filename of a resulting socket. Because the former code used cwd = get_current_dir_name() as part of absolute filename generation, the resulting filepath could later cause failure of bind systcall due to unchecked permissions and introduce confusing permission errors. Signed-off-by: Valeriy Vdovin --- test/zdtm/lib/Makefile | 2 +- test/zdtm/lib/unix.c | 19 ++++++++++++++++ test/zdtm/lib/zdtmtst.h | 3 +++ test/zdtm/static/del_standalone_un.c | 17 +------------- test/zdtm/static/deleted_unix_sock.c | 19 ++-------------- test/zdtm/static/sk-unix01.c | 33 ++++++---------------------- 6 files changed, 33 insertions(+), 60 deletions(-) create mode 100644 test/zdtm/lib/unix.c diff --git a/test/zdtm/lib/Makefile b/test/zdtm/lib/Makefile index b87f36e8f..89ca90933 100644 --- a/test/zdtm/lib/Makefile +++ b/test/zdtm/lib/Makefile @@ -4,7 +4,7 @@ CFLAGS += $(USERCFLAGS) LIB := libzdtmtst.a -LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c fs.c sysctl.c +LIBSRC := datagen.c msg.c parseargs.c test.c streamutil.c lock.c ns.c tcp.c unix.c fs.c sysctl.c LIBOBJ := $(LIBSRC:%.c=%.o) BIN := groups diff --git a/test/zdtm/lib/unix.c b/test/zdtm/lib/unix.c new file mode 100644 index 000000000..c36846cad --- /dev/null +++ b/test/zdtm/lib/unix.c @@ -0,0 +1,19 @@ +#include +#include +#include "zdtmtst.h" +#include "fs.h" + +int unix_fill_sock_name(struct sockaddr_un *name, char *relFilename) +{ + char *cwd; + + if (get_cwd_check_perm(&cwd)) { + pr_err("failed to get current working directory with valid permissions.\n"); + return -1; + } + + name->sun_family = AF_LOCAL; + ssprintf(name->sun_path, "%s/%s", cwd, relFilename); + return 0; +} + diff --git a/test/zdtm/lib/zdtmtst.h b/test/zdtm/lib/zdtmtst.h index 2cd4bdd1d..6eec26647 100644 --- a/test/zdtm/lib/zdtmtst.h +++ b/test/zdtm/lib/zdtmtst.h @@ -149,6 +149,9 @@ extern int tcp_init_server(int family, int *port); extern int tcp_accept_server(int sock); extern int tcp_init_client(int family, char *servIP, unsigned short servPort); +struct sockaddr_un; +extern int unix_fill_sock_name(struct sockaddr_un *name, char *relFilename); + struct zdtm_tcp_opts { bool reuseaddr; bool reuseport; diff --git a/test/zdtm/static/del_standalone_un.c b/test/zdtm/static/del_standalone_un.c index d8200068b..5426fc786 100644 --- a/test/zdtm/static/del_standalone_un.c +++ b/test/zdtm/static/del_standalone_un.c @@ -16,19 +16,6 @@ const char *test_author = "Tycho Andersen "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) - return -1; - - name->sun_family = AF_LOCAL; - ssprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int bind_and_listen(struct sockaddr_un *addr) { int sk; @@ -71,10 +58,8 @@ int main(int argc, char **argv) goto out; } - if (fill_sock_name(&addr, filename) < 0) { - pr_err("filename \"%s\" is too long\n", filename); + if (unix_fill_sock_name(&addr, filename)) goto out; - } sk1 = bind_and_listen(&addr); if (sk1 < 0) diff --git a/test/zdtm/static/deleted_unix_sock.c b/test/zdtm/static/deleted_unix_sock.c index bcc33f3de..4d328e996 100644 --- a/test/zdtm/static/deleted_unix_sock.c +++ b/test/zdtm/static/deleted_unix_sock.c @@ -17,28 +17,13 @@ const char *test_author = "Roman Kagan "; char *filename; TEST_OPTION(filename, string, "file name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) - return -1; - - name->sun_family = AF_LOCAL; - sprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int setup_srv_sock(void) { struct sockaddr_un name; int sock; - if (fill_sock_name(&name, filename) < 0) { - pr_perror("filename \"%s\" is too long", filename); + if (unix_fill_sock_name(&name, filename)) return -1; - } sock = socket(PF_LOCAL, SOCK_STREAM, 0); if (sock < 0) { @@ -67,7 +52,7 @@ static int setup_clnt_sock(void) struct sockaddr_un name; int sock; - if (fill_sock_name(&name, filename) < 0) + if (unix_fill_sock_name(&name, filename)) return -1; sock = socket(PF_LOCAL, SOCK_STREAM, 0); diff --git a/test/zdtm/static/sk-unix01.c b/test/zdtm/static/sk-unix01.c index 2bceef79a..0e9006a15 100644 --- a/test/zdtm/static/sk-unix01.c +++ b/test/zdtm/static/sk-unix01.c @@ -24,22 +24,6 @@ const char *test_author = "Cyrill Gorcunov "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); -static int fill_sock_name(struct sockaddr_un *name, const char *filename) -{ - char *cwd; - - cwd = get_current_dir_name(); - if (strlen(filename) + strlen(cwd) + 1 >= sizeof(name->sun_path)) { - pr_err("Name %s/%s is too long for socket\n", - cwd, filename); - return -1; - } - - name->sun_family = AF_LOCAL; - ssprintf(name->sun_path, "%s/%s", cwd, filename); - return 0; -} - static int sk_alloc_bind(int type, struct sockaddr_un *addr) { int sk; @@ -155,10 +139,9 @@ int main(int argc, char **argv) */ ssprintf(filename, "%s/%s", subdir_dg, "sk-dt"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_dgram[0] = sk_alloc_bind(SOCK_DGRAM, &addr); @@ -184,10 +167,9 @@ int main(int argc, char **argv) test_msg("sk-dt: alloc/connect/unlink %d %s\n", sk_dgram[3], addr.sun_path); ssprintf(filename, "%s/%s", dirname, "sole"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_dgram[4] = sk_alloc_bind(SOCK_DGRAM, &addr); @@ -237,7 +219,7 @@ int main(int argc, char **argv) sk_dgram_pair[0], sk_dgram_pair[1]); ssprintf(filename, "%s/%s", subdir_dg, "sk-dtp"); - if (fill_sock_name(&addr, filename) < 0) { + if (unix_fill_sock_name(&addr, filename)) { pr_err("%s is too long for socket\n", filename); return 1; } @@ -270,10 +252,9 @@ int main(int argc, char **argv) * - delete socket on fs */ ssprintf(filename, "%s/%s", subdir_st, "sk-st"); - if (fill_sock_name(&addr, filename) < 0) { - pr_err("%s is too long for socket\n", filename); + if (unix_fill_sock_name(&addr, filename)) return 1; - } + unlink(addr.sun_path); sk_st[0] = sk_alloc_bind(SOCK_STREAM, &addr); From 2b376168efd751856ebef192e764ebd0037e7174 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 21:58:55 +0300 Subject: [PATCH 0368/2030] pipe: restore pipe size even if a pipe is empty Without this patch, pipe size is restored only if a pipe has queued data. Reported-by: Mr Jenkins Signed-off-by: Andrei Vagin --- criu/pipes.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index cb5da71de..d74329161 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -160,24 +160,24 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash return 0; } - if (!pd->pde->bytes) - goto out; - - if (!pd->data) { - pr_err("Double data restore occurred on %#x\n", id); - return -1; - } - if (pd->pde->has_size) { pr_info("Restoring size %#x for %#x\n", pd->pde->size, pd->pde->pipe_id); ret = fcntl(pfd, F_SETPIPE_SZ, pd->pde->size); if (ret < 0) { pr_perror("Can't restore pipe size"); - goto err; + return -1; } } + if (!pd->pde->bytes) + return 0; + + if (!pd->data) { + pr_err("Double data restore occurred on %#x\n", id); + return -1; + } + iov.iov_base = pd->data; iov.iov_len = pd->pde->bytes; @@ -185,14 +185,13 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash ret = vmsplice(pfd, &iov, 1, SPLICE_F_GIFT | SPLICE_F_NONBLOCK); if (ret < 0) { pr_perror("%#x: Error splicing data", id); - goto err; + return -1; } if (ret == 0 || ret > iov.iov_len /* sanity */) { pr_err("%#x: Wanted to restore %zu bytes, but got %d\n", id, iov.iov_len, ret); - ret = -1; - goto err; + return -1; } iov.iov_base += ret; @@ -211,10 +210,7 @@ int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst **hash munmap(pd->data, pd->pde->bytes); pd->data = NULL; -out: - ret = 0; -err: - return ret; + return 0; } static int userns_reopen(void *_arg, int fd, pid_t pid) From 1ad209b9c2b780fe2d5b043c3ffe29634629252c Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 22:08:38 +0300 Subject: [PATCH 0369/2030] test/pipe03: check that pipe size is restored Create two pipes with and without queued data. Signed-off-by: Andrei Vagin --- test/zdtm/static/pipe03.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/test/zdtm/static/pipe03.c b/test/zdtm/static/pipe03.c index a8721e934..d649007b7 100644 --- a/test/zdtm/static/pipe03.c +++ b/test/zdtm/static/pipe03.c @@ -13,27 +13,28 @@ const char *test_author = "Andrei Vagin "; int main(int argc, char **argv) { - int p[2], i; + int p[2][2], i; uint8_t buf[BUF_SIZE]; uint32_t crc; test_init(argc, argv); - if (pipe2(p, O_NONBLOCK)) { - pr_perror("pipe"); - return 1; - } - - if (fcntl(p[1], F_SETPIPE_SZ, DATA_SIZE) == -1) { - pr_perror("Unable to change a pipe size"); - return 1; + for (i = 0; i < 2; i++) { + if (pipe2(p[i], O_NONBLOCK)) { + pr_perror("pipe"); + return 1; + } + if (fcntl(p[i][1], F_SETPIPE_SZ, DATA_SIZE) == -1) { + pr_perror("Unable to change a pipe size"); + return 1; + } } crc = ~0; datagen(buf, BUF_SIZE, &crc); for (i = 0; i < DATA_SIZE / BUF_SIZE; i++) { - if (write(p[1], buf, BUF_SIZE) != BUF_SIZE) { + if (write(p[0][1], buf, BUF_SIZE) != BUF_SIZE) { pr_perror("write"); return 1; } @@ -43,12 +44,26 @@ int main(int argc, char **argv) test_waitsig(); for (i = 0; i < DATA_SIZE / BUF_SIZE; i++) { - if (read(p[0], buf, BUF_SIZE) != BUF_SIZE) { + if (read(p[0][0], buf, BUF_SIZE) != BUF_SIZE) { pr_perror("read"); return 1; } } + for (i = 0; i < 2; i++) { + int size; + + size = fcntl(p[i][1], F_GETPIPE_SZ); + if (size < 0) { + pr_perror("Unable to get a pipe size"); + return 1; + } + if (size != DATA_SIZE) { + fail("%d: size %d expected %d", i, size, DATA_SIZE); + return 1; + } + } + pass(); return 0; } From 5f28b692a0c972ddefb1ca4d1d0ef003dec4f617 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 21 Mar 2020 22:11:28 +0300 Subject: [PATCH 0370/2030] test/fifo_loop: change sizes of all fifo-s to fit a test buffer This test doesn't expect that the write operation will block. Signed-off-by: Andrei Vagin --- test/zdtm/transition/fifo_loop.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/test/zdtm/transition/fifo_loop.c b/test/zdtm/transition/fifo_loop.c index b028c2fd5..b06592586 100644 --- a/test/zdtm/transition/fifo_loop.c +++ b/test/zdtm/transition/fifo_loop.c @@ -84,6 +84,14 @@ int main(int argc, char **argv) ret = errno; return ret; } + + pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); + if (pipe_size != sizeof(buf)) { + pr_perror("fcntl(writefd, F_SETPIPE_SZ) -> %d", pipe_size); + kill(0, SIGKILL); + exit(1); + } + signal(SIGPIPE, SIG_IGN); if (pipe_in2out(readfd, writefd, buf, sizeof(buf)) < 0) /* pass errno as exit code to the parent */ @@ -107,7 +115,7 @@ int main(int argc, char **argv) pipe_size = fcntl(writefd, F_SETPIPE_SZ, sizeof(buf)); if (pipe_size != sizeof(buf)) { - pr_perror("fcntl(writefd, F_GETPIPE_SZ) -> %d", pipe_size); + pr_perror("fcntl(writefd, F_SETPIPE_SZ) -> %d", pipe_size); kill(0, SIGKILL); exit(1); } From 1ad8657ddb3d383874cc07cd3cf456cac7977db6 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 24 Mar 2020 19:31:42 +0300 Subject: [PATCH 0371/2030] config/nftables: include string.h for strlen Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism") Signed-off-by: Andrei Vagin --- scripts/feature-tests.mak | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 21b390092..8df20afb7 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -152,6 +152,8 @@ endef define FEATURE_TEST_NFTABLES_LIB_API_0 +#include + #include int main(int argc, char **argv) From cc362b432e2d2e3ec68628fb33b117fe3e89f9c2 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 20 Mar 2020 17:34:57 +0300 Subject: [PATCH 0372/2030] namespaces: fix error handling in dump_user_ns Fix n_xid_map leaks on error path and remove useless exit_code. Fixes: 6e1726f8 ("userns: set uid and gid before entering into userns") Signed-off-by: Pavel Tikhomirov --- criu/namespaces.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/criu/namespaces.c b/criu/namespaces.c index 21266df7c..2db805b2f 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -938,9 +938,9 @@ static int check_user_ns(int pid) int dump_user_ns(pid_t pid, int ns_id) { - int ret, exit_code = -1; UsernsEntry *e = &userns_entry; struct cr_img *img; + int ret; ret = parse_id_map(pid, "uid_map", &e->uid_map); if (ret < 0) @@ -953,7 +953,7 @@ int dump_user_ns(pid_t pid, int ns_id) e->n_gid_map = ret; if (check_user_ns(pid)) - return -1; + goto err; img = open_image(CR_FD_USERNS, O_DUMP, ns_id); if (!img) @@ -973,7 +973,7 @@ err: xfree(e->gid_map[0]); xfree(e->gid_map); } - return exit_code; + return -1; } void free_userns_maps(void) From 967797a8676c8b3b7cd8954892b113c6765af25a Mon Sep 17 00:00:00 2001 From: Byeonggon Lee Date: Sun, 15 Mar 2020 16:32:15 +0900 Subject: [PATCH 0373/2030] Add build directory to gitignore After running make install, build directory is generated but not ignored in gitignore. So this commit add build directory to gitignore. Signed-off-by: Byeonggon Lee --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index c231104af..23cd703be 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,4 @@ lib/.crit-setup.files compel/include/asm include/common/asm include/common/config.h +build/ From e3a5d0975240f9e9b6b6d7a096af6b4bbad36737 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Mon, 23 Mar 2020 07:37:00 +0300 Subject: [PATCH 0374/2030] memfd: save all memfd inodes in one image Per-object image is acceptable if we expect to have 1-3 objects per-container. If we expect to have more objects, it is better to save them all into one image. There are a number of reasons for this: * We need fewer system calls to read all objects from one image. * It is faster to save or move one image. Signed-off-by: Andrei Vagin --- criu/cr-restore.c | 3 ++ criu/image-desc.c | 2 +- criu/include/image-desc.h | 2 +- criu/include/memfd.h | 2 + criu/memfd.c | 90 +++++++++++++++++---------------------- images/memfd.proto | 1 + 6 files changed, 46 insertions(+), 54 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 41146d4ad..1d3092f2f 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -233,6 +233,9 @@ static int restore_finish_ns_stage(int from, int to) static int crtools_prepare_shared(void) { + if (prepare_memfd_inodes()) + return -1; + if (prepare_files()) return -1; diff --git a/criu/image-desc.c b/criu/image-desc.c index b538a76ea..ac627a829 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -66,7 +66,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(FS, "fs-%u"), FD_ENTRY(REMAP_FPATH, "remap-fpath"), FD_ENTRY_F(GHOST_FILE, "ghost-file-%x", O_NOBUF), - FD_ENTRY_F(MEMFD_INODE, "memfd-%u", O_NOBUF), + FD_ENTRY_F(MEMFD_INODE, "memfd", O_NOBUF), FD_ENTRY(TCP_STREAM, "tcp-stream-%x"), FD_ENTRY(MNTS, "mountpoints-%u"), FD_ENTRY(NETDEV, "netdev-%u"), diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 9ca9643a1..ce6ef1529 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -65,6 +65,7 @@ enum { CR_FD_CGROUP, CR_FD_FILE_LOCKS, CR_FD_SECCOMP, + CR_FD_MEMFD_INODE, _CR_FD_GLOB_TO, CR_FD_TMPFS_IMG, @@ -107,7 +108,6 @@ enum { CR_FD_PIPES, CR_FD_TTY_FILES, CR_FD_MEMFD_FILE, - CR_FD_MEMFD_INODE, CR_FD_AUTOFS, diff --git a/criu/include/memfd.h b/criu/include/memfd.h index 4189766fd..3074a5c0f 100644 --- a/criu/include/memfd.h +++ b/criu/include/memfd.h @@ -17,6 +17,8 @@ extern struct collect_image_info memfd_cinfo; extern struct file_desc *collect_memfd(u32 id); extern int apply_memfd_seals(void); +extern int prepare_memfd_inodes(void); + #ifdef CONFIG_HAS_MEMFD_CREATE # include #else diff --git a/criu/memfd.c b/criu/memfd.c index bca6900cb..2158e925b 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -43,9 +43,10 @@ struct memfd_inode { }; /* Only for restore */ struct { - mutex_t lock; - int fdstore_id; - unsigned int pending_seals; + mutex_t lock; + int fdstore_id; + unsigned int pending_seals; + MemfdInodeEntry *mie; }; }; }; @@ -71,9 +72,8 @@ int is_memfd(dev_t dev) static int dump_memfd_inode(int fd, struct memfd_inode *inode, const char *name, const struct stat *st) { - int ret = -1; - struct cr_img *img = NULL; MemfdInodeEntry mie = MEMFD_INODE_ENTRY__INIT; + int ret = -1; u32 shmid; /* @@ -90,10 +90,7 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, if (dump_one_memfd_shmem(fd, shmid, st->st_size) < 0) goto out; - img = open_image(CR_FD_MEMFD_INODE, O_DUMP, inode->id); - if (!img) - goto out; - + mie.inode_id = inode->id; mie.uid = userns_uid(st->st_uid); mie.gid = userns_gid(st->st_gid); mie.name = (char *)name; @@ -104,14 +101,12 @@ static int dump_memfd_inode(int fd, struct memfd_inode *inode, if (mie.seals == -1) goto out; - if (pb_write_one(img, &mie, PB_MEMFD_INODE)) + if (pb_write_one(img_from_set(glob_imgset, CR_FD_MEMFD_INODE), &mie, PB_MEMFD_INODE)) goto out; ret = 0; out: - if (img) - close_image(img); return ret; } @@ -212,8 +207,6 @@ struct memfd_info { struct memfd_inode *inode; }; -static int memfd_open_inode(struct memfd_inode *inode); - static struct memfd_inode *memfd_alloc_inode(int id) { struct memfd_inode *inode; @@ -222,35 +215,47 @@ static struct memfd_inode *memfd_alloc_inode(int id) if (inode->id == id) return inode; - inode = shmalloc(sizeof(*inode)); - if (!inode) - return NULL; + pr_err("Unable to find the %d memfd inode\n", id); + return NULL; +} - inode->id = id; +static int collect_one_memfd_inode(void *o, ProtobufCMessage *base, struct cr_img *i) +{ + MemfdInodeEntry *mie = pb_msg(base, MemfdInodeEntry); + struct memfd_inode *inode = o; + + inode->mie = mie; + inode->id = mie->inode_id; mutex_init(&inode->lock); inode->fdstore_id = -1; inode->pending_seals = 0; list_add_tail(&inode->list, &memfd_inodes); - return inode; + + return 0; +} + +static struct collect_image_info memfd_inode_cinfo = { + .fd_type = CR_FD_MEMFD_INODE, + .pb_type = PB_MEMFD_INODE, + .priv_size = sizeof(struct memfd_inode), + .collect = collect_one_memfd_inode, + .flags = COLLECT_SHARED | COLLECT_NOFREE, +}; + +int prepare_memfd_inodes(void) +{ + return collect_image(&memfd_inode_cinfo); } -extern int restore_memfd_shm(int fd, u64 id, u64 size); static int memfd_open_inode_nocache(struct memfd_inode *inode) { MemfdInodeEntry *mie = NULL; - struct cr_img *img = NULL; int fd = -1; int ret = -1; int flags; - img = open_image(CR_FD_MEMFD_INODE, O_RSTR, inode->id); - if (!img) - goto out; - - if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) - goto out; - + mie = inode->mie; if (mie->seals == F_SEAL_SEAL) { inode->pending_seals = 0; flags = 0; @@ -285,10 +290,6 @@ static int memfd_open_inode_nocache(struct memfd_inode *inode) out: if (fd != -1) close(fd); - if (img) - close_image(img); - if (mie) - memfd_inode_entry__free_unpacked(mie, NULL); return ret; } @@ -373,33 +374,17 @@ static int memfd_open_fe_fd(struct file_desc *fd, int *new_fd) static char *memfd_d_name(struct file_desc *d, char *buf, size_t s) { MemfdInodeEntry *mie = NULL; - struct cr_img *img = NULL; struct memfd_info *mfi; - char *ret = NULL; mfi = container_of(d, struct memfd_info, d); - img = open_image(CR_FD_MEMFD_INODE, O_RSTR, mfi->inode->id); - if (!img) - goto out; - - if (pb_read_one(img, &mie, PB_MEMFD_INODE) < 0) - goto out; - + mie = mfi->inode->mie; if (snprintf(buf, s, "%s%s", MEMFD_PREFIX, mie->name) >= s) { pr_err("Buffer too small for memfd name %s\n", mie->name); - goto out; + return NULL; } - ret = buf; - -out: - if (img) - close_image(img); - if (mie) - memfd_inode_entry__free_unpacked(mie, NULL); - - return ret; + return buf; } static struct file_desc_ops memfd_desc_ops = { @@ -427,7 +412,8 @@ struct collect_image_info memfd_cinfo = { .collect = collect_one_memfd, }; -struct file_desc *collect_memfd(u32 id) { +struct file_desc *collect_memfd(u32 id) +{ struct file_desc *fdesc; fdesc = find_file_desc_raw(FD_TYPES__MEMFD, id); diff --git a/images/memfd.proto b/images/memfd.proto index 546ffc2ab..ad5373d10 100644 --- a/images/memfd.proto +++ b/images/memfd.proto @@ -18,4 +18,5 @@ message memfd_inode_entry { required uint64 size = 4; required uint32 shmid = 5; required uint32 seals = 6 [(criu).flags = "seals.flags"]; + required uint64 inode_id = 7; }; From 8c36865c84666c73424b7a0fdb9f460557465ff2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 25 Mar 2020 20:14:24 +0300 Subject: [PATCH 0375/2030] memfd: split the struct memfd_inode The struct memfd_inode has a union for dump and restore parts. The only common parts are the list_head node, and the inode id. Suggested-by: Nicolas Viennot Signed-off-by: Andrei Vagin --- criu/memfd.c | 58 ++++++++++++++++++++++++---------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/criu/memfd.c b/criu/memfd.c index 2158e925b..4419b4bf5 100644 --- a/criu/memfd.c +++ b/criu/memfd.c @@ -32,23 +32,19 @@ /* Linux 5.1+ */ #define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ -struct memfd_inode { +struct memfd_dump_inode { struct list_head list; u32 id; - union { - /* Only for dump */ - struct { - u32 dev; - u32 ino; - }; - /* Only for restore */ - struct { - mutex_t lock; - int fdstore_id; - unsigned int pending_seals; - MemfdInodeEntry *mie; - }; - }; + u32 dev; + u32 ino; +}; + +struct memfd_restore_inode { + struct list_head list; + mutex_t lock; + int fdstore_id; + unsigned int pending_seals; + MemfdInodeEntry *mie; }; static LIST_HEAD(memfd_inodes); @@ -69,7 +65,7 @@ int is_memfd(dev_t dev) return dev == kdat.shmem_dev; } -static int dump_memfd_inode(int fd, struct memfd_inode *inode, +static int dump_memfd_inode(int fd, struct memfd_dump_inode *inode, const char *name, const struct stat *st) { MemfdInodeEntry mie = MEMFD_INODE_ENTRY__INIT; @@ -110,9 +106,10 @@ out: return ret; } -static struct memfd_inode *dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) +static struct memfd_dump_inode * +dump_unique_memfd_inode(int lfd, const char *name, const struct stat *st) { - struct memfd_inode *inode; + struct memfd_dump_inode *inode; int fd; list_for_each_entry(inode, &memfd_inodes, list) @@ -149,7 +146,7 @@ static int dump_one_memfd(int lfd, u32 id, const struct fd_parms *p) { MemfdFileEntry mfe = MEMFD_FILE_ENTRY__INIT; FileEntry fe = FILE_ENTRY__INIT; - struct memfd_inode *inode; + struct memfd_dump_inode *inode; struct fd_link _link, *link; const char *name; @@ -202,17 +199,17 @@ const struct fdtype_ops memfd_dump_ops = { */ struct memfd_info { - MemfdFileEntry *mfe; - struct file_desc d; - struct memfd_inode *inode; + MemfdFileEntry *mfe; + struct file_desc d; + struct memfd_restore_inode *inode; }; -static struct memfd_inode *memfd_alloc_inode(int id) +static struct memfd_restore_inode *memfd_alloc_inode(int id) { - struct memfd_inode *inode; + struct memfd_restore_inode *inode; list_for_each_entry(inode, &memfd_inodes, list) - if (inode->id == id) + if (inode->mie->inode_id == id) return inode; pr_err("Unable to find the %d memfd inode\n", id); @@ -222,10 +219,9 @@ static struct memfd_inode *memfd_alloc_inode(int id) static int collect_one_memfd_inode(void *o, ProtobufCMessage *base, struct cr_img *i) { MemfdInodeEntry *mie = pb_msg(base, MemfdInodeEntry); - struct memfd_inode *inode = o; + struct memfd_restore_inode *inode = o; inode->mie = mie; - inode->id = mie->inode_id; mutex_init(&inode->lock); inode->fdstore_id = -1; inode->pending_seals = 0; @@ -238,7 +234,7 @@ static int collect_one_memfd_inode(void *o, ProtobufCMessage *base, struct cr_im static struct collect_image_info memfd_inode_cinfo = { .fd_type = CR_FD_MEMFD_INODE, .pb_type = PB_MEMFD_INODE, - .priv_size = sizeof(struct memfd_inode), + .priv_size = sizeof(struct memfd_restore_inode), .collect = collect_one_memfd_inode, .flags = COLLECT_SHARED | COLLECT_NOFREE, }; @@ -248,7 +244,7 @@ int prepare_memfd_inodes(void) return collect_image(&memfd_inode_cinfo); } -static int memfd_open_inode_nocache(struct memfd_inode *inode) +static int memfd_open_inode_nocache(struct memfd_restore_inode *inode) { MemfdInodeEntry *mie = NULL; int fd = -1; @@ -293,7 +289,7 @@ out: return ret; } -static int memfd_open_inode(struct memfd_inode *inode) +static int memfd_open_inode(struct memfd_restore_inode *inode) { int fd; @@ -433,7 +429,7 @@ int apply_memfd_seals(void) */ int ret, fd; - struct memfd_inode *inode; + struct memfd_restore_inode *inode; list_for_each_entry(inode, &memfd_inodes, list) { if (!inode->pending_seals) From e3fb52e375d2fdd7160395220cf52eb25dfc8c09 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 30 Mar 2020 14:16:30 +0300 Subject: [PATCH 0376/2030] remove header include statements duplicates Revert "util: introduce the mount_detached_fs helper" This reverts commit 5dbc24b206cd365db7498dddcd03798c5d8ed4e4. Revert "criu: Make use strlcpy() to copy into allocated strings" This reverts commit bc49927bbc28b41e4b2759d42dc24f1d66e22df3. Fixes for https://github.com/checkpoint-restore/criu/pull/1003 Signed-off-by: Pavel Tikhomirov --- criu/cr-restore.c | 1 - criu/util.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 1d3092f2f..74be1a5ca 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,7 +76,6 @@ #include "fdstore.h" #include "string.h" #include "memfd.h" -#include "string.h" #include "parasite-syscall.h" #include "files-reg.h" diff --git a/criu/util.c b/criu/util.c index a0a49c5a3..1646ce1c4 100644 --- a/criu/util.c +++ b/criu/util.c @@ -30,8 +30,6 @@ #include "linux/mount.h" -#include "linux/mount.h" - #include "kerndat.h" #include "page.h" #include "util.h" From 0e9b42acf96d2c5fc3a6174ae6e4f2ad8a64c272 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 25 Feb 2020 11:31:07 +0300 Subject: [PATCH 0377/2030] MAINTAINERS: Add Pavel (myself) to maintainers Hope I have enough experience in the project to be nominated. I want to help with review and will try to do my best in it. Signed-off-by: Pavel Tikhomirov --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 5c28463a7..bb153f1ab 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3,3 +3,4 @@ Andrey Vagin Mike Rapoport Dmitry Safonov <0x7f454c46@gmail.com> Adrian Reber +Pavel Tikhomirov From 4127ef4ab769dc4417c22d0ce0a4ddaaca4193b4 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 14 Aug 2019 07:40:40 +0300 Subject: [PATCH 0378/2030] criu: Add support for time namespaces The time namespace allows for per-namespace offsets to the system monotonic and boot-time clocks. C/R of time namespaces are very straightforward. On dump, criu enters a target time namespace and dumps currents clocks values, then on restore, criu creates a new namespace and restores clocks values. Signed-off-by: Andrei Vagin --- criu/Makefile.crtools | 1 + criu/cr-check.c | 12 ++++ criu/cr-restore.c | 10 ++- criu/image-desc.c | 1 + criu/include/image-desc.h | 1 + criu/include/kerndat.h | 1 + criu/include/magic.h | 1 + criu/include/namespaces.h | 9 ++- criu/include/proc_parse.h | 2 + criu/include/protobuf-desc.h | 1 + criu/include/timens.h | 9 +++ criu/kerndat.c | 17 +++++ criu/namespaces.c | 24 +++++++ criu/proc_parse.c | 38 ++++++++++ criu/protobuf-desc.c | 1 + criu/pstree.c | 2 + criu/timens.c | 130 +++++++++++++++++++++++++++++++++++ criu/util.c | 2 + images/Makefile | 1 + images/core.proto | 1 + images/timens.proto | 10 +++ lib/py/images/images.py | 1 + 22 files changed, 272 insertions(+), 3 deletions(-) create mode 100644 criu/include/timens.h create mode 100644 criu/timens.c create mode 100644 images/timens.proto diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 1a6e0b5b5..5c25b8928 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -87,6 +87,7 @@ obj-y += config.o obj-y += servicefd.o obj-y += pie-util-vdso.o obj-y += vdso.o +obj-y += timens.o obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 obj-$(CONFIG_COMPAT) += vdso-compat.o diff --git a/criu/cr-check.c b/criu/cr-check.c index 80df3f7cd..b790c2ffb 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1266,6 +1266,16 @@ static int check_kcmp_epoll(void) return 0; } +static int check_time_namespace(void) +{ + if (!kdat.has_timens) { + pr_err("Time namespaces are not supported\n"); + return -1; + } + + return 0; +} + static int check_net_diag_raw(void) { check_sock_diag(); @@ -1384,6 +1394,7 @@ int cr_check(void) ret |= check_kcmp_epoll(); ret |= check_net_diag_raw(); ret |= check_clone3_set_tid(); + ret |= check_time_namespace(); } /* @@ -1486,6 +1497,7 @@ static struct feature_list feature_list[] = { { "nsid", check_nsid }, { "link_nsid", check_link_nsid}, { "kcmp_epoll", check_kcmp_epoll}, + { "timens", check_time_namespace}, { "external_net_ns", check_external_net_ns}, { "clone3_set_tid", check_clone3_set_tid}, { NULL, NULL }, diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 74be1a5ca..ce6e667d7 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,6 +76,7 @@ #include "fdstore.h" #include "string.h" #include "memfd.h" +#include "timens.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -1406,7 +1407,7 @@ static inline int fork_with_pid(struct pstree_item *item) if (kdat.has_clone3_set_tid) { ret = clone3_with_pid_noasan(restore_task_with_children, &ca, (ca.clone_flags & - ~(CLONE_NEWNET | CLONE_NEWCGROUP)), + ~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)), SIGCHLD, pid); } else { /* @@ -1424,7 +1425,7 @@ static inline int fork_with_pid(struct pstree_item *item) close_pid_proc(); ret = clone_noasan(restore_task_with_children, (ca.clone_flags & - ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, + ~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)) | SIGCHLD, &ca); } @@ -1745,6 +1746,11 @@ static int restore_task_with_children(void *_arg) } } + if (root_ns_mask & CLONE_NEWTIME) { + if (prepare_timens(current->ids->time_ns_id)) + goto err; + } + /* Wait prepare_userns */ if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0) goto err; diff --git a/criu/image-desc.c b/criu/image-desc.c index ac627a829..617b95355 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -102,6 +102,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(NETNF_CT, "netns-ct-%u"), FD_ENTRY(NETNF_EXP, "netns-exp-%u"), FD_ENTRY(FILES, "files"), + FD_ENTRY(TIMENS, "timens-%u"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index ce6ef1529..6283a576d 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -26,6 +26,7 @@ enum { CR_FD_UTSNS, CR_FD_MNTS, CR_FD_USERNS, + CR_FD_TIMENS, _CR_FD_IPCNS_FROM, CR_FD_IPC_VAR, diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 27c870bb8..ad5f7d324 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -67,6 +67,7 @@ struct kerndat_s { bool has_kcmp_epoll_tfd; bool has_fsopen; bool has_clone3_set_tid; + bool has_timens; }; extern struct kerndat_s kdat; diff --git a/criu/include/magic.h b/criu/include/magic.h index bdaca968d..d078ec422 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -95,6 +95,7 @@ #define AUTOFS_MAGIC 0x49353943 /* Sochi */ #define FILES_MAGIC 0x56303138 /* Toropets */ #define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ +#define TIMENS_MAGIC 0x43114433 /* Beslan */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index a9a970a9b..e570aa0ab 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -34,7 +34,13 @@ #define CLONE_NEWCGROUP 0x02000000 #endif -#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP) +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + +#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | \ + CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | \ + CLONE_NEWCGROUP | CLONE_NEWTIME) /* Nested namespaces are supported only for these types */ #define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET) @@ -146,6 +152,7 @@ extern bool check_ns_proc(struct fd_link *link); extern struct ns_desc pid_ns_desc; extern struct ns_desc user_ns_desc; +extern struct ns_desc time_ns_desc; extern unsigned long root_ns_mask; extern const struct fdtype_ops nsfile_dump_ops; diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h index fd50ff47e..25a57df6c 100644 --- a/criu/include/proc_parse.h +++ b/criu/include/proc_parse.h @@ -102,4 +102,6 @@ extern bool is_vma_range_fmt(char *line); extern void parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf); extern int parse_uptime(uint64_t *upt); +extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff); + #endif /* __CR_PROC_PARSE_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 7e0385ef4..ee4135d65 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -63,6 +63,7 @@ enum { PB_FILE, PB_MEMFD_FILE, PB_MEMFD_INODE, /* 60 */ + PB_TIMENS, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/timens.h b/criu/include/timens.h new file mode 100644 index 000000000..22a4a5220 --- /dev/null +++ b/criu/include/timens.h @@ -0,0 +1,9 @@ +#ifndef __CR_TIME_NS_H__ +#define __CR_TIME_NS_H__ + +extern int dump_time_ns(int ns_id); +extern int prepare_timens(int pid); + +extern struct ns_desc time_ns_desc; + +#endif /* __CR_TIME_NS_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index 2ad72c350..0c6910da9 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -783,6 +783,21 @@ out: return ret; } +static int has_time_namespace(void) +{ + if (access("/proc/self/timens_offsets", F_OK) < 0) { + if (errno == ENOENT) { + pr_debug("Time namespaces are not supported.\n"); + kdat.has_timens = false; + return 0; + } + pr_perror("Unable to access /proc/self/timens_offsets"); + return -1; + } + kdat.has_timens = true; + return 0; +} + int __attribute__((weak)) kdat_x86_has_ptrace_fpu_xsave_bug(void) { return 0; @@ -1091,6 +1106,8 @@ int kerndat_init(void) ret = kerndat_has_fsopen(); if (!ret) ret = kerndat_has_clone3_set_tid(); + if (!ret) + ret = has_time_namespace(); kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/criu/namespaces.c b/criu/namespaces.c index 2db805b2f..e376feaca 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -20,6 +20,7 @@ #include "imgset.h" #include "uts_ns.h" #include "ipc_ns.h" +#include "timens.h" #include "mount.h" #include "pstree.h" #include "namespaces.h" @@ -39,6 +40,7 @@ static struct ns_desc *ns_desc_array[] = { &pid_ns_desc, &user_ns_desc, &mnt_ns_desc, + &time_ns_desc, &cgroup_ns_desc, }; @@ -157,6 +159,9 @@ int join_ns_add(const char *type, char *ns_file, char *extra_opts) } else if (!strncmp(type, "uts", 4)) { jn->nd = &uts_ns_desc; join_ns_flags |= CLONE_NEWUTS; + } else if (!strncmp(type, "time", 5)) { + jn->nd = &time_ns_desc; + join_ns_flags |= CLONE_NEWTIME; } else if (!strncmp(type, "ipc", 4)) { jn->nd = &ipc_ns_desc; join_ns_flags |= CLONE_NEWIPC; @@ -568,6 +573,10 @@ static int open_ns_fd(struct file_desc *d, int *new_fd) item = t; nd = &cgroup_ns_desc; break; + } else if (ids->time_ns_id == nfi->nfe->ns_id) { + item = t; + nd = &time_ns_desc; + break; } } @@ -671,6 +680,13 @@ int dump_task_ns_ids(struct pstree_item *item) return -1; } + ids->has_time_ns_id = true; + ids->time_ns_id = get_ns_id(pid, &time_ns_desc, NULL); + if (!ids->time_ns_id) { + pr_err("Can't make timens id\n"); + return -1; + } + ids->has_mnt_ns_id = true; ids->mnt_ns_id = get_ns_id(pid, &mnt_ns_desc, NULL); if (!ids->mnt_ns_id) { @@ -914,6 +930,9 @@ static int check_user_ns(int pid) if ((root_ns_mask & CLONE_NEWUTS) && switch_ns(pid, &uts_ns_desc, NULL)) exit(1); + if ((root_ns_mask & CLONE_NEWTIME) && + switch_ns(pid, &time_ns_desc, NULL)) + exit(1); if ((root_ns_mask & CLONE_NEWIPC) && switch_ns(pid, &ipc_ns_desc, NULL)) exit(1); @@ -1002,6 +1021,11 @@ static int do_dump_namespaces(struct ns_id *ns) ns->id, ns->ns_pid); ret = dump_uts_ns(ns->id); break; + case CLONE_NEWTIME: + pr_info("Dump TIME namespace %d via %d\n", + ns->id, ns->ns_pid); + ret = dump_time_ns(ns->id); + break; case CLONE_NEWIPC: pr_info("Dump IPC namespace %d via %d\n", ns->id, ns->ns_pid); diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 60aba8788..c73fa9776 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1472,6 +1472,44 @@ static bool should_skip_mount(const char *mountpoint) return false; } +int parse_timens_offsets(struct timespec *boff, struct timespec *moff) +{ + int exit_code = -1; + FILE *f; + + f = fopen_proc(PROC_SELF, "timens_offsets"); + if (!f) { + pr_perror("Unable to open /proc/self/timens_offsets"); + goto out; + } + while (fgets(buf, BUF_SIZE, f)) { + int64_t sec, nsec; + int clockid; + + if (sscanf(buf, "%d %"PRId64" %"PRId64"\n", &clockid, &sec, &nsec) != 3) { + pr_err("Unable to parse: %s\n", buf); + goto out; + } + switch (clockid) { + case CLOCK_MONOTONIC: + moff->tv_sec = sec; + moff->tv_nsec = nsec; + break; + case CLOCK_BOOTTIME: + boff->tv_sec = sec; + boff->tv_nsec = nsec; + break; + default: + pr_err("Unknown clockid: %d\n", clockid); + goto out; + } + } + exit_code = 0; +out: + fclose(f); + return exit_code; +} + struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump) { struct mount_info *list = NULL; diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 41c208037..2ee81e5db 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -37,6 +37,7 @@ #include "images/creds.pb-c.h" #include "images/timer.pb-c.h" #include "images/utsns.pb-c.h" +#include "images/timens.pb-c.h" #include "images/ipc-var.pb-c.h" #include "images/ipc-shm.pb-c.h" #include "images/ipc-msg.pb-c.h" diff --git a/criu/pstree.c b/criu/pstree.c index 19cf5ad38..d0e81bfad 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -814,6 +814,8 @@ static unsigned long get_clone_mask(TaskKobjIdsEntry *i, mask |= CLONE_NEWIPC; if (i->uts_ns_id != p->uts_ns_id) mask |= CLONE_NEWUTS; + if (i->time_ns_id != p->time_ns_id) + mask |= CLONE_NEWTIME; if (i->mnt_ns_id != p->mnt_ns_id) mask |= CLONE_NEWNS; if (i->user_ns_id != p->user_ns_id) diff --git a/criu/timens.c b/criu/timens.c new file mode 100644 index 000000000..79ba6a2ce --- /dev/null +++ b/criu/timens.c @@ -0,0 +1,130 @@ +#include +#include + +#include "types.h" +#include "proc_parse.h" +#include "namespaces.h" +#include "timens.h" + +#include "protobuf.h" +#include "images/timens.pb-c.h" + +int dump_time_ns(int ns_id) +{ + struct cr_img *img; + TimensEntry te = TIMENS_ENTRY__INIT; + Timespec b = TIMESPEC__INIT, m = TIMESPEC__INIT; + struct timespec ts; + int ret; + + img = open_image(CR_FD_TIMENS, O_DUMP, ns_id); + if (!img) + return -1; + + clock_gettime(CLOCK_MONOTONIC, &ts); + te.monotonic = &m; + te.monotonic->tv_sec = ts.tv_sec; + te.monotonic->tv_nsec = ts.tv_nsec; + clock_gettime(CLOCK_BOOTTIME, &ts); + te.boottime = &b; + te.boottime->tv_sec = ts.tv_sec; + te.boottime->tv_nsec = ts.tv_nsec; + + ret = pb_write_one(img, &te, PB_TIMENS); + close_image(img); + + return ret < 0 ? -1 : 0; +} + +static void normalize_timespec(struct timespec *ts) +{ + while (ts->tv_nsec >= NSEC_PER_SEC) { + ts->tv_nsec -= NSEC_PER_SEC; + ++ts->tv_sec; + } + while (ts->tv_nsec < 0) { + ts->tv_nsec += NSEC_PER_SEC; + --ts->tv_sec; + } +} + + +int prepare_timens(int id) +{ + int exit_code = -1; + int ret, fd = -1; + struct cr_img *img; + TimensEntry *te; + struct timespec ts; + struct timespec prev_moff = {}, prev_boff = {}; + + img = open_image(CR_FD_TIMENS, O_RSTR, id); + if (!img) + return -1; + + ret = pb_read_one(img, &te, PB_TIMENS); + close_image(img); + if (ret < 0) + goto err; + + if (unshare(CLONE_NEWTIME)) { + pr_perror("Unable to create a new time namespace"); + return -1; + } + + if (parse_timens_offsets(&prev_boff, &prev_moff)) + goto err; + + fd = open_proc_rw(PROC_SELF, "timens_offsets"); + if (fd < 0) + goto err; + + clock_gettime(CLOCK_MONOTONIC, &ts); + ts.tv_sec = ts.tv_sec - prev_moff.tv_sec; + ts.tv_nsec = ts.tv_nsec - prev_moff.tv_nsec; + + ts.tv_sec = te->monotonic->tv_sec - ts.tv_sec; + ts.tv_nsec = te->monotonic->tv_nsec - ts.tv_nsec; + normalize_timespec(&ts); + + pr_debug("timens: %d %ld %ld\n", + CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec); + if (dprintf(fd, "%d %ld %ld\n", + CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec) < 0) { + pr_perror("Unable to set a monotonic clock offset"); + goto err; + } + + clock_gettime(CLOCK_BOOTTIME, &ts); + + ts.tv_sec = ts.tv_sec - prev_boff.tv_sec; + ts.tv_nsec = ts.tv_nsec - prev_boff.tv_nsec; + + ts.tv_sec = te->boottime->tv_sec - ts.tv_sec; + ts.tv_nsec = te->boottime->tv_nsec - ts.tv_nsec; + normalize_timespec(&ts); + + pr_debug("timens: %d %ld %ld\n", + CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec); + if (dprintf(fd, "%d %ld %ld\n", + CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec) < 0) { + pr_perror("Unable to set a boottime clock offset"); + goto err; + } + + timens_entry__free_unpacked(te, NULL); + close_safe(&fd); + + fd = open_proc(PROC_SELF, "ns/time_for_children"); + if (fd < 0) { + pr_perror("Unable to open ns/time_for_children"); + goto err; + } + if (switch_ns_by_fd(fd, &time_ns_desc, NULL)) + goto err; + exit_code = 0; +err: + close_safe(&fd); + return exit_code; +} +struct ns_desc time_ns_desc = NS_DESC_ENTRY(CLONE_NEWTIME, "time"); diff --git a/criu/util.c b/criu/util.c index 1646ce1c4..6f6a6dde7 100644 --- a/criu/util.c +++ b/criu/util.c @@ -967,6 +967,8 @@ const char *ns_to_string(unsigned int ns) return "user"; case CLONE_NEWUTS: return "uts"; + case CLONE_NEWTIME: + return "time"; default: return NULL; } diff --git a/images/Makefile b/images/Makefile index e7f0580cf..5ddd37664 100644 --- a/images/Makefile +++ b/images/Makefile @@ -64,6 +64,7 @@ proto-obj-y += autofs.o proto-obj-y += macvlan.o proto-obj-y += sit.o proto-obj-y += memfd.o +proto-obj-y += timens.o CFLAGS += -iquote $(obj)/ diff --git a/images/core.proto b/images/core.proto index e90522914..22c2a9f1f 100644 --- a/images/core.proto +++ b/images/core.proto @@ -70,6 +70,7 @@ message task_kobj_ids_entry { optional uint32 mnt_ns_id = 9; optional uint32 user_ns_id = 10; optional uint32 cgroup_ns_id = 11; + optional uint32 time_ns_id = 12; } message thread_sas_entry { diff --git a/images/timens.proto b/images/timens.proto new file mode 100644 index 000000000..a8272609b --- /dev/null +++ b/images/timens.proto @@ -0,0 +1,10 @@ +syntax = "proto2"; + +message timespec { + required uint64 tv_sec = 1; + required uint64 tv_nsec = 2; +} +message timens_entry { + required timespec monotonic = 1; + required timespec boottime = 2; +} diff --git a/lib/py/images/images.py b/lib/py/images/images.py index dca080657..ca6f207bb 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -466,6 +466,7 @@ handlers = { 'IDS': entry_handler(pb.task_kobj_ids_entry), 'CREDS': entry_handler(pb.creds_entry), 'UTSNS': entry_handler(pb.utsns_entry), + 'TIMENS': entry_handler(pb.timens_entry), 'IPC_VAR': entry_handler(pb.ipc_var_entry), 'FS': entry_handler(pb.fs_entry), 'GHOST_FILE': ghost_file_handler(), From ddba4af608d546a968e9558758718bead9c638c5 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 28 Mar 2020 22:14:40 +0300 Subject: [PATCH 0379/2030] namespace: fail if ns/time_for_children isn't equal to ns/time This case isn't supported right now. Signed-off-by: Andrei Vagin --- criu/include/timens.h | 1 + criu/namespaces.c | 20 ++++++++++++++++---- criu/timens.c | 2 ++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/criu/include/timens.h b/criu/include/timens.h index 22a4a5220..0567c5828 100644 --- a/criu/include/timens.h +++ b/criu/include/timens.h @@ -5,5 +5,6 @@ extern int dump_time_ns(int ns_id); extern int prepare_timens(int pid); extern struct ns_desc time_ns_desc; +extern struct ns_desc time_for_children_ns_desc; #endif /* __CR_TIME_NS_H__ */ diff --git a/criu/namespaces.c b/criu/namespaces.c index e376feaca..89d97c7bc 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -341,7 +341,7 @@ struct ns_id *lookup_ns_by_kid(unsigned int kid, struct ns_desc *nd) struct ns_id *nsid; for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) - if (nsid->kid == kid && nsid->nd == nd) + if (nsid->kid == kid && nsid->nd->cflag == nd->cflag) return nsid; return NULL; @@ -447,7 +447,7 @@ static unsigned int __get_ns_id(int pid, struct ns_desc *nd, protobuf_c_boolean { int proc_dir; unsigned int kid; - char ns_path[10]; + char ns_path[32]; struct stat st; proc_dir = open_pid_proc(pid); @@ -680,12 +680,24 @@ int dump_task_ns_ids(struct pstree_item *item) return -1; } - ids->has_time_ns_id = true; - ids->time_ns_id = get_ns_id(pid, &time_ns_desc, NULL); + ids->time_ns_id = get_ns_id(pid, &time_ns_desc, &ids->has_time_ns_id); if (!ids->time_ns_id) { pr_err("Can't make timens id\n"); return -1; } + if (ids->has_time_ns_id) { + unsigned int id; + protobuf_c_boolean supported; + id = get_ns_id(pid, &time_for_children_ns_desc, &supported); + if (!supported || !id) { + pr_err("Can't make timens id\n"); + return -1; + } + if (id != ids->time_ns_id) { + pr_err("Can't dump nested time namespace for %d\n", pid); + return -1; + } + } ids->has_mnt_ns_id = true; ids->mnt_ns_id = get_ns_id(pid, &mnt_ns_desc, NULL); diff --git a/criu/timens.c b/criu/timens.c index 79ba6a2ce..764f8c9e0 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -128,3 +128,5 @@ err: return exit_code; } struct ns_desc time_ns_desc = NS_DESC_ENTRY(CLONE_NEWTIME, "time"); +struct ns_desc time_for_children_ns_desc = + NS_DESC_ENTRY(CLONE_NEWTIME, "time_for_children"); From 3fd0fa4bdc7d325bb244ef01873255e4ebcbb403 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 14 Aug 2019 07:44:02 +0300 Subject: [PATCH 0380/2030] zdtm: add support for time namespaces For ns and uns flavors, tests run in separate time namespaces. Signed-off-by: Andrei Vagin --- test/zdtm/lib/ns.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/test/zdtm/lib/ns.c b/test/zdtm/lib/ns.c index 3099f7495..0054a3040 100644 --- a/test/zdtm/lib/ns.c +++ b/test/zdtm/lib/ns.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include "zdtmtst.h" #include "ns.h" @@ -207,6 +209,39 @@ write_out: write(STDERR_FILENO, buf, MIN(len, sizeof(buf))); } +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 /* New time namespace */ +#endif + +static inline int _settime(clockid_t clk_id, time_t offset) +{ + int fd, len; + char buf[4096]; + + if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW) + clk_id = CLOCK_MONOTONIC; + + len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset); + + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) { + fprintf(stderr, "open(/proc/self/timens_offsets): %m"); + return -1; + } + + if (write(fd, buf, len) != len) { + fprintf(stderr, "write(/proc/self/timens_offsets): %m"); + return -1; + } + + if (close(fd)) { + fprintf(stderr, "close(/proc/self/timens_offsets): %m"); + return -1; + } + + return 0; +} + #define STATUS_FD 255 static int ns_exec(void *_arg) { @@ -218,6 +253,7 @@ static int ns_exec(void *_arg) setsid(); + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); ret = dup2(args->status_pipe[1], STATUS_FD); if (ret < 0) { fprintf(stderr, "dup2() failed: %m\n"); @@ -236,6 +272,35 @@ static int ns_exec(void *_arg) return -1; } +static int create_timens(void) +{ + int fd; + + if (unshare(CLONE_NEWTIME)) { + if (errno == EINVAL) { + fprintf(stderr, "timens isn't supported\n"); + return 0; + } else { + fprintf(stderr, "unshare(CLONE_NEWTIME) failed: %m"); + exit(1); + } + } + + if (_settime(CLOCK_MONOTONIC, 10 * 24 * 60 * 60)) + exit(1); + if (_settime(CLOCK_BOOTTIME, 20 * 24 * 60 * 60)) + exit(1); + + fd = open("/proc/self/ns/time_for_children", O_RDONLY); + if (fd < 0) + exit(1); + if (setns(fd, 0)) + exit(1); + close(fd); + + return 0; +} + int ns_init(int argc, char **argv) { struct sigaction sa = { @@ -253,6 +318,9 @@ int ns_init(int argc, char **argv) exit(1); } + if (create_timens()) + exit(1); + if (init_notify()) { fprintf(stderr, "Can't init pre-dump notification: %m"); exit(1); From f1655fd5402f7827415cddbd796e5e44ed33cbc9 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 14 Aug 2019 07:45:34 +0300 Subject: [PATCH 0381/2030] zdtm: add a new test to check c/r of time namespaces This test checks that monotonic and boottime don't jump after C/R. In ns and uns flavors, the test is started in a separate time namespace with big offsets, so if criu will restore a time namespace incorrectly the test will detect the big delta of clocks values before and after C/R. Signed-off-by: Andrei Vagin --- test/zdtm/static/Makefile | 1 + test/zdtm/static/time.c | 47 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/zdtm/static/time.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index a8e4107d3..1b7542574 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -226,6 +226,7 @@ TST_NOFILE := \ memfd03 \ shmemfd \ shmemfd-priv \ + time \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/time.c b/test/zdtm/static/time.c new file mode 100644 index 000000000..d37e2a8f8 --- /dev/null +++ b/test/zdtm/static/time.c @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check monotonic and boot clocks"; +const char *test_author = "Andrei Vagin b + 60 * 60 * NSEC_PER_SEC) { + fail("%d: %lld %lld", clocks[i], a, b); + return 1; + } + } + + pass(); + + return 0; +} From 0d8c0562f9d8e67bae04f1b1aea08e485edf7340 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 15 Aug 2019 06:51:15 +0300 Subject: [PATCH 0382/2030] zdtm_ct: run each test in a new time namespace Signed-off-by: Andrei Vagin --- test/zdtm_ct.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/test/zdtm_ct.c b/test/zdtm_ct.c index bc88dadde..5495d61eb 100644 --- a/test/zdtm_ct.c +++ b/test/zdtm_ct.c @@ -5,6 +5,69 @@ #include #include #include +#include +#include +#include +#include + +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 /* New time namespace */ +#endif + +static inline int _settime(clockid_t clk_id, time_t offset) +{ + int fd, len; + char buf[4096]; + + if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW) + clk_id = CLOCK_MONOTONIC; + + len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset); + + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) { + fprintf(stderr, "/proc/self/timens_offsets: %m"); + return -1; + } + + if (write(fd, buf, len) != len) { + fprintf(stderr, "/proc/self/timens_offsets: %m"); + return -1; + } + + close(fd); + + return 0; +} + +static int create_timens() +{ + int fd; + + if (unshare(CLONE_NEWTIME)) { + if (errno == EINVAL) { + fprintf(stderr, "timens isn't supported\n"); + return 0; + } else { + fprintf(stderr, "unshare(CLONE_NEWTIME) failed: %m"); + exit(1); + } + } + + if (_settime(CLOCK_MONOTONIC, 110 * 24 * 60 * 60)) + exit(1); + if (_settime(CLOCK_BOOTTIME, 40 * 24 * 60 * 60)) + exit(1); + + fd = open("/proc/self/ns/time_for_children", O_RDONLY); + if (fd < 0) + exit(1); + if (setns(fd, 0)) + exit(1); + close(fd); + + return 0; +} int main(int argc, char **argv) { @@ -20,6 +83,8 @@ int main(int argc, char **argv) return 1; pid = fork(); if (pid == 0) { + if (create_timens()) + exit(1); if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL)) { fprintf(stderr, "mount(/, S_REC | MS_SLAVE)): %m"); return 1; From 73438d34bb5bfba5f0ac063c699f66454c722c51 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 26 Mar 2020 10:55:13 +0300 Subject: [PATCH 0383/2030] test: check that C/R of nested time namespaces fails Signed-off-by: Andrei Vagin --- test/zdtm/static/Makefile | 2 + test/zdtm/static/timens_for_kids.c | 36 ++++++++++++++ test/zdtm/static/timens_for_kids.desc | 1 + test/zdtm/static/timens_nested.c | 67 +++++++++++++++++++++++++++ test/zdtm/static/timens_nested.desc | 1 + 5 files changed, 107 insertions(+) create mode 100644 test/zdtm/static/timens_for_kids.c create mode 100644 test/zdtm/static/timens_for_kids.desc create mode 100644 test/zdtm/static/timens_nested.c create mode 100644 test/zdtm/static/timens_nested.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 1b7542574..7d72673c3 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -227,6 +227,8 @@ TST_NOFILE := \ shmemfd \ shmemfd-priv \ time \ + timens_nested \ + timens_for_kids \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/timens_for_kids.c b/test/zdtm/static/timens_for_kids.c new file mode 100644 index 000000000..72543486b --- /dev/null +++ b/test/zdtm/static/timens_for_kids.c @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check nested time namespaces"; +const char *test_author = "Andrei Vagin +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check nested time namespaces"; +const char *test_author = "Andrei Vagin Date: Mon, 23 Mar 2020 03:26:00 +0300 Subject: [PATCH 0384/2030] timens: restore processes in a new timens to restore clocks After restoring processes, we have to be sure that monotonic and boottime clocks will not go backward. For this, we can restore processes in a new time namespace and set proper offsets for the clocks. In this patch, criu dumps clocks values event when processes are running in this host time namespace and on restore, criu creates a new time namespace, sets dumped clock values and restores processes. Signed-off-by: Andrei Vagin --- criu/cr-dump.c | 7 +++++++ criu/cr-restore.c | 3 +++ criu/timens.c | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 6aa114c2d..a38e47d12 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -81,6 +81,7 @@ #include "dump.h" #include "eventpoll.h" #include "memfd.h" +#include "timens.h" /* * Architectures can overwrite this function to restore register sets that @@ -1921,6 +1922,12 @@ int cr_dump_tasks(pid_t pid) goto err; } + if ((root_ns_mask & CLONE_NEWTIME) == 0) { + ret = dump_time_ns(0); + if (ret) + goto err; + } + ret = dump_cgroups(); if (ret) goto err; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ce6e667d7..ed4b95b91 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1749,6 +1749,9 @@ static int restore_task_with_children(void *_arg) if (root_ns_mask & CLONE_NEWTIME) { if (prepare_timens(current->ids->time_ns_id)) goto err; + } else if (kdat.has_timens) { + if (prepare_timens(0)) + goto err; } /* Wait prepare_userns */ diff --git a/criu/timens.c b/criu/timens.c index 764f8c9e0..f3b50fdff 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -62,6 +62,11 @@ int prepare_timens(int id) if (!img) return -1; + if (id == 0 && empty_image(img)) { + pr_warn("Clocks values have not been dumped\n"); + return 0; + } + ret = pb_read_one(img, &te, PB_TIMENS); close_image(img); if (ret < 0) From 698f3a4dbd8754a5c8bf1bf8f682d176f3bd24fb Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 26 Mar 2020 20:03:34 +0300 Subject: [PATCH 0385/2030] zdtm: limit the line length for ps by 160 symbols By default, this limit is 80 symbols and this isn't enough: 4730 pts/0 S+ 0:00 \_ ./zdtm_ct zdtm.py 7535 4731 pts/0 S+ 0:00 | \_ python zdtm.py 7536 4839 pts/0 S+ 0:00 | \_ python zdtm.p 7537 4861 pts/0 S+ 0:00 | \_ make --no 7538 4882 pts/0 S+ 0:00 | \_ ./mnt 7539 4883 ? Ss 0:00 | \_ . Signed-off-by: Andrei Vagin --- test/zdtm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index 0bd7b84cc..ac8d7bee0 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1885,7 +1885,7 @@ class Launcher: pid, status = os.waitpid(0, flags) except OSError as e: if e.errno == errno.EINTR: - subprocess.Popen(["ps", "axf"]).wait() + subprocess.Popen(["ps", "axf", "--width", "160"]).wait() continue signal.alarm(0) raise e From 067a20c815c5a632eee63469bcc9d99af73a9c79 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 28 Mar 2020 22:13:17 +0300 Subject: [PATCH 0386/2030] zdtm: fail if test with the crfail tag passes Signed-off-by: Andrei Vagin --- test/zdtm.py | 2 ++ test/zdtm/static/unhashed_proc.desc | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/zdtm.py b/test/zdtm.py index ac8d7bee0..5e42c769e 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1735,6 +1735,8 @@ def do_run_test(tname, tdesc, flavs, opts): t.stop() cr_api.fini() try_run_hook(t, ["--clean"]) + if t.blocking(): + raise test_fail_exc("unexpected success") except test_fail_exc as e: print_sep("Test %s FAIL at %s" % (tname, e.step), '#') t.print_output() diff --git a/test/zdtm/static/unhashed_proc.desc b/test/zdtm/static/unhashed_proc.desc index 847e3b27c..de1915b23 100644 --- a/test/zdtm/static/unhashed_proc.desc +++ b/test/zdtm/static/unhashed_proc.desc @@ -1 +1 @@ -{'flags': 'crfail', 'opts' : '--link-remap'} +{'opts' : '--link-remap'} From bb0b4219efbda66995887b09fb3d6d81dc314031 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 31 Mar 2020 14:16:30 +0000 Subject: [PATCH 0387/2030] img: fix image_name() when image is empty When an image is opened but errored with a ENOENT error, the image is still valid. Later on, do_pb_read_one() can fail and will invoke image_name(). The image fd is EMPTY_IMG_FD (-404). read_fd_link fails. Signed-off-by: Nicolas Viennot --- criu/protobuf.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/criu/protobuf.c b/criu/protobuf.c index e68d42b5c..4accc5ee0 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -25,8 +25,13 @@ static char *image_name(struct cr_img *img) int fd = img->_x.fd; static char image_path[PATH_MAX]; - if (read_fd_link(fd, image_path, sizeof(image_path)) > 0) + if (lazy_image(img)) + return img->path; + else if (empty_image(img)) + return "(empty-image)"; + else if (fd >= 0 && read_fd_link(fd, image_path, sizeof(image_path)) > 0) return image_path; + return NULL; } From 4d34f84bb6957f00d3440a428ecd80dd869212be Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 10 Apr 2020 21:10:27 +0000 Subject: [PATCH 0388/2030] img: rellocate a PATH_MAX buffer from the bss section to the stack Reducing our memory footprint by 4K. Improved-by: Andrei Vagin Signed-off-by: Nicolas Viennot --- criu/protobuf.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/criu/protobuf.c b/criu/protobuf.c index 4accc5ee0..206223ca5 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -20,16 +20,16 @@ #include "protobuf.h" #include "util.h" -static char *image_name(struct cr_img *img) +#define image_name(img, buf) __image_name(img, buf, sizeof(buf)) +static char *__image_name(struct cr_img *img, char *image_path, size_t image_path_size) { int fd = img->_x.fd; - static char image_path[PATH_MAX]; if (lazy_image(img)) return img->path; else if (empty_image(img)) return "(empty-image)"; - else if (fd >= 0 && read_fd_link(fd, image_path, sizeof(image_path)) > 0) + else if (fd >= 0 && read_fd_link(fd, image_path, image_path_size) > 0) return image_path; return NULL; @@ -48,6 +48,7 @@ static char *image_name(struct cr_img *img) int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) { + char img_name_buf[PATH_MAX]; u8 local[PB_PKOBJ_LOCAL_SIZE]; void *buf = (void *)&local; u32 size; @@ -55,7 +56,7 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) if (!cr_pb_descs[type].pb_desc) { pr_err("Wrong object requested %d on %s\n", - type, image_name(img)); + type, image_name(img, img_name_buf)); return -1; } @@ -70,13 +71,13 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) return 0; } else { pr_err("Unexpected EOF on %s\n", - image_name(img)); + image_name(img, img_name_buf)); return -1; } } else if (ret < sizeof(size)) { pr_perror("Read %d bytes while %d expected on %s", ret, (int)sizeof(size), - image_name(img)); + image_name(img, img_name_buf)); return -1; } @@ -90,11 +91,11 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) ret = bread(&img->_x, buf, size); if (ret < 0) { pr_perror("Can't read %d bytes from file %s", - size, image_name(img)); + size, image_name(img, img_name_buf)); goto err; } else if (ret != size) { pr_perror("Read %d bytes while %d expected from %s", - ret, size, image_name(img)); + ret, size, image_name(img, img_name_buf)); ret = -1; goto err; } @@ -103,7 +104,7 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) if (!*pobj) { ret = -1; pr_err("Failed unpacking object %p from %s\n", - pobj, image_name(img)); + pobj, image_name(img, img_name_buf)); goto err; } From 6b9faabf39e14bbc23fe3174f308a5f9f870113f Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Sat, 28 Mar 2020 00:18:48 +0000 Subject: [PATCH 0389/2030] mem: avoid re-opening CR_FD_PAGES when not needed This commit introduces an optimization when rsti(t)->vma_io is empty. This optimization allows streaming a non-seekable image as CR_FD_PAGES is not reopened. Signed-off-by: Nicolas Viennot --- criu/mem.c | 14 ++++++++++++++ criu/pie/restorer.c | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/criu/mem.c b/criu/mem.c index 55022d94a..15aa0cbdb 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -1403,6 +1403,20 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) { struct cr_img *pages; + /* + * We optimize the case when rsti(t)->vma_io is empty. + * + * This is useful for for remote images, where all VMAs are premapped + * (pr->pieok is false). This avoids re-opening the CR_FD_PAGES file, + * which could be no longer be available. + */ + if (list_empty(&rsti(t)->vma_io)) { + ta->vma_ios = NULL; + ta->vma_ios_n = 0; + ta->vma_ios_fd = -1; + return 0; + } + /* * If auto-dedup is on we need RDWR mode to be able to punch holes in * the input files (in restorer.c) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index afe185f04..b3d7e2b5c 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1602,7 +1602,8 @@ long __export_restore_task(struct task_restore_args *args) rio = ((void *)rio) + RIO_SIZE(rio->nr_iovs); } - sys_close(args->vma_ios_fd); + if (args->vma_ios_fd != -1) + sys_close(args->vma_ios_fd); /* * Proxify vDSO. From d1fa1734ee53404f8a06d82a5732dc1daff8d756 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 4 Mar 2020 18:26:37 +0300 Subject: [PATCH 0390/2030] autofs: fix integer overflow in mount options parsing In real life cases pipe_ino param could be larger that INT_MAX, but in autofs_parse() function we using atoi function, that uses 4 byte integers. It's a bug. Example of mount info from real case: (00.508286) type autofs source /etc/auto.misc mnt_id 2824 s_dev 0x4b9 / @ ./misc flags 0x300000 options fd=5,pipe_ino=3480845226,pgrp=95929,timeout=300, minproto=5,maxproto=5,indirect 3480845226 > 2147483647 (32-bit wide signed int max value) => we have a problem It causes a error: (03.195915) Error (criu/pipes.c:529): The packetized mode for pipes is not supported yet Signed-off-by: Alexander Mikhalitsyn (Virtuozzo) --- criu/autofs.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/criu/autofs.c b/criu/autofs.c index a2dc60ffc..175b8900e 100644 --- a/criu/autofs.c +++ b/criu/autofs.c @@ -62,25 +62,53 @@ int autofs_parse(struct mount_info *pm) { long pipe_ino = AUTOFS_OPT_UNKNOWN; char **opts; - int nr_opts, i; + int nr_opts, i, ret; split(pm->options, ',', &opts, &nr_opts); if (!opts) return -1; + for (i = 0; i < nr_opts; i++) { if (!strncmp(opts[i], "pipe_ino=", strlen("pipe_ino="))) - pipe_ino = atoi(opts[i] + strlen("pipe_ino=")); + if (xatol(opts[i] + strlen("pipe_ino="), &pipe_ino)) { + pr_err("pipe_ino (%s) mount option parse failed\n", opts[i] + strlen("pipe_ino=")); + ret = -1; + goto free; + } + } + + /* + * We must inform user about bug if pipe_ino is greater than UINT32_MAX, + * because it means that something changed in Linux Kernel virtual fs + * inode numbers generation mechanism. What we have at the moment: + * 1. struct inode i_ino field (include/linux/fs.h in Linux kernel) + * has unsigned long type. + * 2. get_next_ino() function (fs/inode.c), that used for generating inode + * numbers on virtual filesystems (pipefs, debugfs for instance) + * has unsigned int as return type. + * So, it means that ATM it is safe to keep uint32 type for pipe_id field + * in pipe-data.proto. + */ + if (pipe_ino > UINT32_MAX) { + pr_err("overflow: pipe_ino > UINT32_MAX\n"); + ret = -1; + goto free; } - for (i = 0; i < nr_opts; i++) - xfree(opts[i]); - free(opts); if (pipe_ino == AUTOFS_OPT_UNKNOWN) { pr_warn("Failed to find pipe_ino option (old kernel?)\n"); - return 0; + ret = 0; + goto free; } - return autofs_gather_pipe(pipe_ino); + ret = autofs_gather_pipe(pipe_ino); + +free: + for (i = 0; i < nr_opts; i++) + xfree(opts[i]); + xfree(opts); + + return ret; } static int autofs_check_fd_stat(struct stat *stat, int prgp, int fd, From 62088c721f08aaec8b63de3904304a3a31dcefea Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 14 Apr 2020 10:04:28 +0300 Subject: [PATCH 0391/2030] criu: put statement continuation on the same line as the closing bracket We should follow Linux Kernel Codding Style: ... the closing brace is empty on a line of its own, except in the cases where it is followed by a continuation of the same statement, ie ... an else in an if-statement ... https://www.kernel.org/doc/html/v4.10/process/coding-style.html#placing-braces-and-spaces Automaticly fixing with: :!git grep --files-with-matches "^\s*else[^{]*{" | xargs :argadd :argdo :%s/}\s*\n\s*\(else[^{]*{\)/} \1/g | update Signed-off-by: Pavel Tikhomirov --- compel/arch/ppc64/src/lib/infect.c | 6 ++---- compel/src/main.c | 9 +++------ criu/arch/ppc64/crtools.c | 3 +-- criu/cr-dump.c | 3 +-- test/zdtm/static/fifo_wronly.c | 3 +-- test/zdtm/static/inotify_system.c | 3 +-- test/zdtm/static/ptrace_sig.c | 3 +-- test/zdtm/static/vsx.c | 3 +-- test/zdtm/transition/epoll.c | 3 +-- 9 files changed, 12 insertions(+), 24 deletions(-) diff --git a/compel/arch/ppc64/src/lib/infect.c b/compel/arch/ppc64/src/lib/infect.c index defed3d85..637acd46d 100644 --- a/compel/arch/ppc64/src/lib/infect.c +++ b/compel/arch/ppc64/src/lib/infect.c @@ -222,8 +222,7 @@ static int get_altivec_regs(pid_t pid, user_fpregs_struct_t *fp) return -1; } pr_debug("Altivec not supported\n"); - } - else { + } else { pr_debug("Dumping Altivec registers\n"); fp->flags |= USER_FPREGS_FL_ALTIVEC; } @@ -251,8 +250,7 @@ static int get_vsx_regs(pid_t pid, user_fpregs_struct_t *fp) return -1; } pr_debug("VSX register's dump not supported.\n"); - } - else { + } else { pr_debug("Dumping VSX registers\n"); fp->flags |= USER_FPREGS_FL_VSX; } diff --git a/compel/src/main.c b/compel/src/main.c index 36127c357..9fc3a924c 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -181,8 +181,7 @@ static void print_ldflags(bool compat) if (uninst_root) { printf("%s/arch/%s/scripts/compel-pack%s.lds.S\n", uninst_root, flags.arch, compat_str); - } - else { + } else { printf("%s/compel/scripts/compel-pack%s.lds.S\n", LIBEXECDIR, compat_str); @@ -222,8 +221,7 @@ static int print_libs(bool is_static) return 1; } printf("%s/%s\n", uninst_root, STATIC_LIB); - } - else { + } else { printf("%s/%s\n", LIBDIR, (is_static) ? STATIC_LIB : DYN_LIB); } @@ -255,8 +253,7 @@ static char *gen_prefix(const char *path) for (i = len - 1; i >= 0; i--) { if (!p1 && path[i] == '.') { p2 = path + i - 1; - } - else if (!p1 && path[i] == '/') { + } else if (!p1 && path[i] == '/') { p1 = path + i + 1; break; } diff --git a/criu/arch/ppc64/crtools.c b/criu/arch/ppc64/crtools.c index 0d9f49c3f..631150c3e 100644 --- a/criu/arch/ppc64/crtools.c +++ b/criu/arch/ppc64/crtools.c @@ -374,8 +374,7 @@ static int __copy_task_regs(user_regs_struct_t *regs, fpstate = &(core->ti_ppc64->tmstate->fpstate); vrstate = &(core->ti_ppc64->tmstate->vrstate); vsxstate = &(core->ti_ppc64->tmstate->vsxstate); - } - else { + } else { gpregs = core->ti_ppc64->gpregs; fpstate = &(core->ti_ppc64->fpstate); vrstate = &(core->ti_ppc64->vrstate); diff --git a/criu/cr-dump.c b/criu/cr-dump.c index a38e47d12..745998afc 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1529,8 +1529,7 @@ static int cr_pre_dump_finish(int status) timing_stop(TIME_MEMWRITE); ret = page_xfer_predump_pages(item->pid->real, &xfer, mem_pp); - } - else { + } else { ret = page_xfer_dump_pages(&xfer, mem_pp); } diff --git a/test/zdtm/static/fifo_wronly.c b/test/zdtm/static/fifo_wronly.c index 2fbd69e6b..78fc7c8f7 100644 --- a/test/zdtm/static/fifo_wronly.c +++ b/test/zdtm/static/fifo_wronly.c @@ -55,8 +55,7 @@ int main(int argc, char **argv) pr_perror("read error %s", filename); chret = errno; return chret; - } - else if (res == 0) { + } else if (res == 0) { pr_perror("read(%d, rbuf, 7) return 0", fd1); return 1; } diff --git a/test/zdtm/static/inotify_system.c b/test/zdtm/static/inotify_system.c index 3e6b2ad48..f8af3dca2 100644 --- a/test/zdtm/static/inotify_system.c +++ b/test/zdtm/static/inotify_system.c @@ -280,8 +280,7 @@ int errors(int exp_len, int len, char *etalon_buf, char *buf) { fail("Incorrect length of field name."); error++; break; - } - else if (event->len && strncmp(event->name, exp_event->name, event->len)) { + } else if (event->len && strncmp(event->name, exp_event->name, event->len)) { fail("Handled file name %s, expected %s", event->name, exp_event->name); diff --git a/test/zdtm/static/ptrace_sig.c b/test/zdtm/static/ptrace_sig.c index f71517717..b70f8f1b0 100644 --- a/test/zdtm/static/ptrace_sig.c +++ b/test/zdtm/static/ptrace_sig.c @@ -74,8 +74,7 @@ int main(int argc, char ** argv) if (cpid < 0) { pr_perror("fork failed"); return 1; - } - else if (cpid == 0) { + } else if (cpid == 0) { close(child_pipe[0]); return child(child_pipe[1]); } diff --git a/test/zdtm/static/vsx.c b/test/zdtm/static/vsx.c index be02cfe10..e7d81b12c 100644 --- a/test/zdtm/static/vsx.c +++ b/test/zdtm/static/vsx.c @@ -388,8 +388,7 @@ int main(int argc, char *argv[]) test_msg("Data mismatch\n"); fail(); } - } - else { + } else { test_msg("The CPU is missing some features.\n"); fail(); } diff --git a/test/zdtm/transition/epoll.c b/test/zdtm/transition/epoll.c index 4eac5214c..6ab436889 100644 --- a/test/zdtm/transition/epoll.c +++ b/test/zdtm/transition/epoll.c @@ -181,8 +181,7 @@ int main(int argc, char **argv) fail("waitpid error: %m\n"); counter++; continue; - } - else { + } else { rv = WEXITSTATUS(rv); if (rv < MAX_EXIT_CODE && rv > SUCCESS) { fail("Child failed: %s (%d)\n", From ef7ef9cfa0c0ae4a2777c72b9facf994068b817e Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 14 Apr 2020 18:03:38 +0000 Subject: [PATCH 0392/2030] kerndat: remove duplicate call to kerndat_socket_netns() kerndat_socket_netns() is called twice. We keep the latter to avoid changing the behavior. Signed-off-by: Nicolas Viennot --- criu/kerndat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/kerndat.c b/criu/kerndat.c index 0c6910da9..0421997af 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1068,8 +1068,6 @@ int kerndat_init(void) ret = kerndat_tcp_repair(); if (!ret) ret = kerndat_compat_restore(); - if (!ret) - ret = kerndat_socket_netns(); if (!ret) ret = kerndat_tun_netns(); if (!ret) From 2c2fdd3334078f5eefea4f82f0df0fccfc8a9238 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Mon, 13 Apr 2020 17:20:34 +0000 Subject: [PATCH 0393/2030] parasite-msg: %u is not implemented for parasite code Changed all the %u into %d. Ideally, we should implement the %u format for parasite code. Signed-off-by: Nicolas Viennot --- criu/pie/parasite.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index 64b5bbb3e..d83978317 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -53,7 +53,7 @@ static int mprotect_vmas(struct parasite_dump_pages_args *args) vma = vmas + i; ret = sys_mprotect((void *)vma->start, vma->len, vma->prot | args->add_prot); if (ret) { - pr_err("mprotect(%08lx, %lu) failed with code %d\n", + pr_err("mprotect(%08lx, %ld) failed with code %d\n", vma->start, vma->len, ret); break; } @@ -102,7 +102,7 @@ static int dump_pages(struct parasite_dump_pages_args *args) } if (spliced_bytes != args->nr_pages * PAGE_SIZE) { sys_close(p); - pr_err("Can't splice all pages to pipe (%lu/%d)\n", spliced_bytes, args->nr_pages); + pr_err("Can't splice all pages to pipe (%ld/%d)\n", spliced_bytes, args->nr_pages); return -1; } From 42b5700b72c0bebbef113554c064827a3ab40b18 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 15 Apr 2020 11:34:47 +0300 Subject: [PATCH 0394/2030] kerndat remove duplicate call to kerndat_nsid() Func kerndat_nsid() is called twice. v2: leave kerndat_nsid call near kerndat_link_nsid Signed-off-by: Pavel Tikhomirov --- criu/kerndat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/criu/kerndat.c b/criu/kerndat.c index 0421997af..0b6d53bc7 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1092,8 +1092,6 @@ int kerndat_init(void) ret = kerndat_vdso_preserves_hint(); if (!ret) ret = kerndat_socket_netns(); - if (!ret) - ret = kerndat_nsid(); if (!ret) ret = kerndat_x86_has_ptrace_fpu_xsave_bug(); if (!ret) From 7dc89376b85ddea408d43527ff42f4e86ea77a41 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 1 Apr 2020 10:43:25 +0300 Subject: [PATCH 0395/2030] pstree: improve error handling in read_pstree_image First don't free pstree_item as they are allocated with shmalloc on restore. Second always pstree_entry__free_unpacked PstreeEntry. Third remove all breaks replacing them with implict goto err, so that it would be easier to understand that we are on error path. Forth split out code for reading one pstree item in separate function. Sadly there is no much use in xfree-ing pi->threads because in case of an error we still have ->threads unfreed from previous entries anyway. But at least some cleanup can be done here. Signed-off-by: Pavel Tikhomirov --- criu/pstree.c | 197 ++++++++++++++++++++++++++------------------------ 1 file changed, 103 insertions(+), 94 deletions(-) diff --git a/criu/pstree.c b/criu/pstree.c index d0e81bfad..5de367688 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -498,11 +498,109 @@ static int read_pstree_ids(struct pstree_item *pi) return 0; } +/* + * Returns <0 on error, 0 on eof and >0 on successful read + */ +static int read_one_pstree_item(struct cr_img *img, pid_t *pid_max) +{ + struct pstree_item *pi; + PstreeEntry *e; + int ret, i; + + ret = pb_read_one_eof(img, &e, PB_PSTREE); + if (ret <= 0) + return ret; + + ret = -1; + pi = lookup_create_item(e->pid); + if (pi == NULL) + goto err; + BUG_ON(pi->pid->state != TASK_UNDEF); + + /* + * All pids should be added in the tree to be able to find + * free pid-s for helpers. pstree_item for these pid-s will + * be initialized when we meet PstreeEntry with this pid or + * we will create helpers for them. + */ + if (lookup_create_item(e->pgid) == NULL) + goto err; + if (lookup_create_item(e->sid) == NULL) + goto err; + + pi->pid->ns[0].virt = e->pid; + if (e->pid > *pid_max) + *pid_max = e->pid; + pi->pgid = e->pgid; + if (e->pgid > *pid_max) + *pid_max = e->pgid; + pi->sid = e->sid; + if (e->sid > *pid_max) + *pid_max = e->sid; + pi->pid->state = TASK_ALIVE; + + if (e->ppid == 0) { + if (root_item) { + pr_err("Parent missed on non-root task " + "with pid %d, image corruption!\n", e->pid); + goto err; + } + root_item = pi; + pi->parent = NULL; + } else { + struct pid *pid; + struct pstree_item *parent; + + pid = pstree_pid_by_virt(e->ppid); + if (!pid || pid->state == TASK_UNDEF || pid->state == TASK_THREAD) { + pr_err("Can't find a parent for %d\n", vpid(pi)); + goto err; + } + + parent = pid->item; + pi->parent = parent; + list_add(&pi->sibling, &parent->children); + } + + pi->nr_threads = e->n_threads; + pi->threads = xmalloc(e->n_threads * sizeof(struct pid)); + if (!pi->threads) + goto err; + + for (i = 0; i < e->n_threads; i++) { + struct pid *node; + pi->threads[i].real = -1; + pi->threads[i].ns[0].virt = e->threads[i]; + pi->threads[i].state = TASK_THREAD; + pi->threads[i].item = NULL; + if (i == 0) + continue; /* A thread leader is in a tree already */ + node = lookup_create_pid(pi->threads[i].ns[0].virt, &pi->threads[i]); + + BUG_ON(node == NULL); + if (node != &pi->threads[i]) { + pr_err("Unexpected task %d in a tree %d\n", e->threads[i], i); + goto err; + } + } + + task_entries->nr_threads += e->n_threads; + task_entries->nr_tasks++; + + /* note: we don't fail if we have empty ids */ + if (read_pstree_ids(pi) < 0) + goto err; + + ret = 1; +err: + pstree_entry__free_unpacked(e, NULL); + return ret; +} + static int read_pstree_image(pid_t *pid_max) { - int ret = 0, i; struct cr_img *img; - struct pstree_item *pi; + int ret; pr_info("Reading image tree\n"); @@ -510,99 +608,10 @@ static int read_pstree_image(pid_t *pid_max) if (!img) return -1; - while (1) { - PstreeEntry *e; + do { + ret = read_one_pstree_item(img, pid_max); + } while (ret > 0); - ret = pb_read_one_eof(img, &e, PB_PSTREE); - if (ret <= 0) - break; - - ret = -1; - pi = lookup_create_item(e->pid); - if (pi == NULL) - break; - BUG_ON(pi->pid->state != TASK_UNDEF); - - /* - * All pids should be added in the tree to be able to find - * free pid-s for helpers. pstree_item for these pid-s will - * be initialized when we meet PstreeEntry with this pid or - * we will create helpers for them. - */ - if (lookup_create_item(e->pgid) == NULL) - break; - if (lookup_create_item(e->sid) == NULL) - break; - - pi->pid->ns[0].virt = e->pid; - if (e->pid > *pid_max) - *pid_max = e->pid; - pi->pgid = e->pgid; - if (e->pgid > *pid_max) - *pid_max = e->pgid; - pi->sid = e->sid; - if (e->sid > *pid_max) - *pid_max = e->sid; - pi->pid->state = TASK_ALIVE; - - if (e->ppid == 0) { - if (root_item) { - pr_err("Parent missed on non-root task " - "with pid %d, image corruption!\n", e->pid); - goto err; - } - root_item = pi; - pi->parent = NULL; - } else { - struct pid *pid; - struct pstree_item *parent; - - pid = pstree_pid_by_virt(e->ppid); - if (!pid || pid->state == TASK_UNDEF || pid->state == TASK_THREAD) { - pr_err("Can't find a parent for %d\n", vpid(pi)); - pstree_entry__free_unpacked(e, NULL); - xfree(pi); - goto err; - } - - parent = pid->item; - pi->parent = parent; - list_add(&pi->sibling, &parent->children); - } - - pi->nr_threads = e->n_threads; - pi->threads = xmalloc(e->n_threads * sizeof(struct pid)); - if (!pi->threads) - break; - - for (i = 0; i < e->n_threads; i++) { - struct pid *node; - pi->threads[i].real = -1; - pi->threads[i].ns[0].virt = e->threads[i]; - pi->threads[i].state = TASK_THREAD; - pi->threads[i].item = NULL; - if (i == 0) - continue; /* A thread leader is in a tree already */ - node = lookup_create_pid(pi->threads[i].ns[0].virt, &pi->threads[i]); - - BUG_ON(node == NULL); - if (node != &pi->threads[i]) { - pr_err("Unexpected task %d in a tree %d\n", e->threads[i], i); - return -1; - } - } - - task_entries->nr_threads += e->n_threads; - task_entries->nr_tasks++; - - pstree_entry__free_unpacked(e, NULL); - - ret = read_pstree_ids(pi); - if (ret < 0) - goto err; - } - -err: close_image(img); return ret; } From c83a0aae2c71b66abf613c8ef5fe97b7311e1987 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 11 Apr 2020 06:37:38 +0300 Subject: [PATCH 0396/2030] proc: parse clock symbolic names in /proc/pid/timens_offsets Clock IDs in this file has been replaced by clock symbolic names. Now it looks like this: $ cat /proc/774/timens_offsets monotonic 864000 0 boottime 1728000 0 Signed-off-by: Andrei Vagin --- criu/proc_parse.c | 22 ++++++++++++---------- criu/timens.c | 6 ++---- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index c73fa9776..4a22700aa 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1484,25 +1484,27 @@ int parse_timens_offsets(struct timespec *boff, struct timespec *moff) } while (fgets(buf, BUF_SIZE, f)) { int64_t sec, nsec; - int clockid; + char clockid[10]; - if (sscanf(buf, "%d %"PRId64" %"PRId64"\n", &clockid, &sec, &nsec) != 3) { + if (sscanf(buf, "%9s %"PRId64" %"PRId64"\n", clockid, &sec, &nsec) != 3) { pr_err("Unable to parse: %s\n", buf); goto out; } - switch (clockid) { - case CLOCK_MONOTONIC: + clockid[sizeof(clockid) - 1] = 0; + if (strcmp(clockid, "monotonic") == 0 || + strcmp(clockid, __stringify(CLOCK_MONOTONIC)) == 0) { moff->tv_sec = sec; moff->tv_nsec = nsec; - break; - case CLOCK_BOOTTIME: + continue; + } + if (strcmp(clockid, "boottime") == 0 || + strcmp(clockid, __stringify(CLOCK_BOOTTIME)) == 0) { boff->tv_sec = sec; boff->tv_nsec = nsec; - break; - default: - pr_err("Unknown clockid: %d\n", clockid); - goto out; + continue; } + pr_err("Unknown clockid: %s\n", clockid); + goto out; } exit_code = 0; out: diff --git a/criu/timens.c b/criu/timens.c index f3b50fdff..2a7e95284 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -92,8 +92,7 @@ int prepare_timens(int id) ts.tv_nsec = te->monotonic->tv_nsec - ts.tv_nsec; normalize_timespec(&ts); - pr_debug("timens: %d %ld %ld\n", - CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec); + pr_debug("timens: monotonic %ld %ld\n", ts.tv_sec, ts.tv_nsec); if (dprintf(fd, "%d %ld %ld\n", CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec) < 0) { pr_perror("Unable to set a monotonic clock offset"); @@ -109,8 +108,7 @@ int prepare_timens(int id) ts.tv_nsec = te->boottime->tv_nsec - ts.tv_nsec; normalize_timespec(&ts); - pr_debug("timens: %d %ld %ld\n", - CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec); + pr_debug("timens: boottime %ld %ld\n", ts.tv_sec, ts.tv_nsec); if (dprintf(fd, "%d %ld %ld\n", CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec) < 0) { pr_perror("Unable to set a boottime clock offset"); From 5c5e7695a51318b17e3d982df8231ac83971641c Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 20 Apr 2020 15:45:51 -0700 Subject: [PATCH 0397/2030] get_clean_mount: demote an error to a warning When testing runc checkpointing, I frequently see the following error: > Error (criu/mount.c:1107): mnt: Can't create a temporary directory: Read-only file system This happens because container root is read-only mount. The error here is not actually fatal since it is handled later in ns_open_mountpoint() (at least since [1] is fixed), but it is shown as error in runc integration tests. Since it is not fatal, let's demote it to a warning to avoid confusion. [1] https://github.com/checkpoint-restore/criu/issues/520 Signed-off-by: Kir Kolyshkin --- criu/mount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/mount.c b/criu/mount.c index 180f2a62d..89b8cff59 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1104,7 +1104,7 @@ static char *get_clean_mnt(struct mount_info *mi, char *mnt_path_tmp, char *mnt_ if (mnt_path == NULL && errno == ENOENT) mnt_path = mkdtemp(mnt_path_root); if (mnt_path == NULL) { - pr_perror("Can't create a temporary directory"); + pr_warn("Can't create a temporary directory: %s\n", strerror(errno)); return NULL; } From 95ead14874244f3c12e5970a74d1f4dd2433d652 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 29 Apr 2020 16:31:49 +0300 Subject: [PATCH 0398/2030] =?UTF-8?q?criu:=20Version=20=CF=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The long-tempting release with lots of new features on board. We have finally the time namespace support, great improvment of the pre-dump memory consumption, new clone3 support and many more. Signed-off-by: Pavel Emelyanov --- Makefile.versions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.versions b/Makefile.versions index f3adcb0a6..3ccc48185 100644 --- a/Makefile.versions +++ b/Makefile.versions @@ -1,10 +1,10 @@ # # CRIU version. CRIU_VERSION_MAJOR := 3 -CRIU_VERSION_MINOR := 13 +CRIU_VERSION_MINOR := 14 CRIU_VERSION_SUBLEVEL := CRIU_VERSION_EXTRA := -CRIU_VERSION_NAME := Silicon Willet +CRIU_VERSION_NAME := Platinum Peacock CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA)) export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL From f2edc1e1999e7c495af404fc6c38b82c391854ec Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 6 May 2020 13:13:38 +0000 Subject: [PATCH 0399/2030] Update certificates for failing tls based tests When using zdtm.py with --tls it started to fail as the certificates seem to have expired. Following commands have been used to re-generate the certificate: # Generate CA key and certificate echo -ne "ca\ncert_signing_key" > temp certtool --generate-privkey > cakey.pem certtool --generate-self-signed \ --template temp \ --load-privkey cakey.pem \ --outfile cacert.pem # Generate server key and certificate echo -ne "cn=$HOSTNAME\nencryption_key\nsigning_key" > temp certtool --generate-privkey > key.pem certtool --generate-certificate \ --template temp \ --load-privkey key.pem \ --load-ca-certificate cacert.pem \ --load-ca-privkey cakey.pem \ --outfile cert.pem rm temp cakey.pem Without this tests will fail in Travis. Signed-off-by: Adrian Reber --- test/pki/cacert.pem | 42 +++--- test/pki/cert.pem | 44 +++---- test/pki/key.pem | 310 ++++++++++++++++++++++---------------------- 3 files changed, 198 insertions(+), 198 deletions(-) diff --git a/test/pki/cacert.pem b/test/pki/cacert.pem index 2f8706616..65afd2aa7 100644 --- a/test/pki/cacert.pem +++ b/test/pki/cacert.pem @@ -1,23 +1,23 @@ -----BEGIN CERTIFICATE----- -MIID0TCCAjmgAwIBAgIUWzgmx9p7y7mkrNptGX9+0acjpa4wDQYJKoZIhvcNAQEL -BQAwADAeFw0xOTA1MDYxMjAzMDJaFw0yMDA1MDUxMjAzMDJaMAAwggGiMA0GCSqG -SIb3DQEBAQUAA4IBjwAwggGKAoIBgQD0p0lJUlq917GmJuCBeP2eLNd1/MUg1ojy -s7rrpinPYtLZqqquUhp32lfQtt3uJLjkhTrseZd86zWi3SMZlGs8zGGmKfqg0vaG -BXIgpEIr5C0wU9995kL9A6LS+eFZR6vJQETO5T22tjponoqEPOXeU8VaiC9jNipC -uFJT0wyC0bKIo+TUn573kxsGMt8jMOv0tc/okUlH16UAsYrmN7kWzgkWTJPddB7S -v5a9ibpPkbh+wrIGK5A6V5hTZ8U1wz2bE6/Xp+qjsD2R3jeU6f1tDvc8FZilabQy -Rmbxggucl1G3Ulo6Nvor1lhog72eZlHZujzf/5e/aMiZ7Br6plZ1/WTwtNgoCw6A -rgpLDraasQohiK6opYs2rr7uuiQxPLLVWE/RryXwUEoPXzxaf782XtXxkB0UhGvz -y2JBxCVPn7uUGuyEYywjTjI2UFvsMcXnMiQ4WaAfMbAmrBWM7EQ4b7VpD2c+OZkQ -J/AJeg85/ovTAtHPjhPP+0a9hnirktkCAwEAAaNDMEEwDwYDVR0TAQH/BAUwAwEB -/zAPBgNVHQ8BAf8EBQMDBwQAMB0GA1UdDgQWBBQOg6AA8Qu/m/O/II5spzYsTnsn -pjANBgkqhkiG9w0BAQsFAAOCAYEA1KKtw+ILKOg1AhGwgPsJXAWZoSIt7hdLaJ3P -WGyTWHLKKoJiGlLj3bSsJcMmMO+UwHBH9lmXrOWe/xcOvob2H+7dXbddQ0pX1wzK -KJKzSG35FZ2BfuSn5oEDtRsHnN2Ikc5MYz1a+F4w2tVL/Hcxld+oSAodDlCbGoe+ -0MkI5f1KhdAw00l/5IO7xPOcThjHw+nB5/cZTQ+l4zLWCWaXkor4IAEq/plPcdX1 -uoLSj3JruLz7/ts/EgG+ARAzXQrJ+LM2hdPB1NiaVxFq7MSWM6FybUdmMYgbP5s4 -RMNqI/M+bU9K5LRySDaiPhDXUoVULuqG1a23GQwXLOjF0JbrUQewfAaTO7TaPFh1 -lr25j9Fc9/gcXZjvLl+CEIv6P/haGOwySCTCks0F5bDehbLjZStPmugcnJflXdBn -lzoejlw2rePojQMlffQsaRGmmhj0beU4WQBfGACcZQB8GFNxQB8aynf0CK7Dvvb0 -9c9y4k0gHL7RxeLoQfq+smzKm+Eo +MIID0TCCAjmgAwIBAgIUF0WMpaJUi0+wXbOKQ6P41ZeRwdQwDQYJKoZIhvcNAQEL +BQAwADAeFw0yMDA1MDYxMzEwMzNaFw0yMTA1MDYxMzEwMzNaMAAwggGiMA0GCSqG +SIb3DQEBAQUAA4IBjwAwggGKAoIBgQDMPLu3o8RMlWlblK03GynmvNZDQKW0ZTYg +dE/Dlr/rVKo8KMAm6KbmpUJq8HUtaP/9Epf2eY9+LNNpBFKJuURxD23ObNdbU6o6 +hI4LRQVYsX6FB+6DrIXfD61zxebdpPWlCyoEd43firfwMoeGyarqyrZE+UXkR4fF +LhVm/4kQ8qRZte+GAIVp0SVMlNyhQM5AmTZPAO3iYRhZObkVFUTWnwjdxKOx1VhU +vRdhwI4N5x9EGiq6Lzc0iABxyIZ1mHOYhxDQv5gL4CECUZgzVxJp6DVP/v2w4254 +JeKXOPUm6YHxXxzJyT73mdz0/7VpqOZa5yiZKRBtHoWyovuTzu8jS6TgvjNrmAoO +kyu13jTlBDFUfVKBDoRQMhjt6wpMkUItYvVRq/RYLvXGU7VCzUHoVdBxNNO+MrZ2 +4ebN5l5/CSrOeaa0EylLLlkW7Q0JqmZJpZfK8/AnMyp71AkHTps5LZv/sLbPWwh1 +3XTZA1e+k9lS1Z480HjtNKurL526k8ECAwEAAaNDMEEwDwYDVR0TAQH/BAUwAwEB +/zAPBgNVHQ8BAf8EBQMDBwQAMB0GA1UdDgQWBBR5rZcu6Unhc0ZuuflmHEOa9bBu +1zANBgkqhkiG9w0BAQsFAAOCAYEAehfgQ1y3GTJ3LPQQOgn3AB/Sf+fGSNecZsTO +UxGFxbNGvl7UeFCS8Z1/h57AeIXSvE+BGfMvtq9OSl6t+3w9RfbIdzWzYYILoAVM +t0FkwrjLtVIlUSWD+Aia01ESjw+3ENceGcuo9jVyAI3MMkGftFc7U1UyNB9HY//x +79uUavmWioc3odooC0FOosIzV2KHCUPAnpN7TtNlefe4JisMa6WGXAk9CNU9t0wG +JLHK3E0LrtEreIMBmK6zyxH3OwMi1x/3sFSoon09/fsOLBbtUY+401a+nT+vMTKK +KpVvAC4tsEyFH0vyk68c6Z4VKO5aqoAtaQJZk2A2z1cBAz4aArgx1glkaNjd7sZn +lSEhnpP7uVUyYU9fyQ46zpIZm38gx6+95XL2gCQnEIUEeOCXMonXaavMqRx5zL8z +hihMSoLhE6Wx/tzxKlqz0GN8E0CTlpB1MmvCvOkaHzSq7Yc19Bez8jr6PNfZVsFv +s9a4qqhGKaLz7HKpG0863oCO4dU1 -----END CERTIFICATE----- diff --git a/test/pki/cert.pem b/test/pki/cert.pem index a0946ee41..f5a0452b5 100644 --- a/test/pki/cert.pem +++ b/test/pki/cert.pem @@ -1,24 +1,24 @@ -----BEGIN CERTIFICATE----- -MIIEAzCCAmugAwIBAgIUKV6zLC//OJDnmOYBuIG1Gvmv+V4wDQYJKoZIhvcNAQEL -BQAwADAeFw0xOTA1MDYxMjAzMDJaFw0yMDA1MDUxMjAzMDJaMBQxEjAQBgNVBAMT -CWxvY2FsaG9zdDCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoCggGBANX1nv4J -U8+TEb2bWej5O2nOowpw2zSYTDAQ1oyAvV3P99Y6GZCuVZ1uT/7DWat0uRpcdmNi -HvownkO4VmDZdVqgiK1eHzY5YBJ7hBVDs3tpWNuN7eJPjnskNmJqKQ6l9rxYl/au -781T+tdtHp1ATtToMgVJxWaUx5lrpEJdmYc8Y6GpAA42D+rI3o4Sll3mI5rPCk16 -QY5dT2lnL2HuCKzM2bjWat6b3lMpfNz3A/blU9E/462Zxr/yKK/0yy3SBZhYzrrQ -1/erjIpm4I0sakHIOexM1AQliFiowFzVvr/paiXApWGOcuBJVIbmPI/bEGuTh0nr -3pmiF0YrkDCRhargElYcz64KQ9IxPFCKcKjkMnFPjTStZ7rcMyqKvGczqFaM5a6c -9gIn2ieUrVZ38yvtI5Lo/uxZ5IjXqB1Fdg4xi2tyf9WMHKy2tydBr9bTjfQRXfNT -/Zm3woDXOYsHzj+Sf6ntLVCkO1fnczw03fPRV03/uVRa5mPGyyj9xdPBqwIDAQAB -o2EwXzAMBgNVHRMBAf8EAjAAMA8GA1UdDwEB/wQFAwMHoAAwHQYDVR0OBBYEFEtF -ELehnIjLzoh/W51TGm2B00QAMB8GA1UdIwQYMBaAFA6DoADxC7+b878gjmynNixO -eyemMA0GCSqGSIb3DQEBCwUAA4IBgQA17NZCaiCgD9P4GPWgdVNkWUrmc8itJuIB -z3c9RdJGduxb9W/D7IW//D3hIblOPorNi3+08kO/IRMGah874MDCprMNppk94WGj -Kgqi/rsxq+rT6bcZXxMrcOIg0j2EvTPIgPh7trd8nHVWxNT/hvFClDtBJ2ssL2Tz -76EA7smDCUsfdzFJ2Xvk95fSTL49nfT2j9N/YoLaBQtCIxWAVZHKiCF2K+yXufHz -B/9UlXwsPJfqxM75dYWXFEqvhNf08YRHT1e1GRrybNGrNKF864KbLsnASdK4N5wu -sK9vZJ7VkLDQz+YpZkbm+UgOYK/BY3M8IX+F+WngV+43fr6Wh89TSgD7acEBvQTm -q1y9FipRvz0my7fwBh6UlYDja6/3yw6/YfN7uMFGsOOSgpNDCrMLqesf8l1HdQUF -VaVJyDjgFswV9KykAeJK2KU8QI7TGHv9soW60sr97DgUtCh4a6OPXLt79Ji3RSNw -MbU54JnpnfmMAj/0suDymdrJWv8EJKc= +MIIEAjCCAmqgAwIBAgIURFKv2lJVvIEfr7yjE7pK0BdK9W8wDQYJKoZIhvcNAQEL +BQAwADAeFw0yMDA1MDYxMzExMDVaFw0yMTA1MDYxMzExMDVaMBMxETAPBgNVBAMT +CGZlZG9yYTAxMIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEAtS2SivLa +AfsZ1X9xun/6i/1UNSxG5kPvVkfTPWOwSgCYyqn7u2vZsIsvcurqHQBEg1SWF4XA +dcFeCxdDN8em6ISfSOUFL9TSQrZ5eFQYcVTX9IFXDWXbFSC8AgJpYhrPf2O0vxQN +QFD4MpNXrLpEkc3vLrg/JhySC1HAYfO/nuJV3ZnAxJZCIv4upT5j0VjzfPw6WR/O +UUlcmLnh0+qMLv7G2dRLjdAsV1U8k7eykk+z9qXeKAjgFBEEgLTVbHR1zbrUpw42 +DYR7SQpEJ8wrJ9qHdI4H/9MnPu8O75kfgauQV4s/oIgWmVeHMDxpGaLEKgcYVE2F +/HlMfFvCrrZKKXi1/k35rG8sg6cP/bRuRZlgO0xFTrW0QKmWoBLqyqVMvifdkT0E +BWZ4eZWTDn2Rr4HMe7ov54QmKXaXp2VdOGzO1Efv9KvDildb5aEqBrVz+tU+C/Cd +g5p1d2g2m1Uk9wjcYbXOWx9fd9Ur/fxJuHtauxOW4gzFLcTOgvUgVmSXAgMBAAGj +YTBfMAwGA1UdEwEB/wQCMAAwDwYDVR0PAQH/BAUDAwegADAdBgNVHQ4EFgQUh+RA +Kg+4YVs/x9A0hxJlkDxG7IQwHwYDVR0jBBgwFoAUea2XLulJ4XNGbrn5ZhxDmvWw +btcwDQYJKoZIhvcNAQELBQADggGBAMLv1btjFDwxds0XlPH792ECXgYOXPC8cJAx +MJ+qdop/nNW8iUUoOjwy1f9jXBjz1bHKJ+XnyTz4rhLWIHVnhsiLMQ+ljHgTtTjY +3K6Lzo9+KMG/WDRajID8Sk2/B3jnCjgdzb6C4TZQ1mxnhSD7Cj/VitkVLP3D2nYx +55bXqAHAtSkW965LAxILSzt04a8MM9ZxNzyruGVI+jPF0OSgDNcJpcwUw8w8V1lj +c5TC9qqRlokFGphR45vblTw3GaXuZ5TTLOdix0QOhl52H2BtCrvTWJLREC5VJ87C +2FjXIz5zj1WZ3CDZVbpFhTiZy/chHJNaiiDWe6QhD40VvKlo/netp+rxwrpGsFkC +mtyRKrutdKi0vDvI6sKu13gLxetL+Bd6XWZ+XRsA/687QE+epxoh8sTvIt9j0z5r +0yIv/0eTKVZLQ6cAXSoR1g1GwTsWf0eBRgePdl4MCXVAs8+X1hTqnrJomiUdm93C +d2+QakSAhreCtlqMz/AULryv3KgaEg== -----END CERTIFICATE----- diff --git a/test/pki/key.pem b/test/pki/key.pem index eda1aa761..7acde5181 100644 --- a/test/pki/key.pem +++ b/test/pki/key.pem @@ -3,180 +3,180 @@ Public Key Info: Key Security Level: High (3072 bits) modulus: - 00:d5:f5:9e:fe:09:53:cf:93:11:bd:9b:59:e8:f9:3b - 69:ce:a3:0a:70:db:34:98:4c:30:10:d6:8c:80:bd:5d - cf:f7:d6:3a:19:90:ae:55:9d:6e:4f:fe:c3:59:ab:74 - b9:1a:5c:76:63:62:1e:fa:30:9e:43:b8:56:60:d9:75 - 5a:a0:88:ad:5e:1f:36:39:60:12:7b:84:15:43:b3:7b - 69:58:db:8d:ed:e2:4f:8e:7b:24:36:62:6a:29:0e:a5 - f6:bc:58:97:f6:ae:ef:cd:53:fa:d7:6d:1e:9d:40:4e - d4:e8:32:05:49:c5:66:94:c7:99:6b:a4:42:5d:99:87 - 3c:63:a1:a9:00:0e:36:0f:ea:c8:de:8e:12:96:5d:e6 - 23:9a:cf:0a:4d:7a:41:8e:5d:4f:69:67:2f:61:ee:08 - ac:cc:d9:b8:d6:6a:de:9b:de:53:29:7c:dc:f7:03:f6 - e5:53:d1:3f:e3:ad:99:c6:bf:f2:28:af:f4:cb:2d:d2 - 05:98:58:ce:ba:d0:d7:f7:ab:8c:8a:66:e0:8d:2c:6a - 41:c8:39:ec:4c:d4:04:25:88:58:a8:c0:5c:d5:be:bf - e9:6a:25:c0:a5:61:8e:72:e0:49:54:86:e6:3c:8f:db - 10:6b:93:87:49:eb:de:99:a2:17:46:2b:90:30:91:85 - aa:e0:12:56:1c:cf:ae:0a:43:d2:31:3c:50:8a:70:a8 - e4:32:71:4f:8d:34:ad:67:ba:dc:33:2a:8a:bc:67:33 - a8:56:8c:e5:ae:9c:f6:02:27:da:27:94:ad:56:77:f3 - 2b:ed:23:92:e8:fe:ec:59:e4:88:d7:a8:1d:45:76:0e - 31:8b:6b:72:7f:d5:8c:1c:ac:b6:b7:27:41:af:d6:d3 - 8d:f4:11:5d:f3:53:fd:99:b7:c2:80:d7:39:8b:07:ce - 3f:92:7f:a9:ed:2d:50:a4:3b:57:e7:73:3c:34:dd:f3 - d1:57:4d:ff:b9:54:5a:e6:63:c6:cb:28:fd:c5:d3:c1 - ab: + 00:b5:2d:92:8a:f2:da:01:fb:19:d5:7f:71:ba:7f:fa + 8b:fd:54:35:2c:46:e6:43:ef:56:47:d3:3d:63:b0:4a + 00:98:ca:a9:fb:bb:6b:d9:b0:8b:2f:72:ea:ea:1d:00 + 44:83:54:96:17:85:c0:75:c1:5e:0b:17:43:37:c7:a6 + e8:84:9f:48:e5:05:2f:d4:d2:42:b6:79:78:54:18:71 + 54:d7:f4:81:57:0d:65:db:15:20:bc:02:02:69:62:1a + cf:7f:63:b4:bf:14:0d:40:50:f8:32:93:57:ac:ba:44 + 91:cd:ef:2e:b8:3f:26:1c:92:0b:51:c0:61:f3:bf:9e + e2:55:dd:99:c0:c4:96:42:22:fe:2e:a5:3e:63:d1:58 + f3:7c:fc:3a:59:1f:ce:51:49:5c:98:b9:e1:d3:ea:8c + 2e:fe:c6:d9:d4:4b:8d:d0:2c:57:55:3c:93:b7:b2:92 + 4f:b3:f6:a5:de:28:08:e0:14:11:04:80:b4:d5:6c:74 + 75:cd:ba:d4:a7:0e:36:0d:84:7b:49:0a:44:27:cc:2b + 27:da:87:74:8e:07:ff:d3:27:3e:ef:0e:ef:99:1f:81 + ab:90:57:8b:3f:a0:88:16:99:57:87:30:3c:69:19:a2 + c4:2a:07:18:54:4d:85:fc:79:4c:7c:5b:c2:ae:b6:4a + 29:78:b5:fe:4d:f9:ac:6f:2c:83:a7:0f:fd:b4:6e:45 + 99:60:3b:4c:45:4e:b5:b4:40:a9:96:a0:12:ea:ca:a5 + 4c:be:27:dd:91:3d:04:05:66:78:79:95:93:0e:7d:91 + af:81:cc:7b:ba:2f:e7:84:26:29:76:97:a7:65:5d:38 + 6c:ce:d4:47:ef:f4:ab:c3:8a:57:5b:e5:a1:2a:06:b5 + 73:fa:d5:3e:0b:f0:9d:83:9a:75:77:68:36:9b:55:24 + f7:08:dc:61:b5:ce:5b:1f:5f:77:d5:2b:fd:fc:49:b8 + 7b:5a:bb:13:96:e2:0c:c5:2d:c4:ce:82:f5:20:56:64 + 97: public exponent: 01:00:01: private exponent: - 1e:38:b0:79:7f:85:c8:17:24:f5:5c:41:29:e8:32:5d - 32:a3:d2:f0:b7:f5:c8:e1:52:14:be:c9:5f:d1:df:b3 - 65:75:6c:05:7a:6b:35:8a:a4:2f:46:73:ff:71:79:6e - 3f:eb:f9:88:f6:2e:1b:f6:cc:14:12:b0:98:c3:7e:91 - 0b:85:e2:bf:1d:b7:82:09:30:f3:23:68:01:85:13:94 - 80:c9:9a:55:94:96:da:30:48:a0:29:ec:86:da:1b:d5 - 2b:2b:74:63:92:b8:2a:8f:87:29:f0:ae:d7:55:63:0d - 2d:b3:0b:0e:2d:84:dc:d5:08:b5:ac:a0:f7:29:9d:71 - 89:3d:27:6a:eb:96:f5:4e:9b:8a:dc:14:82:0a:c7:5c - 16:1c:d2:7e:b9:1b:13:69:d8:b2:b1:b1:7e:aa:a9:ad - 06:ce:66:0e:5b:50:10:42:2a:0a:fd:29:14:f7:09:63 - c1:20:18:5f:27:81:46:12:8c:b8:f4:89:a6:3d:55:a1 - d4:64:fc:f2:db:d7:9c:f5:be:f7:9d:88:5c:6d:36:a4 - 4b:ea:c5:e3:ea:32:81:6b:f3:47:b5:35:d5:c4:1a:b2 - ae:12:9d:19:a3:ec:a4:af:41:7e:5e:34:9d:f5:bc:b9 - 1f:a3:c2:32:b4:fc:95:a7:7a:54:04:e2:d6:4e:10:2f - 66:68:8b:3b:20:ea:05:db:2e:72:01:11:e7:7c:f8:72 - 0f:60:be:f1:27:19:ad:3a:6f:e9:70:56:3a:86:6e:46 - 0d:e3:55:31:66:77:09:84:48:b9:25:4b:c3:26:70:12 - ca:a4:5f:c6:3d:6a:e5:db:4d:63:04:b8:09:07:c9:30 - 85:08:9d:77:40:26:60:da:10:c2:53:d2:00:0d:9e:d9 - d5:71:06:30:eb:fb:f7:3f:82:1f:b3:9a:f3:4d:24:86 - 2e:94:fd:06:9e:dc:26:68:fa:64:c3:f9:fa:08:c4:b2 - ec:7a:f5:55:c5:10:b5:e2:2d:de:ba:04:30:10:5b:99 - + 66:7a:51:72:30:6c:e9:43:eb:57:dc:4f:2a:ab:2a:bf + 35:da:04:7d:77:d8:d4:c2:32:6e:4b:e8:64:53:99:77 + fe:f4:5a:f3:5d:0e:62:c2:3d:e7:e7:bb:42:12:87:4d + 39:6f:85:b9:e6:58:77:02:99:bb:f2:3b:7d:f8:7a:ca + f8:00:d4:8d:5d:c2:b1:41:00:a5:0f:cd:e4:db:32:77 + 47:f3:2f:99:5a:a5:ab:26:a7:2e:50:80:a6:b4:eb:ef + 43:e1:d4:a1:63:a9:1f:20:ed:52:46:0f:08:4f:0f:6b + dd:2c:95:a3:77:6c:8a:2a:7d:26:8f:87:98:04:61:cd + 29:be:3f:32:4e:bf:a2:c7:02:38:c6:14:a5:07:0b:a6 + 25:a2:d1:2a:0a:18:7b:d7:df:fd:2e:de:b2:2b:ef:80 + c1:71:1e:52:bb:2d:8a:7b:1a:26:c7:2d:d2:70:b9:db + 45:32:94:9d:34:cd:d8:58:e1:4b:47:b5:81:26:68:34 + 2a:32:02:0b:e6:95:3a:d2:d4:d6:e4:c7:bf:8e:04:4e + fc:75:e9:6d:f4:a4:a8:6b:50:76:7c:10:07:81:96:56 + 54:16:d7:39:8a:5f:51:79:1f:96:49:cb:d7:8b:0d:51 + 5d:4a:e8:22:fb:d1:92:a4:a2:02:65:24:0a:62:33:84 + a9:df:a9:4f:40:1a:95:1d:98:ea:0c:23:05:c2:db:1e + 4d:c5:f2:e9:63:6b:de:70:67:3a:a2:f4:72:c1:97:ae + d0:6f:78:82:2b:a1:6f:2c:d7:90:a3:4f:d6:f9:d6:80 + 11:d4:93:8e:e0:06:19:f6:d9:33:72:5d:d8:16:a2:ef + e1:a9:00:de:bc:7e:98:aa:97:45:b8:81:20:01:5d:0b + 10:3d:69:a9:ba:7d:2c:1f:e0:17:0e:bc:ee:97:95:61 + a7:6b:10:94:27:ad:ca:c1:93:3b:fa:dc:8d:3d:58:cc + fb:04:3f:7f:40:d6:6c:e8:83:a1:be:50:cd:46:73:11 + prime1: - 00:fb:d1:47:9d:9e:73:f8:1e:09:21:fd:89:16:05:56 - af:a5:cf:52:d5:cd:f7:26:18:d1:84:3a:36:65:0b:a2 - cd:f9:b8:99:c0:c7:ef:00:c9:2f:c9:92:1a:1d:3d:86 - 58:3b:b1:be:d4:8c:c6:1b:df:ba:ee:87:aa:d1:22:47 - 18:bd:de:01:0f:0d:cb:ac:d0:48:a4:f4:93:e2:a6:cb - b5:b7:f5:f5:72:dd:ec:ac:13:e8:3d:62:23:54:ac:52 - ff:ee:9a:e1:7f:b0:ae:3b:41:38:d8:39:2b:40:ef:25 - 81:50:b0:98:db:f8:40:16:6e:1a:41:79:22:90:58:99 - 80:c2:0d:ba:b5:d3:54:ec:28:33:e4:b0:58:ea:de:61 - a1:b7:30:0b:9d:dc:73:62:c2:07:d3:75:91:48:49:dd - be:cf:b2:90:95:8f:29:6c:6f:f6:68:cb:cf:d5:24:a3 - d7:37:81:1b:34:3b:af:9a:48:52:af:53:7c:f7:32:a2 - 3f: + 00:da:9b:62:22:f8:48:3e:9a:4f:ef:e5:b0:f1:e3:5e + ec:21:3d:3b:8f:ec:f1:d8:f6:fd:a7:2f:69:2a:cd:79 + a1:4b:6b:39:36:e9:c3:a9:5b:f9:59:50:71:6b:72:b0 + 8d:13:ca:93:b1:4d:4b:55:3a:69:b2:84:8d:4e:18:77 + 68:ae:f6:d8:ec:43:ef:c8:21:51:b9:cb:86:b7:62:ab + 5e:90:77:ac:e6:85:a9:e9:f7:19:f7:26:24:4c:18:81 + 53:85:42:cd:35:5c:1d:ae:70:0a:59:b0:44:a2:50:bc + 68:3a:bd:c2:53:7f:2e:ab:04:2d:85:ee:7d:8a:0c:db + cc:85:5a:b5:f8:6f:f8:92:53:0c:93:00:f7:ff:84:61 + d0:67:f4:10:b4:bf:9a:ba:35:df:05:79:d7:78:42:fb + 48:c4:db:35:27:4f:18:cf:8d:da:26:13:36:84:42:5d + 50:c4:9d:38:15:b5:6a:ca:3a:ab:f1:f9:b0:26:1a:54 + 99: prime2: - 00:d9:83:5e:be:0a:ea:0b:d9:66:63:56:3b:9e:44:aa - 46:6d:8d:6c:10:81:4b:de:19:5d:2c:16:7e:30:7c:ad - 23:9a:89:53:cc:18:e8:e8:51:2b:79:35:d0:67:7d:9e - 8f:be:ea:63:5e:14:c0:6b:ba:02:6c:4a:da:07:70:9d - 14:fa:be:1e:40:47:50:6f:f2:5a:87:9e:b6:b1:b8:55 - 2c:b6:a2:e3:b0:24:ba:ea:9b:55:87:8b:4b:cf:40:4a - 25:b4:89:cf:9e:76:ca:79:4a:f4:74:b7:ee:cf:6c:8f - cb:e3:3d:9e:86:3b:44:b7:70:ec:05:0c:68:ce:d6:c3 - a2:ec:e6:11:d6:2f:f7:80:26:a9:5c:aa:b9:a6:33:84 - a9:00:43:cf:72:07:8a:91:59:a2:b1:de:79:07:6b:81 - 67:a5:c2:4b:fd:29:8a:1a:96:66:57:66:d4:37:9a:98 - 69:d1:19:24:53:b1:a4:54:68:1e:8c:2b:b4:93:19:ed - 95: + 00:d4:2b:34:e6:a1:68:c9:c8:7a:22:5c:21:34:0f:67 + 4b:6a:78:d5:0e:63:be:4b:83:a3:ac:28:b6:37:80:c0 + 79:30:ec:0c:87:6d:c1:f2:d1:f8:bd:8d:3c:bb:20:81 + d9:dd:6b:25:0d:0c:e4:15:39:11:06:31:06:84:2c:8d + 12:73:04:b3:cf:fd:57:03:ca:65:3b:f7:e3:e0:6f:37 + 4c:b2:ef:c4:a3:cb:8b:54:f7:35:73:54:e4:f9:62:bd + 1e:7a:c1:76:b1:f0:cc:d9:d0:fc:c7:83:59:07:3b:55 + 1f:a9:88:7e:e9:27:bb:e8:5a:a0:57:de:1d:f4:56:8c + a9:34:9b:0a:43:d5:a1:2a:97:80:27:07:ee:57:20:29 + f7:08:02:78:c7:fc:9b:c9:28:64:a5:63:a4:a6:a2:65 + 48:fd:6d:42:b0:60:59:13:f7:f1:cd:78:09:94:66:42 + 36:6a:ee:7c:40:c5:f1:2f:f9:7e:ca:f7:b5:02:95:10 + af: coefficient: - 00:90:9a:7f:6f:14:a8:bc:79:3f:25:e5:62:f9:5d:29 - 78:a4:78:8e:7a:e4:8a:62:8a:7f:9c:ae:75:95:fe:ee - 1a:99:53:40:01:76:29:7d:48:85:28:a2:2a:9f:0f:10 - 8c:19:6a:36:6b:e1:ac:a2:07:b9:72:5c:b9:a6:20:bb - 8f:cb:f5:ea:dd:3f:0e:ab:9d:c1:57:7e:7b:96:f9:da - b0:52:3c:3f:62:94:e7:5c:04:9e:ac:60:cd:4d:ec:7e - 68:d3:fb:2a:b4:02:f0:0e:be:37:bc:2a:f8:6e:8d:31 - b5:38:67:00:9e:67:9f:71:d0:88:36:32:69:4b:20:73 - eb:a1:d9:bc:72:c2:7e:39:1a:36:cc:c1:45:a2:14:37 - e6:ca:db:4d:0b:5b:68:a4:ff:b7:7b:b1:db:2f:70:27 - a1:6c:31:3f:c0:c3:23:04:b0:7a:e2:0d:21:ba:5a:80 - 52:c1:a1:2b:57:72:20:b6:ed:b1:e8:3b:95:88:81:90 - 5d: + 00:c2:99:5b:b5:1c:59:73:c7:70:78:75:aa:67:4d:92 + d1:27:b6:47:be:e2:71:39:31:f7:5d:be:79:bd:22:b2 + 34:80:b0:a5:39:ab:b2:53:2a:28:f9:4a:34:20:b6:ea + 25:d5:df:34:ad:d0:b3:26:ed:ba:f3:0c:07:95:34:50 + ae:48:40:a7:5b:f7:8c:e4:c6:d6:a4:1f:18:07:2a:ea + 01:38:90:d5:ca:89:19:3d:8e:c2:40:05:e5:09:a8:30 + 78:6c:e9:e6:1d:6c:5b:22:a9:24:d0:07:41:95:0e:82 + f6:19:e4:6c:c1:96:ae:c3:5d:84:a9:02:e5:7c:d6:b9 + f7:94:0f:b9:5a:41:87:db:03:17:9b:39:b2:e9:bb:f5 + 3f:0d:91:6d:d2:32:fe:ef:60:19:3c:15:48:c1:a8:e7 + 4c:b8:bd:dc:31:43:49:df:05:be:c5:3b:6a:5d:68:91 + 6d:e6:47:0b:a4:27:74:44:42:12:31:02:ad:aa:7e:e8 + 0c: exp1: - 00:ef:ce:66:20:01:44:b9:35:89:46:f8:56:33:45:54 - 3f:23:6d:23:9a:7e:71:6d:b3:56:db:50:40:7a:cb:b0 - f7:ec:67:52:ec:96:b9:d1:8a:c6:5a:74:2b:30:4b:66 - 03:e2:9d:2b:78:e8:b2:c4:da:b3:fe:f1:ed:c7:09:98 - a1:44:37:05:d5:1b:33:2a:58:93:c5:9b:30:b6:38:57 - 68:af:4e:a8:b7:02:06:9f:fc:b9:3e:b3:95:a7:ce:0f - a0:b0:ce:88:0e:7c:e7:ff:7f:e6:2d:6b:8b:f8:63:85 - d8:f7:49:a5:d8:5d:3a:52:e1:f9:58:fe:8d:de:de:b1 - 18:40:34:a8:e8:fc:df:33:a2:39:81:00:3b:3d:38:17 - cb:d4:53:09:cd:04:a2:51:9b:2b:ae:c1:98:60:3a:0f - d4:e5:a0:4c:36:51:46:86:80:bd:2d:21:62:c3:bd:07 - d6:2d:82:62:b0:c4:62:3f:4f:be:86:3e:c0:93:fc:81 - 2b: + 00:a6:15:8d:5e:a2:21:49:26:b5:fa:be:18:4b:fe:01 + bd:06:97:dd:eb:c0:0a:12:5b:bc:64:cb:79:6c:22:85 + f9:0c:32:9f:5a:60:09:de:5e:d9:37:89:0d:52:a3:e5 + 0c:99:ef:bb:7b:e6:0c:88:e9:03:2a:b4:d8:22:70:26 + 30:6b:55:71:83:37:2e:32:6f:56:07:01:61:0d:6c:b2 + 63:fb:00:61:65:16:41:fc:56:56:ec:d5:96:98:15:ba + 13:b9:58:02:4e:3a:f8:f5:f1:7b:7b:9e:96:d6:76:de + 5d:95:db:5b:8b:52:42:23:7d:de:14:36:18:3c:cc:fe + 25:09:5f:dc:86:ae:93:3c:a0:4a:1a:59:8f:11:1b:03 + b1:71:79:15:44:2d:9a:21:45:12:76:b7:96:03:71:68 + 36:66:11:60:f5:c7:7b:43:1d:33:92:df:df:f8:65:ef + 3d:90:d3:1d:ac:28:93:c4:a7:04:c1:a7:80:b3:c2:27 + 69: exp2: - 11:e4:73:93:b0:74:26:3b:60:e7:c4:fd:2c:7c:bb:81 - 05:9b:ff:8a:b0:08:1c:a1:fb:7f:17:ee:93:70:7e:11 - 92:b1:bf:39:e7:c6:a8:ed:9c:64:e1:1f:5e:93:ff:ca - 15:4b:54:97:35:9f:ca:7c:c7:9c:3e:e0:06:82:a5:f9 - 46:d3:02:cc:08:d1:be:13:b2:8c:bb:6a:8d:dd:fa:eb - ad:ae:62:8a:67:cb:14:67:68:b6:b8:a7:a8:c9:c2:0f - ad:f5:34:25:f5:e1:9b:ee:a5:83:40:6a:1d:97:f1:90 - 35:06:29:97:23:22:f8:f0:0a:0a:34:46:1e:d5:9d:cc - 36:2e:8a:c3:12:b9:0a:4a:a3:dd:e2:91:58:f1:9d:f5 - 04:f7:8f:05:f3:46:db:c4:02:d5:1c:d6:d9:dc:67:0d - ae:9d:f8:00:40:3d:83:08:62:2c:c8:61:a6:9d:49:f2 - 52:67:fe:0c:00:6d:e3:1f:99:7b:b0:50:af:55:0f:ad - + 3a:11:ad:43:e6:2f:78:f2:be:c2:c2:b6:6f:ba:3b:8a + 3f:94:dc:b3:38:87:6f:c1:92:bd:5e:d3:28:73:bb:ba + 2f:b2:9b:67:41:9a:10:ac:79:48:df:ec:1c:47:34:62 + fd:a0:02:9f:04:c5:34:2d:cf:44:03:8d:06:05:ef:82 + 7a:2a:72:50:5d:c5:40:0b:58:13:c6:af:fe:d0:51:b1 + 53:54:1e:5c:ba:2b:e3:50:59:b7:bc:27:83:3e:0c:06 + d8:90:34:bd:54:eb:ac:3d:ef:c1:67:68:a9:7b:0a:bc + 44:b9:50:0a:de:4e:26:b5:0f:27:9d:6a:53:62:90:d3 + 0a:cb:d8:4e:62:9c:de:bc:62:5d:cc:64:e6:41:ae:5a + a7:3d:8d:f3:26:67:38:f2:2e:41:b9:f8:e9:86:8e:f4 + 20:30:94:4d:13:c1:ad:9e:71:f6:cb:80:cb:b6:05:2f + 41:ab:0e:24:e8:48:2f:da:38:62:b2:d3:3e:e6:af:8f + Public Key PIN: - pin-sha256:EiqPFBPoLKkCzVlK8KoKYGQT/LSo7/0iLg/I7nKt1/0= + pin-sha256:6j7MphUbNRjXh9x/BogWeu4m7+ON7aYmCyFxQsSMsec= Public Key ID: - sha256:122a8f1413e82ca902cd594af0aa0a606413fcb4a8effd222e0fc8ee72add7fd - sha1:4b4510b7a19c88cbce887f5b9d531a6d81d34400 + sha256:ea3ecca6151b3518d787dc7f0688167aee26efe38deda6260b217142c48cb1e7 + sha1:87e4402a0fb8615b3fc7d034871265903c46ec84 -----BEGIN RSA PRIVATE KEY----- -MIIG5AIBAAKCAYEA1fWe/glTz5MRvZtZ6Pk7ac6jCnDbNJhMMBDWjIC9Xc/31joZ -kK5VnW5P/sNZq3S5Glx2Y2Ie+jCeQ7hWYNl1WqCIrV4fNjlgEnuEFUOze2lY243t -4k+OeyQ2YmopDqX2vFiX9q7vzVP6120enUBO1OgyBUnFZpTHmWukQl2ZhzxjoakA -DjYP6sjejhKWXeYjms8KTXpBjl1PaWcvYe4IrMzZuNZq3pveUyl83PcD9uVT0T/j -rZnGv/Ior/TLLdIFmFjOutDX96uMimbgjSxqQcg57EzUBCWIWKjAXNW+v+lqJcCl -YY5y4ElUhuY8j9sQa5OHSevemaIXRiuQMJGFquASVhzPrgpD0jE8UIpwqOQycU+N -NK1nutwzKoq8ZzOoVozlrpz2AifaJ5StVnfzK+0jkuj+7FnkiNeoHUV2DjGLa3J/ -1YwcrLa3J0Gv1tON9BFd81P9mbfCgNc5iwfOP5J/qe0tUKQ7V+dzPDTd89FXTf+5 -VFrmY8bLKP3F08GrAgMBAAECggGAHjiweX+FyBck9VxBKegyXTKj0vC39cjhUhS+ -yV/R37NldWwFems1iqQvRnP/cXluP+v5iPYuG/bMFBKwmMN+kQuF4r8dt4IJMPMj -aAGFE5SAyZpVlJbaMEigKeyG2hvVKyt0Y5K4Ko+HKfCu11VjDS2zCw4thNzVCLWs -oPcpnXGJPSdq65b1TpuK3BSCCsdcFhzSfrkbE2nYsrGxfqqprQbOZg5bUBBCKgr9 -KRT3CWPBIBhfJ4FGEoy49ImmPVWh1GT88tvXnPW+952IXG02pEvqxePqMoFr80e1 -NdXEGrKuEp0Zo+ykr0F+XjSd9by5H6PCMrT8lad6VATi1k4QL2Zoizsg6gXbLnIB -Eed8+HIPYL7xJxmtOm/pcFY6hm5GDeNVMWZ3CYRIuSVLwyZwEsqkX8Y9auXbTWME -uAkHyTCFCJ13QCZg2hDCU9IADZ7Z1XEGMOv79z+CH7Oa800khi6U/Qae3CZo+mTD -+foIxLLsevVVxRC14i3eugQwEFuZAoHBAPvRR52ec/geCSH9iRYFVq+lz1LVzfcm -GNGEOjZlC6LN+biZwMfvAMkvyZIaHT2GWDuxvtSMxhvfuu6HqtEiRxi93gEPDcus -0Eik9JPipsu1t/X1ct3srBPoPWIjVKxS/+6a4X+wrjtBONg5K0DvJYFQsJjb+EAW -bhpBeSKQWJmAwg26tdNU7Cgz5LBY6t5hobcwC53cc2LCB9N1kUhJ3b7PspCVjyls -b/Zoy8/VJKPXN4EbNDuvmkhSr1N89zKiPwKBwQDZg16+CuoL2WZjVjueRKpGbY1s -EIFL3hldLBZ+MHytI5qJU8wY6OhRK3k10Gd9no++6mNeFMBrugJsStoHcJ0U+r4e -QEdQb/Jah562sbhVLLai47AkuuqbVYeLS89ASiW0ic+edsp5SvR0t+7PbI/L4z2e -hjtEt3DsBQxoztbDouzmEdYv94AmqVyquaYzhKkAQ89yB4qRWaKx3nkHa4FnpcJL -/SmKGpZmV2bUN5qYadEZJFOxpFRoHowrtJMZ7ZUCgcEA785mIAFEuTWJRvhWM0VU -PyNtI5p+cW2zVttQQHrLsPfsZ1LslrnRisZadCswS2YD4p0reOiyxNqz/vHtxwmY -oUQ3BdUbMypYk8WbMLY4V2ivTqi3Agaf/Lk+s5Wnzg+gsM6IDnzn/3/mLWuL+GOF -2PdJpdhdOlLh+Vj+jd7esRhANKjo/N8zojmBADs9OBfL1FMJzQSiUZsrrsGYYDoP -1OWgTDZRRoaAvS0hYsO9B9YtgmKwxGI/T76GPsCT/IErAoHAEeRzk7B0Jjtg58T9 -LHy7gQWb/4qwCByh+38X7pNwfhGSsb8558ao7Zxk4R9ek//KFUtUlzWfynzHnD7g -BoKl+UbTAswI0b4Tsoy7ao3d+uutrmKKZ8sUZ2i2uKeoycIPrfU0JfXhm+6lg0Bq -HZfxkDUGKZcjIvjwCgo0Rh7Vncw2LorDErkKSqPd4pFY8Z31BPePBfNG28QC1RzW -2dxnDa6d+ABAPYMIYizIYaadSfJSZ/4MAG3jH5l7sFCvVQ+tAoHBAJCaf28UqLx5 -PyXlYvldKXikeI565Ipiin+crnWV/u4amVNAAXYpfUiFKKIqnw8QjBlqNmvhrKIH -uXJcuaYgu4/L9erdPw6rncFXfnuW+dqwUjw/YpTnXASerGDNTex+aNP7KrQC8A6+ -N7wq+G6NMbU4ZwCeZ59x0Ig2MmlLIHProdm8csJ+ORo2zMFFohQ35srbTQtbaKT/ -t3ux2y9wJ6FsMT/AwyMEsHriDSG6WoBSwaErV3Igtu2x6DuViIGQXQ== +MIIG5AIBAAKCAYEAtS2SivLaAfsZ1X9xun/6i/1UNSxG5kPvVkfTPWOwSgCYyqn7 +u2vZsIsvcurqHQBEg1SWF4XAdcFeCxdDN8em6ISfSOUFL9TSQrZ5eFQYcVTX9IFX +DWXbFSC8AgJpYhrPf2O0vxQNQFD4MpNXrLpEkc3vLrg/JhySC1HAYfO/nuJV3ZnA +xJZCIv4upT5j0VjzfPw6WR/OUUlcmLnh0+qMLv7G2dRLjdAsV1U8k7eykk+z9qXe +KAjgFBEEgLTVbHR1zbrUpw42DYR7SQpEJ8wrJ9qHdI4H/9MnPu8O75kfgauQV4s/ +oIgWmVeHMDxpGaLEKgcYVE2F/HlMfFvCrrZKKXi1/k35rG8sg6cP/bRuRZlgO0xF +TrW0QKmWoBLqyqVMvifdkT0EBWZ4eZWTDn2Rr4HMe7ov54QmKXaXp2VdOGzO1Efv +9KvDildb5aEqBrVz+tU+C/Cdg5p1d2g2m1Uk9wjcYbXOWx9fd9Ur/fxJuHtauxOW +4gzFLcTOgvUgVmSXAgMBAAECggGAZnpRcjBs6UPrV9xPKqsqvzXaBH132NTCMm5L +6GRTmXf+9FrzXQ5iwj3n57tCEodNOW+FueZYdwKZu/I7ffh6yvgA1I1dwrFBAKUP +zeTbMndH8y+ZWqWrJqcuUICmtOvvQ+HUoWOpHyDtUkYPCE8Pa90slaN3bIoqfSaP +h5gEYc0pvj8yTr+ixwI4xhSlBwumJaLRKgoYe9ff/S7esivvgMFxHlK7LYp7GibH +LdJwudtFMpSdNM3YWOFLR7WBJmg0KjICC+aVOtLU1uTHv44ETvx16W30pKhrUHZ8 +EAeBllZUFtc5il9ReR+WScvXiw1RXUroIvvRkqSiAmUkCmIzhKnfqU9AGpUdmOoM +IwXC2x5NxfLpY2vecGc6ovRywZeu0G94giuhbyzXkKNP1vnWgBHUk47gBhn22TNy +XdgWou/hqQDevH6YqpdFuIEgAV0LED1pqbp9LB/gFw687peVYadrEJQnrcrBkzv6 +3I09WMz7BD9/QNZs6IOhvlDNRnMRAoHBANqbYiL4SD6aT+/lsPHjXuwhPTuP7PHY +9v2nL2kqzXmhS2s5NunDqVv5WVBxa3KwjRPKk7FNS1U6abKEjU4Yd2iu9tjsQ+/I +IVG5y4a3YqtekHes5oWp6fcZ9yYkTBiBU4VCzTVcHa5wClmwRKJQvGg6vcJTfy6r +BC2F7n2KDNvMhVq1+G/4klMMkwD3/4Rh0Gf0ELS/mro13wV513hC+0jE2zUnTxjP +jdomEzaEQl1QxJ04FbVqyjqr8fmwJhpUmQKBwQDUKzTmoWjJyHoiXCE0D2dLanjV +DmO+S4OjrCi2N4DAeTDsDIdtwfLR+L2NPLsggdndayUNDOQVOREGMQaELI0ScwSz +z/1XA8plO/fj4G83TLLvxKPLi1T3NXNU5PlivR56wXax8MzZ0PzHg1kHO1UfqYh+ +6Se76FqgV94d9FaMqTSbCkPVoSqXgCcH7lcgKfcIAnjH/JvJKGSlY6SmomVI/W1C +sGBZE/fxzXgJlGZCNmrufEDF8S/5fsr3tQKVEK8CgcEAphWNXqIhSSa1+r4YS/4B +vQaX3evAChJbvGTLeWwihfkMMp9aYAneXtk3iQ1So+UMme+7e+YMiOkDKrTYInAm +MGtVcYM3LjJvVgcBYQ1ssmP7AGFlFkH8Vlbs1ZaYFboTuVgCTjr49fF7e56W1nbe +XZXbW4tSQiN93hQ2GDzM/iUJX9yGrpM8oEoaWY8RGwOxcXkVRC2aIUUSdreWA3Fo +NmYRYPXHe0MdM5Lf3/hl7z2Q0x2sKJPEpwTBp4CzwidpAoHAOhGtQ+YvePK+wsK2 +b7o7ij+U3LM4h2/Bkr1e0yhzu7ovsptnQZoQrHlI3+wcRzRi/aACnwTFNC3PRAON +BgXvgnoqclBdxUALWBPGr/7QUbFTVB5cuivjUFm3vCeDPgwG2JA0vVTrrD3vwWdo +qXsKvES5UAreTia1DyedalNikNMKy9hOYpzevGJdzGTmQa5apz2N8yZnOPIuQbn4 +6YaO9CAwlE0Twa2ecfbLgMu2BS9Bqw4k6Egv2jhistM+5q+PAoHBAMKZW7UcWXPH +cHh1qmdNktEntke+4nE5Mfddvnm9IrI0gLClOauyUyoo+Uo0ILbqJdXfNK3Qsybt +uvMMB5U0UK5IQKdb94zkxtakHxgHKuoBOJDVyokZPY7CQAXlCagweGzp5h1sWyKp +JNAHQZUOgvYZ5GzBlq7DXYSpAuV81rn3lA+5WkGH2wMXmzmy6bv1Pw2RbdIy/u9g +GTwVSMGo50y4vdwxQ0nfBb7FO2pdaJFt5kcLpCd0REISMQKtqn7oDA== -----END RSA PRIVATE KEY----- From d23d1fc0f9483e5b99abd31d92369fa8cbb05150 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 5 Jun 2020 07:56:39 +0200 Subject: [PATCH 0400/2030] travis: fix alpine builds With the latest version of the alpine container image it seems that alpine changed a few package names. This adapts the alpine container to solve the travis failures. Signed-off-by: Adrian Reber --- scripts/build/Dockerfile.alpine | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 5785102da..dbf3c2bf1 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -15,9 +15,12 @@ RUN apk update && apk add \ libnet-dev \ libnl3-dev \ nftables \ + nftables-dev \ pkgconfig \ protobuf-c-dev \ protobuf-dev \ + py3-pip \ + py3-protobuf \ python3 \ sudo @@ -43,7 +46,7 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip3 install protobuf junit_xml +RUN pip3 install junit_xml # For zdtm we need an unversioned python binary RUN ln -s /usr/bin/python3 /usr/bin/python From be2ded15ee1cff6c0bd5733dda57bb17ea85bcd9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 5 Jun 2020 07:55:18 +0200 Subject: [PATCH 0401/2030] test: fix flake8 errors The newest version of flake reports errors that variable names like 'l' should not be used, because they are hard to read. This changes 'l' to 'line' to make flake8 happy. Signed-off-by: Adrian Reber --- test/zdtm.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 5e42c769e..b111fa383 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -2047,11 +2047,11 @@ def grep_errors(fname): print_next = False before = [] with open(fname, errors='replace') as fd: - for l in fd: - before.append(l) + for line in fd: + before.append(line) if len(before) > 5: before.pop(0) - if "Error" in l or "Warn" in l: + if "Error" in line or "Warn" in line: if first: print_fname(fname, 'log') print_sep("grep Error", "-", 60) @@ -2061,7 +2061,7 @@ def grep_errors(fname): before = [] else: if print_next: - print_next = print_error(l) + print_next = print_error(line) before = [] if not first: print_sep("ERROR OVER", "-", 60) From 01cab14dfad656eb1d5f7e6a611a49a87c83b34e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 22 Feb 2019 18:04:32 +0000 Subject: [PATCH 0402/2030] util: Fix addr casting for IPv4/IPv6 in autobind When saddr.ss_family is AF_INET6 we should cast &saddr to (struct sockaddr_in6 *). Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/util.c b/criu/util.c index 6f6a6dde7..06c594ca9 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1131,9 +1131,9 @@ int setup_tcp_server(char *type, char *addr, unsigned short *port) } if (saddr.ss_family == AF_INET6) { - (*port) = ntohs(((struct sockaddr_in *)&saddr)->sin_port); - } else if (saddr.ss_family == AF_INET) { (*port) = ntohs(((struct sockaddr_in6 *)&saddr)->sin6_port); + } else if (saddr.ss_family == AF_INET) { + (*port) = ntohs(((struct sockaddr_in *)&saddr)->sin_port); } pr_info("Using %u port\n", (*port)); From 0708cbd8839267c30e8dcd8bec63f5bc2d2d36d0 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 13 Sep 2018 18:28:10 +0100 Subject: [PATCH 0403/2030] remote: Use tmp file buffer when restore ip dump When CRIU calls the ip tool on restore, it passes the fd of remote socket by replacing the STDIN before execvp. The stdin is used by the ip tool to receive input. However, the ip tool calls ftell(stdin) which fails with "Illegal seek" since UNIX sockets do not support file positioning operations. To resolve this issue, read the received content from the UNIX socket and store it into temporary file, then replace STDIN with the fd of this tmp file. # python test/zdtm.py run -t zdtm/static/env00 --remote -f ns === Run 1/1 ================ zdtm/static/env00 ========================= Run zdtm/static/env00 in ns ========================== Start test ./env00 --pidfile=env00.pid --outfile=env00.out --envname=ENV_00_TEST Adding image cache Adding image proxy Run criu dump Run criu restore =[log]=> dump/zdtm/static/env00/31/1/restore.log ------------------------ grep Error ------------------------ RTNETLINK answers: File exists (00.229895) 1: do_open_remote_image RDONLY path=route-9.img snapshot_id=dump/zdtm/static/env00/31/1 (00.230316) 1: Running ip route restore Failed to restore: ftell: Illegal seek (00.232757) 1: Error (criu/util.c:712): exited, status=255 (00.232777) 1: Error (criu/net.c:1479): IP tool failed on route restore (00.232803) 1: Error (criu/net.c:2153): Can't create net_ns (00.255091) Error (criu/cr-restore.c:1177): 105 killed by signal 9: Killed (00.255307) Error (criu/mount.c:2960): mnt: Can't remove the directory /tmp/.criu.mntns.dTd7ak: No such file or directory (00.255339) Error (criu/cr-restore.c:2119): Restoring FAILED. ------------------------ ERROR OVER ------------------------ ################# Test zdtm/static/env00 FAIL at CRIU restore ################## ##################################### FAIL ##################################### Fixes #311 Signed-off-by: Radostin Stoyanov Signed-off-by: Andrei Vagin --- criu/net.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/criu/net.c b/criu/net.c index 762f9b547..27e7c7e33 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2068,19 +2068,46 @@ out: static int restore_ip_dump(int type, int pid, char *cmd) { - int ret = -1; + int ret = -1, sockfd, n, written; + FILE *tmp_file; struct cr_img *img; + char buf[1024]; img = open_image(type, O_RSTR, pid); if (empty_image(img)) { close_image(img); return 0; } + sockfd = img_raw_fd(img); + tmp_file = tmpfile(); + if (!tmp_file) { + pr_perror("Failed to open tmpfile"); + return -1; + } + + while ((n = read(sockfd, buf, 1024)) > 0) { + written = fwrite(buf, sizeof(char), n, tmp_file); + if (written < n) { + pr_perror("Failed to write to tmpfile " + "[written: %d; total: %d]", written, n); + return -1; + } + } + + if (fseek(tmp_file, 0, SEEK_SET)) { + pr_perror("Failed to set file position to beginning of tmpfile"); + return -1; + } + if (img) { - ret = run_ip_tool(cmd, "restore", NULL, NULL, img_raw_fd(img), -1, 0); + ret = run_ip_tool(cmd, "restore", NULL, NULL, fileno(tmp_file), -1, 0); close_image(img); } + if(fclose(tmp_file)) { + pr_perror("Failed to close tmpfile"); + } + return ret; } From 51c3f8a908b1738d7d268afc5bb0dd9a59f3d8a5 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 11 Jun 2020 21:01:24 +0000 Subject: [PATCH 0404/2030] pipes: loop over splice() when dumping a pipe's data Instead of erroring, we should loop until we get the desired number of bytes written, like regular I/O loops. Signed-off-by: Nicolas Viennot --- criu/pipes.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/criu/pipes.c b/criu/pipes.c index d74329161..5787fdc53 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -463,18 +463,15 @@ int dump_one_pipe_data(struct pipe_data_dump *pd, int lfd, const struct fd_parms if (pb_write_one(img, &pde, PB_PIPE_DATA)) goto err_close; - if (bytes) { + while (bytes > 0) { int wrote; - wrote = splice(steal_pipe[0], NULL, img_raw_fd(img), NULL, bytes, 0); if (wrote < 0) { pr_perror("Can't push pipe data"); goto err_close; - } else if (wrote != bytes) { - pr_err("%#x: Wanted to write %d bytes, but wrote %d\n", - pipe_id(p), bytes, wrote); - goto err_close; - } + } else if (wrote == 0) + break; + bytes -= wrote; } ret = 0; From 7d79a58f4daa9ddda015fc0aaca2a60d156d12cc Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 21 May 2020 17:40:17 +0000 Subject: [PATCH 0405/2030] img-streamer: introduction of criu-image-streamer This adds the ability to stream images with criu-image-streamer The workflow is the following: 1) criu-image-streamer is started, and starts listening on a UNIX socket. 2) CRIU is started. img_streamer_init() is invoked, which connects to the socket. During dump/restore operations, instead of using local disk to open an image file, img_streamer_open() is called to provide a UNIX pipe that is sent over the UNIX socket. 3) Once the operation is done, img_streamer_finish() is called, and the UNIX socket is disconnected. criu-image-streamer can be found at: https://github.com/checkpoint-restore/criu-image-streamer Signed-off-by: Nicolas Viennot --- Documentation/criu.txt | 5 + criu/Makefile.crtools | 1 + criu/config.c | 1 + criu/cr-dump.c | 2 + criu/cr-restore.c | 4 + criu/cr-service.c | 9 +- criu/crtools.c | 34 ++++- criu/files-reg.c | 10 +- criu/image.c | 32 +++-- criu/img-streamer.c | 232 +++++++++++++++++++++++++++++++++++ criu/include/cr_options.h | 1 + criu/include/image.h | 2 +- criu/include/img-streamer.h | 8 ++ criu/include/protobuf-desc.h | 4 +- criu/include/servicefd.h | 1 + criu/mem.c | 6 +- criu/page-xfer.c | 8 ++ criu/pagemap.c | 63 +++++++++- criu/protobuf-desc.c | 1 + criu/util.c | 14 ++- images/Makefile | 1 + images/img-streamer.proto | 16 +++ 22 files changed, 429 insertions(+), 26 deletions(-) create mode 100644 criu/img-streamer.c create mode 100644 criu/include/img-streamer.h create mode 100644 images/img-streamer.proto diff --git a/Documentation/criu.txt b/Documentation/criu.txt index ab63e461c..4e9b4132e 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -76,6 +76,11 @@ The following levels are available: *-D*, *--images-dir* 'path':: Use 'path' as a base directory where to look for sets of image files. +*--stream*:: + dump/restore images using criu-image-streamer. + See https://github.com/checkpoint-restore/criu-image-streamer for detailed + usage. + *--prev-images-dir* 'path':: Use 'path' as a parent directory where to look for sets of image files. This option makes sense in case of incremental dumps. diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 5c25b8928..dc92c2ea2 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -29,6 +29,7 @@ obj-y += files-reg.o obj-y += fsnotify.o obj-y += image-desc.o obj-y += image.o +obj-y += img-streamer.o obj-y += ipc_ns.o obj-y += irmap.o obj-y += kcmp-ids.o diff --git a/criu/config.c b/criu/config.c index b84b7da28..e78b534a9 100644 --- a/criu/config.c +++ b/criu/config.c @@ -510,6 +510,7 @@ int parse_options(int argc, char **argv, bool *usage_error, BOOL_OPT(SK_CLOSE_PARAM, &opts.tcp_close), { "verbosity", optional_argument, 0, 'v' }, { "ps-socket", required_argument, 0, 1091}, + BOOL_OPT("stream", &opts.stream), { "config", required_argument, 0, 1089}, { "no-default-config", no_argument, 0, 1090}, { "tls-cacert", required_argument, 0, 1092}, diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 745998afc..2b4c9ae82 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -82,6 +82,7 @@ #include "eventpoll.h" #include "memfd.h" #include "timens.h" +#include "img-streamer.h" /* * Architectures can overwrite this function to restore register sets that @@ -1759,6 +1760,7 @@ static int cr_dump_finish(int ret) free_userns_maps(); close_service_fd(CR_PROC_FD_OFF); + close_image_dir(); if (ret) { pr_err("Dumping FAILED.\n"); diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ed4b95b91..f572f79a0 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -29,6 +29,7 @@ #include "cr_options.h" #include "servicefd.h" #include "image.h" +#include "img-streamer.h" #include "util.h" #include "util-pie.h" #include "criu-log.h" @@ -2355,6 +2356,9 @@ skip_ns_bouncing: pr_info("Restore finished successfully. Tasks resumed.\n"); write_stats(RESTORE_STATS); + /* This has the effect of dismissing the image streamer */ + close_image_dir(); + ret = run_scripts(ACT_POST_RESUME); if (ret != 0) pr_err("Post-resume script ret code %d\n", ret); diff --git a/criu/cr-service.c b/criu/cr-service.c index 279016bcd..53eadb1bc 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -343,7 +343,14 @@ static int setup_opts_from_req(int sk, CriuOpts *req) if (req->parent_img) SET_CHAR_OPTS(img_parent, req->parent_img); - if (open_image_dir(images_dir_path) < 0) { + /* + * Image streaming is not supported with CRIU's service feature as + * the streamer must be started for each dump/restore operation. + * It is unclear how to do that with RPC, so we punt for now. + * This explains why we provide the argument mode=-1 instead of + * O_RSTR or O_DUMP. + */ + if (open_image_dir(images_dir_path, -1) < 0) { pr_perror("Can't open images directory"); goto err; } diff --git a/criu/crtools.c b/criu/crtools.c index 7f72dde27..ad61fa9bb 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -54,6 +54,20 @@ void flush_early_log_to_stderr(void) flush_early_log_buffer(STDERR_FILENO); } +static int image_dir_mode(char *argv[], int optind) +{ + if (!strcmp(argv[optind], "dump") || + !strcmp(argv[optind], "pre-dump") || + (!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "dump"))) + return O_DUMP; + + if (!strcmp(argv[optind], "restore") || + (!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "restore"))) + return O_RSTR; + + return -1; +} + int main(int argc, char *argv[], char *envp[]) { int ret = -1; @@ -148,13 +162,30 @@ int main(int argc, char *argv[], char *envp[]) } } + if (opts.stream && image_dir_mode(argv, optind) == -1) { + pr_err("--stream cannot be used with the %s command\n", argv[optind]); + goto usage; + } + /* We must not open imgs dir, if service is called */ if (strcmp(argv[optind], "service")) { - ret = open_image_dir(opts.imgs_dir); + ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind)); if (ret < 0) return 1; } + /* + * The kernel might send us lethal signals when writing to a pipe + * which reader has disappeared. We deal with write() failures on our + * own, and prefer not to get killed. So we ignore SIGPIPEs. + * + * Pipes are used in various places: + * 1) Receiving application page data + * 2) Transmitting data to the image streamer + * 3) Emitting logs (potentially to a pipe). + */ + signal(SIGPIPE, SIG_IGN); + /* * When a process group becomes an orphan, * its processes are sent a SIGHUP signal @@ -322,6 +353,7 @@ usage: " this requires running a second instance of criu\n" " in lazy-pages mode: 'criu lazy-pages -D DIR'\n" " --lazy-pages and lazy-pages mode require userfaultfd\n" +" --stream dump/restore images using criu-image-streamer\n" "\n" "* External resources support:\n" " --external RES dump objects from this list as external resources:\n" diff --git a/criu/files-reg.c b/criu/files-reg.c index b53e9b080..7e84addf2 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -154,7 +154,6 @@ static int trim_last_parent(char *path) static int copy_chunk_from_file(int fd, int img, off_t off, size_t len) { - char *buf = NULL; int ret; while (len > 0) { @@ -167,7 +166,6 @@ static int copy_chunk_from_file(int fd, int img, off_t off, size_t len) len -= ret; } - xfree(buf); return 0; } @@ -213,7 +211,6 @@ static int copy_file_to_chunks(int fd, struct cr_img *img, size_t file_size) static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) { - char *buf = NULL; int ret; while (len > 0) { @@ -221,7 +218,11 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) pr_perror("Can't seek file"); return -1; } - ret = sendfile(fd, img, NULL, len); + + if (opts.stream) + ret = splice(img, NULL, fd, NULL, len, SPLICE_F_MOVE); + else + ret = sendfile(fd, img, NULL, len); if (ret < 0) { pr_perror("Can't send data"); return -1; @@ -231,7 +232,6 @@ static int copy_chunk_to_file(int img, int fd, off_t off, size_t len) len -= ret; } - xfree(buf); return 0; } diff --git a/criu/image.c b/criu/image.c index 0225788b0..2bbb4dd02 100644 --- a/criu/image.c +++ b/criu/image.c @@ -17,6 +17,7 @@ #include "images/inventory.pb-c.h" #include "images/pagemap.pb-c.h" #include "proc_parse.h" +#include "img-streamer.h" #include "namespaces.h" bool ns_per_id = false; @@ -415,13 +416,16 @@ static int do_open_image(struct cr_img *img, int dfd, int type, unsigned long of flags = oflags & ~(O_NOBUF | O_SERVICE | O_FORCE_LOCAL); - /* - * For pages images dedup we need to open images read-write on - * restore, that may require proper capabilities, so we ask - * usernsd to do it for us - */ - if (root_ns_mask & CLONE_NEWUSER && - type == CR_FD_PAGES && oflags & O_RDWR) { + if (opts.stream && !(oflags & O_FORCE_LOCAL)) { + ret = img_streamer_open(path, flags); + errno = EIO; /* errno value is meaningless, only the ret value is meaningful */ + } else if (root_ns_mask & CLONE_NEWUSER && + type == CR_FD_PAGES && oflags & O_RDWR) { + /* + * For pages images dedup we need to open images read-write on + * restore, that may require proper capabilities, so we ask + * usernsd to do it for us + */ struct openat_args pa = { .flags = flags, .err = 0, @@ -520,7 +524,12 @@ struct cr_img *img_from_fd(int fd) return img; } -int open_image_dir(char *dir) +/* + * `mode` should be O_RSTR or O_DUMP depending on the intent. + * This is used when opts.stream is enabled for picking the right streamer + * socket name. `mode` is ignored when opts.stream is not enabled. + */ +int open_image_dir(char *dir, int mode) { int fd, ret; @@ -535,7 +544,10 @@ int open_image_dir(char *dir) return -1; fd = ret; - if (opts.img_parent) { + if (opts.stream) { + if (img_streamer_init(dir, mode) < 0) + goto err; + } else if (opts.img_parent) { ret = symlinkat(opts.img_parent, fd, CR_PARENT_LINK); if (ret < 0 && errno != EEXIST) { pr_perror("Can't link parent snapshot"); @@ -556,6 +568,8 @@ err: void close_image_dir(void) { + if (opts.stream) + img_streamer_finish(); close_service_fd(IMG_FD_OFF); } diff --git a/criu/img-streamer.c b/criu/img-streamer.c new file mode 100644 index 000000000..e31b17dd9 --- /dev/null +++ b/criu/img-streamer.c @@ -0,0 +1,232 @@ +#include +#include +#include +#include + +#include "cr_options.h" +#include "img-streamer.h" +#include "image.h" +#include "images/img-streamer.pb-c.h" +#include "protobuf.h" +#include "servicefd.h" +#include "rst-malloc.h" +#include "common/scm.h" +#include "common/lock.h" + +/* + * We use different path names for the dump and restore sockets because: + * 1) The user may want to perform both at the same time (akin to live + * migration). Specifying the same images-dir is convenient. + * 2) It fails quickly when the user mix-up the streamer and CRIU operations. + * (e.g., streamer is in capture more, while CRIU is in restore mode). + */ +#define IMG_STREAMER_CAPTURE_SOCKET_NAME "streamer-capture.sock" +#define IMG_STREAMER_SERVE_SOCKET_NAME "streamer-serve.sock" + +/* All requests go through the same socket connection. We must synchronize */ +static mutex_t *img_streamer_fd_lock; + +/* Either O_DUMP or O_RSTR */ +static int img_streamer_mode; + +static const char *socket_name_for_mode(int mode) +{ + switch (mode) { + case O_DUMP: return IMG_STREAMER_CAPTURE_SOCKET_NAME; + case O_RSTR: return IMG_STREAMER_SERVE_SOCKET_NAME; + default: BUG(); return NULL; + } +} + +/* + * img_streamer_init() connects to the image streamer socket. + * mode should be either O_DUMP or O_RSTR. + */ +int img_streamer_init(const char *image_dir, int mode) +{ + struct sockaddr_un addr; + int sockfd; + + img_streamer_mode = mode; + + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) { + pr_perror("Unable to instantiate UNIX socket"); + return -1; + } + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/%s", + image_dir, socket_name_for_mode(mode)); + + if (connect(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + pr_perror("Unable to connect to image streamer socket: %s", addr.sun_path); + goto err; + } + + img_streamer_fd_lock = shmalloc(sizeof(*img_streamer_fd_lock)); + if (!img_streamer_fd_lock) { + pr_err("Failed to allocate memory\n"); + goto err; + } + mutex_init(img_streamer_fd_lock); + + if (install_service_fd(IMG_STREAMER_FD_OFF, sockfd) < 0) + goto err; + + return 0; + +err: + close(sockfd); + return -1; +} + +/* + * img_streamer_finish() indicates that no more files will be opened. + * In other words, img_streamer_open() will no longer be called. + */ +void img_streamer_finish(void) +{ + if (get_service_fd(IMG_STREAMER_FD_OFF) >= 0) { + pr_info("Dismissing the image streamer\n"); + close_service_fd(IMG_STREAMER_FD_OFF); + } +} + +/* + * The regular protobuf APIs pb_write_one() and pb_read_one() operate over a + * `struct cr_img` object. Sadly, we don't have such object. We just have a + * file descriptor. The following pb_write_one_fd() and pb_read_one_fd() + * provide a protobuf API over a file descriptor. The implementation is a bit + * of a hack, but should be fine. At some point we can revisit to have a + * proper protobuf API over fds. + */ +static int pb_write_one_fd(int fd, void *obj, int type) +{ + int ret; + struct cr_img img; + memset(&img, 0, sizeof(img)); + + img._x.fd = fd; + ret = pb_write_one(&img, obj, type); + if (ret < 0) + pr_perror("Failed to communicate with the image streamer"); + return ret; +} + +static int pb_read_one_fd(int fd, void **pobj, int type) +{ + int ret; + struct cr_img img; + memset(&img, 0, sizeof(img)); + + img._x.fd = fd; + ret = pb_read_one(&img, pobj, type); + if (ret < 0) + pr_perror("Failed to communicate with the image streamer"); + return ret; +} + +static int send_file_request(char *filename) +{ + ImgStreamerRequestEntry req = IMG_STREAMER_REQUEST_ENTRY__INIT; + req.filename = filename; + return pb_write_one_fd(get_service_fd(IMG_STREAMER_FD_OFF), + &req, PB_IMG_STREAMER_REQUEST); +} + +static int recv_file_reply(bool *exists) +{ + ImgStreamerReplyEntry *reply; + int ret = pb_read_one_fd(get_service_fd(IMG_STREAMER_FD_OFF), + (void **)&reply, PB_IMG_STREAMER_REPLY); + if (ret < 0) + return ret; + + *exists = reply->exists; + free(reply); + + return 0; +} + +/* + * Using a pipe for image file transfers allows the data to be spliced by the + * image streamer, greatly improving performance. + * Transfer rates of up to 15GB/s can be seen with this technique. + */ +#define READ_PIPE 0 /* index of the read pipe returned by pipe() */ +#define WRITE_PIPE 1 +static int establish_streamer_file_pipe(void) +{ + /* + * If the other end of the pipe closes, the kernel will want to kill + * us with a SIGPIPE. These signal must be ignored, which we do in + * crtools.c:main() with signal(SIGPIPE, SIG_IGN). + */ + int ret = -1; + int criu_pipe_direction = img_streamer_mode == O_DUMP ? WRITE_PIPE : READ_PIPE; + int streamer_pipe_direction = 1 - criu_pipe_direction; + int fds[2]; + + if (pipe(fds) < 0) { + pr_perror("Unable to create pipe"); + return -1; + } + + if (send_fd(get_service_fd(IMG_STREAMER_FD_OFF), + NULL, 0, fds[streamer_pipe_direction]) < 0) + close(fds[criu_pipe_direction]); + else + ret = fds[criu_pipe_direction]; + + close(fds[streamer_pipe_direction]); + + return ret; +} + +static int _img_streamer_open(char *filename) +{ + if (send_file_request(filename) < 0) + return -1; + + if (img_streamer_mode == O_RSTR) { + /* The streamer replies whether the file exists */ + bool exists; + if (recv_file_reply(&exists) < 0) + return -1; + + if (!exists) + return -ENOENT; + } + + /* + * When the image streamer encounters a fatal error, it won't report + * errors via protobufs. Instead, CRIU will get a broken pipe error + * when trying to access a streaming pipe. This behavior is similar to + * what would happen if we were connecting criu and * criu-image-streamer + * via a shell pipe. + */ + + return establish_streamer_file_pipe(); +} + +/* + * Opens an image file via a UNIX pipe with the image streamer. + * + * Return: + * A file descriptor on success + * -ENOENT when the file was not found. + * -1 on any other error. + */ +int img_streamer_open(char *filename, int flags) +{ + int ret; + + BUG_ON(flags != img_streamer_mode); + + mutex_lock(img_streamer_fd_lock); + ret = _img_streamer_open(filename); + mutex_unlock(img_streamer_fd_lock); + return ret; +} diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index ba405182e..d5655212d 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -143,6 +143,7 @@ struct cr_options { int weak_sysctls; int status_fd; bool orphan_pts_master; + int stream; pid_t tree_id; int log_level; char *imgs_dir; diff --git a/criu/include/image.h b/criu/include/image.h index 1c7cc5471..62c8d7ba0 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -145,7 +145,7 @@ static inline int img_raw_fd(struct cr_img *img) extern off_t img_raw_size(struct cr_img *img); -extern int open_image_dir(char *dir); +extern int open_image_dir(char *dir, int mode); extern void close_image_dir(void); extern struct cr_img *open_image_at(int dfd, int type, unsigned long flags, ...); diff --git a/criu/include/img-streamer.h b/criu/include/img-streamer.h new file mode 100644 index 000000000..0c380c915 --- /dev/null +++ b/criu/include/img-streamer.h @@ -0,0 +1,8 @@ +#ifndef IMAGE_STREAMER_H +#define IMAGE_STREAMER_H + +extern int img_streamer_init(const char *image_dir, int mode); +extern void img_streamer_finish(void); +extern int img_streamer_open(char *filename, int flags); + +#endif /* IMAGE_STREAMER_H */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index ee4135d65..43d961731 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -62,8 +62,10 @@ enum { PB_GHOST_CHUNK, PB_FILE, PB_MEMFD_FILE, - PB_MEMFD_INODE, /* 60 */ + PB_MEMFD_INODE, PB_TIMENS, + PB_IMG_STREAMER_REQUEST, + PB_IMG_STREAMER_REPLY, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h index 986c46af5..c11f89d37 100644 --- a/criu/include/servicefd.h +++ b/criu/include/servicefd.h @@ -14,6 +14,7 @@ enum sfd_type { LOG_FD_OFF, IMG_FD_OFF, + IMG_STREAMER_FD_OFF, PROC_FD_OFF, /* fd with /proc for all proc_ calls */ PROC_PID_FD_OFF, CR_PROC_FD_OFF, /* some other's proc fd: diff --git a/criu/mem.c b/criu/mem.c index 15aa0cbdb..167838b98 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -1406,9 +1406,9 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) /* * We optimize the case when rsti(t)->vma_io is empty. * - * This is useful for for remote images, where all VMAs are premapped - * (pr->pieok is false). This avoids re-opening the CR_FD_PAGES file, - * which could be no longer be available. + * This is useful when using the image streamer, where all VMAs are + * premapped (pr->pieok is false). This avoids re-opening the + * CR_FD_PAGES file, which may only be readable only once. */ if (list_empty(&rsti(t)->vma_io)) { ta->vma_ios = NULL; diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 9affc2706..db8e5bec2 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -382,6 +382,10 @@ static int open_page_local_xfer(struct page_xfer *xfer, int fd_type, unsigned lo int pfd; int pr_flags = (fd_type == CR_FD_PAGEMAP) ? PR_TASK : PR_SHMEM; + /* Image streaming lacks support for incremental images */ + if (opts.stream) + goto out; + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); if (pfd < 0 && errno == ENOENT) goto out; @@ -928,6 +932,10 @@ int check_parent_local_xfer(int fd_type, unsigned long img_id) struct stat st; int ret, pfd; + /* Image streaming lacks support for incremental images */ + if (opts.stream) + return 0; + pfd = openat(get_service_fd(IMG_FD_OFF), CR_PARENT_LINK, O_RDONLY); if (pfd < 0 && errno == ENOENT) return 0; diff --git a/criu/pagemap.c b/criu/pagemap.c index 05f6b82b8..f1e1be91f 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -406,6 +406,49 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, return ret; } +/* + * We cannot use maybe_read_page_local() for streaming images as it uses + * pread(), seeking in the file. Instead, we use this custom page reader. + */ +static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vaddr, + int nr, void *buf, unsigned flags) +{ + unsigned long len = nr * PAGE_SIZE; + int fd = img_raw_fd(pr->pi); + int ret; + size_t curr = 0; + + pr_debug("\tpr%lu-%u Read page from self %lx/%"PRIx64"\n", + pr->img_id, pr->id, pr->cvaddr, pr->pi_off); + + /* We can't seek. The requested address better match */ + BUG_ON(pr->cvaddr != vaddr); + + while (1) { + ret = read(fd, buf + curr, len - curr); + if (ret == 0) { + pr_err("Reached EOF unexpectedly while reading page from image\n"); + return -1; + } else if (ret < 0) { + pr_perror("Can't read mapping page %d", ret); + return -1; + } + curr += ret; + if (curr == len) + break; + } + + if (opts.auto_dedup) + pr_warn_once("Can't dedup when streaming images\n"); + + if (ret == 0 && pr->io_complete) + ret = pr->io_complete(pr, vaddr, nr); + + pr->pi_off += len; + + return ret; +} + static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_pages, void *priv) { int ret = 0; @@ -601,6 +644,10 @@ static int try_open_parent(int dfd, unsigned long id, struct page_read *pr, int int pfd, ret; struct page_read *parent = NULL; + /* Image streaming lacks support for incremental images */ + if (opts.stream) + goto out; + pfd = openat(dfd, CR_PARENT_LINK, O_RDONLY); if (pfd < 0 && errno == ENOENT) goto out; @@ -657,7 +704,19 @@ static int init_pagemaps(struct page_read *pr) off_t fsize; int nr_pmes, nr_realloc; - fsize = img_raw_size(pr->pmi); + if (opts.stream) { + /* + * TODO - There is no easy way to estimate the size of the + * pagemap that is still to be read from the pipe. Possible + * solution is to ask the image streamer for the size of the + * image. 1024 is a wild guess (more space is allocated if + * needed). + */ + fsize = 1024; + } else { + fsize = img_raw_size(pr->pmi); + } + if (fsize < 0) return -1; @@ -781,6 +840,8 @@ int open_page_read_at(int dfd, unsigned long img_id, struct page_read *pr, int p if (remote) pr->maybe_read_page = maybe_read_page_remote; + else if (opts.stream) + pr->maybe_read_page = maybe_read_page_img_streamer; else { pr->maybe_read_page = maybe_read_page_local; if (!pr->parent && !opts.lazy_pages) diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 2ee81e5db..13655264a 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -63,6 +63,7 @@ #include "images/seccomp.pb-c.h" #include "images/binfmt-misc.pb-c.h" #include "images/autofs.pb-c.h" +#include "images/img-streamer.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/criu/util.c b/criu/util.c index 06c594ca9..b30dbc86c 100644 --- a/criu/util.c +++ b/criu/util.c @@ -423,13 +423,19 @@ int copy_file(int fd_in, int fd_out, size_t bytes) { ssize_t written = 0; size_t chunk = bytes ? bytes : 4096; + ssize_t ret; while (1) { - ssize_t ret; - - ret = sendfile(fd_out, fd_in, NULL, chunk); + /* + * When fd_out is a pipe, sendfile() returns -EINVAL, so we + * fallback to splice(). Not sure why. + */ + if (opts.stream) + ret = splice(fd_in, NULL, fd_out, NULL, chunk, SPLICE_F_MOVE); + else + ret = sendfile(fd_out, fd_in, NULL, chunk); if (ret < 0) { - pr_perror("Can't send data to ghost file"); + pr_perror("Can't transfer data to ghost file from image"); return -1; } diff --git a/images/Makefile b/images/Makefile index 5ddd37664..bc67278e6 100644 --- a/images/Makefile +++ b/images/Makefile @@ -65,6 +65,7 @@ proto-obj-y += macvlan.o proto-obj-y += sit.o proto-obj-y += memfd.o proto-obj-y += timens.o +proto-obj-y += img-streamer.o CFLAGS += -iquote $(obj)/ diff --git a/images/img-streamer.proto b/images/img-streamer.proto new file mode 100644 index 000000000..d1bd4cc19 --- /dev/null +++ b/images/img-streamer.proto @@ -0,0 +1,16 @@ +syntax = "proto2"; + +// This message is sent from CRIU to the streamer. +// * During dump, it communicates the name of the file that is about to be sent +// to the streamer. +// * During restore, CRIU requests image files from the streamer. The message is +// used to communicate the name of the desired file. +message img_streamer_request_entry { + required string filename = 1; +} + +// This message is sent from the streamer to CRIU. It is only used during +// restore to report whether the requested file exists. +message img_streamer_reply_entry { + required bool exists = 1; +} From 27ab533cbec281f60726c2ec3a26f74ca661e814 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Wed, 10 Jun 2020 20:25:36 +0000 Subject: [PATCH 0406/2030] tests: run tests with criu-image-streamer with --stream One can pass --stream to zdtm.py for testing criu with image streaming. criu-image-streamer should be installed in ../criu-image-streamer relative to the criu project directory. But any path will do providing that criu-image-streamer can be found in the PATH env. Added a few tests to run on travis-ci to make sure streaming works. We run test that are likely to fail. However, it would be good to once in a while run all tests with `--stream -a`. Signed-off-by: Nicolas Viennot --- .travis.yml | 1 + scripts/install-criu-image-streamer.sh | 14 ++++ scripts/travis/travis-tests | 15 ++++ test/zdtm.py | 106 ++++++++++++++++++++++++- 4 files changed, 133 insertions(+), 3 deletions(-) create mode 100755 scripts/install-criu-image-streamer.sh diff --git a/.travis.yml b/.travis.yml index 9928f16c2..69a505193 100644 --- a/.travis.yml +++ b/.travis.yml @@ -94,6 +94,7 @@ jobs: arch: amd64 env: TR_ARCH=ppc64-cross dist: bionic + - env: TR_ARCH=local STREAM_TEST=1 allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/install-criu-image-streamer.sh b/scripts/install-criu-image-streamer.sh new file mode 100755 index 000000000..e4f368602 --- /dev/null +++ b/scripts/install-criu-image-streamer.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -eux + +# Install Rust toolchain +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + +# Clone criu-image-streamer in a sibling directory of the criu project directory +cd $(dirname "$0")/../../ +# TODO change dev branch to master once PR is merged +git clone --depth=1 https://github.com/checkpoint-restore/criu-image-streamer.git -b dev + +# Compile +cd criu-image-streamer +make BUILD=debug # debug build compiles faster than release mode (2x faster) diff --git a/scripts/travis/travis-tests b/scripts/travis/travis-tests index 4cb842c97..311236d7d 100755 --- a/scripts/travis/travis-tests +++ b/scripts/travis/travis-tests @@ -61,6 +61,13 @@ travis_prep () { chmod a+x $HOME } +test_stream() { + # We must test CRIU features that dump content into an image file to ensure + # streaming compatibility. + STREAM_TEST_PATTERN='.*(ghost|fifo|unlink|memfd|shmem|socket_queue).*' + ./test/zdtm.py run --stream -p 2 --keep-going -T $STREAM_TEST_PATTERN $ZDTM_OPTS +} + travis_prep export GCOV @@ -132,6 +139,14 @@ chmod 0777 test/ chmod 0777 test/zdtm/static chmod 0777 test/zdtm/transition +# We run streaming tests separately to improve test completion times, +# hence the exit 0. +if [ "${STREAM_TEST}" = "1" ]; then + ./scripts/install-criu-image-streamer.sh + test_stream + exit 0 +fi + ./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS KERN_MAJ=`uname -r | cut -d. -f1` diff --git a/test/zdtm.py b/test/zdtm.py index b111fa383..49387e43e 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -30,6 +30,9 @@ import yaml os.chdir(os.path.dirname(os.path.abspath(__file__))) +# File to store content of streamed images +STREAMED_IMG_FILE_NAME = "img.criu" + prev_line = None @@ -1024,6 +1027,7 @@ class criu: self.__mdedup = bool(opts['noauto_dedup']) self.__user = bool(opts['user']) self.__leave_stopped = bool(opts['stop']) + self.__stream = bool(opts['stream']) self.__criu = (opts['rpc'] and criu_rpc or criu_cli) self.__show_stats = bool(opts['show_stats']) self.__lazy_pages_p = None @@ -1208,11 +1212,19 @@ class criu: stats_written = int(stent['shpages_written']) + int( stent['pages_written']) + if self.__stream: + p = self.spawn_criu_image_streamer("extract") + p.wait() + real_written = 0 for f in os.listdir(self.__ddir()): if f.startswith('pages-'): real_written += os.path.getsize(os.path.join(self.__ddir(), f)) + if self.__stream: + # make sure the extracted image is not usable. + os.unlink(os.path.join(self.__ddir(), "inventory.img")) + r_pages = real_written / mmap.PAGESIZE r_off = real_written % mmap.PAGESIZE if (stats_written != r_pages) or (r_off != 0): @@ -1220,6 +1232,57 @@ class criu: (stats_written, r_pages, r_off)) raise test_fail_exc("page counts mismatch") + # action can be "capture", "extract", or "serve" + def spawn_criu_image_streamer(self, action): + print("Run criu-image-streamer in {} mode".format(action)) + + progress_r, progress_w = os.pipe() + # We fcntl() on both file descriptors due to some potential differences + # with python2 and python3. + fcntl.fcntl(progress_r, fcntl.F_SETFD, fcntl.FD_CLOEXEC) + fcntl.fcntl(progress_w, fcntl.F_SETFD, 0) + + # We use cat because the streamer requires to work with pipes. + if action == 'capture': + cmd = ["criu-image-streamer", + "--images-dir '{images_dir}'", + "--progress-fd {progress_fd}", + action, + "| cat > {img_file}"] + else: + cmd = ["cat {img_file} |", + "criu-image-streamer", + "--images-dir '{images_dir}'", + "--progress-fd {progress_fd}", + action] + + # * As we are using a shell pipe command, we want to use pipefail. + # Otherwise, failures stay unnoticed. For this, we use bash as sh + # doesn't support that feature. + # * We use close_fds=False because we want the child to inherit the progress pipe + p = subprocess.Popen(["bash", "-c", "set -o pipefail; " + " ".join(cmd).format( + progress_fd=progress_w, + images_dir=self.__ddir(), + img_file=os.path.join(self.__ddir(), STREAMED_IMG_FILE_NAME) + )], close_fds=False) + + os.close(progress_w) + progress = os.fdopen(progress_r, "r") + + if action == 'serve' or action == 'extract': + # Consume image statistics + progress.readline() + + if action == 'capture' or action == 'serve': + # The streamer socket is ready for consumption once we receive the + # socket-init message. + if progress.readline().strip() != "socket-init": + p.kill() + raise test_fail_exc( + "criu-image-streamer is not starting (exit_code=%d)" % p.wait()) + + return p + def dump(self, action, opts=[]): self.__iter += 1 os.mkdir(self.__ddir()) @@ -1249,6 +1312,10 @@ class criu: a_opts += self.__test.getdopts() + if self.__stream: + streamer_p = self.spawn_criu_image_streamer("capture") + a_opts += ["--stream"] + if self.__dedup: a_opts += ["--auto-dedup"] @@ -1273,6 +1340,11 @@ class criu: self.__dump_process = self.__criu_act(action, opts=a_opts + opts, nowait=nowait) + if self.__stream: + ret = streamer_p.wait() + if ret: + raise test_fail_exc("criu-image-streamer exited with %d" % ret) + if self.__mdedup and self.__iter > 1: self.__criu_act("dedup", opts=[]) @@ -1303,6 +1375,10 @@ class criu: r_opts += ['--empty-ns', 'net'] r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh'] + if self.__stream: + streamer_p = self.spawn_criu_image_streamer("serve") + r_opts += ["--stream"] + if self.__dedup: r_opts += ["--auto-dedup"] @@ -1336,6 +1412,11 @@ class criu: r_opts += ['--leave-stopped'] self.__criu_act("restore", opts=r_opts + ["--restore-detached"]) + if self.__stream: + ret = streamer_p.wait() + if ret: + raise test_fail_exc("criu-image-streamer exited with %d" % ret) + self.show_stats("restore") if self.__leave_stopped: @@ -1344,6 +1425,13 @@ class criu: @staticmethod def check(feature): + if feature == 'stream': + try: + p = subprocess.Popen(["criu-image-streamer", "--version"]) + return p.wait() == 0 + except Exception: + return False + return criu_cli.run( "check", ["--no-default-config", "-v0", "--feature", feature], opts['criu_bin']) == 0 @@ -1852,7 +1940,7 @@ class Launcher: 'stop', 'empty_ns', 'fault', 'keep_img', 'report', 'snaps', 'sat', 'script', 'rpc', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', - 'remote_lazy_pages', 'show_stats', 'lazy_migrate', + 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'stream', 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) @@ -2138,6 +2226,15 @@ def run_tests(opts): "[WARNING] Non-cooperative UFFD is missing, some tests might spuriously fail" ) + if opts['stream']: + streamer_dir = os.path.realpath(opts['criu_image_streamer_dir']) + os.environ['PATH'] = "{}:{}".format(streamer_dir, os.environ['PATH']) + if not criu.check('stream'): + raise RuntimeError(( + "Streaming tests need the criu-image-streamer binary to be accessible in the {} directory. " + + "Specify --criu-image-streamer-dir or modify PATH to provide an alternate location") + .format(streamer_dir)) + launcher = Launcher(opts, len(torun)) try: for t in torun: @@ -2460,8 +2557,8 @@ rp.add_argument("--rpc", rp.add_argument("--page-server", help="Use page server dump", action='store_true') -rp.add_argument("--remote", - help="Use remote option for diskless C/R", +rp.add_argument("--stream", + help="Use criu-image-streamer", action='store_true') rp.add_argument("-p", "--parallel", help="Run test in parallel") rp.add_argument("--dry-run", @@ -2500,6 +2597,9 @@ rp.add_argument("--criu-bin", rp.add_argument("--crit-bin", help="Path to crit binary", default='../crit/crit') +rp.add_argument("--criu-image-streamer-dir", + help="Directory where the criu-image-streamer binary is located", + default="../../criu-image-streamer") rp.add_argument("--pre-dump-mode", help="Use splice or read mode of pre-dumping", choices=['splice', 'read'], From 8c538ca10dc4385baf911b9b1747ff2287aff5fb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 6 Jul 2017 12:38:48 +0300 Subject: [PATCH 0407/2030] page-read: Warn about async read w/o completion cb Acked-by: Mike Rapoport Signed-off-by: Pavel Emelyanov Signed-off-by: Andrei Vagin --- criu/pagemap.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/criu/pagemap.c b/criu/pagemap.c index f1e1be91f..58f2850ff 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -462,6 +462,8 @@ static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_ if (pr->io_complete) ret = pr->io_complete(pr, vaddr, nr_pages); + else + pr_warn_once("Remote page read w/o io_complete!\n"); return ret; } From eb732bcf0d7621e3b7a74797e8b4f04b40f2b183 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 24 Mar 2019 14:26:10 +0000 Subject: [PATCH 0408/2030] util: Remove deprecated print_data() routine The print_data() function was part of the deprecated (and removed) 'show' action, and it was moved in util.c with the following commit: a501b4804b3c95e1d83d64dd10ed95c37f0378bb The 'show' action has been deprecated since 1.6, let's finally drop it. The print_data() routine is kept for yet another (to be deprecated too) feature called 'criu exec'. The criu exec feature was removed with: 909590a3558560655c1ce5b72215efbb325999ca Remove criu exec code It's now obsoleted by compel library. Maybe-TODO: Add compel tool exec action? Therefore, now we can drop print_data() as well. Signed-off-by: Radostin Stoyanov --- criu/include/util.h | 2 -- criu/util.c | 75 +-------------------------------------------- 2 files changed, 1 insertion(+), 76 deletions(-) diff --git a/criu/include/util.h b/criu/include/util.h index 45bebf673..d67f6d39d 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -290,8 +290,6 @@ char *xstrcat(char *str, const char *fmt, ...) char *xsprintf(const char *fmt, ...) __attribute__ ((__format__ (__printf__, 1, 2))); -void print_data(unsigned long addr, unsigned char *data, size_t size); - int setup_tcp_server(char *type, char *addr, unsigned short *port); int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk); int setup_tcp_client(char *hostname); diff --git a/criu/util.c b/criu/util.c index b30dbc86c..517f0fc25 100644 --- a/criu/util.c +++ b/criu/util.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "linux/mount.h" @@ -994,85 +993,13 @@ void tcp_nodelay(int sk, bool on) pr_perror("Unable to restore TCP_NODELAY (%d)", val); } -static inline void pr_xsym(unsigned char *data, size_t len, int pos) -{ - char sym; - - if (pos < len) - sym = data[pos]; - else - sym = ' '; - - pr_msg("%c", isprint(sym) ? sym : '.'); -} - -static inline void pr_xdigi(unsigned char *data, size_t len, int pos) -{ - if (pos < len) - pr_msg("%02x ", data[pos]); - else - pr_msg(" "); -} - -static int nice_width_for(unsigned long addr) -{ - int ret = 3; - - while (addr) { - addr >>= 4; - ret++; - } - - return ret; -} - -void print_data(unsigned long addr, unsigned char *data, size_t size) -{ - int i, j, addr_len; - unsigned zero_line = 0; - - addr_len = nice_width_for(addr + size); - - for (i = 0; i < size; i += 16) { - if (*(u64 *)(data + i) == 0 && *(u64 *)(data + i + 8) == 0) { - if (zero_line == 0) - zero_line = 1; - else { - if (zero_line == 1) { - pr_msg("*\n"); - zero_line = 2; - } - - continue; - } - } else - zero_line = 0; - - pr_msg("%#0*lx: ", addr_len, addr + i); - for (j = 0; j < 8; j++) - pr_xdigi(data, size, i + j); - pr_msg(" "); - for (j = 8; j < 16; j++) - pr_xdigi(data, size, i + j); - - pr_msg(" |"); - for (j = 0; j < 8; j++) - pr_xsym(data, size, i + j); - pr_msg(" "); - for (j = 8; j < 16; j++) - pr_xsym(data, size, i + j); - - pr_msg("|\n"); - } -} - static int get_sockaddr_in(struct sockaddr_storage *addr, char *host, unsigned short port) { memset(addr, 0, sizeof(*addr)); if (!host) { - ((struct sockaddr_in *)addr)->sin_addr.s_addr = INADDR_ANY; + ((struct sockaddr_in *)addr)->sin_addr.s_addr = INADDR_ANY; addr->ss_family = AF_INET; } else if (inet_pton(AF_INET, host, &((struct sockaddr_in *)addr)->sin_addr)) { addr->ss_family = AF_INET; From 8be1d457d71c93ea0a592bdc2dc8a982cd3e6fa9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 15 May 2019 09:47:17 +0000 Subject: [PATCH 0409/2030] net: fix coverity RESOURCE_LEAK criu-3.12/criu/net.c:2043: overwrite_var: Overwriting "img" in "img = open_image_at(-1, CR_FD_IP6TABLES, 0UL, pid)" leaks the storage that "img" points to. Signed-off-by: Adrian Reber --- criu/net.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/criu/net.c b/criu/net.c index 27e7c7e33..86fba2ddc 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2090,13 +2090,13 @@ static int restore_ip_dump(int type, int pid, char *cmd) if (written < n) { pr_perror("Failed to write to tmpfile " "[written: %d; total: %d]", written, n); - return -1; + goto close; } } if (fseek(tmp_file, 0, SEEK_SET)) { pr_perror("Failed to set file position to beginning of tmpfile"); - return -1; + goto close; } if (img) { @@ -2104,6 +2104,7 @@ static int restore_ip_dump(int type, int pid, char *cmd) close_image(img); } +close: if(fclose(tmp_file)) { pr_perror("Failed to close tmpfile"); } @@ -2208,6 +2209,7 @@ static inline int restore_iptables(int pid) return -1; if (empty_image(img)) { ret = 0; + close_image(img); goto ipt6; } From ba0d6dbac1bd237319817ac0f34653a78686be1f Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:13:22 +0800 Subject: [PATCH 0410/2030] mips:compel/arch/mips: Add architecture support to compel tool and libraries This patch only adds the support but does not enable it for building. Signed-off-by: Guoyun Sun --- .../arch/mips/plugins/include/asm/prologue.h | 36 ++ .../mips/plugins/include/asm/syscall-types.h | 36 ++ compel/arch/mips/plugins/include/features.h | 6 + compel/arch/mips/plugins/std/memcpy.S | 23 ++ compel/arch/mips/plugins/std/parasite-head.S | 20 ++ .../plugins/std/syscalls/Makefile.syscalls | 117 +++++++ .../std/syscalls/syscall-common-mips-64.S | 12 + .../mips/plugins/std/syscalls/syscall_64.tbl | 115 +++++++ .../mips/scripts/compel-pack-compat.lds.S | 4 + compel/arch/mips/scripts/compel-pack.lds.S | 37 +++ compel/arch/mips/src/lib/cpu.c | 36 ++ compel/arch/mips/src/lib/handle-elf-host.c | 1 + compel/arch/mips/src/lib/handle-elf.c | 23 ++ compel/arch/mips/src/lib/include/handle-elf.h | 8 + compel/arch/mips/src/lib/include/ldsodefs.h | 147 +++++++++ compel/arch/mips/src/lib/include/syscall.h | 7 + .../mips/src/lib/include/uapi/asm/.gitignore | 0 .../src/lib/include/uapi/asm/breakpoints.h | 6 + .../arch/mips/src/lib/include/uapi/asm/cpu.h | 5 + .../arch/mips/src/lib/include/uapi/asm/fpu.h | 4 + .../src/lib/include/uapi/asm/infect-types.h | 66 ++++ .../mips/src/lib/include/uapi/asm/sigframe.h | 63 ++++ .../mips/src/lib/include/uapi/asm/siginfo.h | 124 +++++++ compel/arch/mips/src/lib/infect.c | 310 ++++++++++++++++++ include/common/arch/mips/asm/atomic.h | 148 +++++++++ include/common/arch/mips/asm/bitops.h | 41 +++ include/common/arch/mips/asm/bitsperlong.h | 6 + include/common/arch/mips/asm/cmpxchg.h | 67 ++++ include/common/arch/mips/asm/fls64.h | 38 +++ include/common/arch/mips/asm/linkage.h | 58 ++++ include/common/arch/mips/asm/page.h | 39 +++ include/common/arch/mips/asm/utils.h | 24 ++ 32 files changed, 1627 insertions(+) create mode 100755 compel/arch/mips/plugins/include/asm/prologue.h create mode 100755 compel/arch/mips/plugins/include/asm/syscall-types.h create mode 100755 compel/arch/mips/plugins/include/features.h create mode 100755 compel/arch/mips/plugins/std/memcpy.S create mode 100755 compel/arch/mips/plugins/std/parasite-head.S create mode 100755 compel/arch/mips/plugins/std/syscalls/Makefile.syscalls create mode 100755 compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S create mode 100755 compel/arch/mips/plugins/std/syscalls/syscall_64.tbl create mode 100755 compel/arch/mips/scripts/compel-pack-compat.lds.S create mode 100755 compel/arch/mips/scripts/compel-pack.lds.S create mode 100755 compel/arch/mips/src/lib/cpu.c create mode 120000 compel/arch/mips/src/lib/handle-elf-host.c create mode 100755 compel/arch/mips/src/lib/handle-elf.c create mode 100755 compel/arch/mips/src/lib/include/handle-elf.h create mode 100755 compel/arch/mips/src/lib/include/ldsodefs.h create mode 100755 compel/arch/mips/src/lib/include/syscall.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/.gitignore create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/cpu.h create mode 100644 compel/arch/mips/src/lib/include/uapi/asm/fpu.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/infect-types.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/sigframe.h create mode 100755 compel/arch/mips/src/lib/include/uapi/asm/siginfo.h create mode 100755 compel/arch/mips/src/lib/infect.c create mode 100755 include/common/arch/mips/asm/atomic.h create mode 100644 include/common/arch/mips/asm/bitops.h create mode 100755 include/common/arch/mips/asm/bitsperlong.h create mode 100755 include/common/arch/mips/asm/cmpxchg.h create mode 100644 include/common/arch/mips/asm/fls64.h create mode 100644 include/common/arch/mips/asm/linkage.h create mode 100755 include/common/arch/mips/asm/page.h create mode 100644 include/common/arch/mips/asm/utils.h diff --git a/compel/arch/mips/plugins/include/asm/prologue.h b/compel/arch/mips/plugins/include/asm/prologue.h new file mode 100755 index 000000000..9d812eec9 --- /dev/null +++ b/compel/arch/mips/plugins/include/asm/prologue.h @@ -0,0 +1,36 @@ +#ifndef __ASM_PROLOGUE_H__ +#define __ASM_PROLOGUE_H__ + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +#include + +#define sys_recv(sockfd, ubuf, size, flags) \ + sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL) + +typedef struct prologue_init_args { + struct sockaddr_un ctl_sock_addr; + unsigned int ctl_sock_addr_len; + + unsigned int arg_s; + void *arg_p; + + void *sigframe; +} prologue_init_args_t; + +#endif /* __ASSEMBLY__ */ + +/* + * Reserve enough space for sigframe. + * + * FIXME It is rather should be taken from sigframe header. + */ +#define PROLOGUE_SGFRAME_SIZE 4096 + +#define PROLOGUE_INIT_ARGS_SIZE 1024 + +#endif /* __ASM_PROLOGUE_H__ */ diff --git a/compel/arch/mips/plugins/include/asm/syscall-types.h b/compel/arch/mips/plugins/include/asm/syscall-types.h new file mode 100755 index 000000000..64daf2c7a --- /dev/null +++ b/compel/arch/mips/plugins/include/asm/syscall-types.h @@ -0,0 +1,36 @@ +#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__ +#define COMPEL_ARCH_SYSCALL_TYPES_H__ + +/* Types for sigaction, sigprocmask syscalls */ +typedef void rt_signalfn_t(int, siginfo_t *, void *); +typedef rt_signalfn_t *rt_sighandler_t; + +typedef void rt_restorefn_t(void); +typedef rt_restorefn_t *rt_sigrestore_t; + +#define SA_RESTORER 0x04000000 + +/** refer to linux-3.10/arch/mips/include/uapi/asm/signal.h*/ +#define _KNSIG 128 +#define _NSIG_BPW 64 + +#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW) + +/* + * Note: as k_rtsigset_t is the same size for 32-bit and 64-bit, + * sig defined as uint64_t rather than (unsigned long) - for the + * purpose if we ever going to support native 32-bit compilation. + */ + +typedef struct { + uint64_t sig[_KNSIG_WORDS]; +} k_rtsigset_t; + +typedef struct { + rt_sighandler_t rt_sa_handler; + unsigned long rt_sa_flags; + rt_sigrestore_t rt_sa_restorer; + k_rtsigset_t rt_sa_mask; +} rt_sigaction_t; + +#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */ diff --git a/compel/arch/mips/plugins/include/features.h b/compel/arch/mips/plugins/include/features.h new file mode 100755 index 000000000..0f35725fa --- /dev/null +++ b/compel/arch/mips/plugins/include/features.h @@ -0,0 +1,6 @@ +#ifndef __COMPEL_ARCH_FEATURES_H +#define __COMPEL_ARCH_FEATURES_H + +#define ARCH_HAS_MEMCPY + +#endif /* __COMPEL_ARCH_FEATURES_H */ diff --git a/compel/arch/mips/plugins/std/memcpy.S b/compel/arch/mips/plugins/std/memcpy.S new file mode 100755 index 000000000..262267f7f --- /dev/null +++ b/compel/arch/mips/plugins/std/memcpy.S @@ -0,0 +1,23 @@ + +#include "common/asm/linkage.h" + + .section .head.text, "ax" +ENTRY(memcpy) + .set noreorder + dadd v0,zero,a0 + daddiu t1,zero,0 +loop: + beq t1,a2,exit + nop + lb t2,0(a1) + sb t2,0(a0) + daddiu t1,t1,1 + daddiu a0,a0,1 + daddiu a1,a1,1 + j loop + nop +exit: + jr ra + nop +END(memcpy) + diff --git a/compel/arch/mips/plugins/std/parasite-head.S b/compel/arch/mips/plugins/std/parasite-head.S new file mode 100755 index 000000000..38e87f823 --- /dev/null +++ b/compel/arch/mips/plugins/std/parasite-head.S @@ -0,0 +1,20 @@ + +#include "common/asm/linkage.h" + + + .section .head.text, "ax" +ENTRY(__export_parasite_head_start) + .set push + .set noreorder + lw a0, __export_parasite_cmd + dla a1, __export_parasite_args + jal parasite_service + nop + .byte 0x0d, 0x00, 0x00, 0x00 //break + .set pop +// .byte 0x40,0x01,0x00,0x00 //pause + +__export_parasite_cmd: + .long 0 +END(__export_parasite_head_start) + diff --git a/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls b/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls new file mode 100755 index 000000000..ef75f9e95 --- /dev/null +++ b/compel/arch/mips/plugins/std/syscalls/Makefile.syscalls @@ -0,0 +1,117 @@ +std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/syscalls-64.o +sys-proto-types := $(obj)/include/uapi/std/syscall-types.h +sys-proto-generic := $(obj)/include/uapi/std/syscall.h +sys-codes-generic := $(obj)/include/uapi/std/syscall-codes.h +sys-codes = $(obj)/include/uapi/std/syscall-codes-$(1).h +sys-proto = $(obj)/include/uapi/std/syscall-$(1).h +sys-def = $(PLUGIN_ARCH_DIR)/std/syscalls/syscall_$(1).tbl +sys-asm = $(PLUGIN_ARCH_DIR)/std/syscalls-$(1).S +sys-asm-common-name = std/syscalls/syscall-common-mips-$(1).S +sys-asm-common = $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name) +sys-asm-types := $(obj)/include/uapi/std/asm/syscall-types.h +sys-exec-tbl = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl-$(1).c + +sys-bits := 64 + +AV := $$$$ + +define gen-rule-sys-codes +$(sys-codes): $(sys-def) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) echo "#ifndef ASM_SYSCALL_CODES_H_$(1)__" >> $$@ + $(Q) echo "#define ASM_SYSCALL_CODES_H_$(1)__" >> $$@ + $(Q) cat $$< | awk '/^__NR/{SYSN=$(AV)1; \ + sub("^__NR", "SYS", SYSN); \ + print "\n#ifndef ", $(AV)1; \ + print "#define", $(AV)1, $(AV)2; \ + print "#endif"; \ + print "\n#ifndef ", SYSN; \ + print "#define ", SYSN, $(AV)1; \ + print "#endif";}' >> $$@ + $(Q) echo "#endif /* ASM_SYSCALL_CODES_H_$(1)__ */" >> $$@ +endef + +define gen-rule-sys-proto +$(sys-proto): $(sys-def) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) echo "#ifndef ASM_SYSCALL_PROTO_H_$(1)__" >> $$@ + $(Q) echo "#define ASM_SYSCALL_PROTO_H_$(1)__" >> $$@ + $(Q) echo '#include ' >> $$@ + $(Q) echo '#include ' >> $$@ +ifeq ($(1),32) + $(Q) echo '#include "asm/syscall32.h"' >> $$@ +endif + $(Q) cat $$< | awk '/^__NR/{print "extern long", $(AV)3, \ + substr($(AV)0, index($(AV)0,$(AV)4)), ";"}' >> $$@ + $(Q) echo "#endif /* ASM_SYSCALL_PROTO_H_$(1)__ */" >> $$@ +endef + +define gen-rule-sys-asm +$(sys-asm): $(sys-def) $(sys-asm-common) $(sys-codes) $(sys-proto) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) echo '#include ' >> $$@ + $(Q) echo '#include "$(sys-asm-common-name)"' >> $$@ + $(Q) cat $$< | awk '/^__NR/{print "SYSCALL(", $(AV)3, ",", $(AV)2, ")"}' >> $$@ +endef + +define gen-rule-sys-exec-tbl +$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) $(sys-proto-types) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) cat $$< | awk '/^__NR/{print \ + "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@ +endef + +$(sys-codes-generic): $(sys-proto-types) + $(call msg-gen, $@) + $(Q) echo "/* Autogenerated, don't edit */" > $@ + $(Q) echo "#ifndef __ASM_CR_SYSCALL_CODES_H__" >> $@ + $(Q) echo "#define __ASM_CR_SYSCALL_CODES_H__" >> $@ + $(Q) echo '#include ' >> $@ + $(Q) cat $< | awk '/^__NR/{NR32=$$1; \ + sub("^__NR", "__NR32", NR32); \ + print "\n#ifndef ", NR32; \ + print "#define ", NR32, $$2; \ + print "#endif";}' >> $@ + $(Q) echo "#endif /* __ASM_CR_SYSCALL_CODES_H__ */" >> $@ +mrproper-y += $(sys-codes-generic) + +$(sys-proto-generic): $(strip $(call map,sys-proto,$(sys-bits))) $(sys-proto-types) + $(call msg-gen, $@) + $(Q) echo "/* Autogenerated, don't edit */" > $@ + $(Q) echo "#ifndef __ASM_CR_SYSCALL_PROTO_H__" >> $@ + $(Q) echo "#define __ASM_CR_SYSCALL_PROTO_H__" >> $@ + $(Q) echo "" >> $@ + $(Q) echo '#include ' >> $@ + $(Q) echo "" >> $@ + $(Q) echo "#endif /* __ASM_CR_SYSCALL_PROTO_H__ */" >> $@ +mrproper-y += $(sys-proto-generic) + +define gen-rule-sys-exec-tbl +$(sys-exec-tbl): $(sys-def) $(sys-codes) $(sys-proto) $(sys-proto-generic) + $(call msg-gen, $$@) + $(Q) echo "/* Autogenerated, don't edit */" > $$@ + $(Q) cat $$< | awk '/^__NR/{print \ + "SYSCALL(", substr($(AV)3, 5), ",", $(AV)2, ")"}' >> $$@ +endef + +$(eval $(call map,gen-rule-sys-codes,$(sys-bits))) +$(eval $(call map,gen-rule-sys-proto,$(sys-bits))) +$(eval $(call map,gen-rule-sys-asm,$(sys-bits))) +$(eval $(call map,gen-rule-sys-exec-tbl,$(sys-bits))) + +$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h + $(call msg-gen, $@) + $(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types) + +std-headers-deps += $(call sys-codes,$(sys-bits)) +std-headers-deps += $(call sys-proto,$(sys-bits)) +std-headers-deps += $(call sys-asm,$(sys-bits)) +std-headers-deps += $(call sys-exec-tbl,$(sys-bits)) +std-headers-deps += $(sys-codes-generic) +std-headers-deps += $(sys-proto-generic) +std-headers-deps += $(sys-asm-types) +mrproper-y += $(std-headers-deps) diff --git a/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S b/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S new file mode 100755 index 000000000..3478488da --- /dev/null +++ b/compel/arch/mips/plugins/std/syscalls/syscall-common-mips-64.S @@ -0,0 +1,12 @@ +#include "common/asm/linkage.h" + +#define SYSCALL(name, opcode) \ + ENTRY(name); \ + li v0, opcode; \ + syscall; \ + jr ra; \ + nop; \ + END(name) + +ENTRY(__cr_restore_rt) +END(__cr_restore_rt) diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl new file mode 100755 index 000000000..47c08fc4c --- /dev/null +++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl @@ -0,0 +1,115 @@ +# +# System calls table, please make sure the table consist only the syscalls +# really used somewhere in project. +# from kernel/linux-3.10.84/arch/mips/include/uapi/asm/unistd.h Linux 64-bit syscalls are in the range from 5000 to 5999. +# +# __NR_name code name arguments +# ------------------------------------------------------------------------------------------------------------------------------------------------------------- +__NR_read 5000 sys_read (int fd, void *buf, unsigned long count) +__NR_write 5001 sys_write (int fd, const void *buf, unsigned long count) +__NR_open 5002 sys_open (const char *filename, unsigned long flags, unsigned long mode) +__NR_close 5003 sys_close (int fd) +__NR_lseek 5008 sys_lseek (int fd, unsigned long offset, unsigned long origin) +__NR_mmap 5009 sys_mmap (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset) +__NR_mprotect 5010 sys_mprotect (const void *addr, unsigned long len, unsigned long prot) +__NR_munmap 5011 sys_munmap (void *addr, unsigned long len) +__NR_brk 5012 sys_brk (void *addr) +__NR_rt_sigaction 5013 sys_sigaction (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize) +__NR_rt_sigprocmask 5014 sys_sigprocmask (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize) +__NR_rt_sigreturn 5211 sys_rt_sigreturn (void) +__NR_ioctl 5015 sys_ioctl (unsigned int fd, unsigned int cmd, unsigned long arg) +__NR_pread64 5016 sys_pread (unsigned int fd, char *buf, size_t count, loff_t pos) +__NR_mremap 5024 sys_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) +__NR_mincore 5026 sys_mincore (void *addr, unsigned long size, unsigned char *vec) +__NR_madvise 5027 sys_madvise (unsigned long start, size_t len, int behavior) +__NR_shmat 5029 sys_shmat (int shmid, void *shmaddr, int shmflag) +__NR_dup2 5032 sys_dup2 (int oldfd, int newfd) +__NR_nanosleep 5034 sys_nanosleep (struct timespec *req, struct timespec *rem) +__NR_getitimer 5035 sys_getitimer (int which, const struct itimerval *val) +__NR_setitimer 5036 sys_setitimer (int which, const struct itimerval *val, struct itimerval *old) +__NR_getpid 5038 sys_getpid (void) +__NR_socket 5040 sys_socket (int domain, int type, int protocol) +__NR_connect 5041 sys_connect (int sockfd, struct sockaddr *addr, int addrlen) +__NR_sendto 5043 sys_sendto (int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len) +__NR_recvfrom 5044 sys_recvfrom (int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len) +__NR_sendmsg 5045 sys_sendmsg (int sockfd, const struct msghdr *msg, int flags) +__NR_recvmsg 5046 sys_recvmsg (int sockfd, struct msghdr *msg, int flags) +__NR_shutdown 5047 sys_shutdown (int sockfd, int how) +__NR_bind 5048 sys_bind (int sockfd, const struct sockaddr *addr, int addrlen) +__NR_setsockopt 5053 sys_setsockopt (int sockfd, int level, int optname, const void *optval, socklen_t optlen) +__NR_getsockopt 5054 sys_getsockopt (int sockfd, int level, int optname, const void *optval, socklen_t *optlen) +__NR_clone 5055 sys_clone (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid) +__NR_exit 5058 sys_exit (unsigned long error_code) +__NR_wait4 5059 sys_wait4 (int pid, int *status, int options, struct rusage *ru) +__NR_kill 5060 sys_kill (long pid, int sig) +__NR_fcntl 5070 sys_fcntl (int fd, int type, long arg) +__NR_flock 5071 sys_flock (int fd, unsigned long cmd) +__NR_mkdir 5081 sys_mkdir (const char *name, int mode) +__NR_rmdir 5082 sys_rmdir (const char *name) +__NR_unlink 5085 sys_unlink (char *pathname) +__NR_umask 5093 sys_umask (int mask) +__NR_gettimeofday 5094 sys_gettimeofday (struct timeval *tv, struct timezone *tz) +__NR_ptrace 5099 sys_ptrace (long request, pid_t pid, void *addr, void *data) +__NR_getgroups 5113 sys_getgroups (int gsize, unsigned int *groups) +__NR_setgroups 5114 sys_setgroups (int gsize, unsigned int *groups) +__NR_setresuid 5115 sys_setresuid (int uid, int euid, int suid) +__NR_getresuid 5116 sys_getresuid (int *uid, int *euid, int *suid) +__NR_setresgid 5117 sys_setresgid (int gid, int egid, int sgid) +__NR_getresgid 5118 sys_getresgid (int *gid, int *egid, int *sgid) +__NR_getpgid 5119 sys_getpgid (pid_t pid) +__NR_setfsuid 5120 sys_setfsuid (int fsuid) +__NR_setfsgid 5121 sys_setfsgid (int fsgid) +__NR_getsid 5122 sys_getsid (void) +__NR_capget 5123 sys_capget (struct cap_header *h, struct cap_data *d) +__NR_capset 5124 sys_capset (struct cap_header *h, struct cap_data *d) +__NR_rt_sigqueueinfo 5127 sys_rt_sigqueueinfo (pid_t pid, int sig, siginfo_t *info) +__NR_sigaltstack 5129 sys_sigaltstack (const void *uss, void *uoss) +__NR_personality 5132 sys_personality (unsigned int personality) +__NR_setpriority 5138 sys_setpriority (int which, int who, int nice) +__NR_sched_setscheduler 5141 sys_sched_setscheduler (int pid, int policy, struct sched_param *p) +__NR_prctl 5153 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) +__NR_setrlimit 5155 sys_setrlimit (int resource, struct krlimit *rlim) +__NR_mount 5160 sys_mount (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data) +__NR_umount2 5161 sys_umount2 (char *name, int flags) +__NR_gettid 5178 sys_gettid (void) +__NR_futex 5194 sys_futex (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3) +__NR_cacheflush 5197 sys_cacheflush (char *addr, int nbytes, int cache) +__NR_io_setup 5200 sys_io_setup (unsigned nr_events, aio_context_t *ctx) +__NR_io_getevents 5202 sys_io_getevents (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo) +__NR_io_submit 5203 sys_io_submit (aio_context_t ctx, long nr, struct iocb **iocbpp) +__NR_set_tid_address 5212 sys_set_tid_address (int *tid_addr) +__NR_restart_syscall 5213 sys_restart_syscall (void) +__NR_sys_timer_create 5216 sys_timer_create (clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id) +__NR_sys_timer_settime 5217 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting) +__NR_sys_timer_gettime 5218 sys_timer_gettime (int timer_id, const struct itimerspec *setting) +__NR_sys_timer_getoverrun 5219 sys_timer_getoverrun (int timer_id) +__NR_sys_timer_delete 5220 sys_timer_delete (kernel_timer_t timer_id) +__NR_clock_gettime 5222 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) +__NR_exit_group 5205 sys_exit_group (int error_code) +__NR_set_thread_area 5242 sys_set_thread_area (unsigned long *addr) +__NR_openat 5247 sys_openat (int dfd, const char *filename, int flags, int mode) +__NR_waitid 5237 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) +__NR_readlinkat 5257 sys_readlinkat (int fd, const char *path, char *buf, int bufsize) +__NR_ppoll 5261 sys_ppoll (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) +__NR_set_robust_list 5268 sys_set_robust_list (struct robust_list_head *head, size_t len) +__NR_get_robust_list 5269 sys_get_robust_list (int pid, struct robust_list_head **head_ptr, size_t *len_ptr) +__NR_fallocate 5279 sys_fallocate (int fd, int mode, loff_t offset, loff_t len) +__NR_seccomp 5312 sys_seccomp (unsigned int op, unsigned int flags, const char *uargs) +__NR_vmsplice 5266 sys_vmsplice (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags) +__NR_timerfd_settime 5282 sys_timerfd_settime (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr) +__NR_signalfd4 5283 sys_signalfd (int fd, k_rtsigset_t *mask, size_t sizemask, int flags) +__NR_preadv 5289 sys_preadv_raw (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) +__NR_rt_tgsigqueueinfo 5291 sys_rt_tgsigqueueinfo (pid_t tgid, pid_t pid, int sig, siginfo_t *info) +__NR_fanotify_init 5295 sys_fanotify_init (unsigned int flags, unsigned int event_f_flags) +__NR_fanotify_mark 5296 sys_fanotify_mark (int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname) +__NR_open_by_handle_at 5299 sys_open_by_handle_at (int mountdirfd, struct file_handle *handle, int flags) +__NR_setns 5303 sys_setns (int fd, int nstype) +__NR_kcmp 5306 sys_kcmp (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) +__NR_memfd_create 5314 sys_memfd_create (const char *name, unsigned int flags) +__NR_userfaultfd 5317 sys_userfaultfd (int flags) + +##TODO for kernel +__NR_fsopen 5430 sys_fsopen (char *fsname, unsigned int flags) +__NR_fsconfig 5431 sys_fsconfig (int fd, unsigned int cmd, const char *key, const char *value, int aux) +__NR_fsmount 5432 sys_fsmount (int fd, unsigned int flags, unsigned int attr_flags) +__NR_clone3 5435 sys_clone3 (struct clone_args *uargs, size_t size) \ No newline at end of file diff --git a/compel/arch/mips/scripts/compel-pack-compat.lds.S b/compel/arch/mips/scripts/compel-pack-compat.lds.S new file mode 100755 index 000000000..0ae585e0d --- /dev/null +++ b/compel/arch/mips/scripts/compel-pack-compat.lds.S @@ -0,0 +1,4 @@ +OUTPUT_ARCH(mips) +EXTERN(__export_parasite_head_start) +ASSERT(0,"Compatible PIEs are unsupported on mips") + diff --git a/compel/arch/mips/scripts/compel-pack.lds.S b/compel/arch/mips/scripts/compel-pack.lds.S new file mode 100755 index 000000000..cadb19aef --- /dev/null +++ b/compel/arch/mips/scripts/compel-pack.lds.S @@ -0,0 +1,37 @@ +OUTPUT_ARCH(mips) +EXTERN(__export_parasite_head_start) + +SECTIONS +{ + .text : { + *(.head.text) + ASSERT(DEFINED(__export_parasite_head_start), + "Symbol __export_parasite_head_start is missing"); + *(.text*) + *(.compel.exit) + *(.compel.init) + /* .rodata section*/ + *(.rodata*) + *(.got*) + /* .data section */ + *(.data*) + *(.bss*) + *(.sbss*) + *(.toc*) + } + + /DISCARD/ : { /*segments need to discard */ + *(.debug*) + *(.pdr) + *(.comment*) + *(.note*) + *(.group*) + *(.eh_frame*) + *(.MIPS.options) + *(.gnu.attributes) + } + +/* Parasite args should have 4 bytes align, as we have futex inside. */ +. = ALIGN(32); +__export_parasite_args = .; +} diff --git a/compel/arch/mips/src/lib/cpu.c b/compel/arch/mips/src/lib/cpu.c new file mode 100755 index 000000000..4b071cd16 --- /dev/null +++ b/compel/arch/mips/src/lib/cpu.c @@ -0,0 +1,36 @@ +#include +#include + +#include "compel-cpu.h" +#include "common/bitops.h" +#include "common/compiler.h" +#include "log.h" + +#undef LOG_PREFIX +#define LOG_PREFIX "cpu: " + +static compel_cpuinfo_t rt_info; +static bool rt_info_done = false; + +void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature){ } + +void compel_clear_cpu_cap(compel_cpuinfo_t *c, unsigned int feature){ } + +int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature) +{ + return 0; +} + +int compel_cpuid(compel_cpuinfo_t *c){ + return 0; +} + +bool compel_cpu_has_feature(unsigned int feature) +{ + if (!rt_info_done) { + compel_cpuid(&rt_info); + rt_info_done = true; + } + + return compel_test_cpu_cap(&rt_info, feature); +} diff --git a/compel/arch/mips/src/lib/handle-elf-host.c b/compel/arch/mips/src/lib/handle-elf-host.c new file mode 120000 index 000000000..fe4611886 --- /dev/null +++ b/compel/arch/mips/src/lib/handle-elf-host.c @@ -0,0 +1 @@ +handle-elf.c \ No newline at end of file diff --git a/compel/arch/mips/src/lib/handle-elf.c b/compel/arch/mips/src/lib/handle-elf.c new file mode 100755 index 000000000..199bb1581 --- /dev/null +++ b/compel/arch/mips/src/lib/handle-elf.c @@ -0,0 +1,23 @@ +#include +#include + +#include "handle-elf.h" +#include "piegen.h" +#include "log.h" + +static const unsigned char __maybe_unused +elf_ident_64_le[EI_NIDENT] = { + 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +extern int __handle_elf(void *mem, size_t size); + +int handle_binary(void *mem, size_t size) +{ + if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0) + return __handle_elf(mem, size); + + pr_err("Unsupported Elf format detected\n"); + return -EINVAL; +} diff --git a/compel/arch/mips/src/lib/include/handle-elf.h b/compel/arch/mips/src/lib/include/handle-elf.h new file mode 100755 index 000000000..f28188136 --- /dev/null +++ b/compel/arch/mips/src/lib/include/handle-elf.h @@ -0,0 +1,8 @@ +#ifndef COMPEL_HANDLE_ELF_H__ +#define COMPEL_HANDLE_ELF_H__ + +#include "elf64-types.h" + +#define arch_is_machine_supported(e_machine) (e_machine == EM_MIPS) + +#endif /* COMPEL_HANDLE_ELF_H__ */ diff --git a/compel/arch/mips/src/lib/include/ldsodefs.h b/compel/arch/mips/src/lib/include/ldsodefs.h new file mode 100755 index 000000000..6e2d4f549 --- /dev/null +++ b/compel/arch/mips/src/lib/include/ldsodefs.h @@ -0,0 +1,147 @@ +/* Run-time dynamic linker data structures for loaded ELF shared objects. + Copyright (C) 2000-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#ifndef _MIPS_LDSODEFS_H +#define _MIPS_LDSODEFS_H 1 + +#include + +struct La_mips_32_regs; +struct La_mips_32_retval; +struct La_mips_64_regs; +struct La_mips_64_retval; + +#define ARCH_PLTENTER_MEMBERS \ + Elf32_Addr (*mips_o32_gnu_pltenter) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_mips_32_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf32_Addr (*mips_n32_gnu_pltenter) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_mips_64_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); \ + Elf64_Addr (*mips_n64_gnu_pltenter) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + struct La_mips_64_regs *, \ + unsigned int *, const char *name, \ + long int *framesizep); + +#define ARCH_PLTEXIT_MEMBERS \ + unsigned int (*mips_o32_gnu_pltexit) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_mips_32_regs *, \ + struct La_mips_32_retval *, \ + const char *); \ + unsigned int (*mips_n32_gnu_pltexit) (Elf32_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_mips_64_regs *, \ + struct La_mips_64_retval *, \ + const char *); \ + unsigned int (*mips_n64_gnu_pltexit) (Elf64_Sym *, unsigned int, \ + uintptr_t *, uintptr_t *, \ + const struct La_mips_64_regs *, \ + struct La_mips_64_retval *, \ + const char *); + +/* The MIPS ABI specifies that the dynamic section has to be read-only. */ + +/* The 64-bit MIPS ELF ABI uses an unusual reloc format. Each + relocation entry specifies up to three actual relocations, all at + the same address. The first relocation which required a symbol + uses the symbol in the r_sym field. The second relocation which + requires a symbol uses the symbol in the r_ssym field. If all + three relocations require a symbol, the third one uses a zero + value. + + We define these structures in internal headers because we're not + sure we want to make them part of the ABI yet. Eventually, some of + this may move into elf/elf.h. */ + +/* An entry in a 64 bit SHT_REL section. */ + +typedef struct +{ + Elf32_Word r_sym; /* Symbol index */ + unsigned char r_ssym; /* Special symbol for 2nd relocation */ + unsigned char r_type3; /* 3rd relocation type */ + unsigned char r_type2; /* 2nd relocation type */ + unsigned char r_type1; /* 1st relocation type */ +} _Elf64_Mips_R_Info; + +typedef union +{ + Elf64_Xword r_info_number; + _Elf64_Mips_R_Info r_info_fields; +} _Elf64_Mips_R_Info_union; + +typedef struct +{ + Elf64_Addr r_offset; /* Address */ + _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */ +} Elf64_Mips_Rel; + +typedef struct +{ + Elf64_Addr r_offset; /* Address */ + _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */ + Elf64_Sxword r_addend; /* Addend */ +} Elf64_Mips_Rela; + +#define ELF64_MIPS_R_SYM(i) \ + ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym) +#define ELF64_MIPS_R_TYPE(i) \ + (((_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type1 \ + | ((Elf32_Word)(__extension__ (_Elf64_Mips_R_Info_union)(i) \ + ).r_info_fields.r_type2 << 8) \ + | ((Elf32_Word)(__extension__ (_Elf64_Mips_R_Info_union)(i) \ + ).r_info_fields.r_type3 << 16) \ + | ((Elf32_Word)(__extension__ (_Elf64_Mips_R_Info_union)(i) \ + ).r_info_fields.r_ssym << 24)) +#define ELF64_MIPS_R_INFO(sym, type) \ + (__extension__ (_Elf64_Mips_R_Info_union) \ + (__extension__ (_Elf64_Mips_R_Info) \ + { (sym), ELF64_MIPS_R_SSYM (type), \ + ELF64_MIPS_R_TYPE3 (type), \ + ELF64_MIPS_R_TYPE2 (type), \ + ELF64_MIPS_R_TYPE1 (type) \ + }).r_info_number) +/* These macros decompose the value returned by ELF64_MIPS_R_TYPE, and + compose it back into a value that it can be used as an argument to + ELF64_MIPS_R_INFO. */ +#define ELF64_MIPS_R_SSYM(i) (((i) >> 24) & 0xff) +#define ELF64_MIPS_R_TYPE3(i) (((i) >> 16) & 0xff) +#define ELF64_MIPS_R_TYPE2(i) (((i) >> 8) & 0xff) +#define ELF64_MIPS_R_TYPE1(i) ((i) & 0xff) +#define ELF64_MIPS_R_TYPEENC(type1, type2, type3, ssym) \ + ((type1) \ + | ((Elf32_Word)(type2) << 8) \ + | ((Elf32_Word)(type3) << 16) \ + | ((Elf32_Word)(ssym) << 24)) + +#undef ELF64_R_SYM +#define ELF64_R_SYM(i) ELF64_MIPS_R_SYM (i) +#undef ELF64_R_TYPE + +/*fixme*/ +#define ELF64_R_TYPE(i) (ELF64_MIPS_R_TYPE (i) & 0x00ff) +#undef ELF64_R_INFO +#define ELF64_R_INFO(sym, type) ELF64_MIPS_R_INFO ((sym), (type)) + +#endif diff --git a/compel/arch/mips/src/lib/include/syscall.h b/compel/arch/mips/src/lib/include/syscall.h new file mode 100755 index 000000000..704080172 --- /dev/null +++ b/compel/arch/mips/src/lib/include/syscall.h @@ -0,0 +1,7 @@ +#ifndef __COMPEL_SYSCALL_H__ +#define __COMPEL_SYSCALL_H__ + +#ifndef SIGSTKFLT +#define SIGSTKFLT 16 +#endif +#endif diff --git a/compel/arch/mips/src/lib/include/uapi/asm/.gitignore b/compel/arch/mips/src/lib/include/uapi/asm/.gitignore new file mode 100755 index 000000000..e69de29bb diff --git a/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h new file mode 100755 index 000000000..21eb1309f --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/breakpoints.h @@ -0,0 +1,6 @@ +#ifndef __COMPEL_BREAKPOINTS_H__ +#define __COMPEL_BREAKPOINTS_H__ +#define ARCH_SI_TRAP TRAP_BRKPT +extern int ptrace_set_breakpoint(pid_t pid, void *addr); +extern int ptrace_flush_breakpoints(pid_t pid); +#endif diff --git a/compel/arch/mips/src/lib/include/uapi/asm/cpu.h b/compel/arch/mips/src/lib/include/uapi/asm/cpu.h new file mode 100755 index 000000000..329b9529b --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/cpu.h @@ -0,0 +1,5 @@ +#ifndef __CR_ASM_CPU_H__ +#define __CR_ASM_CPU_H__ + +typedef struct { } compel_cpuinfo_t; +#endif /* __CR_ASM_CPU_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/fpu.h b/compel/arch/mips/src/lib/include/uapi/asm/fpu.h new file mode 100644 index 000000000..7f476d541 --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/fpu.h @@ -0,0 +1,4 @@ +#ifndef __CR_ASM_FPU_H__ +#define __CR_ASM_FPU_H__ + +#endif /* __CR_ASM_FPU_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h new file mode 100755 index 000000000..423880821 --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/infect-types.h @@ -0,0 +1,66 @@ +#ifndef UAPI_COMPEL_ASM_TYPES_H__ +#define UAPI_COMPEL_ASM_TYPES_H__ + +#include +#include +#include +#include +#include +#define SIGMAX 64 +#define SIGMAX_OLD 31 + +/* + + * Copied from the Linux kernel header arch/mips/include/asm/ptrace.h + * + * A thread MIPS CPU context + **/ +typedef struct { + /* Saved main processor registers. */ + __u64 regs[32]; + + /* Saved special registers. */ + __u64 lo; + __u64 hi; + __u64 cp0_epc; + __u64 cp0_badvaddr; + __u64 cp0_status; + __u64 cp0_cause; +} user_regs_struct_t; + +/* from linux-3.10/arch/mips/kernel/ptrace.c */ +typedef struct { + /* Saved fpu registers. */ + __u64 regs[32]; + + __u32 fpu_fcr31; + __u32 fpu_id; + + +} user_fpregs_struct_t; + +#define MIPS_a0 regs[4] //arguments a0-a3 +#define MIPS_t0 regs[8] //temporaries t0-t7 +#define MIPS_v0 regs[2] +#define MIPS_v1 regs[3] +#define MIPS_sp regs[29] +#define MIPS_ra regs[31] + + +#define NATIVE_MAGIC 0x0A +#define COMPAT_MAGIC 0x0C +static inline bool user_regs_native(user_regs_struct_t *pregs) +{ + return true; +} + + +#define REG_RES(regs) ((regs).MIPS_v0) +#define REG_IP(regs) ((regs).cp0_epc) +#define REG_SP(regs) ((regs).MIPS_sp) +#define REG_SYSCALL_NR(regs) ((regs).MIPS_v0) + +//#define __NR(syscall, compat) ((compat) ? __NR32_##syscall : __NR_##syscall) +#define __NR(syscall, compat) __NR_##syscall + +#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h b/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h new file mode 100755 index 000000000..ed6a959fc --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/sigframe.h @@ -0,0 +1,63 @@ +#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__ +#define UAPI_COMPEL_ASM_SIGFRAME_H__ + +#include +#include + +#include +#include + +#include +#define u32 __u32 + +/* sigcontext defined in /usr/include/asm/sigcontext.h*/ +#define rt_sigcontext sigcontext + + +#include + +/* refer to linux-3.10/include/uapi/asm-generic/ucontext.h */ +struct k_ucontext{ + unsigned long uc_flags; + struct k_ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + k_rtsigset_t uc_sigmask; +}; + +/* Copy from the kernel source arch/mips/kernel/signal.c */ +struct rt_sigframe { + u32 rs_ass[4]; /* argument save space for o32 */ + u32 rs_pad[2]; /* Was: signal trampoline */ + siginfo_t rs_info; + struct k_ucontext rs_uc; +}; + + +#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->rs_uc) +#define RT_SIGFRAME_UC_SIGMASK(rt_sigframe) ((k_rtsigset_t *)(void *)&rt_sigframe->rs_uc.uc_sigmask) +#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)0x00) +#define RT_SIGFRAME_FPU(rt_sigframe) +#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1 + + +#define RT_SIGFRAME_OFFSET(rt_sigframe) 0 + + +#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \ + asm volatile( \ + "move $29, %0 \n" \ + "li $2, "__stringify(__NR_rt_sigreturn)" \n" \ + "syscall \n" \ + : \ + : "r"(new_sp) \ + : "$29","$2","memory") + +int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, + struct rt_sigframe *rsigframe); + +#define rt_sigframe_erase_sigset(sigframe) \ + memset(&sigframe->rs_uc.uc_sigmask, 0, sizeof(k_rtsigset_t)) +#define rt_sigframe_copy_sigset(sigframe, from) \ + memcpy(&sigframe->rs_uc.uc_sigmask, from, sizeof(k_rtsigset_t)) +#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */ diff --git a/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h new file mode 100755 index 000000000..519aea1a6 --- /dev/null +++ b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h @@ -0,0 +1,124 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998, 1999, 2001, 2003 Ralf Baechle + * Copyright (C) 2000, 2001 Silicon Graphics, Inc. + */ +#ifndef _UAPI_ASM_SIGINFO_H +#define _UAPI_ASM_SIGINFO_H + + +#define __ARCH_SIGEV_PREAMBLE_SIZE (sizeof(long) + 2*sizeof(int)) +#undef __ARCH_SI_TRAPNO /* exception code needs to fill this ... */ + +#define HAVE_ARCH_SIGINFO_T + +/* + * Careful to keep union _sifields from shifting ... + */ + +#define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) + +#define __ARCH_SIGSYS + +#define SI_MAX_SIZE 128 +#define SI_PAD_SIZE ((SI_MAX_SIZE - __ARCH_SI_PREAMBLE_SIZE) / sizeof(int)) +#define __ARCH_SI_UID_T __kernel_uid32_t + +#ifndef __ARCH_SI_UID_T +#define __ARCH_SI_UID_T __kernel_uid32_t +#endif + +#ifndef __ARCH_SI_BAND_T +#define __ARCH_SI_BAND_T long +#endif + +#ifndef __ARCH_SI_CLOCK_T +#define __ARCH_SI_CLOCK_T __kernel_clock_t +#endif + +#ifndef __ARCH_SI_ATTRIBUTES +#define __ARCH_SI_ATTRIBUTES +#endif + +typedef struct siginfo { + int si_signo; + int si_errno; + int si_code; + + union { + int _pad[SI_PAD_SIZE]; + + /* kill() */ + struct { + __kernel_pid_t _pid; /* sender's pid */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + } _kill; + + /* POSIX.1b timers */ + struct { + __kernel_timer_t _tid; /* timer id */ + int _overrun; /* overrun count */ + char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)]; + sigval_t _sigval; /* same as below */ + int _sys_private; /* not to be passed to user */ + } _timer; + + /* POSIX.1b signals */ + struct { + __kernel_pid_t _pid; /* sender's pid */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + sigval_t _sigval; + } _rt; + + /* SIGCHLD */ + struct { + __kernel_pid_t _pid; /* which child */ + __ARCH_SI_UID_T _uid; /* sender's uid */ + int _status; /* exit code */ + __ARCH_SI_CLOCK_T _utime; + __ARCH_SI_CLOCK_T _stime; + } _sigchld; + + /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ + struct { + void *_addr; /* faulting insn/memory ref. */ +#ifdef __ARCH_SI_TRAPNO + int _trapno; /* TRAP # which caused the signal */ +#endif + short _addr_lsb; /* LSB of the reported address */ +#ifndef __GENKSYMS__ + struct { + void *_lower; + void *_upper; + } _addr_bnd; +#endif + } _sigfault; + + /* SIGPOLL */ + struct { + __ARCH_SI_BAND_T _band; /* POLL_IN, POLL_OUT, POLL_MSG */ + int _fd; + } _sigpoll; + + /* SIGSYS */ + struct { + void *_call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } _sigsys; + } _sifields; +} __ARCH_SI_ATTRIBUTES siginfo_t; + +/* + * si_code values + * Again these have been chosen to be IRIX compatible. + */ +#undef SI_ASYNCIO +#undef SI_TIMER +#undef SI_MESGQ +#define SI_ASYNCIO -2 /* sent by AIO completion */ + +#endif /* _UAPI_ASM_SIGINFO_H */ diff --git a/compel/arch/mips/src/lib/infect.c b/compel/arch/mips/src/lib/infect.c new file mode 100755 index 000000000..a7dcea55a --- /dev/null +++ b/compel/arch/mips/src/lib/infect.c @@ -0,0 +1,310 @@ +#include +#include +#include +#include +#include +#include +#include "errno.h" +#include +#include +#include "common/err.h" +#include "common/page.h" +#include "asm/infect-types.h" +#include "ptrace.h" +#include "infect.h" +#include "infect-priv.h" +#include "log.h" +#include "common/bug.h" +/* + * Injected syscall instruction + * mips64el is Little Endian + */ +const char code_syscall[] = { + 0x0c, 0x00, 0x00, 0x00, /* syscall */ + 0x0d, 0x00, 0x00, 0x00 /* break */ +}; + +/* 10-byte legacy floating point register */ +struct fpreg { + uint16_t significand[4]; + uint16_t exponent; +}; + +/* 16-byte floating point register */ +struct fpxreg { + uint16_t significand[4]; + uint16_t exponent; + uint16_t padding[3]; +}; + + +int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, + user_regs_struct_t *regs, + user_fpregs_struct_t *fpregs) +{ + sigframe->rs_uc.uc_mcontext.sc_regs[0] = regs->regs[0]; + sigframe->rs_uc.uc_mcontext.sc_regs[1] = regs->regs[1]; + sigframe->rs_uc.uc_mcontext.sc_regs[2] = regs->regs[2]; + sigframe->rs_uc.uc_mcontext.sc_regs[3] = regs->regs[3]; + sigframe->rs_uc.uc_mcontext.sc_regs[4] = regs->regs[4]; + sigframe->rs_uc.uc_mcontext.sc_regs[5] = regs->regs[5]; + sigframe->rs_uc.uc_mcontext.sc_regs[6] = regs->regs[6]; + sigframe->rs_uc.uc_mcontext.sc_regs[7] = regs->regs[7]; + sigframe->rs_uc.uc_mcontext.sc_regs[8] = regs->regs[8]; + sigframe->rs_uc.uc_mcontext.sc_regs[9] = regs->regs[9]; + sigframe->rs_uc.uc_mcontext.sc_regs[10] = regs->regs[10]; + sigframe->rs_uc.uc_mcontext.sc_regs[11] = regs->regs[11]; + sigframe->rs_uc.uc_mcontext.sc_regs[12] = regs->regs[12]; + sigframe->rs_uc.uc_mcontext.sc_regs[13] = regs->regs[13]; + sigframe->rs_uc.uc_mcontext.sc_regs[14] = regs->regs[14]; + sigframe->rs_uc.uc_mcontext.sc_regs[15] = regs->regs[15]; + sigframe->rs_uc.uc_mcontext.sc_regs[16] = regs->regs[16]; + sigframe->rs_uc.uc_mcontext.sc_regs[17] = regs->regs[17]; + sigframe->rs_uc.uc_mcontext.sc_regs[18] = regs->regs[18]; + sigframe->rs_uc.uc_mcontext.sc_regs[19] = regs->regs[19]; + sigframe->rs_uc.uc_mcontext.sc_regs[20] = regs->regs[20]; + sigframe->rs_uc.uc_mcontext.sc_regs[21] = regs->regs[21]; + sigframe->rs_uc.uc_mcontext.sc_regs[22] = regs->regs[22]; + sigframe->rs_uc.uc_mcontext.sc_regs[23] = regs->regs[23]; + sigframe->rs_uc.uc_mcontext.sc_regs[24] = regs->regs[24]; + sigframe->rs_uc.uc_mcontext.sc_regs[25] = regs->regs[25]; + sigframe->rs_uc.uc_mcontext.sc_regs[26] = regs->regs[26]; + sigframe->rs_uc.uc_mcontext.sc_regs[27] = regs->regs[27]; + sigframe->rs_uc.uc_mcontext.sc_regs[28] = regs->regs[28]; + sigframe->rs_uc.uc_mcontext.sc_regs[29] = regs->regs[29]; + sigframe->rs_uc.uc_mcontext.sc_regs[30] = regs->regs[30]; + sigframe->rs_uc.uc_mcontext.sc_regs[31] = regs->regs[31]; + sigframe->rs_uc.uc_mcontext.sc_mdlo = regs->lo; + sigframe->rs_uc.uc_mcontext.sc_mdhi = regs->hi; + sigframe->rs_uc.uc_mcontext.sc_pc = regs->cp0_epc; + + sigframe->rs_uc.uc_mcontext.sc_fpregs[0] = fpregs->regs[0]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[1] = fpregs->regs[1]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[2] = fpregs->regs[2]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[3] = fpregs->regs[3]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[4] = fpregs->regs[4]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[5] = fpregs->regs[5]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[6] = fpregs->regs[6]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[7] = fpregs->regs[7]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[8] = fpregs->regs[8]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[9] = fpregs->regs[9]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[10] = fpregs->regs[10]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[11] = fpregs->regs[11]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[12] = fpregs->regs[12]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[13] = fpregs->regs[13]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[14] = fpregs->regs[14]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[15] = fpregs->regs[15]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[16] = fpregs->regs[16]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[17] = fpregs->regs[17]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[18] = fpregs->regs[18]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[19] = fpregs->regs[19]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[20] = fpregs->regs[20]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[21] = fpregs->regs[21]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[22] = fpregs->regs[22]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[23] = fpregs->regs[23]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[24] = fpregs->regs[24]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[25] = fpregs->regs[25]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[26] = fpregs->regs[26]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[27] = fpregs->regs[27]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[28] = fpregs->regs[28]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[29] = fpregs->regs[29]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[30] = fpregs->regs[30]; + sigframe->rs_uc.uc_mcontext.sc_fpregs[31] = fpregs->regs[31]; + + return 0; +} + +int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, + struct rt_sigframe *rsigframe) +{ + return 0; +} + +int get_task_regs(pid_t pid, user_regs_struct_t *regs, save_regs_t save, + void *arg, __maybe_unused unsigned long flags) +{ + user_fpregs_struct_t xsave = { }, *xs = NULL; + int ret = -1; + + if (ptrace(PTRACE_GETFPREGS, pid, NULL, &xsave)) { + pr_perror("Can't obtain FPU registers for %d", pid); + return ret; + } + + /*Restart the system call*/ + if (regs->regs[0]){ + switch ((long)(int)regs->regs[2]) { + case ERESTARTNOHAND: + case ERESTARTSYS: + case ERESTARTNOINTR: + regs->regs[2] = regs->regs[0]; + regs->regs[7] = regs->regs[26]; + regs->cp0_epc -= 4; + break; + case ERESTART_RESTARTBLOCK: + regs->regs[2] = __NR_restart_syscall; + regs->regs[7] = regs->regs[26]; + regs->cp0_epc -= 4; + break; + } + regs->regs[0] = 0; + } + + xs = &xsave; + ret = save(arg, regs, xs); + return ret; +} + +int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4, + unsigned long arg5, + unsigned long arg6) +{ + /*refer to glibc-2.20/sysdeps/unix/sysv/linux/mips/mips64/syscall.S*/ + user_regs_struct_t regs = ctl->orig.regs; + int err; + + regs.regs[2] = (unsigned long)nr; //syscall_number will be in v0 + regs.regs[4] = arg1; + regs.regs[5] = arg2; + regs.regs[6] = arg3; + regs.regs[7] = arg4; + regs.regs[8] = arg5; + regs.regs[9] = arg6; + + err = compel_execute_syscall(ctl, ®s, code_syscall); + *ret = regs.regs[2]; + + return err; +} + +void *remote_mmap(struct parasite_ctl *ctl, + void *addr, size_t length, int prot, + int flags, int fd, off_t offset) +{ + long map; + int err; + + err = compel_syscall(ctl, __NR_mmap, &map, + (unsigned long)addr, length, prot, flags, fd, offset >> PAGE_SHIFT); + + if (err < 0 || IS_ERR_VALUE(map)) { + pr_err("remote mmap() failed: %s\n", strerror(-map)); + return NULL; + } + + return (void *)map; +} + +/* + * regs must be inited when calling this function from original context + */ +void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) +{ + regs->cp0_epc = new_ip; + if (stack){ + /* regs[29] is sp */ + regs->regs[29] = (unsigned long)stack; + } +} + +bool arch_can_dump_task(struct parasite_ctl *ctl) +{ + return true; +} + +int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s) +{ + long ret; + int err; + + err = compel_syscall(ctl, __NR_sigaltstack, + &ret, 0, (unsigned long)&s->rs_uc.uc_stack, + 0, 0, 0, 0); + return err ? err : ret; +} + + +int ptrace_set_breakpoint(pid_t pid, void *addr) +{ + return 0; +} + +int ptrace_flush_breakpoints(pid_t pid) +{ + return 0; +} + +/*refer to kernel linux-3.10/arch/mips/include/asm/processor.h*/ +#define TASK_SIZE32 0x7fff8000UL +#define TASK_SIZE64 0x10000000000UL +#define TASK_SIZE TASK_SIZE64 + +unsigned long compel_task_size(void) { return TASK_SIZE; } + +/* + * Get task registers (overwrites weak function) + * + */ +int ptrace_get_regs(int pid, user_regs_struct_t *regs) +{ + return ptrace(PTRACE_GETREGS, pid, NULL, regs); +} + +/* + * Set task registers (overwrites weak function) + */ +int ptrace_set_regs(int pid, user_regs_struct_t *regs) +{ + return ptrace(PTRACE_SETREGS, pid, NULL, regs); +} + +void compel_relocs_apply_mips(void *mem, void *vbase, compel_reloc_t *elf_relocs, size_t nr_relocs) +{ + size_t i, j; + + /* + * mips rebasing :load time relocation + * parasite.built-in.o and restorer.built-in.o is ELF 64-bit LSB relocatable for mips. + * so we have to relocate some type for R_MIPS_26 R_MIPS_HIGHEST R_MIPS_HIGHER R_MIPS_HI16 and R_MIPS_LO16 in there. + * for mips64el .if toload/store data or jump instruct ,need to relocation R_TYPE + */ + for (i = 0, j = 0; i < nr_relocs; i++) { + if (elf_relocs[i].type & COMPEL_TYPE_MIPS_26) { + int *where = (mem + elf_relocs[i].offset); + *where = *where | ((elf_relocs[i].addend + ((unsigned long)vbase & 0x00fffffff) /*low 28 bit*/)>>2); + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_64) { + unsigned long *where = (mem + elf_relocs[i].offset); + *where = elf_relocs[i].addend + (unsigned long)vbase; + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HI16) { + /* refer to binutils mips.cc */ + int *where = (mem + elf_relocs[i].offset); + int v_lo16 = (unsigned long)vbase &0x00ffff; + + if ((v_lo16+elf_relocs[i].value+elf_relocs[i].addend) >= 0x8000){ + *where = *where | ((((unsigned long)vbase>>16) &0xffff)+0x1); + } else { + *where = *where | ((((unsigned long)vbase>>16) &0xffff)); + } + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_LO16) { + int *where = (mem + elf_relocs[i].offset); + int v_lo16 = (unsigned long)vbase &0x00ffff; + *where = *where | ((v_lo16 + elf_relocs[i].addend) & 0xffff); + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HIGHER) { + int *where = (mem + elf_relocs[i].offset); + *where = *where | ((( (unsigned long)vbase + (uint64_t) 0x80008000) >> 32) & 0xffff); + + } else if (elf_relocs[i].type & COMPEL_TYPE_MIPS_HIGHEST) { + int *where = (mem + elf_relocs[i].offset); + *where = *where | ((( (unsigned long)vbase + (uint64_t) 0x800080008000llu) >> 48) & 0xffff); + } else + BUG(); + } +} diff --git a/include/common/arch/mips/asm/atomic.h b/include/common/arch/mips/asm/atomic.h new file mode 100755 index 000000000..600e3a70f --- /dev/null +++ b/include/common/arch/mips/asm/atomic.h @@ -0,0 +1,148 @@ +#ifndef __CR_ATOMIC_H__ +#define __CR_ATOMIC_H__ + +#include +#include "common/compiler.h" +#include "common/arch/mips/asm/utils.h" +#include "common/arch/mips/asm/cmpxchg.h" + +/* + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +#define atomic_read(v) (*(volatile int *)&(v)->counter) + +/* + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +#define atomic_set(v, i) ((v)->counter = (i)) +/* + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ + +static __inline__ void atomic_add(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_add \n" + " addu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static __inline__ void atomic_sub(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_sub \n" + " subu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * Same as above, but return the result value + */ +static __inline__ int atomic_add_return(int i, atomic_t * v) +{ + int result; + + smp_mb__before_llsc(); + + + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_add_return \n" + " addu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp + i; + + smp_llsc_mb(); + + return result; +} + +static __inline__ int atomic_sub_return(int i, atomic_t * v) +{ + int result; + + smp_mb__before_llsc(); + + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_sub_return \n" + " subu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (v->counter) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp - i; + + smp_llsc_mb(); + + return result; +} + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +/* + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +#define atomic_inc( v) atomic_add(1, (v)) + +/* + * atomic_dec - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1. + */ +#define atomic_dec(v) atomic_sub(1, (v)) + +#endif /* __CR_ATOMIC_H__ */ diff --git a/include/common/arch/mips/asm/bitops.h b/include/common/arch/mips/asm/bitops.h new file mode 100644 index 000000000..874845e45 --- /dev/null +++ b/include/common/arch/mips/asm/bitops.h @@ -0,0 +1,41 @@ +#ifndef _LINUX_BITOPS_H +#define _LINUX_BITOPS_H +#include +#include "common/compiler.h" +#include "common/asm-generic/bitops.h" + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ + +static inline int test_and_set_bit(unsigned long nr, + volatile unsigned long *addr) +{ + unsigned long *m = ((unsigned long *) addr) + (nr >> 6); + unsigned long temp = 0; + unsigned long res; + int bit = nr & 63UL; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " lld %0, %1 # test_and_set_bit \n" + " or %2, %0, %3 \n" + " scd %2, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*m), "=&r" (res) + : "r" (1UL << bit) + : "memory"); + } while (unlikely(!res)); + + res = temp & (1UL << bit); + + return res != 0; +} + +#endif diff --git a/include/common/arch/mips/asm/bitsperlong.h b/include/common/arch/mips/asm/bitsperlong.h new file mode 100755 index 000000000..31aece3b6 --- /dev/null +++ b/include/common/arch/mips/asm/bitsperlong.h @@ -0,0 +1,6 @@ +#ifndef __CR_BITSPERLONG_H__ +#define __CR_BITSPERLONG_H__ + +# define BITS_PER_LONG 64 + +#endif /* __CR_BITSPERLONG_H__ */ diff --git a/include/common/arch/mips/asm/cmpxchg.h b/include/common/arch/mips/asm/cmpxchg.h new file mode 100755 index 000000000..bdc41390b --- /dev/null +++ b/include/common/arch/mips/asm/cmpxchg.h @@ -0,0 +1,67 @@ +#ifndef __CR_CMPXCHG_H__ +#define __CR_CMPXCHG_H__ + +#define __cmpxchg_asm(ld, st, m, old, new) \ +({ \ + __typeof(*(m)) __ret; \ + \ + if (kernel_uses_llsc) { \ + __asm__ __volatile__( \ + " .set push \n" \ + " .set noat \n" \ + " .set mips3 \n" \ + "1: " ld " %0, %2 # __cmpxchg_asm \n" \ + " bne %0, %z3, 2f \n" \ + " .set mips0 \n" \ + " move $1, %z4 \n" \ + " .set mips3 \n" \ + " " st " $1, %1 \n" \ + " beqz $1, 1b \n" \ + " .set pop \n" \ + "2: \n" \ + : "=&r" (__ret), "=R" (*m) \ + : "R" (*m), "Jr" (old), "Jr" (new) \ + : "memory"); \ + } else { \ + } \ + \ + __ret; \ +}) +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __cmpxchg(ptr, old, new, pre_barrier, post_barrier) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old = (old); \ + __typeof__(*(ptr)) __new = (new); \ + __typeof__(*(ptr)) __res = 0; \ + \ + pre_barrier; \ + \ + switch (sizeof(*(__ptr))) { \ + case 4: \ + __res = __cmpxchg_asm("ll", "sc", __ptr, __old, __new); \ + break; \ + case 8: \ + if (sizeof(long) == 8) { \ + __res = __cmpxchg_asm("lld", "scd", __ptr, \ + __old, __new); \ + break; \ + } \ + default: \ + __cmpxchg_called_with_bad_pointer(); \ + break; \ + } \ + \ + post_barrier; \ + \ + __res; \ +}) + +#define cmpxchg(ptr, old, new) __cmpxchg(ptr, old, new, smp_mb__before_llsc(), smp_llsc_mb()) + +#endif /* __CR_CMPXCHG_H__ */ diff --git a/include/common/arch/mips/asm/fls64.h b/include/common/arch/mips/asm/fls64.h new file mode 100644 index 000000000..fdae28513 --- /dev/null +++ b/include/common/arch/mips/asm/fls64.h @@ -0,0 +1,38 @@ +#ifndef _ASM_GENERIC_BITOPS_FLS64_H_ +#define _ASM_GENERIC_BITOPS_FLS64_H_ + +#include + +/** + * fls64 - find last set bit in a 64-bit word + * @x: the word to search + * + * This is defined in a similar way as the libc and compiler builtin + * ffsll, but returns the position of the most significant set bit. + * + * fls64(value) returns 0 if value is 0 or the position of the last + * set bit if value is nonzero. The last (most significant) bit is + * at position 64. + */ +#include "common/arch/mips/asm/bitops.h" +#if BITS_PER_LONG == 32 +static __always_inline int fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(h) + 32; + return fls(x); +} +#elif BITS_PER_LONG == 64 +extern unsigned long __fls(unsigned long word); +static __always_inline int fls64(__u64 x) +{ + if (x == 0) + return 0; + return __fls(x) + 1; +} +#else +#error BITS_PER_LONG not 32 or 64 +#endif + +#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */ diff --git a/include/common/arch/mips/asm/linkage.h b/include/common/arch/mips/asm/linkage.h new file mode 100644 index 000000000..8f2426889 --- /dev/null +++ b/include/common/arch/mips/asm/linkage.h @@ -0,0 +1,58 @@ +#ifndef __CR_LINKAGE_H__ +#define __CR_LINKAGE_H__ + +#define zero $0 /* wired zero */ +#define AT $1 /* assembler temp - uppercase because of ".set at" */ +#define v0 $2 +#define v1 $3 + +#define a0 $4 +#define a1 $5 +#define a2 $6 +#define a3 $7 +#define a4 $8 +#define a5 $9 +#define a6 $10 +#define a7 $11 +#define t0 $12 +#define t1 $13 +#define t2 $14 +#define t3 $15 + +#define s0 $16 /* callee saved */ +#define s1 $17 +#define s2 $18 +#define s3 $19 +#define s4 $20 +#define s5 $21 +#define s6 $22 +#define s7 $23 +#define t8 $24 /* caller saved */ +#define t9 $25 +#define jp $25 /* PIC jump register */ +#define k0 $26 /* kernel scratch */ +#define k1 $27 +#define gp $28 /* global pointer */ +#define sp $29 /* stack pointer */ +#define fp $30 /* frame pointer */ +#define s8 $30 /* same like fp! */ +#define ra $31 /* return address */ + +#define __ALIGN .align 8 +#define __ALIGN_STR ".align 8" + +#define GLOBAL(name) \ + .globl name; \ + name: + +#define ENTRY(name) \ + .globl name; \ + __ALIGN; \ + .type name, @function; \ + name: + +#define END(sym) \ + .size sym, . - sym + + +#endif /* __CR_LINKAGE_H__ */ diff --git a/include/common/arch/mips/asm/page.h b/include/common/arch/mips/asm/page.h new file mode 100755 index 000000000..bf27420f7 --- /dev/null +++ b/include/common/arch/mips/asm/page.h @@ -0,0 +1,39 @@ +#ifndef __CR_ASM_PAGE_H__ +#define __CR_ASM_PAGE_H__ + +#define ARCH_HAS_LONG_PAGES + +#ifndef CR_NOGLIBC +#include /* ffsl() */ +#include /* _SC_PAGESIZE */ + +static unsigned __page_size; +static unsigned __page_shift; + +static inline unsigned page_size(void) +{ + if (!__page_size) + __page_size = sysconf(_SC_PAGESIZE); + return __page_size; +} + +static inline unsigned page_shift(void) +{ + if (!__page_shift) + __page_shift = (ffsl(page_size()) - 1); + return __page_shift; +} + +#define PAGE_SIZE page_size() +#define PAGE_SHIFT page_shift() +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +#define PAGE_PFN(addr) ((addr) / PAGE_SIZE) +#else /* CR_NOGLIBC */ + +extern unsigned page_size(void); +#define PAGE_SIZE page_size() + +#endif /* CR_NOGLIBC */ + +#endif /* __CR_ASM_PAGE_H__ */ diff --git a/include/common/arch/mips/asm/utils.h b/include/common/arch/mips/asm/utils.h new file mode 100644 index 000000000..6415bf485 --- /dev/null +++ b/include/common/arch/mips/asm/utils.h @@ -0,0 +1,24 @@ +#ifndef __UTILS_H__ +#define __UTILS_H__ + + +# define kernel_uses_llsc 1 + +typedef struct { + int counter; +}atomic_t; + + +/* + * FIXME: detect with compel_cpu_has_feature() if LL/SC implicitly + * provide a memory barrier. +*/ +#define __WEAK_LLSC_MB " sync \n" + +#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") + +#define smp_mb__before_llsc() smp_llsc_mb() +#define smp_mb__before_atomic() smp_mb__before_llsc() +#define smp_mb__after_atomic() smp_llsc_mb() + +#endif /* __UTILS_H__ */ From e7d13b368db95c57d0a20ea016d6b0517e19d3b8 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:21:11 +0800 Subject: [PATCH 0411/2030] mips:compel: Enable mips in compel/ Signed-off-by: Guoyun Sun --- compel/include/uapi/handle-elf.h | 9 ++++- compel/include/uapi/infect.h | 1 + compel/plugins/Makefile | 14 +++++++ compel/src/lib/handle-elf.c | 65 +++++++++++++++++++++++++++++++- compel/src/lib/infect.c | 5 ++- compel/src/main.c | 3 ++ 6 files changed, 93 insertions(+), 4 deletions(-) diff --git a/compel/include/uapi/handle-elf.h b/compel/include/uapi/handle-elf.h index ddeecb0d5..ba40be57f 100644 --- a/compel/include/uapi/handle-elf.h +++ b/compel/include/uapi/handle-elf.h @@ -4,7 +4,14 @@ #define COMPEL_TYPE_INT (1u << 0) #define COMPEL_TYPE_LONG (1u << 1) #define COMPEL_TYPE_GOTPCREL (1u << 2) - +#ifdef CONFIG_MIPS +#define COMPEL_TYPE_MIPS_26 (1u << 3) +#define COMPEL_TYPE_MIPS_HI16 (1u << 4) +#define COMPEL_TYPE_MIPS_LO16 (1u << 5) +#define COMPEL_TYPE_MIPS_HIGHER (1u << 6) +#define COMPEL_TYPE_MIPS_HIGHEST (1u << 7) +#define COMPEL_TYPE_MIPS_64 (1u << 8) +#endif typedef struct { unsigned int offset; unsigned int type; diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index dd672bc1c..4f14c7270 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -165,6 +165,7 @@ extern struct parasite_blob_desc *compel_parasite_blob_desc(struct parasite_ctl extern int __must_check compel_get_thread_regs(struct parasite_thread_ctl *, save_regs_t, void *); extern void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs); +extern void compel_relocs_apply_mips(void *mem, void *vbase, compel_reloc_t *elf_relocs, size_t nr_relocs); extern unsigned long compel_task_size(void); diff --git a/compel/plugins/Makefile b/compel/plugins/Makefile index 197ff1b24..e5fa781ac 100644 --- a/compel/plugins/Makefile +++ b/compel/plugins/Makefile @@ -16,7 +16,12 @@ asflags-y += -I compel/include/uapi # General compel includes ccflags-y += -iquote compel/include + +ifeq ($(ARCH),mips) +ccflags-y += -mno-abicalls -fno-pic -fno-stack-protector +else ccflags-y += -fpie -fno-stack-protector +endif # General compel/plugins includes ccflags-y += -iquote $(obj)/include @@ -28,7 +33,12 @@ asflags-y += -iquote $(PLUGIN_ARCH_DIR)/include asflags-y += -iquote $(PLUGIN_ARCH_DIR) # General flags for assembly +ifeq ($(ARCH),mips) +asflags-y += -mno-abicalls -fno-pic -Wstrict-prototypes +else asflags-y += -fpie -Wstrict-prototypes +endif + asflags-y += -nostdlib -fomit-frame-pointer asflags-y += -fno-stack-protector ldflags-y += -z noexecstack @@ -57,6 +67,10 @@ ifeq ($(ARCH),x86) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o endif +ifeq ($(ARCH),mips) + std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o +endif + ifeq ($(ARCH),ppc64) std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcpy.o std-lib-y += ./$(PLUGIN_ARCH_DIR)/std/memcmp.o diff --git a/compel/src/lib/handle-elf.c b/compel/src/lib/handle-elf.c index 69d5104b6..e97d75026 100644 --- a/compel/src/lib/handle-elf.c +++ b/compel/src/lib/handle-elf.c @@ -16,8 +16,9 @@ #include "piegen.h" #include "log.h" -piegen_opt_t opts = {}; - +#ifdef CONFIG_MIPS +#include "ldsodefs.h" +#endif /* Check if pointer is out-of-bound */ static bool __ptr_oob(const uintptr_t ptr, const uintptr_t start, const size_t size) { @@ -403,6 +404,66 @@ int __handle_elf(void *mem, size_t size) #endif switch (ELF_R_TYPE(r->rel.r_info)) { +#ifdef CONFIG_MIPS + case R_MIPS_PC16: + /* s+a-p relative */ + *((int32_t *)where) = *((int32_t *)where) | ((value32 + addend32 - place)>>2); + break; + + case R_MIPS_26: + /* local : (((A << 2) | (P & 0xf0000000) + S) >> 2 + * external : (sign–extend(A < 2) + S) >> 2 + */ + + if (((unsigned)ELF_ST_BIND(sym->st_info) == 0x1) + || ((unsigned)ELF_ST_BIND(sym->st_info) == 0x2)){ + /* bind type local is 0x0 ,global is 0x1,WEAK is 0x2 */ + addend32 = value32; + } + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_26, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_26 */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_32: + /* S+A */ + break; + + case R_MIPS_64: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_64, " + ".addend = %-8ld, .value = 0x%-16lx, }, /* R_MIPS_64 */\n", + (unsigned int)place, (long)addend64, (long)value64); + break; + + case R_MIPS_HIGHEST: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_HIGHEST, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_HIGHEST */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_HIGHER: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_HIGHER, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_HIGHER */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_HI16: + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_HI16, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_HI16 */\n", + (unsigned int)place, addend32, value32); + break; + + case R_MIPS_LO16: + if((unsigned)ELF_ST_BIND(sym->st_info) == 0x1){ + /* bind type local is 0x0 ,global is 0x1 */ + addend32 = value32; + } + pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_MIPS_LO16, " + ".addend = %-8d, .value = 0x%-16x, }, /* R_MIPS_LO16 */\n", + (unsigned int)place, addend32, value32); + break; + +#endif #ifdef ELF_PPC64 case R_PPC64_REL24: /* Update PC relative offset, linker has not done this yet */ diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 3fad85ed3..296ffcbd0 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -817,7 +817,9 @@ err_cure: void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *elf_relocs, size_t nr_relocs) { size_t i, j; - +#ifdef CONFIG_MIPS + compel_relocs_apply_mips(mem, vbase, elf_relocs, nr_relocs); +#else for (i = 0, j = 0; i < nr_relocs; i++) { if (elf_relocs[i].type & COMPEL_TYPE_LONG) { long *where = mem + elf_relocs[i].offset; @@ -840,6 +842,7 @@ void compel_relocs_apply(void *mem, void *vbase, size_t size, compel_reloc_t *el } else BUG(); } +#endif } static int compel_map_exchange(struct parasite_ctl *ctl, unsigned long size) diff --git a/compel/src/main.c b/compel/src/main.c index 9fc3a924c..c5f6e57ed 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -53,11 +53,14 @@ static const flags_t flags = { #elif defined CONFIG_S390 .arch = "s390", .cflags = COMPEL_CFLAGS_PIE, +#elif defined CONFIG_MIPS + .arch = "mips", #else #error "CONFIG_ not defined, or unsupported ARCH" #endif }; +piegen_opt_t opts = {}; const char *uninst_root; static int piegen(void) From 158e8f8fe637117f00bb6ecbad84d6491970d84d Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:23:16 +0800 Subject: [PATCH 0412/2030] mips:proto: Add mips to protocol buffer files Signed-off-by: Guoyun Sun --- images/Makefile | 1 + images/core-mips.proto | 90 ++++++++++++++++++++++++++++++++++++++++++ images/core.proto | 5 +++ images/sa.proto | 1 + 4 files changed, 97 insertions(+) create mode 100755 images/core-mips.proto diff --git a/images/Makefile b/images/Makefile index bc67278e6..5458e4679 100644 --- a/images/Makefile +++ b/images/Makefile @@ -1,6 +1,7 @@ proto-obj-y += stats.o proto-obj-y += core.o proto-obj-y += core-x86.o +proto-obj-y += core-mips.o proto-obj-y += core-arm.o proto-obj-y += core-aarch64.o proto-obj-y += core-ppc64.o diff --git a/images/core-mips.proto b/images/core-mips.proto new file mode 100755 index 000000000..6391b1e86 --- /dev/null +++ b/images/core-mips.proto @@ -0,0 +1,90 @@ +syntax = "proto2"; + +import "opts.proto"; + +message user_mips_regs_entry { + required uint64 r0 = 1; + required uint64 r1 = 2; + required uint64 r2 = 3; + required uint64 r3 = 4; + required uint64 r4 = 5; + required uint64 r5 = 6; + required uint64 r6 = 7; + required uint64 r7 = 8; + required uint64 r8 = 9; + required uint64 r9 = 10; + required uint64 r10 = 11; + required uint64 r11 = 12; + required uint64 r12 = 13; + required uint64 r13 = 14; + required uint64 r14 = 15; + required uint64 r15 = 16; + required uint64 r16 = 17; + required uint64 r17 = 18; + required uint64 r18 = 19; + required uint64 r19 = 20; + required uint64 r20 = 21; + required uint64 r21 = 22; + required uint64 r22 = 23; + required uint64 r23 = 24; + required uint64 r24 = 25; + required uint64 r25 = 26; + required uint64 r26 = 27; + required uint64 r27 = 28; + required uint64 r28 = 29; + required uint64 r29 = 30; + required uint64 r30 = 31; + required uint64 r31 = 32; + required uint64 lo = 33; + required uint64 hi = 34; + required uint64 cp0_epc = 35; + required uint64 cp0_badvaddr = 36; + required uint64 cp0_status = 37; + required uint64 cp0_cause = 38; +} + +message user_mips_fpregs_entry { + required uint64 r0 = 1; + required uint64 r1 = 2; + required uint64 r2 = 3; + required uint64 r3 = 4; + required uint64 r4 = 5; + required uint64 r5 = 6; + required uint64 r6 = 7; + required uint64 r7 = 8; + required uint64 r8 = 9; + required uint64 r9 = 10; + required uint64 r10 = 11; + required uint64 r11 = 12; + required uint64 r12 = 13; + required uint64 r13 = 14; + required uint64 r14 = 15; + required uint64 r15 = 16; + required uint64 r16 = 17; + required uint64 r17 = 18; + required uint64 r18 = 19; + required uint64 r19 = 20; + required uint64 r20 = 21; + required uint64 r21 = 22; + required uint64 r22 = 23; + required uint64 r23 = 24; + required uint64 r24 = 25; + required uint64 r25 = 26; + required uint64 r26 = 27; + required uint64 r27 = 28; + required uint64 r28 = 29; + required uint64 r29 = 30; + required uint64 r30 = 31; + required uint64 r31 = 32; + required uint64 lo = 33; + required uint64 hi = 34; + required uint32 fpu_fcr31 = 35; + required uint32 fpu_id = 36; +} + +message thread_info_mips { + required uint64 clear_tid_addr = 1[(criu).hex = true]; + required uint64 tls = 2; + required user_mips_regs_entry gpregs = 3[(criu).hex = true]; + required user_mips_fpregs_entry fpregs = 4[(criu).hex = true]; +} diff --git a/images/core.proto b/images/core.proto index 22c2a9f1f..9e9e39388 100644 --- a/images/core.proto +++ b/images/core.proto @@ -5,6 +5,7 @@ import "core-arm.proto"; import "core-aarch64.proto"; import "core-ppc64.proto"; import "core-s390.proto"; +import "core-mips.proto"; import "rlimit.proto"; import "timer.proto"; @@ -55,6 +56,7 @@ message task_core_entry { optional bool child_subreaper = 18; // Reserved for container relative start time //optional uint64 start_time = 19; + optional uint64 blk_sigset_extended = 20[(criu).hex = true]; } message task_kobj_ids_entry { @@ -96,6 +98,7 @@ message thread_core_entry { optional uint32 seccomp_filter = 12; optional string comm = 13; + optional uint64 blk_sigset_extended = 14; } message task_rlimits_entry { @@ -110,6 +113,7 @@ message core_entry { AARCH64 = 3; PPC64 = 4; S390 = 5; + MIPS = 6; } required march mtype = 1; @@ -118,6 +122,7 @@ message core_entry { optional thread_info_aarch64 ti_aarch64 = 8; optional thread_info_ppc64 ti_ppc64 = 9; optional thread_info_s390 ti_s390 = 10; + optional thread_info_mips ti_mips = 11; optional task_core_entry tc = 3; optional task_kobj_ids_entry ids = 4; diff --git a/images/sa.proto b/images/sa.proto index 3bce0c4ff..07fd4ffd3 100644 --- a/images/sa.proto +++ b/images/sa.proto @@ -8,4 +8,5 @@ message sa_entry { required uint64 restorer = 3 [(criu).hex = true]; required uint64 mask = 4 [(criu).hex = true]; optional bool compat_sigaction = 5; + optional uint64 mask_extended = 6 [(criu).hex = true]; } From d325b7b775cd2e51651c00137be0df5bd3d2d74f Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:24:45 +0800 Subject: [PATCH 0413/2030] mips:criu/arch/mips: Add mips parts to criu Signed-off-by: Guoyun Sun --- criu/arch/mips/Makefile | 14 + criu/arch/mips/cpu.c | 53 ++++ criu/arch/mips/crtools.c | 252 ++++++++++++++++++ criu/arch/mips/include/asm/dump.h | 14 + criu/arch/mips/include/asm/int.h | 6 + criu/arch/mips/include/asm/kerndat.h | 7 + criu/arch/mips/include/asm/parasite-syscall.h | 8 + criu/arch/mips/include/asm/parasite.h | 9 + criu/arch/mips/include/asm/restore.h | 29 ++ criu/arch/mips/include/asm/restorer.h | 79 ++++++ criu/arch/mips/include/asm/syscall32.h | 17 ++ criu/arch/mips/include/asm/types.h | 31 +++ criu/arch/mips/include/asm/vdso.h | 23 ++ criu/arch/mips/restorer.c | 17 ++ criu/arch/mips/sigaction_compat.c | 19 ++ criu/arch/mips/sigframe.c | 13 + criu/arch/mips/vdso-pie.c | 21 ++ 17 files changed, 612 insertions(+) create mode 100755 criu/arch/mips/Makefile create mode 100755 criu/arch/mips/cpu.c create mode 100755 criu/arch/mips/crtools.c create mode 100755 criu/arch/mips/include/asm/dump.h create mode 100755 criu/arch/mips/include/asm/int.h create mode 100644 criu/arch/mips/include/asm/kerndat.h create mode 100755 criu/arch/mips/include/asm/parasite-syscall.h create mode 100755 criu/arch/mips/include/asm/parasite.h create mode 100755 criu/arch/mips/include/asm/restore.h create mode 100755 criu/arch/mips/include/asm/restorer.h create mode 100755 criu/arch/mips/include/asm/syscall32.h create mode 100755 criu/arch/mips/include/asm/types.h create mode 100755 criu/arch/mips/include/asm/vdso.h create mode 100755 criu/arch/mips/restorer.c create mode 100755 criu/arch/mips/sigaction_compat.c create mode 100755 criu/arch/mips/sigframe.c create mode 100755 criu/arch/mips/vdso-pie.c diff --git a/criu/arch/mips/Makefile b/criu/arch/mips/Makefile new file mode 100755 index 000000000..4bd99eb7e --- /dev/null +++ b/criu/arch/mips/Makefile @@ -0,0 +1,14 @@ +builtin-name := crtools.built-in.o + +ccflags-y += -iquote $(obj)/include +ccflags-y += -iquote criu/include -iquote include +ccflags-y += $(COMPEL_UAPI_INCLUDES) + +asflags-y += -Wstrict-prototypes +asflags-y += -D__ASSEMBLY__ -nostdlib -fomit-frame-pointer +asflags-y += -iquote $(obj)/include +ldflags-y += -r -z noexecstack + +obj-y += cpu.o +obj-y += crtools.o +obj-y += sigframe.o diff --git a/criu/arch/mips/cpu.c b/criu/arch/mips/cpu.c new file mode 100755 index 000000000..484698e4f --- /dev/null +++ b/criu/arch/mips/cpu.c @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include +#include + +#include "bitops.h" +#include "asm/types.h" +#include "asm/cpu.h" +#include +#include + +#include "common/compiler.h" +#include "cr_options.h" +#include "image.h" +#include "util.h" +#include "log.h" +#include "cpu.h" +#include "protobuf.h" +#include "images/cpuinfo.pb-c.h" + +#undef LOG_PREFIX +#define LOG_PREFIX "cpu: " + +int cpu_init(void) +{ + return 0; +} + +int cpu_dump_cpuinfo(void) +{ + return 0; +} + +int cpu_validate_cpuinfo(void) +{ + return 0; +} + +int cpuinfo_dump(void) +{ + if (cpu_init()) + return -1; + if (cpu_dump_cpuinfo()) + return -1; + return 0; +} + +int cpuinfo_check(void) +{ + return 0; +} diff --git a/criu/arch/mips/crtools.c b/criu/arch/mips/crtools.c new file mode 100755 index 000000000..329ae8edb --- /dev/null +++ b/criu/arch/mips/crtools.c @@ -0,0 +1,252 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "log.h" +#include "asm/parasite-syscall.h" +#include "asm/restorer.h" +#include +#include "asm/dump.h" +#include "cr_options.h" +#include "common/compiler.h" +#include "restorer.h" +#include "parasite-syscall.h" +#include "util.h" +#include "cpu.h" +#include +#include "kerndat.h" + +#include "protobuf.h" +#include "images/core.pb-c.h" +#include "images/creds.pb-c.h" + + +int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) +{ + CoreEntry *core = x; + + /* Save the MIPS CPU state */ + core->ti_mips->gpregs->r0 = regs->regs[0]; + core->ti_mips->gpregs->r1 = regs->regs[1]; + core->ti_mips->gpregs->r2 = regs->regs[2]; + core->ti_mips->gpregs->r3 = regs->regs[3]; + core->ti_mips->gpregs->r4 = regs->regs[4]; + core->ti_mips->gpregs->r5 = regs->regs[5]; + core->ti_mips->gpregs->r6 = regs->regs[6]; + core->ti_mips->gpregs->r7 = regs->regs[7]; + core->ti_mips->gpregs->r8 = regs->regs[8]; + core->ti_mips->gpregs->r9 = regs->regs[9]; + core->ti_mips->gpregs->r10 = regs->regs[10]; + core->ti_mips->gpregs->r11 = regs->regs[11]; + core->ti_mips->gpregs->r12 = regs->regs[12]; + core->ti_mips->gpregs->r13 = regs->regs[13]; + core->ti_mips->gpregs->r14 = regs->regs[14]; + core->ti_mips->gpregs->r15 = regs->regs[15]; + core->ti_mips->gpregs->r16 = regs->regs[16]; + core->ti_mips->gpregs->r17 = regs->regs[17]; + core->ti_mips->gpregs->r18 = regs->regs[18]; + core->ti_mips->gpregs->r19 = regs->regs[19]; + core->ti_mips->gpregs->r20 = regs->regs[20]; + core->ti_mips->gpregs->r21 = regs->regs[21]; + core->ti_mips->gpregs->r22 = regs->regs[22]; + core->ti_mips->gpregs->r23 = regs->regs[23]; + core->ti_mips->gpregs->r24 = regs->regs[24]; + core->ti_mips->gpregs->r25 = regs->regs[25]; + core->ti_mips->gpregs->r26 = regs->regs[26]; + core->ti_mips->gpregs->r27 = regs->regs[27]; + core->ti_mips->gpregs->r28 = regs->regs[28]; + core->ti_mips->gpregs->r29 = regs->regs[29]; + core->ti_mips->gpregs->r30 = regs->regs[30]; + core->ti_mips->gpregs->r31 = regs->regs[31]; + + core->ti_mips->gpregs->lo = regs->lo; + core->ti_mips->gpregs->hi = regs->hi; + core->ti_mips->gpregs->cp0_epc = regs->cp0_epc; + core->ti_mips->gpregs->cp0_badvaddr = regs->cp0_badvaddr; + core->ti_mips->gpregs->cp0_status = regs->cp0_status; + core->ti_mips->gpregs->cp0_cause = regs->cp0_cause; + + core->ti_mips->fpregs->r0 = fpregs->regs[0]; + core->ti_mips->fpregs->r1 = fpregs->regs[1]; + core->ti_mips->fpregs->r2 = fpregs->regs[2]; + core->ti_mips->fpregs->r3 = fpregs->regs[3]; + core->ti_mips->fpregs->r4 = fpregs->regs[4]; + core->ti_mips->fpregs->r5 = fpregs->regs[5]; + core->ti_mips->fpregs->r6 = fpregs->regs[6]; + core->ti_mips->fpregs->r7 = fpregs->regs[7]; + core->ti_mips->fpregs->r8 = fpregs->regs[8]; + core->ti_mips->fpregs->r9 = fpregs->regs[9]; + core->ti_mips->fpregs->r10 = fpregs->regs[10]; + core->ti_mips->fpregs->r11 = fpregs->regs[11]; + core->ti_mips->fpregs->r12 = fpregs->regs[12]; + core->ti_mips->fpregs->r13 = fpregs->regs[13]; + core->ti_mips->fpregs->r14 = fpregs->regs[14]; + core->ti_mips->fpregs->r15 = fpregs->regs[15]; + core->ti_mips->fpregs->r16 = fpregs->regs[16]; + core->ti_mips->fpregs->r17 = fpregs->regs[17]; + core->ti_mips->fpregs->r18 = fpregs->regs[18]; + core->ti_mips->fpregs->r19 = fpregs->regs[19]; + core->ti_mips->fpregs->r20 = fpregs->regs[20]; + core->ti_mips->fpregs->r21 = fpregs->regs[21]; + core->ti_mips->fpregs->r22 = fpregs->regs[22]; + core->ti_mips->fpregs->r23 = fpregs->regs[23]; + core->ti_mips->fpregs->r24 = fpregs->regs[24]; + core->ti_mips->fpregs->r25 = fpregs->regs[25]; + core->ti_mips->fpregs->r26 = fpregs->regs[26]; + core->ti_mips->fpregs->r27 = fpregs->regs[27]; + core->ti_mips->fpregs->r28 = fpregs->regs[28]; + core->ti_mips->fpregs->r29 = fpregs->regs[29]; + core->ti_mips->fpregs->r30 = fpregs->regs[30]; + core->ti_mips->fpregs->r31 = fpregs->regs[31]; + core->ti_mips->fpregs->fpu_fcr31 = fpregs->fpu_fcr31; + core->ti_mips->fpregs->fpu_id = fpregs->fpu_id; + + return 0; +} + +int arch_alloc_thread_info(CoreEntry *core) +{ + ThreadInfoMips *ti_mips; + UserMipsRegsEntry *gpregs; + UserMipsFpregsEntry *fpregs; + + ti_mips = xmalloc(sizeof(*ti_mips)); + if (!ti_mips) + goto err; + + thread_info_mips__init(ti_mips); + core->ti_mips = ti_mips; + + gpregs = xmalloc(sizeof(*gpregs)); + if (!gpregs){ + xfree(ti_mips); + goto err; + } + + user_mips_regs_entry__init(gpregs); + ti_mips->gpregs = gpregs; + + fpregs = xmalloc(sizeof(*fpregs)); + if (!fpregs){ + xfree(ti_mips); + xfree(gpregs); + goto err; + } + + user_mips_fpregs_entry__init(fpregs); + ti_mips->fpregs = fpregs; + + return 0; +err: + return -1; +} + +void arch_free_thread_info(CoreEntry *core) +{ + if (!core->ti_mips) + return; + + if (core->ti_mips->gpregs) + xfree(core->ti_mips->gpregs); + + if (core->ti_mips->fpregs) + xfree(core->ti_mips->fpregs); + + xfree(core->ti_mips); +} + +int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) +{ + struct rt_sigframe *f = sigframe; + UserMipsFpregsEntry *r = core->ti_mips->fpregs; + + f->rs_uc.uc_mcontext.sc_fpregs[0] = r->r0; + f->rs_uc.uc_mcontext.sc_fpregs[1] = r->r1; + f->rs_uc.uc_mcontext.sc_fpregs[2] = r->r2; + f->rs_uc.uc_mcontext.sc_fpregs[3] = r->r3; + f->rs_uc.uc_mcontext.sc_fpregs[4] = r->r4; + f->rs_uc.uc_mcontext.sc_fpregs[5] = r->r5; + f->rs_uc.uc_mcontext.sc_fpregs[6] = r->r6; + f->rs_uc.uc_mcontext.sc_fpregs[7] = r->r7; + f->rs_uc.uc_mcontext.sc_fpregs[8] = r->r8; + f->rs_uc.uc_mcontext.sc_fpregs[9] = r->r9; + f->rs_uc.uc_mcontext.sc_fpregs[10] = r->r10; + f->rs_uc.uc_mcontext.sc_fpregs[11] = r->r11; + f->rs_uc.uc_mcontext.sc_fpregs[12] = r->r12; + f->rs_uc.uc_mcontext.sc_fpregs[13] = r->r13; + f->rs_uc.uc_mcontext.sc_fpregs[14] = r->r14; + f->rs_uc.uc_mcontext.sc_fpregs[15] = r->r15; + f->rs_uc.uc_mcontext.sc_fpregs[16] = r->r16; + f->rs_uc.uc_mcontext.sc_fpregs[17] = r->r17; + f->rs_uc.uc_mcontext.sc_fpregs[18] = r->r18; + f->rs_uc.uc_mcontext.sc_fpregs[19] = r->r19; + f->rs_uc.uc_mcontext.sc_fpregs[20] = r->r20; + f->rs_uc.uc_mcontext.sc_fpregs[21] = r->r21; + f->rs_uc.uc_mcontext.sc_fpregs[22] = r->r22; + f->rs_uc.uc_mcontext.sc_fpregs[23] = r->r23; + f->rs_uc.uc_mcontext.sc_fpregs[24] = r->r24; + f->rs_uc.uc_mcontext.sc_fpregs[25] = r->r25; + f->rs_uc.uc_mcontext.sc_fpregs[26] = r->r26; + f->rs_uc.uc_mcontext.sc_fpregs[27] = r->r27; + f->rs_uc.uc_mcontext.sc_fpregs[28] = r->r28; + f->rs_uc.uc_mcontext.sc_fpregs[29] = r->r29; + f->rs_uc.uc_mcontext.sc_fpregs[30] = r->r30; + f->rs_uc.uc_mcontext.sc_fpregs[31] = r->r31; + + return 0; +} + + +int restore_gpregs(struct rt_sigframe *f, UserMipsRegsEntry *r) +{ + f->rs_uc.uc_mcontext.sc_regs[0] = r->r0; + f->rs_uc.uc_mcontext.sc_regs[1] = r->r1; + f->rs_uc.uc_mcontext.sc_regs[2] = r->r2; + f->rs_uc.uc_mcontext.sc_regs[3] = r->r3; + f->rs_uc.uc_mcontext.sc_regs[4] = r->r4; + f->rs_uc.uc_mcontext.sc_regs[5] = r->r5; + f->rs_uc.uc_mcontext.sc_regs[6] = r->r6; + f->rs_uc.uc_mcontext.sc_regs[7] = r->r7; + f->rs_uc.uc_mcontext.sc_regs[8] = r->r8; + f->rs_uc.uc_mcontext.sc_regs[9] = r->r9; + f->rs_uc.uc_mcontext.sc_regs[10] = r->r10; + f->rs_uc.uc_mcontext.sc_regs[11] = r->r11; + f->rs_uc.uc_mcontext.sc_regs[12] = r->r12; + f->rs_uc.uc_mcontext.sc_regs[13] = r->r13; + f->rs_uc.uc_mcontext.sc_regs[14] = r->r14; + f->rs_uc.uc_mcontext.sc_regs[15] = r->r15; + f->rs_uc.uc_mcontext.sc_regs[16] = r->r16; + f->rs_uc.uc_mcontext.sc_regs[17] = r->r17; + f->rs_uc.uc_mcontext.sc_regs[18] = r->r18; + f->rs_uc.uc_mcontext.sc_regs[19] = r->r19; + f->rs_uc.uc_mcontext.sc_regs[20] = r->r20; + f->rs_uc.uc_mcontext.sc_regs[21] = r->r21; + f->rs_uc.uc_mcontext.sc_regs[22] = r->r22; + f->rs_uc.uc_mcontext.sc_regs[23] = r->r23; + f->rs_uc.uc_mcontext.sc_regs[24] = r->r24; + f->rs_uc.uc_mcontext.sc_regs[25] = r->r25; + f->rs_uc.uc_mcontext.sc_regs[26] = r->r26; + f->rs_uc.uc_mcontext.sc_regs[27] = r->r27; + f->rs_uc.uc_mcontext.sc_regs[28] = r->r28; + f->rs_uc.uc_mcontext.sc_regs[29] = r->r29; + f->rs_uc.uc_mcontext.sc_regs[30] = r->r30; + f->rs_uc.uc_mcontext.sc_regs[31] = r->r31; + + f->rs_uc.uc_mcontext.sc_mdlo = r->lo; + f->rs_uc.uc_mcontext.sc_mdhi = r->hi; + f->rs_uc.uc_mcontext.sc_pc = r->cp0_epc; + + return 0; +} + +int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info) +{ + return 0; +} diff --git a/criu/arch/mips/include/asm/dump.h b/criu/arch/mips/include/asm/dump.h new file mode 100755 index 000000000..58015833d --- /dev/null +++ b/criu/arch/mips/include/asm/dump.h @@ -0,0 +1,14 @@ +#ifndef __CR_ASM_DUMP_H__ +#define __CR_ASM_DUMP_H__ + +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int arch_alloc_thread_info(CoreEntry *core); +extern void arch_free_thread_info(CoreEntry *core); +extern int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info); + +static inline void core_put_tls(CoreEntry *core, tls_t tls) +{ + core->ti_mips->tls = tls; +} + +#endif diff --git a/criu/arch/mips/include/asm/int.h b/criu/arch/mips/include/asm/int.h new file mode 100755 index 000000000..642804e9b --- /dev/null +++ b/criu/arch/mips/include/asm/int.h @@ -0,0 +1,6 @@ +#ifndef __CR_ASM_INT_H__ +#define __CR_ASM_INT_H__ + +#include "asm-generic/int.h" + +#endif /* __CR_ASM_INT_H__ */ diff --git a/criu/arch/mips/include/asm/kerndat.h b/criu/arch/mips/include/asm/kerndat.h new file mode 100644 index 000000000..60956b573 --- /dev/null +++ b/criu/arch/mips/include/asm/kerndat.h @@ -0,0 +1,7 @@ +#ifndef __CR_ASM_KERNDAT_H__ +#define __CR_ASM_KERNDAT_H__ + +#define kdat_compatible_cr() 0 +#define kdat_can_map_vdso() 0 + +#endif /* __CR_ASM_KERNDAT_H__ */ diff --git a/criu/arch/mips/include/asm/parasite-syscall.h b/criu/arch/mips/include/asm/parasite-syscall.h new file mode 100755 index 000000000..a2b5e75ff --- /dev/null +++ b/criu/arch/mips/include/asm/parasite-syscall.h @@ -0,0 +1,8 @@ +#ifndef __CR_ASM_PARASITE_SYSCALL_H__ +#define __CR_ASM_PARASITE_SYSCALL_H__ + +#include "asm/types.h" + +struct parasite_ctl; + +#endif diff --git a/criu/arch/mips/include/asm/parasite.h b/criu/arch/mips/include/asm/parasite.h new file mode 100755 index 000000000..39882dd21 --- /dev/null +++ b/criu/arch/mips/include/asm/parasite.h @@ -0,0 +1,9 @@ +#ifndef __ASM_PARASITE_H__ +#define __ASM_PARASITE_H__ + +static inline void arch_get_tls(tls_t *ptls) +{ + asm("rdhwr %0, $29" : "=r"(*ptls)); +} + +#endif diff --git a/criu/arch/mips/include/asm/restore.h b/criu/arch/mips/include/asm/restore.h new file mode 100755 index 000000000..0cb9aa8ed --- /dev/null +++ b/criu/arch/mips/include/asm/restore.h @@ -0,0 +1,29 @@ +#ifndef __CR_ASM_RESTORE_H__ +#define __CR_ASM_RESTORE_H__ + +#include "asm/restorer.h" +#include "images/core.pb-c.h" + +#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start, \ + task_args) \ + asm volatile( \ + "move $4, %0 \n" \ + "move $25, %1 \n" \ + "move $5, %2 \n" \ + "move $29, $5 \n" \ + "jalr $25 \n" \ + "nop \n" \ + : \ + :"r"(task_args),"r"(restore_task_exec_start), \ + "g"(new_sp) \ + : "$29", "$25", "$4","$5") + +static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) +{ + *ptls = pcore->ti_mips->tls; +} + + +int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core); + +#endif diff --git a/criu/arch/mips/include/asm/restorer.h b/criu/arch/mips/include/asm/restorer.h new file mode 100755 index 000000000..1a33cd884 --- /dev/null +++ b/criu/arch/mips/include/asm/restorer.h @@ -0,0 +1,79 @@ +#ifndef __CR_ASM_RESTORER_H__ +#define __CR_ASM_RESTORER_H__ + +#include "asm/types.h" +#include +#include "images/core.pb-c.h" +#include +#include + +static inline void restore_tls(tls_t *ptls) { + asm volatile( + "move $4, %0 \n" + "li $2, "__stringify(__NR_set_thread_area)" \n" + "syscall \n" + : + : "r"(*ptls) + : "$4","$2","memory"); +} +static inline int arch_compat_rt_sigaction(void *stack, int sig, void *act) +{ + return -1; +} +static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) +{ + return -1; +} + +#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ + thread_args, clone_restore_fn) \ + asm volatile( \ + "ld $5,%2 \n" /* a1 = new_sp */ \ + "dsubu $5,32 \n" \ + "sd %5,0($5) \n" \ + "sd %6,8($5) \n" \ + "sd %1,16($5) \n" \ + "move $4,%1 \n" /* a0=flags */ \ + "move $6,%3 \n" /* a2=parent_tid */ \ + "li $7,0 \n" /* a3 = tls is 0 */ \ + "move $8,%4 \n" /* a4 = child_tid */ \ + "li $2, "__stringify(__NR_clone)" \n" \ + "syscall \n" /* syscall */ \ + "sync \n" \ + "bnez $7,err \n" \ + "nop \n" \ + "beqz $2,thread_start \n" \ + "nop \n" \ + "move %0,$2 \n" \ + "b end \n" \ + "err:break \n" \ + "thread_start: \n" \ + "ld $25,0($29) \n" \ + "ld $4,8($29) \n" \ + "jal $25 \n" \ + "nop \n" \ + "end: \n" \ + : "=r"(ret) \ + : "r"(clone_flags), \ + "m"(new_sp), \ + "r"(&parent_tid), \ + "r"(&thread_args[i].pid), \ + "r"(clone_restore_fn), \ + "r"(&thread_args[i]) \ + :"$2","$4","$5","$6","$7","$8","$25","$29","memory") + +#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ + clone_restore_fn) do { \ + pr_err("This architecture does not support clone3() with set_tid, yet!\n"); \ + ret = -1; \ +} while (0) + +#define kdat_compatible_cr() 0 +#define arch_map_vdso(map, compat) -1 + +static inline void *alloc_compat_syscall_stack(void) { return NULL; } +static inline void free_compat_syscall_stack(void *stack32) { } +int restore_gpregs(struct rt_sigframe *f, UserMipsRegsEntry *r); +int restore_nonsigframe_gpregs(UserMipsRegsEntry *r); + +#endif diff --git a/criu/arch/mips/include/asm/syscall32.h b/criu/arch/mips/include/asm/syscall32.h new file mode 100755 index 000000000..a6e298217 --- /dev/null +++ b/criu/arch/mips/include/asm/syscall32.h @@ -0,0 +1,17 @@ +#ifndef __CR_SYSCALL32_H__ +#define __CR_SYSCALL32_H__ + +extern long sys_socket(int domain, int type, int protocol); +extern long sys_connect(int sockfd, struct sockaddr *addr, int addrlen); +extern long sys_sendto(int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len); +extern long sys_recvfrom(int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len); +extern long sys_sendmsg(int sockfd, const struct msghdr *msg, int flags); +extern long sys_recvmsg(int sockfd, struct msghdr *msg, int flags); +extern long sys_shutdown(int sockfd, int how); +extern long sys_bind(int sockfd, const struct sockaddr *addr, int addrlen); +extern long sys_setsockopt(int sockfd, int level, int optname, const void *optval, unsigned int optlen); +extern long sys_getsockopt(int sockfd, int level, int optname, const void *optval, unsigned int *optlen); +extern long sys_shmat(int shmid, void *shmaddr, int shmflag); +extern long sys_pread(unsigned int fd, char *ubuf, u32 count, u64 pos); + +#endif /* __CR_SYSCALL32_H__ */ diff --git a/criu/arch/mips/include/asm/types.h b/criu/arch/mips/include/asm/types.h new file mode 100755 index 000000000..8366e0540 --- /dev/null +++ b/criu/arch/mips/include/asm/types.h @@ -0,0 +1,31 @@ +#ifndef __CR_ASM_TYPES_H__ +#define __CR_ASM_TYPES_H__ + +#include +#include + +#include "page.h" +#include "bitops.h" +#include "asm/int.h" + +#include + +#include "images/core.pb-c.h" + +#define core_is_compat(core) false + +#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__MIPS + +#define CORE_THREAD_ARCH_INFO(core) core->ti_mips + +typedef UserMipsRegsEntry UserRegsEntry; + +static inline u64 encode_pointer(void *p) { return (u64)p; } +static inline void *decode_pointer(u64 v) { return (void*)v; } + + +#define AT_VECTOR_SIZE 44 +typedef uint64_t auxv_t; +typedef unsigned long tls_t; + +#endif /* __CR_ASM_TYPES_H__ */ diff --git a/criu/arch/mips/include/asm/vdso.h b/criu/arch/mips/include/asm/vdso.h new file mode 100755 index 000000000..0e5da159e --- /dev/null +++ b/criu/arch/mips/include/asm/vdso.h @@ -0,0 +1,23 @@ +#ifndef __CR_ASM_VDSO_H__ +#define __CR_ASM_VDSO_H__ + +#include "asm/int.h" +#include "asm-generic/vdso.h" + +/* This definition is used in pie/util-vdso.c to initialize the vdso symbol + * name string table 'vdso_symbols' + */ + +/* + * This is a minimal amount of symbols + * we should support at the moment. + */ +#define VDSO_SYMBOL_MAX 3 +#define VDSO_SYMBOL_GTOD 0 +#define ARCH_VDSO_SYMBOLS \ + "__vdso_clock_gettime", \ + "__vdso_gettimeofday", \ + "__vdso_clock_getres" + + +#endif /* __CR_ASM_VDSO_H__ */ diff --git a/criu/arch/mips/restorer.c b/criu/arch/mips/restorer.c new file mode 100755 index 000000000..2e196b60c --- /dev/null +++ b/criu/arch/mips/restorer.c @@ -0,0 +1,17 @@ +#include + +#include "types.h" +#include "restorer.h" +#include "asm/restorer.h" +#include + +#include +#include +#include +#include "log.h" +#include "cpu.h" + +int restore_nonsigframe_gpregs(UserMipsRegsEntry *r) +{ + return 0; +} diff --git a/criu/arch/mips/sigaction_compat.c b/criu/arch/mips/sigaction_compat.c new file mode 100755 index 000000000..d3e45f082 --- /dev/null +++ b/criu/arch/mips/sigaction_compat.c @@ -0,0 +1,19 @@ +#include "log.h" +#include "asm/restorer.h" +#include +#include "asm/compat.h" +#include + +#ifdef CR_NOGLIBC +# include +#endif + +#include "cpu.h" + +extern char restore_rt_sigaction; + +int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) +{ + return 0; +} + diff --git a/criu/arch/mips/sigframe.c b/criu/arch/mips/sigframe.c new file mode 100755 index 000000000..1e39102f0 --- /dev/null +++ b/criu/arch/mips/sigframe.c @@ -0,0 +1,13 @@ +#include +#include + +#include "asm/sigframe.h" +#include "asm/types.h" + +#include "log.h" +#include +int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, + struct rt_sigframe *rsigframe) +{ + return 0; +} diff --git a/criu/arch/mips/vdso-pie.c b/criu/arch/mips/vdso-pie.c new file mode 100755 index 000000000..737e5538b --- /dev/null +++ b/criu/arch/mips/vdso-pie.c @@ -0,0 +1,21 @@ +#include +#include "asm/types.h" + +#include +#include +#include "parasite-vdso.h" +#include "log.h" +#include "common/bug.h" + +#ifdef LOG_PREFIX +# undef LOG_PREFIX +#endif +#define LOG_PREFIX "vdso: " + +int vdso_redirect_calls(unsigned long base_to, unsigned long base_from, + struct vdso_symtable *sto, struct vdso_symtable *sfrom, + bool compat_vdso) +{ + pr_err("Vdso proxification isn't implemented on mips\n"); + return -1; +} From afe90627e276e9e68cdf4872cae8fa3c5c637914 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:29:21 +0800 Subject: [PATCH 0414/2030] mips:criu: Enable mips in criu Signed-off-by: Guoyun Sun --- Makefile | 10 +++++++++- criu/cr-restore.c | 9 ++++++++- criu/kerndat.c | 9 +++++++++ criu/parasite-syscall.c | 17 +++++++++++++++++ criu/pie/Makefile | 4 ++++ criu/pie/Makefile.library | 4 ++++ 6 files changed, 51 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 00e563c11..e72dd1428 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ endif # # Supported Architectures -ifneq ($(filter-out x86 arm aarch64 ppc64 s390,$(ARCH)),) +ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips,$(ARCH)),) $(error "The architecture $(ARCH) isn't supported") endif @@ -76,6 +76,10 @@ ifeq ($(ARCH),x86) DEFINES := -DCONFIG_X86_64 endif +ifeq ($(ARCH),mips) + DEFINES := -DCONFIG_MIPS +endif + # # CFLAGS_PIE: # @@ -105,6 +109,10 @@ WARNINGS := -Wall -Wformat-security -Wdeclaration-after-statement -Wstrict-prot CFLAGS-GCOV := --coverage -fno-exceptions -fno-inline -fprofile-update=atomic export CFLAGS-GCOV +ifeq ($(ARCH),mips) +WARNINGS := -rdynamic +endif + ifneq ($(GCOV),) LDFLAGS += -lgcov CFLAGS += $(CFLAGS-GCOV) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index f572f79a0..99b36e0d4 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -461,9 +461,16 @@ static int restore_native_sigaction(int sig, SaEntry *e) ASSIGN_TYPED(act.rt_sa_handler, decode_pointer(e->sigaction)); ASSIGN_TYPED(act.rt_sa_flags, e->flags); ASSIGN_TYPED(act.rt_sa_restorer, decode_pointer(e->restorer)); +#ifdef CONFIG_MIPS + e->has_mask_extended = 1; + BUILD_BUG_ON(sizeof(e->mask)* 2 != sizeof(act.rt_sa_mask.sig)); + + memcpy(&(act.rt_sa_mask.sig[0]), &e->mask, sizeof(act.rt_sa_mask.sig[0])); + memcpy(&(act.rt_sa_mask.sig[1]), &e->mask_extended, sizeof(act.rt_sa_mask.sig[1])); +#else BUILD_BUG_ON(sizeof(e->mask) != sizeof(act.rt_sa_mask.sig)); memcpy(act.rt_sa_mask.sig, &e->mask, sizeof(act.rt_sa_mask.sig)); - +#endif if (sig == SIGCHLD) { sigchld_act = act; return 0; diff --git a/criu/kerndat.c b/criu/kerndat.c index 0b6d53bc7..831f9f72a 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -1007,6 +1007,15 @@ static bool kerndat_has_clone3_set_tid(void) pid_t pid; struct _clone_args args = {}; +#if defined(CONFIG_MIPS) + /* + * Currently the CRIU PIE assembler clone3() wrapper is + * not implemented for MIPS. + */ + kdat.has_clone3_set_tid = false; + return 0; +#endif + args.set_tid = -1; /* * On a system without clone3() this will return ENOSYS. diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index b649d1b51..5f9de152a 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -179,7 +179,12 @@ int parasite_dump_thread_seized(struct parasite_thread_ctl *tctl, pc->cap_last_cap = kdat.last_cap; tc->has_blk_sigset = true; +#ifdef CONFIG_MIPS + memcpy(&tc->blk_sigset, (unsigned long *)compel_thread_sigmask(tctl), sizeof(tc->blk_sigset)); + memcpy(&tc->blk_sigset_extended, (unsigned long *)compel_thread_sigmask(tctl)+1, sizeof(tc->blk_sigset)); +#else memcpy(&tc->blk_sigset, compel_thread_sigmask(tctl), sizeof(k_rtsigset_t)); +#endif ret = compel_get_thread_regs(tctl, save_task_regs, core); if (ret) { pr_err("Can't obtain regs for thread %d\n", pid); @@ -240,8 +245,15 @@ int parasite_dump_sigacts_seized(struct parasite_ctl *ctl, struct pstree_item *i ASSIGN_TYPED(sa->sigaction, encode_pointer(args->sas[i].rt_sa_handler)); ASSIGN_TYPED(sa->flags, args->sas[i].rt_sa_flags); ASSIGN_TYPED(sa->restorer, encode_pointer(args->sas[i].rt_sa_restorer)); +#ifdef CONFIG_MIPS + sa->has_mask_extended = 1; + BUILD_BUG_ON(sizeof(sa->mask) * 2 != sizeof(args->sas[0].rt_sa_mask.sig)); + memcpy(&sa->mask, &(args->sas[i].rt_sa_mask.sig[0]), sizeof(sa->mask)); + memcpy(&sa->mask_extended, &(args->sas[i].rt_sa_mask.sig[1]), sizeof(sa->mask)); +#else BUILD_BUG_ON(sizeof(sa->mask) != sizeof(args->sas[0].rt_sa_mask.sig)); memcpy(&sa->mask, args->sas[i].rt_sa_mask.sig, sizeof(sa->mask)); +#endif sa->has_compat_sigaction = true; sa->compat_sigaction = !compel_mode_native(ctl); @@ -569,7 +581,12 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item, } parasite_args_size = PARASITE_ARG_SIZE_MIN; /* reset for next task */ +#ifdef CONFIG_MIPS + memcpy(&item->core[0]->tc->blk_sigset, (unsigned long *)compel_task_sigmask(ctl), sizeof(item->core[0]->tc->blk_sigset)); + memcpy(&item->core[0]->tc->blk_sigset_extended, (unsigned long *)compel_task_sigmask(ctl)+1, sizeof(item->core[0]->tc->blk_sigset)); +#else memcpy(&item->core[0]->tc->blk_sigset, compel_task_sigmask(ctl), sizeof(k_rtsigset_t)); +#endif dmpi(item)->parasite_ctl = ctl; return ctl; diff --git a/criu/pie/Makefile b/criu/pie/Makefile index a30747ac3..265dcf82b 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -14,6 +14,10 @@ ifneq ($(filter-out clean mrproper,$(MAKECMDGOALS)),) compel_plugins := $(shell $(COMPEL_BIN) plugins) endif +ifeq ($(ARCH),mips) + ccflags-y += -mno-abicalls -fno-pic +endif + LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index de75b11d4..da2a2fab3 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -23,3 +23,7 @@ endif CFLAGS := $(filter-out -pg $(CFLAGS-GCOV) $(CFLAGS-ASAN),$(CFLAGS)) CFLAGS += $(CFLAGS_PIE) + +ifeq ($(ARCH),mips) +CFLAGS += -fno-stack-protector -DCR_NOGLIBC -mno-abicalls -fno-pic +endif From b5c34c74c5055301821c2acbe4cf8aad646da558 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 8 Apr 2020 10:49:47 +0800 Subject: [PATCH 0415/2030] mips:support docker-cross compile Signed-off-by: Guoyun Sun --- .travis.yml | 4 + scripts/build/Dockerfile.mips64el-cross | 44 ++++++ scripts/build/Makefile | 4 +- test/zdtm/lib/arch/mips/include/asm/atomic.h | 136 +++++++++++++++++++ test/zdtm/lib/test.c | 2 +- test/zdtm/static/pthread01.c | 5 + 6 files changed, 193 insertions(+), 2 deletions(-) create mode 100644 scripts/build/Dockerfile.mips64el-cross create mode 100644 test/zdtm/lib/arch/mips/include/asm/atomic.h diff --git a/.travis.yml b/.travis.yml index 69a505193..8c126b47f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -95,6 +95,10 @@ jobs: env: TR_ARCH=ppc64-cross dist: bionic - env: TR_ARCH=local STREAM_TEST=1 + - os: linux + arch: amd64 + env: TR_ARCH=mips64el-cross + dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial diff --git a/scripts/build/Dockerfile.mips64el-cross b/scripts/build/Dockerfile.mips64el-cross new file mode 100644 index 000000000..1ba936105 --- /dev/null +++ b/scripts/build/Dockerfile.mips64el-cross @@ -0,0 +1,44 @@ +FROM dockcross/base:latest + +# Add the cross compiler sources +RUN echo "deb http://ftp.us.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ + dpkg --add-architecture mips64el && \ + apt-get install emdebian-archive-keyring + +RUN apt-get update && apt-get install -y \ + crossbuild-essential-mips64el \ + libbz2-dev:mips64el \ + libexpat1-dev:mips64el \ + ncurses-dev:mips64el \ + libssl-dev:mips64el \ + protobuf-c-compiler \ + protobuf-compiler \ + python-protobuf \ + libnl-3-dev:mips64el \ + libprotobuf-dev:mips64el \ + libnet-dev:mips64el \ + libprotobuf-c-dev:mips64el \ + libcap-dev:mips64el \ + libaio-dev:mips64el \ + libnl-route-3-dev:mips64el + +ENV CROSS_TRIPLE=mips64el-linux-gnuabi64 +ENV CROSS_COMPILE=${CROSS_TRIPLE}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLE} \ + AS=/usr/bin/${CROSS_TRIPLE}-as \ + AR=/usr/bin/${CROSS_TRIPLE}-ar \ + CC=/usr/bin/${CROSS_TRIPLE}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLE}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLE}-g++ \ + LD=/usr/bin/${CROSS_TRIPLE}-ld \ + FC=/usr/bin/${CROSS_TRIPLE}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLE}/pkgconfig \ + ARCH=mips \ + SUBARCH=mips + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 855539152..974d1455f 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -2,7 +2,9 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos armv7hf TARGETS := $(ARCHES) alpine TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) CONTAINER_RUNTIME := docker -TARGETS += armv7-cross aarch64-cross ppc64-cross + +TARGETS += armv7-cross aarch64-cross ppc64-cross mips64el-cross + all: $(TARGETS) $(TARGETS_CLANG) .PHONY: all diff --git a/test/zdtm/lib/arch/mips/include/asm/atomic.h b/test/zdtm/lib/arch/mips/include/asm/atomic.h new file mode 100644 index 000000000..acf4c03cd --- /dev/null +++ b/test/zdtm/lib/arch/mips/include/asm/atomic.h @@ -0,0 +1,136 @@ +#ifndef __CR_ATOMIC_H__ +#define __CR_ATOMIC_H__ + +//#include +//#include "common/compiler.h" +//#include "common/arch/mips/asm/utils.h" +//#include "common/arch/mips/asm/cmpxchg.h" + +typedef uint32_t atomic_t; +/* typedef struct { */ +/* int counter; */ +/* }atomic_t; */ + +#define __WEAK_LLSC_MB " sync \n" + +#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") + +#define smp_mb__before_llsc() smp_llsc_mb() +#define smp_mb__before_atomic() smp_mb__before_llsc() +#define smp_mb__after_atomic() smp_llsc_mb() + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#define atomic_get(v) (*(volatile int *)v) +#define atomic_set(v, i) ((*v) = (i)) + +//#define atomic_get atomic_read + +/* + * atomic_add - add integer to atomic variable + * @i: integer value to add + * @v: pointer of type atomic_t + * + * Atomically adds @i to @v. + */ + +static __inline__ void atomic_add(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_add \n" + " addu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * atomic_sub - subtract the atomic variable + * @i: integer value to subtract + * @v: pointer of type atomic_t + * + * Atomically subtracts @i from @v. + */ +static __inline__ void atomic_sub(int i, atomic_t * v) +{ + int temp; + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %0, %1 # atomic_sub \n" + " subu %0, %2 \n" + " sc %0, %1 \n" + " .set mips0 \n" + : "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!temp)); +} + +/* + * Same as above, but return the result value + */ +static __inline__ int atomic_add_return(int i, atomic_t * v) +{ + int result; + int temp; + + smp_mb__before_llsc(); + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_add_return \n" + " addu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp + i; + + smp_llsc_mb(); + + return result; +} + +static __inline__ int atomic_sub_return(int i, atomic_t * v) +{ + int result; + int temp; + + smp_mb__before_llsc(); + + do { + __asm__ __volatile__( + " .set mips3 \n" + " ll %1, %2 # atomic_sub_return \n" + " subu %0, %1, %3 \n" + " sc %0, %2 \n" + " .set mips0 \n" + : "=&r" (result), "=&r" (temp), "+m" (*v) + : "Ir" (i)); + } while (unlikely(!result)); + + result = temp - i; + + smp_llsc_mb(); + + return result; +} + +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +static inline unsigned int atomic_inc(atomic_t *v) { return atomic_add_return(1, v) - 1; } +static inline unsigned int atomic_dec(atomic_t *v) { return atomic_sub_return(1, v) + 1; } +#endif /* __CR_ATOMIC_H__ */ diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c index 630476de0..e031357ac 100644 --- a/test/zdtm/lib/test.c +++ b/test/zdtm/lib/test.c @@ -403,7 +403,7 @@ pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid { #ifdef __x86_64__ return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, newtls); -#elif (__i386__ || __arm__ || __aarch64__ ||__powerpc64__) +#elif (__i386__ || __arm__ || __aarch64__ ||__powerpc64__ || __mips__) return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, newtls, child_tid); #elif __s390x__ return (pid_t)syscall(__NR_clone, child_stack, flags, parent_tid, child_tid, newtls); diff --git a/test/zdtm/static/pthread01.c b/test/zdtm/static/pthread01.c index 1e84463ee..bdd7c59d8 100644 --- a/test/zdtm/static/pthread01.c +++ b/test/zdtm/static/pthread01.c @@ -27,6 +27,11 @@ static __thread struct tls_data_s { static task_waiter_t t1; static task_waiter_t t2; +#ifdef CONFIG_MIPS +#ifndef SIGSTKFLT +#define SIGSTKFLT 16 +#endif +#endif static char *decode_signal(const sigset_t *s, char *buf) { buf[0] = '\0'; From 40169b950eff23347975acc0de0b9316f08d175b Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 26 Mar 2020 06:26:48 +0000 Subject: [PATCH 0416/2030] style: fix typos Oddly, one of the test had a typo which should be fatal. Signed-off-by: Nicolas Viennot --- criu/config.c | 2 +- test/others/ext-tty/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/criu/config.c b/criu/config.c index e78b534a9..eb303fd77 100644 --- a/criu/config.c +++ b/criu/config.c @@ -876,7 +876,7 @@ int check_options(void) } if (!opts.restore_detach && opts.restore_sibling) { - pr_err("--restore-sibling only makes sense with --restore-detach\n"); + pr_err("--restore-sibling only makes sense with --restore-detached\n"); return 1; } diff --git a/test/others/ext-tty/run.py b/test/others/ext-tty/run.py index 2c0bacc84..8109033cb 100755 --- a/test/others/ext-tty/run.py +++ b/test/others/ext-tty/run.py @@ -29,7 +29,7 @@ ttyid = "fd[%d]:tty[%x:%x]" % (slave, st.st_rdev, st.st_dev) ret = subprocess.Popen([ "../../../criu/criu", "restore", "-v4", "--inherit-fd", ttyid, - "--restore-sibling", "--restore-detach" + "--restore-sibling", "--restore-detached" ]).wait() if ret: sys.exit(ret) From d38851c9bd003d9c5b2b1c804291f518db681938 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 18 Apr 2020 22:28:24 +0300 Subject: [PATCH 0417/2030] test/jenkins: use bash to run shell scripts We permanently have issues like this: ./test/jenkins/criu-iter.sh: 3: source: not found It looks like a good idea to use one shell to run our jenkins scripts. Signed-off-by: Andrei Vagin --- test/jenkins/criu-btrfs.sh | 2 ++ test/jenkins/criu-by-id.sh | 2 ++ test/jenkins/criu-dedup.sh | 2 ++ test/jenkins/criu-dump.sh | 2 ++ test/jenkins/criu-fault.sh | 1 + test/jenkins/criu-fcg.sh | 2 ++ test/jenkins/criu-groups.sh | 2 ++ test/jenkins/criu-inhfd.sh | 2 ++ test/jenkins/criu-iter.sh | 2 ++ test/jenkins/criu-join-ns.sh | 2 ++ test/jenkins/criu-lazy-common.sh | 2 ++ test/jenkins/criu-lazy-migration.sh | 2 ++ test/jenkins/criu-lazy-pages.sh | 2 ++ test/jenkins/criu-other.sh | 2 ++ test/jenkins/criu-overlay.sh | 2 ++ test/jenkins/criu-pre-dump.sh | 2 ++ test/jenkins/criu-remote-lazy-pages.sh | 2 ++ test/jenkins/criu-sibling.sh | 2 ++ test/jenkins/criu-snap.sh | 2 ++ test/jenkins/criu-stop.sh | 2 ++ test/jenkins/criu-user.sh | 2 ++ test/jenkins/criu.sh | 2 ++ 22 files changed, 43 insertions(+) diff --git a/test/jenkins/criu-btrfs.sh b/test/jenkins/criu-btrfs.sh index e749ad906..e456f1c34 100644 --- a/test/jenkins/criu-btrfs.sh +++ b/test/jenkins/criu-btrfs.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # This is a job which is executed on btrfs source `dirname $0`/criu-lib.sh && diff --git a/test/jenkins/criu-by-id.sh b/test/jenkins/criu-by-id.sh index 2381e73f7..c041ed358 100644 --- a/test/jenkins/criu-by-id.sh +++ b/test/jenkins/criu-by-id.sh @@ -1,3 +1,5 @@ +#!/bin/bash + echo 950000 > /sys/fs/cgroup/cpu,cpuacct/system/cpu.rt_runtime_us echo 950000 > /sys/fs/cgroup/cpu,cpuacct/system/jenkins.service/cpu.rt_runtime_us git checkout -f ${TEST_COMMIT} diff --git a/test/jenkins/criu-dedup.sh b/test/jenkins/criu-dedup.sh index e75ef5f82..0041496d8 100755 --- a/test/jenkins/criu-dedup.sh +++ b/test/jenkins/criu-dedup.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check auto-deduplication of pagemaps set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-dump.sh b/test/jenkins/criu-dump.sh index 381cf7a98..4c49532b2 100755 --- a/test/jenkins/criu-dump.sh +++ b/test/jenkins/criu-dump.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check that dump is not destructive set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index c27dd3738..f871a140b 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -1,4 +1,5 @@ #!/bin/bash + # Check known fault injections set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-fcg.sh b/test/jenkins/criu-fcg.sh index 938a72f26..ca5054f5e 100755 --- a/test/jenkins/criu-fcg.sh +++ b/test/jenkins/criu-fcg.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Test how freeze cgroup works set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-groups.sh b/test/jenkins/criu-groups.sh index 508d20aa6..b5bea4eab 100755 --- a/test/jenkins/criu-groups.sh +++ b/test/jenkins/criu-groups.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle over randomly-generated groups set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-inhfd.sh b/test/jenkins/criu-inhfd.sh index a59dcda6e..8f44ba13a 100755 --- a/test/jenkins/criu-inhfd.sh +++ b/test/jenkins/criu-inhfd.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check known fault injections set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-iter.sh b/test/jenkins/criu-iter.sh index d414b0575..304aa43db 100755 --- a/test/jenkins/criu-iter.sh +++ b/test/jenkins/criu-iter.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make 3 iteration of dump/restore for each test set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-join-ns.sh b/test/jenkins/criu-join-ns.sh index 39ef182f0..241c29034 100755 --- a/test/jenkins/criu-join-ns.sh +++ b/test/jenkins/criu-join-ns.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-lazy-common.sh b/test/jenkins/criu-lazy-common.sh index 7fdab40dd..a8ff9e51b 100644 --- a/test/jenkins/criu-lazy-common.sh +++ b/test/jenkins/criu-lazy-common.sh @@ -1,3 +1,5 @@ +#!/bin/bash + KERN_MAJ=`uname -r | cut -d. -f1` KERN_MIN=`uname -r | cut -d. -f2` if [ $KERN_MAJ -ge "4" ] && [ $KERN_MIN -ge "11" ]; then diff --git a/test/jenkins/criu-lazy-migration.sh b/test/jenkins/criu-lazy-migration.sh index 30e3c0375..02a212e0d 100755 --- a/test/jenkins/criu-lazy-migration.sh +++ b/test/jenkins/criu-lazy-migration.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check lazy-pages set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-lazy-pages.sh b/test/jenkins/criu-lazy-pages.sh index a3ee9a4ec..9ef721739 100755 --- a/test/jenkins/criu-lazy-pages.sh +++ b/test/jenkins/criu-lazy-pages.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check lazy-pages set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-other.sh b/test/jenkins/criu-other.sh index c6c231c86..bb68f912a 100755 --- a/test/jenkins/criu-other.sh +++ b/test/jenkins/criu-other.sh @@ -1,3 +1,5 @@ +#!/bin/bash + source `dirname $0`/criu-lib.sh && prep && make -C test other && diff --git a/test/jenkins/criu-overlay.sh b/test/jenkins/criu-overlay.sh index 5ef7682ac..de80007a3 100755 --- a/test/jenkins/criu-overlay.sh +++ b/test/jenkins/criu-overlay.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-pre-dump.sh b/test/jenkins/criu-pre-dump.sh index 95f4d8549..137f7c23f 100755 --- a/test/jenkins/criu-pre-dump.sh +++ b/test/jenkins/criu-pre-dump.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check 3 pre-dump-s before dump (with and w/o page server) set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-remote-lazy-pages.sh b/test/jenkins/criu-remote-lazy-pages.sh index ea0d17f0e..1c677e333 100755 --- a/test/jenkins/criu-remote-lazy-pages.sh +++ b/test/jenkins/criu-remote-lazy-pages.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check remote-lazy-pages set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-sibling.sh b/test/jenkins/criu-sibling.sh index 93f070330..d59b38970 100755 --- a/test/jenkins/criu-sibling.sh +++ b/test/jenkins/criu-sibling.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make 3 iteration of dump/restore for each test set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-snap.sh b/test/jenkins/criu-snap.sh index d28ba45d9..b08c57f52 100755 --- a/test/jenkins/criu-snap.sh +++ b/test/jenkins/criu-snap.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check snapshots set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-stop.sh b/test/jenkins/criu-stop.sh index d92519d68..64da2ee8a 100644 --- a/test/jenkins/criu-stop.sh +++ b/test/jenkins/criu-stop.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Check --leave-stopped option set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu-user.sh b/test/jenkins/criu-user.sh index d89ede203..f4ec52fc6 100755 --- a/test/jenkins/criu-user.sh +++ b/test/jenkins/criu-user.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make 3 iteration of dump/restore for each test set -e source `dirname $0`/criu-lib.sh diff --git a/test/jenkins/criu.sh b/test/jenkins/criu.sh index 19d545c3c..0ee750b08 100755 --- a/test/jenkins/criu.sh +++ b/test/jenkins/criu.sh @@ -1,3 +1,5 @@ +#!/bin/bash + # Make one regular C/R cycle set -e source `dirname $0`/criu-lib.sh From be1394122112381eca55faf5b1a7b2e2b51bd383 Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Fri, 17 Apr 2020 11:54:31 +0000 Subject: [PATCH 0418/2030] mips: impliment arch_shmat() On MIPS CPUs with VIPT caches also has aliasing issues, just like ARMv6. To overcome this issue, page coloring 0x40000 align for shared mappings was introduced (SHMLBA) in kernel. https://github.com/torvalds/linux/blob/master/arch/mips/include/asm/shmparam.h Related to this, zdtm test suites ipc.c shm.c shm-unaligned.c and shm-mp.c are passed. Signed-off-by: Guoyun Sun --- criu/arch/mips/include/asm/restorer.h | 4 ++++ criu/arch/mips/restorer.c | 32 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/criu/arch/mips/include/asm/restorer.h b/criu/arch/mips/include/asm/restorer.h index 1a33cd884..d916377f4 100755 --- a/criu/arch/mips/include/asm/restorer.h +++ b/criu/arch/mips/include/asm/restorer.h @@ -76,4 +76,8 @@ static inline void free_compat_syscall_stack(void *stack32) { } int restore_gpregs(struct rt_sigframe *f, UserMipsRegsEntry *r); int restore_nonsigframe_gpregs(UserMipsRegsEntry *r); +#define ARCH_HAS_SHMAT_HOOK +unsigned long arch_shmat(int shmid, void *shmaddr, + int shmflg, unsigned long size); + #endif diff --git a/criu/arch/mips/restorer.c b/criu/arch/mips/restorer.c index 2e196b60c..e3a1e4a44 100755 --- a/criu/arch/mips/restorer.c +++ b/criu/arch/mips/restorer.c @@ -15,3 +15,35 @@ int restore_nonsigframe_gpregs(UserMipsRegsEntry *r) { return 0; } + +#define SHMLBA 0x40000 +unsigned long arch_shmat(int shmid, void *shmaddr, + int shmflg, unsigned long size) +{ + unsigned long smap; + + /* SHMLBA-aligned, direct call shmat() */ + if (!((unsigned long)shmaddr & (SHMLBA - 1))) + return sys_shmat(shmid, shmaddr, shmflg); + + smap = sys_shmat(shmid, NULL, shmflg); + if (IS_ERR_VALUE(smap)) { + pr_err("shmat() with NULL shmaddr failed: %d\n", (int)smap); + return smap; + } + + /* We're lucky! */ + if (smap == (unsigned long)shmaddr) + return smap; + + /* Warn ALOUD */ + pr_warn("Restoring shmem %p unaligned to SHMLBA.\n", shmaddr); + pr_warn("Make sure that you don't migrate shmem from non-VIPT cached CPU to VIPT cached \n"); + pr_warn("Otherwise YOU HAVE A CHANCE OF DATA CORRUPTIONS in writeable shmem\n"); + + smap = sys_mremap(smap, size, size, + MREMAP_FIXED | MREMAP_MAYMOVE, (unsigned long)shmaddr); + if (IS_ERR_VALUE(smap)) + pr_err("mremap() for shmem failed: %d\n", (int)smap); + return smap; +} From 277b0b69fac7afa6cbde51e7c99e2756029d0d6c Mon Sep 17 00:00:00 2001 From: Guoyun Sun Date: Wed, 22 Apr 2020 15:43:04 +0800 Subject: [PATCH 0419/2030] mips: fix fail when run zdtm test pthread01.c k_rtsigset_t is 16Bytes in mips architecture but not 8Bytes. so blk_sigset_extended be added in TaskCoreEntry and ThreadCoreEntry for dumping extern 8Bytes data in parasite-syscall.c, restore extern 8Bytes data in cr-restore.c Signed-off-by: Guoyun Sun --- criu/cr-restore.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 99b36e0d4..ec00bf71b 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -3551,8 +3551,12 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns for (i = 0; i < current->nr_threads; i++) { CoreEntry *tcore; struct rt_sigframe *sigframe; +#ifdef CONFIG_MIPS + k_rtsigset_t mips_blkset; +#else k_rtsigset_t *blkset = NULL; +#endif thread_args[i].pid = current->threads[i].ns[0].virt; thread_args[i].siginfo_n = siginfo_priv_nr[i]; thread_args[i].siginfo = task_args->siginfo; @@ -3563,11 +3567,22 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns if (thread_args[i].pid == pid) { task_args->t = thread_args + i; tcore = core; +#ifdef CONFIG_MIPS + mips_blkset.sig[0] = tcore->tc->blk_sigset; + mips_blkset.sig[1] = tcore->tc->blk_sigset_extended; +#else blkset = (void *)&tcore->tc->blk_sigset; +#endif } else { tcore = current->core[i]; - if (tcore->thread_core->has_blk_sigset) + if (tcore->thread_core->has_blk_sigset) { +#ifdef CONFIG_MIPS + mips_blkset.sig[0] = tcore->thread_core->blk_sigset; + mips_blkset.sig[1] = tcore->thread_core->blk_sigset_extended; +#else blkset = (void *)&tcore->thread_core->blk_sigset; +#endif + } } if ((tcore->tc || tcore->ids) && thread_args[i].pid != pid) { @@ -3607,7 +3622,11 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns thread_args[i].mz = mz + i; sigframe = (struct rt_sigframe *)&mz[i].rt_sigframe; +#ifdef CONFIG_MIPS + if (construct_sigframe(sigframe, sigframe, &mips_blkset, tcore)) +#else if (construct_sigframe(sigframe, sigframe, blkset, tcore)) +#endif goto err; if (tcore->thread_core->comm) From 8364b09407a969c1af68cd3e477449fa60d2518e Mon Sep 17 00:00:00 2001 From: Josh Abraham Date: Mon, 27 Apr 2020 13:40:46 -0400 Subject: [PATCH 0420/2030] soccr/test: Fix error logging in libsoccr tcp-test Signed-off-by: Joshua Abraham --- soccr/test/tcp-conn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/soccr/test/tcp-conn.c b/soccr/test/tcp-conn.c index e31f58e7e..cdd75129a 100644 --- a/soccr/test/tcp-conn.c +++ b/soccr/test/tcp-conn.c @@ -101,12 +101,12 @@ int main(void) /* Start testing */ dst_let = sizeof(addr); if (getsockname(sock, (struct sockaddr *) &addr, &dst_let)) { - pr_perror("connect"); + pr_perror("getsockname"); return 1; } dst_let = sizeof(addr); if (getpeername(sock, (struct sockaddr *) &dst, &dst_let)) { - pr_perror("connect"); + pr_perror("getpeername"); return 1; } From 5bd776da382fb0838830d80f4bec6c0aaec8bfbb Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 11 May 2020 02:38:14 -0700 Subject: [PATCH 0421/2030] Remove dupe of "deprecated stuff on" msg A similar one is already printed in check_options(). Before this patch: > $ ./criu/criu -vvvvvv --deprecated --log-file=/dev/stdout xxx > (00.000000) Turn deprecated stuff ON > ... > (00.029680) DEPRECATED ON > (00.029687) Error (criu/crtools.c:284): unknown command: xxx Signed-off-by: Kir Kolyshkin --- criu/crtools.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index ad61fa9bb..76172f350 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -210,9 +210,6 @@ int main(int argc, char *argv[], char *envp[]) if (fault_injected(FI_CANNOT_MAP_VDSO)) kdat.can_map_vdso = 0; - if (opts.deprecated_ok) - pr_debug("DEPRECATED ON\n"); - if (!list_empty(&opts.inherit_fds)) { if (strcmp(argv[optind], "restore")) { pr_err("--inherit-fd is restore-only option\n"); From 8452be93cf24b0dffe257ddab892a9c95d44c91b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 May 2020 15:48:28 +0000 Subject: [PATCH 0422/2030] travis: use bionic almost everywhere A few tests were still running on xenial because at some point they were hanging. This switches now all tests to bionic except one docker test which still uses xenial to test with overlayfs. Signed-off-by: Adrian Reber --- .travis.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8c126b47f..b28bd64f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,7 +51,7 @@ jobs: - os: linux arch: amd64 env: TR_ARCH=fedora-rawhide - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=podman-test @@ -69,19 +69,19 @@ jobs: - os: linux arch: amd64 env: TR_ARCH=alpine CLANG=1 - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=alpine - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=centos - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=fedora-asan - dist: xenial # test hangs on bionic + dist: bionic - os: linux arch: amd64 env: TR_ARCH=armv7-cross From 00b8257d9f31876e35e08e6d556f6f67d76f3908 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 13 May 2020 17:57:03 +0200 Subject: [PATCH 0423/2030] tests: move cross compilation to github actions This moves the cross compilation tests to github actions, to slightly reduce the number of Travis tests and run them in parallel on github actions. Signed-off-by: Adrian Reber --- .github/workflows/cross-compile.yml | 23 +++++++++++++++++++++++ .travis.yml | 16 ---------------- 2 files changed, 23 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/cross-compile.yml diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml new file mode 100644 index 000000000..9545d3df6 --- /dev/null +++ b/.github/workflows/cross-compile.yml @@ -0,0 +1,23 @@ +name: Cross Compile Tests + +on: + push: + branches: [ criu-dev ] + pull_request: + branches: [ criu-dev ] + schedule: + - cron: '55 5 * * *' + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + target: [armv7-cross, aarch64-cross, ppc64-cross, mips64el-cross] + + steps: + - uses: actions/checkout@v2 + - name: Run Cross Compilation Targets + run: > + sudo make -C scripts/travis ${{ matrix.target }} diff --git a/.travis.yml b/.travis.yml index b28bd64f9..e71afa0a3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,23 +82,7 @@ jobs: arch: amd64 env: TR_ARCH=fedora-asan dist: bionic - - os: linux - arch: amd64 - env: TR_ARCH=armv7-cross - dist: bionic - - os: linux - arch: amd64 - env: TR_ARCH=aarch64-cross - dist: bionic - - os: linux - arch: amd64 - env: TR_ARCH=ppc64-cross - dist: bionic - env: TR_ARCH=local STREAM_TEST=1 - - os: linux - arch: amd64 - env: TR_ARCH=mips64el-cross - dist: bionic allow_failures: - env: TR_ARCH=docker-test - env: TR_ARCH=docker-test DIST=xenial From 00a44031e220919c9df6a1e25db8f32fb141d741 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 17:10:58 -0700 Subject: [PATCH 0424/2030] cr-service: fix wording in debug messages The message "Overwriting RPC settings with values from " is misleading, giving the impression that file is being read and consumed. It really puzzled me, since didn't exist. What it needs to say is "Would overwrite", i.e. if a file with such name is present, it would be used. Also, add actual "Parsing file ..." so it will be clear which files are being used. Signed-off-by: Kir Kolyshkin --- criu/config.c | 2 ++ criu/cr-service.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/config.c b/criu/config.c index eb303fd77..904addf3a 100644 --- a/criu/config.c +++ b/criu/config.c @@ -126,6 +126,8 @@ static char ** parse_config(char *filepath) if (!configfile) return NULL; + pr_debug("Parsing config file %s\n", filepath); + configuration = xmalloc(config_size * sizeof(char *)); if (configuration == NULL) { fclose(configfile); diff --git a/criu/cr-service.c b/criu/cr-service.c index 53eadb1bc..6dc2379d6 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -405,7 +405,7 @@ static int setup_opts_from_req(int sk, CriuOpts *req) } if (req->config_file) { - pr_debug("Overwriting RPC settings with values from %s\n", req->config_file); + pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file); } if (kerndat_init()) From f6d1b498dc22a66865e1c8899ac5dc2b81363ce1 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 17:13:33 -0700 Subject: [PATCH 0425/2030] cr-service: spell out an error While working on runc checkpointing, I incorrectly closed status_fd prematurely, and received an error from CRIU, but it was non-descriptive. Do print the error from open(). Signed-off-by: Kir Kolyshkin --- criu/cr-service.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 6dc2379d6..7c2ff9835 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -680,8 +680,10 @@ static int setup_opts_from_req(int sk, CriuOpts *req) if (req->has_status_fd) { sprintf(status_fd, "/proc/%d/fd/%d", ids.pid, req->status_fd); opts.status_fd = open(status_fd, O_WRONLY); - if (opts.status_fd < 0) + if (opts.status_fd < 0) { + pr_perror("Can't reopen status fd %s", status_fd); goto err; + } } if (req->orphan_pts_master) From ae4fd07ca5c09482fa5a78f3ede0d31a4b5ff63e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 20 Feb 2020 21:10:23 +0000 Subject: [PATCH 0426/2030] libcriu: Add orphan pts master The orphan pts master option was introduced with commit [1] to enable checkpoint/restore of containers with a pty pair used as a console. [1] https://github.com/checkpoint-restore/criu/commit/6afe523d97d59e6bf29621b8aa0e6a4332f710fc Signed-off-by: Radostin Stoyanov --- lib/c/criu.c | 11 +++++++++++ lib/c/criu.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/lib/c/criu.c b/lib/c/criu.c index 1d0a235f4..2ac18ade9 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -541,6 +541,17 @@ void criu_set_shell_job(bool shell_job) criu_local_set_shell_job(global_opts, shell_job); } +void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master) +{ + opts->rpc->has_orphan_pts_master = true; + opts->rpc->orphan_pts_master = orphan_pts_master; +} + +void criu_set_orphan_pts_master(bool orphan_pts_master) +{ + criu_local_set_orphan_pts_master(global_opts, orphan_pts_master); +} + void criu_local_set_file_locks(criu_opts *opts, bool file_locks) { opts->rpc->has_file_locks = true; diff --git a/lib/c/criu.h b/lib/c/criu.h index 22db0fdcf..3a9204f5b 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -72,6 +72,7 @@ void criu_set_tcp_close(bool tcp_close); void criu_set_weak_sysctls(bool val); void criu_set_evasive_devices(bool evasive_devices); void criu_set_shell_job(bool shell_job); +void criu_set_orphan_pts_master(bool orphan_pts_master); void criu_set_file_locks(bool file_locks); void criu_set_track_mem(bool track_mem); void criu_set_auto_dedup(bool auto_dedup); @@ -185,6 +186,7 @@ void criu_local_set_tcp_close(criu_opts *opts, bool tcp_close); void criu_local_set_weak_sysctls(criu_opts *opts, bool val); void criu_local_set_evasive_devices(criu_opts *opts, bool evasive_devices); void criu_local_set_shell_job(criu_opts *opts, bool shell_job); +void criu_local_set_orphan_pts_master(criu_opts *opts, bool orphan_pts_master); void criu_local_set_file_locks(criu_opts *opts, bool file_locks); void criu_local_set_track_mem(criu_opts *opts, bool track_mem); void criu_local_set_auto_dedup(criu_opts *opts, bool auto_dedup); From 4ac9a3c904ace0414521afc05e1ba3287a95f248 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 21 Feb 2020 12:23:01 +0000 Subject: [PATCH 0427/2030] libcriu: Use spaces around '=' Signed-off-by: Radostin Stoyanov --- lib/c/criu.c | 94 ++++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index 2ac18ade9..7daac7dbf 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -256,11 +256,11 @@ int criu_local_init_opts(criu_opts **o) return -1; } - opts->rpc = rpc; - opts->notify = NULL; + opts->rpc = rpc; + opts->notify = NULL; - opts->service_comm = CRIU_COMM_BIN; - opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); + opts->service_comm = CRIU_COMM_BIN; + opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); if(opts->service_binary == NULL) { perror("Can't allocate memory for criu service setting"); @@ -303,8 +303,8 @@ int criu_notify_pid(criu_notify_arg_t na) void criu_local_set_pid(criu_opts *opts, int pid) { - opts->rpc->has_pid = true; - opts->rpc->pid = pid; + opts->rpc->has_pid = true; + opts->rpc->pid = pid; } void criu_set_pid(int pid) @@ -408,8 +408,8 @@ void criu_set_work_dir_fd(int fd) void criu_local_set_leave_running(criu_opts *opts, bool leave_running) { - opts->rpc->has_leave_running = true; - opts->rpc->leave_running = leave_running; + opts->rpc->has_leave_running = true; + opts->rpc->leave_running = leave_running; } void criu_set_leave_running(bool leave_running) @@ -419,8 +419,8 @@ void criu_set_leave_running(bool leave_running) void criu_local_set_ext_unix_sk(criu_opts *opts, bool ext_unix_sk) { - opts->rpc->has_ext_unix_sk = true; - opts->rpc->ext_unix_sk = ext_unix_sk; + opts->rpc->has_ext_unix_sk = true; + opts->rpc->ext_unix_sk = ext_unix_sk; } void criu_set_ext_unix_sk(bool ext_unix_sk) @@ -477,8 +477,8 @@ int criu_add_unix_sk(unsigned int inode) void criu_local_set_tcp_established(criu_opts *opts, bool tcp_established) { - opts->rpc->has_tcp_established = true; - opts->rpc->tcp_established = tcp_established; + opts->rpc->has_tcp_established = true; + opts->rpc->tcp_established = tcp_established; } void criu_set_tcp_established(bool tcp_established) @@ -488,8 +488,8 @@ void criu_set_tcp_established(bool tcp_established) void criu_local_set_tcp_skip_in_flight(criu_opts *opts, bool tcp_skip_in_flight) { - opts->rpc->has_tcp_skip_in_flight = true; - opts->rpc->tcp_skip_in_flight = tcp_skip_in_flight; + opts->rpc->has_tcp_skip_in_flight = true; + opts->rpc->tcp_skip_in_flight = tcp_skip_in_flight; } void criu_set_tcp_skip_in_flight(bool tcp_skip_in_flight) @@ -499,8 +499,8 @@ void criu_set_tcp_skip_in_flight(bool tcp_skip_in_flight) void criu_local_set_tcp_close(criu_opts *opts, bool tcp_close) { - opts->rpc->has_tcp_close = true; - opts->rpc->tcp_close = tcp_close; + opts->rpc->has_tcp_close = true; + opts->rpc->tcp_close = tcp_close; } void criu_set_tcp_close(bool tcp_close) @@ -511,7 +511,7 @@ void criu_set_tcp_close(bool tcp_close) void criu_local_set_weak_sysctls(criu_opts *opts, bool val) { opts->rpc->has_weak_sysctls = true; - opts->rpc->weak_sysctls = val; + opts->rpc->weak_sysctls = val; } void criu_set_weak_sysctls(bool val) @@ -521,8 +521,8 @@ void criu_set_weak_sysctls(bool val) void criu_local_set_evasive_devices(criu_opts *opts, bool evasive_devices) { - opts->rpc->has_evasive_devices = true; - opts->rpc->evasive_devices = evasive_devices; + opts->rpc->has_evasive_devices = true; + opts->rpc->evasive_devices = evasive_devices; } void criu_set_evasive_devices(bool evasive_devices) @@ -532,8 +532,8 @@ void criu_set_evasive_devices(bool evasive_devices) void criu_local_set_shell_job(criu_opts *opts, bool shell_job) { - opts->rpc->has_shell_job = true; - opts->rpc->shell_job = shell_job; + opts->rpc->has_shell_job = true; + opts->rpc->shell_job = shell_job; } void criu_set_shell_job(bool shell_job) @@ -554,8 +554,8 @@ void criu_set_orphan_pts_master(bool orphan_pts_master) void criu_local_set_file_locks(criu_opts *opts, bool file_locks) { - opts->rpc->has_file_locks = true; - opts->rpc->file_locks = file_locks; + opts->rpc->has_file_locks = true; + opts->rpc->file_locks = file_locks; } void criu_set_file_locks(bool file_locks) @@ -565,8 +565,8 @@ void criu_set_file_locks(bool file_locks) void criu_local_set_log_level(criu_opts *opts, int log_level) { - opts->rpc->has_log_level = true; - opts->rpc->log_level = log_level; + opts->rpc->has_log_level = true; + opts->rpc->log_level = log_level; } void criu_set_log_level(int log_level) @@ -697,8 +697,8 @@ int criu_set_log_file(const char *log_file) void criu_local_set_cpu_cap(criu_opts *opts, unsigned int cap) { - opts->rpc->has_cpu_cap = true; - opts->rpc->cpu_cap = cap; + opts->rpc->has_cpu_cap = true; + opts->rpc->cpu_cap = cap; } void criu_set_cpu_cap(unsigned int cap) @@ -1410,7 +1410,7 @@ exit: static int send_req_and_recv_resp(criu_opts *opts, CriuReq *req, CriuResp **resp) { int fd; - int ret = 0; + int ret = 0; bool d = false; if (req->type == CRIU_REQ_TYPE__DUMP && req->opts->has_pid == false) @@ -1431,12 +1431,12 @@ static int send_req_and_recv_resp(criu_opts *opts, CriuReq *req, CriuResp **resp int criu_local_check(criu_opts *opts) { int ret = -1; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__CHECK; + req.type = CRIU_REQ_TYPE__CHECK; ret = send_req_and_recv_resp(opts, &req, &resp); if (ret) @@ -1463,13 +1463,13 @@ int criu_check(void) int criu_local_dump(criu_opts *opts) { int ret = -1; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__DUMP; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__DUMP; + req.opts = opts->rpc; ret = send_req_and_recv_resp(opts, &req, &resp); if (ret) @@ -1502,13 +1502,13 @@ int criu_dump(void) int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi)) { int ret = -1, fd = -1, uret; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__PRE_DUMP; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__PRE_DUMP; + req.opts = opts->rpc; ret = -EINVAL; /* @@ -1573,13 +1573,13 @@ int criu_dump_iters(int (*more)(criu_predump_info pi)) int criu_local_restore(criu_opts *opts) { int ret = -1; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; saved_errno = 0; - req.type = CRIU_REQ_TYPE__RESTORE; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__RESTORE; + req.opts = opts->rpc; ret = send_req_and_recv_resp(opts, &req, &resp); if (ret) @@ -1612,8 +1612,8 @@ int criu_local_restore_child(criu_opts *opts) enum criu_service_comm saved_comm; const char *saved_comm_data; bool save_comm; - CriuReq req = CRIU_REQ__INIT; - CriuResp *resp = NULL; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; /* * restore_child is not possible with criu running as a system @@ -1644,8 +1644,8 @@ int criu_local_restore_child(criu_opts *opts) saved_errno = 0; - req.type = CRIU_REQ_TYPE__RESTORE; - req.opts = opts->rpc; + req.type = CRIU_REQ_TYPE__RESTORE; + req.opts = opts->rpc; req.opts->has_rst_sibling = true; req.opts->rst_sibling = true; From f3341025207ba960e36140bb24d63098cdd69a57 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 24 Feb 2020 18:30:59 +0000 Subject: [PATCH 0428/2030] libcriu: Add space between 'if' and parenthesis Signed-off-by: Radostin Stoyanov --- lib/c/criu.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/c/criu.c b/lib/c/criu.c index 7daac7dbf..de57a65dc 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -58,7 +58,7 @@ int criu_local_set_service_address(criu_opts *opts, const char *path) } else { opts->service_address = strdup(CR_DEFAULT_SERVICE_ADDRESS); } - if(opts->service_address == NULL) { + if (opts->service_address == NULL) { return -ENOMEM; } return 0; @@ -90,7 +90,7 @@ int criu_local_set_service_binary(criu_opts *opts, const char *path) } else { opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); } - if(opts->service_binary == NULL) { + if (opts->service_binary == NULL) { return -ENOMEM; } return 0; @@ -118,7 +118,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_exec_cmd = 0; - if(opts->rpc->unix_sk_ino) { + if (opts->rpc->unix_sk_ino) { for (i = 0; i < opts->rpc->n_unix_sk_ino; i++) { free(opts->rpc->unix_sk_ino[i]); } @@ -126,7 +126,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_unix_sk_ino = 0; - if(opts->rpc->ext_mnt) { + if (opts->rpc->ext_mnt) { for (i = 0; i < opts->rpc->n_ext_mnt; i++) { if (opts->rpc->ext_mnt[i]) { free(opts->rpc->ext_mnt[i]->val); @@ -138,7 +138,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_ext_mnt = 0; - if(opts->rpc->cg_root) { + if (opts->rpc->cg_root) { for (i = 0; i < opts->rpc->n_cg_root; i++) { if (opts->rpc->cg_root[i]) { free(opts->rpc->cg_root[i]->ctrl); @@ -150,7 +150,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_cg_root = 0; - if(opts->rpc->veths) { + if (opts->rpc->veths) { for (i = 0; i < opts->rpc->n_veths; i++) { if (opts->rpc->veths[i]) { free(opts->rpc->veths[i]->if_in); @@ -162,7 +162,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_veths = 0; - if(opts->rpc->enable_fs) { + if (opts->rpc->enable_fs) { for (i = 0; i < opts->rpc->n_enable_fs; i++) { free(opts->rpc->enable_fs[i]); } @@ -170,7 +170,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_enable_fs = 0; - if(opts->rpc->skip_mnt) { + if (opts->rpc->skip_mnt) { for (i = 0; i < opts->rpc->n_skip_mnt; i++) { free(opts->rpc->skip_mnt[i]); } @@ -178,7 +178,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_skip_mnt = 0; - if(opts->rpc->irmap_scan_paths) { + if (opts->rpc->irmap_scan_paths) { for (i = 0; i < opts->rpc->n_irmap_scan_paths; i++) { free(opts->rpc->irmap_scan_paths[i]); } @@ -186,7 +186,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_irmap_scan_paths = 0; - if(opts->rpc->cgroup_dump_controller) { + if (opts->rpc->cgroup_dump_controller) { for (i = 0; i < opts->rpc->n_cgroup_dump_controller; i++) { free(opts->rpc->cgroup_dump_controller[i]); } @@ -194,7 +194,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_cgroup_dump_controller = 0; - if(opts->rpc->inherit_fd) { + if (opts->rpc->inherit_fd) { for (i = 0; i < opts->rpc->n_inherit_fd; i++) { if (opts->rpc->inherit_fd[i]) { free(opts->rpc->inherit_fd[i]->key); @@ -205,7 +205,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_inherit_fd = 0; - if(opts->rpc->external) { + if (opts->rpc->external) { for (i = 0; i < opts->rpc->n_external; i++) { free(opts->rpc->external[i]); } @@ -213,7 +213,7 @@ void criu_local_free_opts(criu_opts *opts) } opts->rpc->n_external = 0; - if(opts->rpc->ps) { + if (opts->rpc->ps) { free(opts->rpc->ps->address); free(opts->rpc->ps); } @@ -262,7 +262,7 @@ int criu_local_init_opts(criu_opts **o) opts->service_comm = CRIU_COMM_BIN; opts->service_binary = strdup(CR_DEFAULT_SERVICE_BIN); - if(opts->service_binary == NULL) { + if (opts->service_binary == NULL) { perror("Can't allocate memory for criu service setting"); criu_local_free_opts(opts); return -1; @@ -325,7 +325,7 @@ void criu_set_images_dir_fd(int fd) int criu_local_set_parent_images(criu_opts *opts, const char *path) { opts->rpc->parent_img = strdup(path); - if(opts->rpc->parent_img == NULL) { + if (opts->rpc->parent_img == NULL) { return -ENOMEM; } return 0; @@ -577,7 +577,7 @@ void criu_set_log_level(int log_level) int criu_local_set_root(criu_opts *opts, const char *root) { opts->rpc->root = strdup(root); - if(opts->rpc->root == NULL) { + if (opts->rpc->root == NULL) { return -ENOMEM; } return 0; @@ -613,7 +613,7 @@ void criu_set_manage_cgroups_mode(enum criu_cg_mode mode) int criu_local_set_freeze_cgroup(criu_opts *opts, const char *name) { opts->rpc->freeze_cgroup = strdup(name); - if(opts->rpc->freeze_cgroup == NULL) { + if (opts->rpc->freeze_cgroup == NULL) { return -ENOMEM; } return 0; @@ -627,7 +627,7 @@ int criu_set_freeze_cgroup(const char *name) int criu_local_set_lsm_profile(criu_opts *opts, const char *name) { opts->rpc->lsm_profile = strdup(name); - if(opts->rpc->lsm_profile == NULL) { + if (opts->rpc->lsm_profile == NULL) { return -ENOMEM; } return 0; @@ -684,7 +684,7 @@ void criu_set_ext_masters(bool val) int criu_local_set_log_file(criu_opts *opts, const char *log_file) { opts->rpc->log_file = strdup(log_file); - if(opts->rpc->log_file == NULL) { + if (opts->rpc->log_file == NULL) { return -ENOMEM; } return 0; From 64347398c10b5911d0f1acd2db5c856b4b1fe464 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 20 May 2020 11:57:22 +0000 Subject: [PATCH 0429/2030] coverity: fix RESOURCE_LEAK criu/timens.c: 67 7. criu-3.14/criu/timens.c:67: leaked_storage: Variable "img" going out of scope leaks the storage it points to. 65| if (id == 0 && empty_image(img)) { 66| pr_warn("Clocks values have not been dumped\n"); 67|-> return 0; 68| } Signed-off-by: Adrian Reber --- criu/timens.c | 1 + 1 file changed, 1 insertion(+) diff --git a/criu/timens.c b/criu/timens.c index 2a7e95284..f81808abf 100644 --- a/criu/timens.c +++ b/criu/timens.c @@ -64,6 +64,7 @@ int prepare_timens(int id) if (id == 0 && empty_image(img)) { pr_warn("Clocks values have not been dumped\n"); + close_image(img); return 0; } From b4c51ea492c98011472dd28a7cb47bffcfb4ad20 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 20 May 2020 12:19:36 +0000 Subject: [PATCH 0430/2030] coverity: fix FORWARD_NULL in criu/proc_parse.c: 1481 8. criu-3.14/criu/proc_parse.c:1511: var_deref_model: Passing null pointer "f" to "fclose", which dereferences it. 1509| exit_code = 0; 1510| out: 1511|-> fclose(f); 1512| return exit_code; 1513| } Signed-off-by: Adrian Reber --- criu/proc_parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 4a22700aa..d1ccd9281 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1480,7 +1480,7 @@ int parse_timens_offsets(struct timespec *boff, struct timespec *moff) f = fopen_proc(PROC_SELF, "timens_offsets"); if (!f) { pr_perror("Unable to open /proc/self/timens_offsets"); - goto out; + return exit_code; } while (fgets(buf, BUF_SIZE, f)) { int64_t sec, nsec; From e34f5dd3a351dc2e475fa235c25ed115ac996644 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 20 May 2020 12:38:55 +0000 Subject: [PATCH 0431/2030] clang: Branch condition evaluates to a garbage value criu-3.14/criu/namespaces.c:692:7: warning: Branch condition evaluates to a garbage value criu-3.14/criu/namespaces.c:690:3: note: 'supported' declared without an initial value protobuf_c_boolean supported; ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:691:8: note: Calling 'get_ns_id' id = get_ns_id(pid, &time_for_children_ns_desc, &supported); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:479:9: note: Calling '__get_ns_id' return __get_ns_id(pid, nd, supported, NULL); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:454:6: note: Assuming 'proc_dir' is < 0 if (proc_dir < 0) ^~~~~~~~~~~~ criu-3.14/criu/namespaces.c:454:2: note: Taking true branch if (proc_dir < 0) ^ criu-3.14/criu/namespaces.c:455:3: note: Returning without writing to '*supported' return 0; ^ criu-3.14/criu/namespaces.c:479:9: note: Returning from '__get_ns_id' return __get_ns_id(pid, nd, supported, NULL); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:479:2: note: Returning without writing to '*supported' return __get_ns_id(pid, nd, supported, NULL); ^ criu-3.14/criu/namespaces.c:691:8: note: Returning from 'get_ns_id' id = get_ns_id(pid, &time_for_children_ns_desc, &supported); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ criu-3.14/criu/namespaces.c:692:7: note: Branch condition evaluates to a garbage value if (!supported || !id) { ^~~~~~~~~~ 690| protobuf_c_boolean supported; 691| id = get_ns_id(pid, &time_for_children_ns_desc, &supported); 692|-> if (!supported || !id) { 693| pr_err("Can't make timens id\n"); 694| Signed-off-by: Adrian Reber --- criu/namespaces.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/namespaces.c b/criu/namespaces.c index 89d97c7bc..04f242505 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -687,7 +687,7 @@ int dump_task_ns_ids(struct pstree_item *item) } if (ids->has_time_ns_id) { unsigned int id; - protobuf_c_boolean supported; + protobuf_c_boolean supported = false; id = get_ns_id(pid, &time_for_children_ns_desc, &supported); if (!supported || !id) { pr_err("Can't make timens id\n"); From faf6dbf33e04eb8a0907f44b2787022b14a840e0 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 18:21:25 -0700 Subject: [PATCH 0432/2030] close_service_fd: rename to status_ready The name close_service_fd() is misleading, as it not just closes the status_fd, but also writes to it. On a high level, though, it signals the other side that we are ready, so rename to status_ready. Signed-off-by: Kir Kolyshkin --- criu/cr-service.c | 2 +- criu/include/util.h | 2 +- criu/uffd.c | 2 +- criu/util.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 7c2ff9835..7201b549a 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -1394,7 +1394,7 @@ int cr_service(bool daemon_mode) if (setup_sigchld_handler()) goto err; - if (close_status_fd()) + if (status_ready()) goto err; while (1) { diff --git a/criu/include/util.h b/criu/include/util.h index d67f6d39d..1b22d9e0b 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -177,7 +177,7 @@ extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], un extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid); extern int cr_daemon(int nochdir, int noclose, int close_fd); -extern int close_status_fd(void); +extern int status_ready(void); extern int is_root_user(void); extern void set_proc_self_fd(int fd); diff --git a/criu/uffd.c b/criu/uffd.c index 99373c04d..33b34ba25 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -1456,7 +1456,7 @@ int cr_lazy_pages(bool daemon) } } - if (close_status_fd()) + if (status_ready()) return -1; /* diff --git a/criu/util.c b/criu/util.c index 517f0fc25..0a60fa105 100644 --- a/criu/util.c +++ b/criu/util.c @@ -643,7 +643,7 @@ out: return ret; } -int close_status_fd(void) +int status_ready(void) { char c = 0; @@ -1105,7 +1105,7 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) } } - if (close_status_fd()) + if (status_ready()) return -1; if (sk >= 0) { From 62c03530c9d6e0a1012a589ed1a26c2612113238 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 15 May 2020 18:12:30 -0700 Subject: [PATCH 0433/2030] swrk: send notification instead of using status fd When we use swrk, we have a mechanism to send notifications over RPC. It is cleaner and more straightforward than sending \0 to status fd. For now, both mechanisms are supported, although status fd request option is now deprecated, so a warning is logged in case it's used. Guess we can remove it in a few years. Signed-off-by: Kir Kolyshkin --- criu/action-scripts.c | 1 + criu/cr-service.c | 2 ++ criu/include/action-scripts.h | 1 + criu/util.c | 4 ++++ 4 files changed, 8 insertions(+) diff --git a/criu/action-scripts.c b/criu/action-scripts.c index 2f7617c0f..5337efa64 100644 --- a/criu/action-scripts.c +++ b/criu/action-scripts.c @@ -29,6 +29,7 @@ static const char *action_names[ACT_MAX] = { [ ACT_PRE_RESUME ] = "pre-resume", [ ACT_POST_RESUME ] = "post-resume", [ ACT_ORPHAN_PTS_MASTER ] = "orphan-pts-master", + [ ACT_STATUS_READY ] = "status-ready", }; struct script { diff --git a/criu/cr-service.c b/criu/cr-service.c index 7201b549a..56be6bcd3 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -678,6 +678,8 @@ static int setup_opts_from_req(int sk, CriuOpts *req) } if (req->has_status_fd) { + pr_warn("status_fd is obsoleted; use status-ready notification instead\n"); + sprintf(status_fd, "/proc/%d/fd/%d", ids.pid, req->status_fd); opts.status_fd = open(status_fd, O_WRONLY); if (opts.status_fd < 0) { diff --git a/criu/include/action-scripts.h b/criu/include/action-scripts.h index 40b09b160..c2e8850aa 100644 --- a/criu/include/action-scripts.h +++ b/criu/include/action-scripts.h @@ -15,6 +15,7 @@ enum script_actions { ACT_POST_RESUME, ACT_PRE_RESUME, ACT_ORPHAN_PTS_MASTER, + ACT_STATUS_READY, ACT_MAX }; diff --git a/criu/util.c b/criu/util.c index 0a60fa105..4c1f3b4ca 100644 --- a/criu/util.c +++ b/criu/util.c @@ -45,6 +45,7 @@ #include "pstree.h" #include "cr-errno.h" +#include "action-scripts.h" #define VMA_OPT_LEN 128 @@ -647,6 +648,9 @@ int status_ready(void) { char c = 0; + if (run_scripts(ACT_STATUS_READY)) + return -1; + if (opts.status_fd < 0) return 0; From e57e74a18df0b7bbfb2fa556941fc7c8715e57d1 Mon Sep 17 00:00:00 2001 From: ZeyadYasser Date: Thu, 16 Apr 2020 15:58:18 +0200 Subject: [PATCH 0434/2030] criu: optimize find_unix_sk_by_ino() Fixes: #339 Replaced the linear search with a hashtable lookup. Signed-off-by: Zeyad Yasser --- criu/files.c | 1 + criu/include/sockets.h | 2 ++ criu/sk-unix.c | 19 ++++++++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/criu/files.c b/criu/files.c index a1fd26764..2cfc9040e 100644 --- a/criu/files.c +++ b/criu/files.c @@ -1749,5 +1749,6 @@ struct collect_image_info files_cinfo = { int prepare_files(void) { init_fdesc_hash(); + init_sk_info_hash(); return collect_image(&files_cinfo); } diff --git a/criu/include/sockets.h b/criu/include/sockets.h index cd98d18e0..e971f3efd 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -62,6 +62,8 @@ extern int unix_sk_id_add(unsigned int ino); extern int unix_sk_ids_parse(char *optarg); extern int unix_prepare_root_shared(void); +extern void init_sk_info_hash(void); + extern int do_dump_opt(int sk, int level, int name, void *val, int len); #define dump_opt(s, l, n, f) do_dump_opt(s, l, n, f, sizeof(*f)) extern int do_restore_opt(int sk, int level, int name, void *val, int len); diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 048ff44ae..cbcf1f66c 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -903,6 +903,7 @@ struct unix_sk_info { struct unix_sk_info *peer; struct pprep_head peer_resolve; /* XXX : union with the above? */ struct file_desc d; + struct hlist_node hash; /* To lookup socket by ino */ struct list_head connected; /* List of sockets, connected to me */ struct list_head node; /* To link in peer's connected list */ struct list_head scm_fles; @@ -934,11 +935,25 @@ struct scm_fle { #define USK_PAIR_SLAVE 0x2 #define USK_GHOST_FDSTORE 0x4 /* bound but removed address */ +#define SK_INFO_HASH_SIZE 32 + +static struct hlist_head sk_info_hash[SK_INFO_HASH_SIZE]; + +void init_sk_info_hash(void) +{ + int i; + + for (i = 0; i < SK_INFO_HASH_SIZE; i++) + INIT_HLIST_HEAD(&sk_info_hash[i]); +} + static struct unix_sk_info *find_unix_sk_by_ino(int ino) { struct unix_sk_info *ui; + struct hlist_head *chain; - list_for_each_entry(ui, &unix_sockets, list) { + chain = &sk_info_hash[ino % SK_INFO_HASH_SIZE]; + hlist_for_each_entry(ui, chain, hash) { if (ui->ue->ino == ino) return ui; } @@ -2044,6 +2059,7 @@ static int init_unix_sk_info(struct unix_sk_info *ui, UnixSkEntry *ue) INIT_LIST_HEAD(&ui->node); INIT_LIST_HEAD(&ui->scm_fles); INIT_LIST_HEAD(&ui->ghost_node); + INIT_HLIST_NODE(&ui->hash); return 0; } @@ -2135,6 +2151,7 @@ static int collect_one_unixsk(void *o, ProtobufCMessage *base, struct cr_img *i) list_add_tail(&ui->ghost_node, &unix_ghost_addr); } + hlist_add_head(&ui->hash, &sk_info_hash[ui->ue->ino % SK_INFO_HASH_SIZE]); list_add_tail(&ui->list, &unix_sockets); return file_desc_add(&ui->d, ui->ue->id, &unix_desc_ops); } From 55f71b8667043fb8241bc500a96c20644d478eba Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 23 Apr 2020 14:13:15 +0000 Subject: [PATCH 0435/2030] lib/c: add criu_get_version() Although the CRIU version is exported in macros in version.h it only contains the CRIU version of libcriu during build time. As it is possible that CRIU is upgraded since the last time something was built against libcriu, this adds functions to query the actual CRIU binary about its version. Signed-off-by: Adrian Reber --- lib/c/criu.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/c/criu.h | 32 ++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/lib/c/criu.c b/lib/c/criu.c index de57a65dc..d052f8d1f 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -1668,3 +1668,68 @@ int criu_restore_child(void) { return criu_local_restore_child(global_opts); } + +int criu_local_get_version(criu_opts *opts) +{ + int ret = -1; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; + + saved_errno = 0; + + req.type = CRIU_REQ_TYPE__VERSION; + req.opts = opts->rpc; + + ret = send_req_and_recv_resp(opts, &req, &resp); + if (ret) + goto exit; + + if (resp->success) { + ret = resp->version->major_number * 10000; + ret += resp->version->minor_number * 100; + if (resp->version->has_sublevel) + ret += resp->version->sublevel; + if (resp->version->gitid) { + /* Taken from runc: a git release -> minor + 1 */ + ret -= (ret % 100); + ret += 100; + } + } else { + ret = -EBADE; + } + +exit: + if (resp) + criu_resp__free_unpacked(resp, NULL); + + swrk_wait(opts); + + errno = saved_errno; + + return ret; +} + +int criu_get_version(void) +{ + return criu_local_get_version(global_opts); +} + +int criu_local_check_version(criu_opts *opts, int minimum) +{ + int version; + + version = criu_local_get_version(opts); + + if (version < 0) + return version; + + if (minimum <= version) + return 1; + + return 0; +} + +int criu_check_version(int minimum) +{ + return criu_local_check_version(global_opts, minimum); +} diff --git a/lib/c/criu.h b/lib/c/criu.h index 3a9204f5b..49f7a7005 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -158,6 +158,35 @@ int criu_restore_child(void); typedef void *criu_predump_info; int criu_dump_iters(int (*more)(criu_predump_info pi)); +/* + * Get the version of the actual binary used for RPC. + * + * As this library is just forwarding all tasks to an + * independent (of this library) CRIU binary, the actual + * version of the CRIU binary can be different then the + * hardcoded values in the libary (version.h). + * To be able to easily check the version of the CRIU binary + * the function criu_get_version() returns the version + * in the following format: + * + * (major * 10000) + (minor * 100) + sublevel + * + * If the CRIU binary has been built from a git checkout + * minor will increased by one. + */ +int criu_get_version(void); + +/* + * Check if the version of the CRIU binary is at least + * 'minimum'. Version has to be in the same format as + * described for criu_get_version(). + * + * Returns 1 if CRIU is at least 'minimum'. + * Returns 0 if CRIU is too old. + * Returns < 0 if there was an error. + */ +int criu_check_version(int minimum); + /* * Same as the list above, but lets you have your very own options * structure and lets you set individual options in it. @@ -229,6 +258,9 @@ int criu_local_restore(criu_opts *opts); int criu_local_restore_child(criu_opts *opts); int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi)); +int criu_local_get_version(criu_opts *opts); +int criu_local_check_version(criu_opts *opts, int minimum); + #ifdef __GNUG__ } #endif From 047ecd3a15f83b15535943c2c87e0c55b4866dd9 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 23 Apr 2020 14:28:00 +0000 Subject: [PATCH 0436/2030] test/others/libcriu: test version library calls This adds the previously added libcriu version functions to the libcriu tests. Signed-off-by: Adrian Reber --- test/others/libcriu/lib.c | 7 ++++++- test/others/libcriu/lib.h | 1 + test/others/libcriu/test_self.c | 13 +++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/test/others/libcriu/lib.c b/test/others/libcriu/lib.c index 33aa4090d..0c7929cda 100644 --- a/test/others/libcriu/lib.c +++ b/test/others/libcriu/lib.c @@ -2,6 +2,8 @@ #include #include +#include "criu.h" + void what_err_ret_mean(int ret) { /* NOTE: errno is set by libcriu */ @@ -44,4 +46,7 @@ int chk_exit(int status, int want) return 1; } - +int get_version() +{ + printf("Using a CRIU binary with version %d\n", criu_get_version()); +} diff --git a/test/others/libcriu/lib.h b/test/others/libcriu/lib.h index 67b784bff..6fdf8aef2 100644 --- a/test/others/libcriu/lib.h +++ b/test/others/libcriu/lib.h @@ -1,2 +1,3 @@ void what_err_ret_mean(int ret); int chk_exit(int status, int want); +int get_version(void); diff --git a/test/others/libcriu/test_self.c b/test/others/libcriu/test_self.c index c9d2a2e64..374a4b545 100644 --- a/test/others/libcriu/test_self.c +++ b/test/others/libcriu/test_self.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,18 @@ int main(int argc, char *argv[]) criu_init_opts(); criu_set_service_binary(argv[1]); + + get_version(); + if (!criu_check_version(31400)) { + printf("CRIU version check failed. CRIU too old\n"); + return 1; + } + + if (criu_check_version(INT_MAX)) { + printf("CRIU version check failed. CRIU too new.\n"); + return 1; + } + criu_set_images_dir_fd(fd); criu_set_log_level(4); From d72428b7c4254c2e3587d8d84f16626302e7e111 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 23 Apr 2020 09:11:48 +0000 Subject: [PATCH 0437/2030] Also report clone3() errors correctly Without clone3() CRIU was able to detect a process with a wrong PID only in the already created child process. With clone3() this error can happen before the process is created. In the case of EEXIST this error will now be correctly forwarded to an RPC client. This was detected by running test/others/libcriu on a clone3() system. Signed-off-by: Adrian Reber --- criu/clone-noasan.c | 1 + criu/cr-restore.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/criu/clone-noasan.c b/criu/clone-noasan.c index a2190ba0a..35c40c21d 100644 --- a/criu/clone-noasan.c +++ b/criu/clone-noasan.c @@ -70,6 +70,7 @@ int clone3_with_pid_noasan(int (*fn)(void *), void *arg, int flags, if (!(flags & CLONE_PARENT)) { if (exit_signal != SIGCHLD) { pr_err("Exit signal not SIGCHLD\n"); + errno = EINVAL; return -1; } c_args.exit_signal = exit_signal; diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ec00bf71b..e44ba308d 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -1439,6 +1439,8 @@ static inline int fork_with_pid(struct pstree_item *item) if (ret < 0) { pr_perror("Can't fork for %d", pid); + if (errno == EEXIST) + set_cr_errno(EEXIST); goto err_unlock; } From cbf099400a24debe4eaf830bd81138bb73a46a00 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sat, 8 Feb 2020 16:58:36 +0100 Subject: [PATCH 0438/2030] Travis: use Vagrant to run VMs This adds the minimal configuration to run Fedora 31 based VMs on Travis. This can be used to test cgroupv2 based tests, tests with vdso=off and probably much more which requires booting a newer kernel. As an example this builds CRIU on Fedora 31 and reconfigures it to boot without VDSO support and runs one single test. Signed-off-by: Adrian Reber --- .travis.yml | 1 + scripts/travis/Makefile | 8 ++++++ scripts/travis/vagrant.sh | 53 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+) create mode 100755 scripts/travis/vagrant.sh diff --git a/.travis.yml b/.travis.yml index e71afa0a3..8ada90193 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,7 @@ env: - TR_ARCH=x86_64 - TR_ARCH=x86_64 CLANG=1 - TR_ARCH=openj9-test + - TR_ARCH=vagrant-fedora-no-vdso jobs: include: - os: linux diff --git a/scripts/travis/Makefile b/scripts/travis/Makefile index 17abb703a..1af60fe8d 100644 --- a/scripts/travis/Makefile +++ b/scripts/travis/Makefile @@ -64,5 +64,13 @@ podman-test: openj9-test: restart-docker ./openj9-test.sh +setup-vagrant: + ./vagrant.sh setup + +vagrant-fedora-no-vdso: setup-vagrant + ./vagrant.sh fedora-no-vdso + +.PHONY: setup-vagrant vagrant-fedora-no-vdso + %: $(MAKE) -C ../build $@$(target-suffix) diff --git a/scripts/travis/vagrant.sh b/scripts/travis/vagrant.sh new file mode 100755 index 000000000..943a8b9a3 --- /dev/null +++ b/scripts/travis/vagrant.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# This script is used to run vagrant based tests on Travis. +# This script is started via sudo from .travis.yml + +set -e +set -x + +VAGRANT_VERSION=2.2.7 +FEDORA_VERSION=31 +FEDORA_BOX_VERSION=31.20191023.0 + +setup() { + apt-get -qq update + # Load the kvm modules for vagrant to use qemu + modprobe kvm kvm_intel + + # Tar up the git checkout to have vagrant rsync it to the VM + tar cf criu.tar ../../../criu + wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb -O /tmp/vagrant.deb && \ + dpkg -i /tmp/vagrant.deb + + apt-get -qq install -y libvirt-bin libvirt-dev qemu-utils qemu + systemctl restart libvirt-bin + vagrant plugin install vagrant-libvirt + vagrant init fedora/${FEDORA_VERSION}-cloud-base --box-version ${FEDORA_BOX_VERSION} + # The default libvirt Vagrant VM uses 512MB. + # Travis VMs should have around 7.5GB. + # Increasing it to 4GB should work. + sed -i Vagrantfile -e 's,^end$, config.vm.provider :libvirt do |libvirt|'"\n"' libvirt.memory = 4096;end'"\n"'end,g' + vagrant up --provider=libvirt + mkdir -p /root/.ssh + vagrant ssh-config >> /root/.ssh/config + ssh default sudo dnf install -y gcc git gnutls-devel nftables-devel libaio-devel \ + libasan libcap-devel libnet-devel libnl3-devel make protobuf-c-devel \ + protobuf-devel python3-flake8 python3-future python3-protobuf \ + python3-junit_xml rubygem-asciidoctor iptables libselinux-devel + # Disable sssd to avoid zdtm test failures in pty04 due to sssd socket + ssh default sudo systemctl mask sssd + ssh default cat /proc/cmdline +} + +fedora-no-vdso() { + ssh default sudo grubby --update-kernel ALL --args="vdso=0" + vagrant reload + ssh default cat /proc/cmdline + ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' + # Excluding the VDSO test as we are running without VDSO + # Excluding two cgroup tests which seem to fail because of cgroup2 + ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a -x zdtm/static/cgroup04 -x zdtm/static/cgroup_ifpriomap -x zdtm/static/vdso01 --keep-going' +} + +$1 From 1d9438aefbd1609ee765f7e958b88883f402a662 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Tue, 28 Apr 2020 19:00:42 -0700 Subject: [PATCH 0439/2030] criu swrk: fix usage, allow common options TL;DR: this makes possible -v with criu swrk, and removes showing usage which is useless in swrk mode. 1. Since criu swrk command is not described in usage, there is no sense in showing it. Instead, show a one-line hint about how to use it. 2. In case some global options (like -v) are used, argv[1] might not point to "swrk". Use optind to point to a correct non-option argument. 3. While at it, also error out in case we have extra arguments. Signed-off-by: Kir Kolyshkin --- criu/crtools.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 76172f350..b696898e7 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -100,12 +100,18 @@ int main(int argc, char *argv[], char *envp[]) return 1; if (ret == 2) goto usage; + if (optind >= argc) { + pr_err("command is required\n"); + goto usage; + } log_set_loglevel(opts.log_level); - if (!strcmp(argv[1], "swrk")) { - if (argc < 3) - goto usage; + if (optind < argc && !strcmp(argv[optind], "swrk")) { + if (argc != optind+2) { + fprintf(stderr, "Usage: criu swrk \n"); + return 1; + } /* * This is to start criu service worker from libcriu calls. * The usage is "criu swrk " and is not for CLI/scripts. @@ -113,7 +119,7 @@ int main(int argc, char *argv[], char *envp[]) * corresponding lib call change. */ opts.swrk_restore = true; - return cr_service_work(atoi(argv[2])); + return cr_service_work(atoi(argv[optind+1])); } if (check_options()) @@ -125,11 +131,6 @@ int main(int argc, char *argv[], char *envp[]) if (opts.work_dir == NULL) SET_CHAR_OPTS(work_dir, opts.imgs_dir); - if (optind >= argc) { - pr_err("command is required\n"); - goto usage; - } - has_sub_command = (argc - optind) > 1; if (has_exec_cmd) { From 6ee4b72382f72362ab746876cf32a70712eb89f9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 18 May 2020 11:19:23 +0300 Subject: [PATCH 0440/2030] arch/x86: Fix calculation of xstate_size The layout of xsave frame in a standart format is predefined by the hardware. Lets make sure we're increasing in frame offsets and use latest offset where appropriate. https://github.com/checkpoint-restore/criu/issues/1042 Reported-by: Ashutosh Mehra Signed-off-by: Cyrill Gorcunov --- criu/arch/x86/crtools.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index 9c8beeedd..bc8022535 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -437,6 +437,7 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) void *from = xsave->member; \ size_t size = pb_repeated_size(xsave, member); \ size_t xsize = (size_t)compel_fpu_feature_size(feature); \ + size_t xstate_size_next = off + xsize; \ if (xsize != size) { \ if (size) { \ pr_err("%s reported %zu bytes (expecting %zu)\n",\ @@ -448,7 +449,8 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) } \ } \ xstate_bv |= (1UL << feature); \ - xstate_size += xsize; \ + BUG_ON(xstate_size > xstate_size_next); \ + xstate_size = xstate_size_next; \ memcpy(to, from, size); \ } \ } while (0) @@ -485,6 +487,11 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) UserX86XsaveEntry *xsave = core->thread_info->fpregs->xsave; uint8_t *extended_state_area = (void *)x; + /* + * Note the order does matter here and bound + * to the increasing offsets of XFEATURE_x + * inside memory layout (xstate_size calculation). + */ assign_xsave(XFEATURE_YMM, xsave, ymmh_space, extended_state_area); assign_xsave(XFEATURE_BNDREGS, xsave, bndreg_state, extended_state_area); assign_xsave(XFEATURE_BNDCSR, xsave, bndcsr_state, extended_state_area); From 808684c99eb4c0cf12a5725cd6bbe3fea191273c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 9 Jun 2020 16:52:10 +0300 Subject: [PATCH 0441/2030] Add CONTRIBUTING.md Move the existing contribution guidelines to a dedicated file for future extensions. Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 19 +++++++++++++++++++ README.md | 20 +++++--------------- 2 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..342619e88 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,19 @@ +[![master](https://travis-ci.org/checkpoint-restore/criu.svg?branch=master)](https://travis-ci.org/checkpoint-restore/criu) +[![development](https://travis-ci.org/checkpoint-restore/criu.svg?branch=criu-dev)](https://travis-ci.org/checkpoint-restore/criu) +[![Codacy Badge](https://api.codacy.com/project/badge/Grade/55251ec7db28421da4481fc7c1cb0cee)](https://www.codacy.com/app/xemul/criu?utm_source=github.com&utm_medium=referral&utm_content=xemul/criu&utm_campaign=Badge_Grade) +

+ +## How to contribute to CRIU + +CRIU project is (almost) the never-ending story, because we have to always keep up with the +Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're +looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. +Here are some useful hints to get involved. + +* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; +* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); +* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; +* Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); +* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); +* Spread the word about CRIU in [social networks](http://criu.org/Contacts); +* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); diff --git a/README.md b/README.md index 6a578b953..d703638ec 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,11 @@ project is that it is mainly implemented in user space. There are some more proj doing C/R for Linux, and so far CRIU [appears to be](https://criu.org/Comparison_to_other_CR_projects) the most feature-rich and up-to-date with the kernel. +CRIU project is (almost) the never-ending story, because we have to always keep up with the +Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're +looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. +Please refer to [CONTRIBUTING.md](CONTRIBUTING.md) if you would like to get involved. + The project [started](https://criu.org/History) as the way to do live migration for OpenVZ Linux containers, but later grew to more sophisticated and flexible tool. It is currently used by (integrated into) OpenVZ, LXC/LXD, Docker, and other software, project gets tremendous @@ -56,21 +61,6 @@ One of the CRIU features is the ability to save and restore state of a TCP socke without breaking the connection. This functionality is considered to be useful by itself, and we have it available as the [libsoccr library](https://criu.org/Libsoccr). -## How to contribute - -CRIU project is (almost) the never-ending story, because we have to always keep up with the -Linux kernel supporting checkpoint and restore for all the features it provides. Thus we're -looking for contributors of all kinds -- feedback, bug reports, testing, coding, writing, etc. -Here are some useful hints to get involved. - -* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; -* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); -* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; -* Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); -* Spread the word about CRIU in [social networks](http://criu.org/Contacts); -* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); - ## Licence The project is licensed under GPLv2 (though files sitting in the lib/ directory are LGPLv2.1). From d0fcb01d47de8d5c659cc09d0ab5d994d10b5ffa Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 9 Jun 2020 16:52:11 +0300 Subject: [PATCH 0442/2030] CONTRIBUTING.md: import "How to submit patches" from criu.org Import "How to submit patches" article from CRIU wiki and update its format to match GitHub markdown. Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 200 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 199 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 342619e88..edb7ecb48 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,6 +14,204 @@ Here are some useful hints to get involved. * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; * Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [the devel list](http://criu.org/How_to_submit_patches); +* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu). +Below we describe in more detail recommend practices for CRIU developemnt. * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); + +### Seting up the developemnt environment + +Although criu could be run as non-root (see [Security](https://criu.org/Security), development is better to be done as root. For example, some tests require root. So, it would be a good idea to set up some recent Linux distro on a virtual machine. + +### Get the source code + +The CRIU sources are tracked by Git. Official CRIU repo is at https://github.com/checkpoint-restore/criu. + +The repository may contain multiple branches. Development happens in the **criu-dev** branch. + +To clone CRIU repo and switch to the proper branch, run: + +``` + git clone https://github.com/checkpoint-restore/criu criu + cd criu + git checkout criu-dev +``` + +### Compile + +First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info. + +To compile CRIU, run: + +``` + make +``` + +This should create the `./criu/criu` executable. + +## Edit the source code + +If you use ctags, you can generate the ctags file by running + +``` + make tags +``` + +When you change the source code, please keep in mind the following code conventions: + +* we prefer tabs and indentations to be 8 characters width +* CRIU mostly follows [Linux kernel coding style](https://www.kernel.org/doc/Documentation/process/coding-style.rst), but we are less strict than the kernel community. + +Other conventions can be learned from the source code itself. In short, make sure your new code +looks similar to what is already there. + +## Test your changes + +CRIU comes with an extensive test suite. To check whether your changes introduce any regressions, run + +``` + make test +``` + +The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it. + +In case you'd rather have someone else run the tests, you can use travis-ci for your +own github fork of CRIU. It will check the compilation for various supported platforms, +as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu +for more details. + +## Sign your work + +To improve tracking of who did what, we ask you to sign off the patches +that are to be emailed. + +The sign-off is a simple line at the end of the explanation for the +patch, which certifies that you wrote it or otherwise have the right to +pass it on as an open-source patch. The rules are pretty simple: if you +can certify the below: + +### Developer's Certificate of Origin 1.1 + By making a contribution to this project, I certify that: + + (a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + + (b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + + (c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + + (d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. + +then you just add a line saying + +``` + Signed-off-by: Random J Developer +``` + +using your real name (please, no pseudonyms or anonymous contributions if +it possible). + +Hint: you can use `git commit -s` to add Signed-off-by line to your +commit message. To append such line to a commit you already made, use +`git commit --amend -s`. + +``` + From: Random J Developer + Subject: [PATCH] Short patch description + + Long patch description (could be skipped if patch + is trivial enough) + + Signed-off-by: Random J Developer + --- + Patch body here +``` + +## Submit your work upstream + +We accept github pull requests and this is the preferred way to contribute to CRIU. +For that you should push your work to your fork of CRIU at [GitHub](https://github.com) and create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) + +Historically, CRIU worked with mailing lists and patches so if you still prefer this way continue reading till the end of this section. + +### Make a patch + +To create a patch, run + +``` + git format-patch --signoff origin/criu-dev +``` + +You might need to read GIT documentation on how to prepare patches +for mail submission. Take a look at http://book.git-scm.com/ and/or +http://git-scm.com/documentation for details. It should not be hard +at all. + +We recommend to post patches using `git send-email` + +``` + git send-email --cover-letter --no-chain-reply-to --annotate \ + --confirm=always --to=criu@openvz.org criu-dev +``` + +Note that the `git send-email` subcommand may not be in +the main git package and using it may require installation of a +separate package, for example the "git-email" package in Fedora and +Debian. + +If this is your first time using git send-email, you might need to +configure it to point it to your SMTP server with something like: + +``` + git config --global sendemail.smtpServer stmp.example.net +``` + +If you get tired of typing `--to=criu@openvz.org` all the time, +you can configure that to be automatically handled as well: + +``` + git config sendemail.to criu@openvz.org +``` + +If a developer is sending another version of the patch (e.g. to address +review comments), they are advised to note differences to previous versions +after the `---` line in the patch so that it helps reviewers but +doesn't become part of git history. Moreover, such patch needs to be prefixed +correctly with `--subject-prefix=PATCHv2` appended to +`git send-email` (substitute `v2` with the correct +version if needed though). + +### Mail patches + +The patches should be sent to CRIU development mailing list, `criu AT openvz.org`. Note that you need to be subscribed first in order to post. The list web interface is available at https://openvz.org/mailman/listinfo/criu; you can also use standard mailman aliases to work with it. + +Please make sure the email client you're using doesn't screw your patch (line wrapping and so on). + +{{Note| When sending a patch set that consists of more than one patch, please, push your changes in your local repo and provide the URL of the branch in the cover-letter}} + +### Wait for response + +Be patient. Most CRIU developers are pretty busy people so if +there is no immediate response on your patch — don't be surprised, +sometimes a patch may fly around a week before it gets reviewed. + +## Continuous integration + +Wiki article: [Continuous integration](https://criu.org/Continuous_integration) + +CRIU tests are run for each series sent to the mailing list. If you get a message from our patchwork that patches failed to pass the tests, you have to investigate what is wrong. + +We also recommend you to [enable Travis CI for your repo](https://criu.org/Continuous_integration#Enable_Travis_CI_for_your_repo) to check patches in your git branch, before sending them to the mailing list. From 2e5805878bbf1f80b2297b4b60a6859d15055142 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 14 Jun 2020 09:31:15 +0300 Subject: [PATCH 0443/2030] CONTRIBUTING.md: minor formatting fixes * Mark lowcase criu as code in the environment section * Add missing brace around the reference to https://criu.org/Secrity * Fixup an admolition block that GitHub cannot render * Spelling fixups * s/github/GitHub/g Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index edb7ecb48..de4f3e1ea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,15 +13,15 @@ Here are some useful hints to get involved. * We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; -* Feedback is expected on the github issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); -* We accept github pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu). -Below we describe in more detail recommend practices for CRIU developemnt. +* Feedback is expected on the GitHub issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); +* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu). +Below we describe in more detail recommend practices for CRIU development. * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); -### Seting up the developemnt environment +### Setting up the development environment -Although criu could be run as non-root (see [Security](https://criu.org/Security), development is better to be done as root. For example, some tests require root. So, it would be a good idea to set up some recent Linux distro on a virtual machine. +Although `criu` could be run as non-root (see [Security](https://criu.org/Security)), development is better to be done as root. For example, some tests require root. So, it would be a good idea to set up some recent Linux distro on a virtual machine. ### Get the source code @@ -76,14 +76,14 @@ CRIU comes with an extensive test suite. To check whether your changes introduce The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it. In case you'd rather have someone else run the tests, you can use travis-ci for your -own github fork of CRIU. It will check the compilation for various supported platforms, +own GitHub fork of CRIU. It will check the compilation for various supported platforms, as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu for more details. ## Sign your work -To improve tracking of who did what, we ask you to sign off the patches -that are to be emailed. +To improve tracking of who did what, we ask you to sign off the commits in +your fork of CRIU or the patches that are to be emailed. The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to @@ -142,7 +142,7 @@ commit message. To append such line to a commit you already made, use ## Submit your work upstream -We accept github pull requests and this is the preferred way to contribute to CRIU. +We accept GitHub pull requests and this is the preferred way to contribute to CRIU. For that you should push your work to your fork of CRIU at [GitHub](https://github.com) and create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) Historically, CRIU worked with mailing lists and patches so if you still prefer this way continue reading till the end of this section. @@ -200,7 +200,7 @@ The patches should be sent to CRIU development mailing list, `criu AT openvz.org Please make sure the email client you're using doesn't screw your patch (line wrapping and so on). -{{Note| When sending a patch set that consists of more than one patch, please, push your changes in your local repo and provide the URL of the branch in the cover-letter}} +> **Note:** When sending a patch set that consists of more than one patch, please, push your changes in your local repo and provide the URL of the branch in the cover-letter ### Wait for response From 35f8c056ac49ab62b8575a2d1f04cbbd94ccff5b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 14 Jun 2020 11:22:19 +0300 Subject: [PATCH 0444/2030] CONTRIBUTING.md: add sections about patch description and splitting Shamelessly stolen from the Linux kernel [1], shortened a bit and relaxed to match CRIU. [1] https://www.kernel.org/doc/html/latest/process/submitting-patches.html Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 71 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index de4f3e1ea..1c731b7f8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -80,6 +80,77 @@ own GitHub fork of CRIU. It will check the compilation for various supported pla as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu for more details. +## Describe your changes + +Describe your problem. Whether your change is a one-line bug fix or +5000 lines of a new feature, there must be an underlying problem that +motivated you to do this work. Convince the reviewer that there is a +problem worth fixing and that it makes sense for them to read past the +first paragraph. + +Once the problem is established, describe what you are actually doing +about it in technical detail. It's important to describe the change +in plain English for the reviewer to verify that the code is behaving +as you intend it to. + +Solve only one problem per commit. If your description starts to get +long, that's a sign that you probably need to split up your commit. +See [Separate your changes](#separate-your-changes). + +Describe your changes in imperative mood, e.g. "make xyzzy do frotz" +instead of "[This commit] makes xyzzy do frotz" or "[I] changed xyzzy +to do frotz", as if you are giving orders to the codebase to change +its behaviour. + +If your change fixes a bug in a specific commit, e.g. you found an issue using +`git bisect`, please use the `Fixes:` tag with the abbreviation of +the SHA-1 ID, and the one line summary. For example: + +``` + Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism") +``` + +The following `git config` settings can be used to add a pretty format for +outputting the above style in the `git log` or `git show` commands: + +``` + [pretty] + fixes = Fixes: %h (\"%s\") +``` + +If your change address an issue listed in GitHub, please use `Fixes:` tag with the number of the issue. For instance: + +``` + Fixes: #339 +``` + +You may refer to [How to Write a Git Commit +Message](https://chris.beams.io/posts/git-commit/) article for +recommendations for good commit message. + +## Separate your changes + +Separate each **logical change** into a separate commit. + +For example, if your changes include both bug fixes and performance +enhancements for a single driver, separate those changes into two +or more commits. If your changes include an API update, and a new +driver which uses that new API, separate those into two commits. + +On the other hand, if you make a single change to numerous files, +group those changes into a single commit. Thus a single logical change +is contained within a single commit. + +The point to remember is that each commit should make an easily understood +change that can be verified by reviewers. Each commit should be justifiable +on its own merits. + +When dividing your change into a series of commits, take special care to +ensure that CRIU builds and runs properly after each commit in the +series. Developers using `git bisect` to track down a problem can end up +splitting your patch series at any point; they will not thank you if you +introduce bugs in the middle. + ## Sign your work To improve tracking of who did what, we ask you to sign off the commits in From 6815aa958d1ac01f0dd0c81d55475d44aabfff88 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 14 Jun 2020 11:26:14 +0300 Subject: [PATCH 0445/2030] CONTRIBUTING.md: add pull request guidelines Following the discussion at [1] describe best practices for pull request creation. [1] https://github.com/checkpoint-restore/criu/pull/1096 Signed-off-by: Mike Rapoport --- CONTRIBUTING.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1c731b7f8..d40f0014c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -216,6 +216,44 @@ commit message. To append such line to a commit you already made, use We accept GitHub pull requests and this is the preferred way to contribute to CRIU. For that you should push your work to your fork of CRIU at [GitHub](https://github.com) and create a [pull request](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) +### Pull request guidelines + +Pull request comment should contain description of the problem your changes +solve and a brief outline of the changes included in the pull request. + +Please avoid pushing fixup commits to an existent pull request. Each commit +should be self contained and there should not be fixup commits in a patch +series. Pull requests that contain one commit which breaks something +and another commit which fixes it, will be rejected. + +Please merge the fixup commits into the commits that has introduced the +problem before creating a pull request. + +It may happen that the reviewers were not completely happy with your +changes and requested changes to your patches. After you updated your +changes please close the old pull request and create a new one that +contains the following: + +* Description of the problem your changes solve and a brief outline of the + changes +* Link to the previous version of the pull request +* Brief description of the changes between old and new versions of the pull + request. If there were more than one previous pull request, all the + revisions should be listed. For example: + +``` + v3: rebase on the current criu-dev + v2: add commit to foo() and update bar() coding style +``` + +If there are only minor updates to the commits in a pull request, it is +possible to force-push them into an existing pull request. This only applies +to small changes and should be used with care. If you update an existing +pull request, remember to add the description of the changes from the +previous version. + +### Mailing list submission + Historically, CRIU worked with mailing lists and patches so if you still prefer this way continue reading till the end of this section. ### Make a patch From ce22e0f37dfce44bcdd6c9b8a94713441f139158 Mon Sep 17 00:00:00 2001 From: Angie Ni Date: Thu, 4 Jun 2020 11:24:23 -0600 Subject: [PATCH 0446/2030] uffd: uffd_open prints info, caller prints error When uffd_open is called from kerndat_uffd, userfaultfd failure is not considered an error, so the goal is to suppress the error message -- instead, we print this message as info. If the function fails, it is the responsibility of the caller to print the error message. Signed-off-by: Angie Ni --- criu/uffd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/uffd.c b/criu/uffd.c index 33b34ba25..5f4c15a60 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -269,7 +269,7 @@ int uffd_open(int flags, unsigned long *features) uffd = syscall(SYS_userfaultfd, flags); if (uffd == -1) { - pr_perror("Lazy pages are not available"); + pr_info("Lazy pages are not available: %s\n", strerror(errno)); return -errno; } From 41b535d312828eac6fd79481d04abe20163b0cc8 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 16 Jun 2020 14:26:06 +0000 Subject: [PATCH 0447/2030] test: skip vdso test on non-vdso systems Signed-off-by: Adrian Reber --- scripts/travis/vagrant.sh | 3 +-- test/zdtm/static/vdso01.checkskip | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100755 test/zdtm/static/vdso01.checkskip diff --git a/scripts/travis/vagrant.sh b/scripts/travis/vagrant.sh index 943a8b9a3..46740efc3 100755 --- a/scripts/travis/vagrant.sh +++ b/scripts/travis/vagrant.sh @@ -45,9 +45,8 @@ fedora-no-vdso() { vagrant reload ssh default cat /proc/cmdline ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' - # Excluding the VDSO test as we are running without VDSO # Excluding two cgroup tests which seem to fail because of cgroup2 - ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a -x zdtm/static/cgroup04 -x zdtm/static/cgroup_ifpriomap -x zdtm/static/vdso01 --keep-going' + ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a -x zdtm/static/cgroup04 -x zdtm/static/cgroup_ifpriomap --keep-going' } $1 diff --git a/test/zdtm/static/vdso01.checkskip b/test/zdtm/static/vdso01.checkskip new file mode 100755 index 000000000..a00df6231 --- /dev/null +++ b/test/zdtm/static/vdso01.checkskip @@ -0,0 +1,3 @@ +#!/bin/bash + +grep -q "\[vdso\]" /proc/self/maps From d38046b0035d977f44f99d68bb5e3c3d4fb4fd9a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 22 Jul 2017 11:33:12 +0300 Subject: [PATCH 0448/2030] mount: restore_task_mnt_ns - Lookup for mount namespace conditionally In case if our parent is a dead task (zombie) we should lookup for parent ids which will be inherited on restore. Otherwise parent->ids may be nil and SIGSEGV produced. Signed-off-by: Cyrill Gorcunov Rework and port from vzcriu: 87b320964 ("vz7: mount: restore_task_mnt_ns - Lookup for mount namespace conditionally") Fixes: #1066 Signed-off-by: Pavel Tikhomirov --- criu/mount.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/criu/mount.c b/criu/mount.c index 89b8cff59..a0b8b2e06 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -3094,19 +3094,19 @@ int restore_task_mnt_ns(struct pstree_item *current) return 0; if (current->ids && current->ids->has_mnt_ns_id) { + struct pstree_item *parent = current->parent; unsigned int id = current->ids->mnt_ns_id; struct ns_id *nsid; - /* - * Regardless of the namespace a task wants to - * live in, by that point they all will live in - * root's one (see prepare_pstree_kobj_ids() + - * get_clone_mask()). So if the current task's - * target namespace is the root's one -- it's - * already there, otherwise it will have to do - * setns(). + /* Zombies and helpers can have ids == 0 so we skip them */ + while (parent && !parent->ids) + parent = parent->parent; + + /** + * Our parent had restored the mount namespace before forking + * us and if we have the same mntns we just stay there. */ - if (current->parent && id == current->parent->ids->mnt_ns_id) + if (parent && id == parent->ids->mnt_ns_id) return 0; nsid = lookup_ns_by_id(id, &mnt_ns_desc); From f0438f47f28d73a75e0fff4c74ee50f80b0c70d1 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 22 Jun 2020 18:50:55 +0300 Subject: [PATCH 0449/2030] cgroup: make prepare_task_cgroup lookup current cgset in ancestors In case if our parent is a dead task (zombie) or a helper which in it's turn has zombie parent, and parent thus has zero cg_set we should look for current cgset deeper. Fixes: #1066 Signed-off-by: Pavel Tikhomirov --- criu/cgroup.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/criu/cgroup.c b/criu/cgroup.c index d4c712167..3737772df 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -1210,14 +1210,19 @@ static int move_in_cgroup(CgSetEntry *se, bool setup_cgns) int prepare_task_cgroup(struct pstree_item *me) { + struct pstree_item *parent = me->parent; CgSetEntry *se; u32 current_cgset; if (!rsti(me)->cg_set) return 0; - if (me->parent) - current_cgset = rsti(me->parent)->cg_set; + /* Zombies and helpers can have cg_set == 0 so we skip them */ + while (parent && !rsti(parent)->cg_set) + parent = parent->parent; + + if (parent) + current_cgset = rsti(parent)->cg_set; else current_cgset = root_cg_set; From 99c84878374e90e52ed1723f12564a5d16b1281d Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 22 Jun 2020 14:20:29 +0300 Subject: [PATCH 0450/2030] zdtm: add zombie_leader test Create a session leader and it's child - session member, make leader zombie. To restore this criu will need to create a helper task a child of our zombie so that member can inherit session. Before fixes in this patchset we segfault on empty ids and fail to restore cgroups because of empty cg_set Signed-off-by: Pavel Tikhomirov --- test/zdtm/static/Makefile | 1 + test/zdtm/static/zombie_leader.c | 83 +++++++++++++++++++++++++++++ test/zdtm/static/zombie_leader.desc | 1 + 3 files changed, 85 insertions(+) create mode 100644 test/zdtm/static/zombie_leader.c create mode 100644 test/zdtm/static/zombie_leader.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 7d72673c3..45a0df784 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -229,6 +229,7 @@ TST_NOFILE := \ time \ timens_nested \ timens_for_kids \ + zombie_leader \ # jobctl00 \ ifneq ($(ARCH),arm) diff --git a/test/zdtm/static/zombie_leader.c b/test/zdtm/static/zombie_leader.c new file mode 100644 index 000000000..d94b2af04 --- /dev/null +++ b/test/zdtm/static/zombie_leader.c @@ -0,0 +1,83 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check non-empty session with zombie leader"; +const char *test_author = "Pavel Tikhomirov "; + +int child(void) +{ + while (1) + sleep(1); + + return 0; +} + +int zombie_leader(int *cpid) +{ + int pid; + + setsid(); + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork child"); + return 1; + } else if (pid == 0) { + exit(child()); + } + + *cpid = pid; + return 0; +} + +int main(int argc, char **argv) +{ + int ret = -1, status; + int pid, *cpid; + siginfo_t infop; + + test_init(argc, argv); + + cpid = (int *)mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_SHARED, -1, 0); + *cpid = 0; + + pid = fork(); + if (pid < 0) { + pr_perror("Failed to fork zombie"); + return 1; + } else if (pid == 0) { + exit(zombie_leader(cpid)); + } + + if (waitid(P_PID, pid, &infop, WNOWAIT | WEXITED) < 0) { + pr_perror("Failed to waitid zombie"); + goto err; + } + + if (!*cpid) { + pr_err("Don't know grand child's pid"); + goto err; + } + + test_daemon(); + test_waitsig(); + + ret = 0; +err: + waitpid(pid, &status, 0); + + if (*cpid) + kill(*cpid, SIGKILL); + + if (!ret) + pass(); + + return 0; +} diff --git a/test/zdtm/static/zombie_leader.desc b/test/zdtm/static/zombie_leader.desc new file mode 100644 index 000000000..6c4afe5f0 --- /dev/null +++ b/test/zdtm/static/zombie_leader.desc @@ -0,0 +1 @@ +{'flavor': 'ns uns'} From 4e7ec3c88b518e7bbf986228bb822bed6869ad9c Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 5 May 2020 14:53:08 +0000 Subject: [PATCH 0451/2030] pidns: add pidns image file definition TODO: create correct magic Signed-off-by: Adrian Reber --- criu/image-desc.c | 1 + criu/include/image-desc.h | 1 + criu/include/magic.h | 1 + criu/include/protobuf-desc.h | 1 + criu/protobuf-desc.c | 1 + images/Makefile | 1 + images/pidns.proto | 5 +++++ lib/py/images/images.py | 1 + 8 files changed, 12 insertions(+) create mode 100644 images/pidns.proto diff --git a/criu/image-desc.c b/criu/image-desc.c index 617b95355..c9581f5b8 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -103,6 +103,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(NETNF_EXP, "netns-exp-%u"), FD_ENTRY(FILES, "files"), FD_ENTRY(TIMENS, "timens-%u"), + FD_ENTRY(PIDNS, "pidns-%u"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 6283a576d..e37d535c2 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -27,6 +27,7 @@ enum { CR_FD_MNTS, CR_FD_USERNS, CR_FD_TIMENS, + CR_FD_PIDNS, _CR_FD_IPCNS_FROM, CR_FD_IPC_VAR, diff --git a/criu/include/magic.h b/criu/include/magic.h index d078ec422..ac5cd8033 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -96,6 +96,7 @@ #define FILES_MAGIC 0x56303138 /* Toropets */ #define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ #define TIMENS_MAGIC 0x43114433 /* Beslan */ +#define PIDNS_MAGIC 0x12345678 #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 43d961731..46f7f8cea 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -66,6 +66,7 @@ enum { PB_TIMENS, PB_IMG_STREAMER_REQUEST, PB_IMG_STREAMER_REPLY, + PB_PIDNS, /* PB_AUTOGEN_STOP */ diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 13655264a..223a6149f 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -38,6 +38,7 @@ #include "images/timer.pb-c.h" #include "images/utsns.pb-c.h" #include "images/timens.pb-c.h" +#include "images/pidns.pb-c.h" #include "images/ipc-var.pb-c.h" #include "images/ipc-shm.pb-c.h" #include "images/ipc-msg.pb-c.h" diff --git a/images/Makefile b/images/Makefile index 5458e4679..9ce7198c0 100644 --- a/images/Makefile +++ b/images/Makefile @@ -55,6 +55,7 @@ proto-obj-y += rpc.o proto-obj-y += ext-file.o proto-obj-y += cgroup.o proto-obj-y += userns.o +proto-obj-y += pidns.o proto-obj-y += google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto proto-obj-y += opts.o proto-obj-y += seccomp.o diff --git a/images/pidns.proto b/images/pidns.proto new file mode 100644 index 000000000..7ff049749 --- /dev/null +++ b/images/pidns.proto @@ -0,0 +1,5 @@ +syntax = "proto2"; + +message pidns_entry { + optional string ext_key = 1; +} diff --git a/lib/py/images/images.py b/lib/py/images/images.py index ca6f207bb..7faefbb96 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -467,6 +467,7 @@ handlers = { 'CREDS': entry_handler(pb.creds_entry), 'UTSNS': entry_handler(pb.utsns_entry), 'TIMENS': entry_handler(pb.timens_entry), + 'PIDNS': entry_handler(pb.pidns_entry), 'IPC_VAR': entry_handler(pb.ipc_var_entry), 'FS': entry_handler(pb.fs_entry), 'GHOST_FILE': ghost_file_handler(), From f1e6b103692e20a031fde1257193aab3d1f45ef4 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 5 May 2020 15:16:19 +0000 Subject: [PATCH 0452/2030] pidns: write and read pidns information This loads and stores the key for an external PID namespace if specified by the user using: --external pid[]: