diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 1a6e0b5b5..5c25b8928 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -87,6 +87,7 @@ obj-y += config.o obj-y += servicefd.o obj-y += pie-util-vdso.o obj-y += vdso.o +obj-y += timens.o obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32 obj-$(CONFIG_COMPAT) += vdso-compat.o diff --git a/criu/cr-check.c b/criu/cr-check.c index 80df3f7cd..b790c2ffb 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1266,6 +1266,16 @@ static int check_kcmp_epoll(void) return 0; } +static int check_time_namespace(void) +{ + if (!kdat.has_timens) { + pr_err("Time namespaces are not supported\n"); + return -1; + } + + return 0; +} + static int check_net_diag_raw(void) { check_sock_diag(); @@ -1384,6 +1394,7 @@ int cr_check(void) ret |= check_kcmp_epoll(); ret |= check_net_diag_raw(); ret |= check_clone3_set_tid(); + ret |= check_time_namespace(); } /* @@ -1486,6 +1497,7 @@ static struct feature_list feature_list[] = { { "nsid", check_nsid }, { "link_nsid", check_link_nsid}, { "kcmp_epoll", check_kcmp_epoll}, + { "timens", check_time_namespace}, { "external_net_ns", check_external_net_ns}, { "clone3_set_tid", check_clone3_set_tid}, { NULL, NULL }, diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 74be1a5ca..ce6e667d7 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -76,6 +76,7 @@ #include "fdstore.h" #include "string.h" #include "memfd.h" +#include "timens.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -1406,7 +1407,7 @@ static inline int fork_with_pid(struct pstree_item *item) if (kdat.has_clone3_set_tid) { ret = clone3_with_pid_noasan(restore_task_with_children, &ca, (ca.clone_flags & - ~(CLONE_NEWNET | CLONE_NEWCGROUP)), + ~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)), SIGCHLD, pid); } else { /* @@ -1424,7 +1425,7 @@ static inline int fork_with_pid(struct pstree_item *item) close_pid_proc(); ret = clone_noasan(restore_task_with_children, (ca.clone_flags & - ~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD, + ~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)) | SIGCHLD, &ca); } @@ -1745,6 +1746,11 @@ static int restore_task_with_children(void *_arg) } } + if (root_ns_mask & CLONE_NEWTIME) { + if (prepare_timens(current->ids->time_ns_id)) + goto err; + } + /* Wait prepare_userns */ if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0) goto err; diff --git a/criu/image-desc.c b/criu/image-desc.c index ac627a829..617b95355 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -102,6 +102,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY(NETNF_CT, "netns-ct-%u"), FD_ENTRY(NETNF_EXP, "netns-exp-%u"), FD_ENTRY(FILES, "files"), + FD_ENTRY(TIMENS, "timens-%u"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index ce6ef1529..6283a576d 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -26,6 +26,7 @@ enum { CR_FD_UTSNS, CR_FD_MNTS, CR_FD_USERNS, + CR_FD_TIMENS, _CR_FD_IPCNS_FROM, CR_FD_IPC_VAR, diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 27c870bb8..ad5f7d324 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -67,6 +67,7 @@ struct kerndat_s { bool has_kcmp_epoll_tfd; bool has_fsopen; bool has_clone3_set_tid; + bool has_timens; }; extern struct kerndat_s kdat; diff --git a/criu/include/magic.h b/criu/include/magic.h index bdaca968d..d078ec422 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -95,6 +95,7 @@ #define AUTOFS_MAGIC 0x49353943 /* Sochi */ #define FILES_MAGIC 0x56303138 /* Toropets */ #define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */ +#define TIMENS_MAGIC 0x43114433 /* Beslan */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index a9a970a9b..e570aa0ab 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -34,7 +34,13 @@ #define CLONE_NEWCGROUP 0x02000000 #endif -#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP) +#ifndef CLONE_NEWTIME +#define CLONE_NEWTIME 0x00000080 +#endif + +#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | \ + CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | \ + CLONE_NEWCGROUP | CLONE_NEWTIME) /* Nested namespaces are supported only for these types */ #define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET) @@ -146,6 +152,7 @@ extern bool check_ns_proc(struct fd_link *link); extern struct ns_desc pid_ns_desc; extern struct ns_desc user_ns_desc; +extern struct ns_desc time_ns_desc; extern unsigned long root_ns_mask; extern const struct fdtype_ops nsfile_dump_ops; diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h index fd50ff47e..25a57df6c 100644 --- a/criu/include/proc_parse.h +++ b/criu/include/proc_parse.h @@ -102,4 +102,6 @@ extern bool is_vma_range_fmt(char *line); extern void parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf); extern int parse_uptime(uint64_t *upt); +extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff); + #endif /* __CR_PROC_PARSE_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 7e0385ef4..ee4135d65 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -63,6 +63,7 @@ enum { PB_FILE, PB_MEMFD_FILE, PB_MEMFD_INODE, /* 60 */ + PB_TIMENS, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/timens.h b/criu/include/timens.h new file mode 100644 index 000000000..22a4a5220 --- /dev/null +++ b/criu/include/timens.h @@ -0,0 +1,9 @@ +#ifndef __CR_TIME_NS_H__ +#define __CR_TIME_NS_H__ + +extern int dump_time_ns(int ns_id); +extern int prepare_timens(int pid); + +extern struct ns_desc time_ns_desc; + +#endif /* __CR_TIME_NS_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index 2ad72c350..0c6910da9 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -783,6 +783,21 @@ out: return ret; } +static int has_time_namespace(void) +{ + if (access("/proc/self/timens_offsets", F_OK) < 0) { + if (errno == ENOENT) { + pr_debug("Time namespaces are not supported.\n"); + kdat.has_timens = false; + return 0; + } + pr_perror("Unable to access /proc/self/timens_offsets"); + return -1; + } + kdat.has_timens = true; + return 0; +} + int __attribute__((weak)) kdat_x86_has_ptrace_fpu_xsave_bug(void) { return 0; @@ -1091,6 +1106,8 @@ int kerndat_init(void) ret = kerndat_has_fsopen(); if (!ret) ret = kerndat_has_clone3_set_tid(); + if (!ret) + ret = has_time_namespace(); kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/criu/namespaces.c b/criu/namespaces.c index 2db805b2f..e376feaca 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -20,6 +20,7 @@ #include "imgset.h" #include "uts_ns.h" #include "ipc_ns.h" +#include "timens.h" #include "mount.h" #include "pstree.h" #include "namespaces.h" @@ -39,6 +40,7 @@ static struct ns_desc *ns_desc_array[] = { &pid_ns_desc, &user_ns_desc, &mnt_ns_desc, + &time_ns_desc, &cgroup_ns_desc, }; @@ -157,6 +159,9 @@ int join_ns_add(const char *type, char *ns_file, char *extra_opts) } else if (!strncmp(type, "uts", 4)) { jn->nd = &uts_ns_desc; join_ns_flags |= CLONE_NEWUTS; + } else if (!strncmp(type, "time", 5)) { + jn->nd = &time_ns_desc; + join_ns_flags |= CLONE_NEWTIME; } else if (!strncmp(type, "ipc", 4)) { jn->nd = &ipc_ns_desc; join_ns_flags |= CLONE_NEWIPC; @@ -568,6 +573,10 @@ static int open_ns_fd(struct file_desc *d, int *new_fd) item = t; nd = &cgroup_ns_desc; break; + } else if (ids->time_ns_id == nfi->nfe->ns_id) { + item = t; + nd = &time_ns_desc; + break; } } @@ -671,6 +680,13 @@ int dump_task_ns_ids(struct pstree_item *item) return -1; } + ids->has_time_ns_id = true; + ids->time_ns_id = get_ns_id(pid, &time_ns_desc, NULL); + if (!ids->time_ns_id) { + pr_err("Can't make timens id\n"); + return -1; + } + ids->has_mnt_ns_id = true; ids->mnt_ns_id = get_ns_id(pid, &mnt_ns_desc, NULL); if (!ids->mnt_ns_id) { @@ -914,6 +930,9 @@ static int check_user_ns(int pid) if ((root_ns_mask & CLONE_NEWUTS) && switch_ns(pid, &uts_ns_desc, NULL)) exit(1); + if ((root_ns_mask & CLONE_NEWTIME) && + switch_ns(pid, &time_ns_desc, NULL)) + exit(1); if ((root_ns_mask & CLONE_NEWIPC) && switch_ns(pid, &ipc_ns_desc, NULL)) exit(1); @@ -1002,6 +1021,11 @@ static int do_dump_namespaces(struct ns_id *ns) ns->id, ns->ns_pid); ret = dump_uts_ns(ns->id); break; + case CLONE_NEWTIME: + pr_info("Dump TIME namespace %d via %d\n", + ns->id, ns->ns_pid); + ret = dump_time_ns(ns->id); + break; case CLONE_NEWIPC: pr_info("Dump IPC namespace %d via %d\n", ns->id, ns->ns_pid); diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 60aba8788..c73fa9776 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1472,6 +1472,44 @@ static bool should_skip_mount(const char *mountpoint) return false; } +int parse_timens_offsets(struct timespec *boff, struct timespec *moff) +{ + int exit_code = -1; + FILE *f; + + f = fopen_proc(PROC_SELF, "timens_offsets"); + if (!f) { + pr_perror("Unable to open /proc/self/timens_offsets"); + goto out; + } + while (fgets(buf, BUF_SIZE, f)) { + int64_t sec, nsec; + int clockid; + + if (sscanf(buf, "%d %"PRId64" %"PRId64"\n", &clockid, &sec, &nsec) != 3) { + pr_err("Unable to parse: %s\n", buf); + goto out; + } + switch (clockid) { + case CLOCK_MONOTONIC: + moff->tv_sec = sec; + moff->tv_nsec = nsec; + break; + case CLOCK_BOOTTIME: + boff->tv_sec = sec; + boff->tv_nsec = nsec; + break; + default: + pr_err("Unknown clockid: %d\n", clockid); + goto out; + } + } + exit_code = 0; +out: + fclose(f); + return exit_code; +} + struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump) { struct mount_info *list = NULL; diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index 41c208037..2ee81e5db 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -37,6 +37,7 @@ #include "images/creds.pb-c.h" #include "images/timer.pb-c.h" #include "images/utsns.pb-c.h" +#include "images/timens.pb-c.h" #include "images/ipc-var.pb-c.h" #include "images/ipc-shm.pb-c.h" #include "images/ipc-msg.pb-c.h" diff --git a/criu/pstree.c b/criu/pstree.c index 19cf5ad38..d0e81bfad 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -814,6 +814,8 @@ static unsigned long get_clone_mask(TaskKobjIdsEntry *i, mask |= CLONE_NEWIPC; if (i->uts_ns_id != p->uts_ns_id) mask |= CLONE_NEWUTS; + if (i->time_ns_id != p->time_ns_id) + mask |= CLONE_NEWTIME; if (i->mnt_ns_id != p->mnt_ns_id) mask |= CLONE_NEWNS; if (i->user_ns_id != p->user_ns_id) diff --git a/criu/timens.c b/criu/timens.c new file mode 100644 index 000000000..79ba6a2ce --- /dev/null +++ b/criu/timens.c @@ -0,0 +1,130 @@ +#include +#include + +#include "types.h" +#include "proc_parse.h" +#include "namespaces.h" +#include "timens.h" + +#include "protobuf.h" +#include "images/timens.pb-c.h" + +int dump_time_ns(int ns_id) +{ + struct cr_img *img; + TimensEntry te = TIMENS_ENTRY__INIT; + Timespec b = TIMESPEC__INIT, m = TIMESPEC__INIT; + struct timespec ts; + int ret; + + img = open_image(CR_FD_TIMENS, O_DUMP, ns_id); + if (!img) + return -1; + + clock_gettime(CLOCK_MONOTONIC, &ts); + te.monotonic = &m; + te.monotonic->tv_sec = ts.tv_sec; + te.monotonic->tv_nsec = ts.tv_nsec; + clock_gettime(CLOCK_BOOTTIME, &ts); + te.boottime = &b; + te.boottime->tv_sec = ts.tv_sec; + te.boottime->tv_nsec = ts.tv_nsec; + + ret = pb_write_one(img, &te, PB_TIMENS); + close_image(img); + + return ret < 0 ? -1 : 0; +} + +static void normalize_timespec(struct timespec *ts) +{ + while (ts->tv_nsec >= NSEC_PER_SEC) { + ts->tv_nsec -= NSEC_PER_SEC; + ++ts->tv_sec; + } + while (ts->tv_nsec < 0) { + ts->tv_nsec += NSEC_PER_SEC; + --ts->tv_sec; + } +} + + +int prepare_timens(int id) +{ + int exit_code = -1; + int ret, fd = -1; + struct cr_img *img; + TimensEntry *te; + struct timespec ts; + struct timespec prev_moff = {}, prev_boff = {}; + + img = open_image(CR_FD_TIMENS, O_RSTR, id); + if (!img) + return -1; + + ret = pb_read_one(img, &te, PB_TIMENS); + close_image(img); + if (ret < 0) + goto err; + + if (unshare(CLONE_NEWTIME)) { + pr_perror("Unable to create a new time namespace"); + return -1; + } + + if (parse_timens_offsets(&prev_boff, &prev_moff)) + goto err; + + fd = open_proc_rw(PROC_SELF, "timens_offsets"); + if (fd < 0) + goto err; + + clock_gettime(CLOCK_MONOTONIC, &ts); + ts.tv_sec = ts.tv_sec - prev_moff.tv_sec; + ts.tv_nsec = ts.tv_nsec - prev_moff.tv_nsec; + + ts.tv_sec = te->monotonic->tv_sec - ts.tv_sec; + ts.tv_nsec = te->monotonic->tv_nsec - ts.tv_nsec; + normalize_timespec(&ts); + + pr_debug("timens: %d %ld %ld\n", + CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec); + if (dprintf(fd, "%d %ld %ld\n", + CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec) < 0) { + pr_perror("Unable to set a monotonic clock offset"); + goto err; + } + + clock_gettime(CLOCK_BOOTTIME, &ts); + + ts.tv_sec = ts.tv_sec - prev_boff.tv_sec; + ts.tv_nsec = ts.tv_nsec - prev_boff.tv_nsec; + + ts.tv_sec = te->boottime->tv_sec - ts.tv_sec; + ts.tv_nsec = te->boottime->tv_nsec - ts.tv_nsec; + normalize_timespec(&ts); + + pr_debug("timens: %d %ld %ld\n", + CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec); + if (dprintf(fd, "%d %ld %ld\n", + CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec) < 0) { + pr_perror("Unable to set a boottime clock offset"); + goto err; + } + + timens_entry__free_unpacked(te, NULL); + close_safe(&fd); + + fd = open_proc(PROC_SELF, "ns/time_for_children"); + if (fd < 0) { + pr_perror("Unable to open ns/time_for_children"); + goto err; + } + if (switch_ns_by_fd(fd, &time_ns_desc, NULL)) + goto err; + exit_code = 0; +err: + close_safe(&fd); + return exit_code; +} +struct ns_desc time_ns_desc = NS_DESC_ENTRY(CLONE_NEWTIME, "time"); diff --git a/criu/util.c b/criu/util.c index 1646ce1c4..6f6a6dde7 100644 --- a/criu/util.c +++ b/criu/util.c @@ -967,6 +967,8 @@ const char *ns_to_string(unsigned int ns) return "user"; case CLONE_NEWUTS: return "uts"; + case CLONE_NEWTIME: + return "time"; default: return NULL; } diff --git a/images/Makefile b/images/Makefile index e7f0580cf..5ddd37664 100644 --- a/images/Makefile +++ b/images/Makefile @@ -64,6 +64,7 @@ proto-obj-y += autofs.o proto-obj-y += macvlan.o proto-obj-y += sit.o proto-obj-y += memfd.o +proto-obj-y += timens.o CFLAGS += -iquote $(obj)/ diff --git a/images/core.proto b/images/core.proto index e90522914..22c2a9f1f 100644 --- a/images/core.proto +++ b/images/core.proto @@ -70,6 +70,7 @@ message task_kobj_ids_entry { optional uint32 mnt_ns_id = 9; optional uint32 user_ns_id = 10; optional uint32 cgroup_ns_id = 11; + optional uint32 time_ns_id = 12; } message thread_sas_entry { diff --git a/images/timens.proto b/images/timens.proto new file mode 100644 index 000000000..a8272609b --- /dev/null +++ b/images/timens.proto @@ -0,0 +1,10 @@ +syntax = "proto2"; + +message timespec { + required uint64 tv_sec = 1; + required uint64 tv_nsec = 2; +} +message timens_entry { + required timespec monotonic = 1; + required timespec boottime = 2; +} diff --git a/lib/py/images/images.py b/lib/py/images/images.py index dca080657..ca6f207bb 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -466,6 +466,7 @@ handlers = { 'IDS': entry_handler(pb.task_kobj_ids_entry), 'CREDS': entry_handler(pb.creds_entry), 'UTSNS': entry_handler(pb.utsns_entry), + 'TIMENS': entry_handler(pb.timens_entry), 'IPC_VAR': entry_handler(pb.ipc_var_entry), 'FS': entry_handler(pb.fs_entry), 'GHOST_FILE': ghost_file_handler(),