criu: Add support for time namespaces

The time namespace allows for per-namespace offsets to the system
monotonic and boot-time clocks.

C/R of time namespaces are very straightforward. On dump, criu enters a
target time namespace and dumps currents clocks values, then on restore,
criu creates a new namespace and restores clocks values.

Signed-off-by: Andrei Vagin <avagin@gmail.com>
This commit is contained in:
Andrei Vagin 2019-08-14 07:40:40 +03:00
parent 0e9b42acf9
commit 4127ef4ab7
22 changed files with 272 additions and 3 deletions

View file

@ -87,6 +87,7 @@ obj-y += config.o
obj-y += servicefd.o
obj-y += pie-util-vdso.o
obj-y += vdso.o
obj-y += timens.o
obj-$(CONFIG_COMPAT) += pie-util-vdso-elf32.o
CFLAGS_pie-util-vdso-elf32.o += -DCONFIG_VDSO_32
obj-$(CONFIG_COMPAT) += vdso-compat.o

View file

@ -1266,6 +1266,16 @@ static int check_kcmp_epoll(void)
return 0;
}
static int check_time_namespace(void)
{
if (!kdat.has_timens) {
pr_err("Time namespaces are not supported\n");
return -1;
}
return 0;
}
static int check_net_diag_raw(void)
{
check_sock_diag();
@ -1384,6 +1394,7 @@ int cr_check(void)
ret |= check_kcmp_epoll();
ret |= check_net_diag_raw();
ret |= check_clone3_set_tid();
ret |= check_time_namespace();
}
/*
@ -1486,6 +1497,7 @@ static struct feature_list feature_list[] = {
{ "nsid", check_nsid },
{ "link_nsid", check_link_nsid},
{ "kcmp_epoll", check_kcmp_epoll},
{ "timens", check_time_namespace},
{ "external_net_ns", check_external_net_ns},
{ "clone3_set_tid", check_clone3_set_tid},
{ NULL, NULL },

View file

@ -76,6 +76,7 @@
#include "fdstore.h"
#include "string.h"
#include "memfd.h"
#include "timens.h"
#include "parasite-syscall.h"
#include "files-reg.h"
@ -1406,7 +1407,7 @@ static inline int fork_with_pid(struct pstree_item *item)
if (kdat.has_clone3_set_tid) {
ret = clone3_with_pid_noasan(restore_task_with_children,
&ca, (ca.clone_flags &
~(CLONE_NEWNET | CLONE_NEWCGROUP)),
~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)),
SIGCHLD, pid);
} else {
/*
@ -1424,7 +1425,7 @@ static inline int fork_with_pid(struct pstree_item *item)
close_pid_proc();
ret = clone_noasan(restore_task_with_children,
(ca.clone_flags &
~(CLONE_NEWNET | CLONE_NEWCGROUP)) | SIGCHLD,
~(CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWTIME)) | SIGCHLD,
&ca);
}
@ -1745,6 +1746,11 @@ static int restore_task_with_children(void *_arg)
}
}
if (root_ns_mask & CLONE_NEWTIME) {
if (prepare_timens(current->ids->time_ns_id))
goto err;
}
/* Wait prepare_userns */
if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0)
goto err;

View file

@ -102,6 +102,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
FD_ENTRY(NETNF_CT, "netns-ct-%u"),
FD_ENTRY(NETNF_EXP, "netns-exp-%u"),
FD_ENTRY(FILES, "files"),
FD_ENTRY(TIMENS, "timens-%u"),
[CR_FD_STATS] = {
.fmt = "stats-%s",

View file

@ -26,6 +26,7 @@ enum {
CR_FD_UTSNS,
CR_FD_MNTS,
CR_FD_USERNS,
CR_FD_TIMENS,
_CR_FD_IPCNS_FROM,
CR_FD_IPC_VAR,

View file

@ -67,6 +67,7 @@ struct kerndat_s {
bool has_kcmp_epoll_tfd;
bool has_fsopen;
bool has_clone3_set_tid;
bool has_timens;
};
extern struct kerndat_s kdat;

View file

@ -95,6 +95,7 @@
#define AUTOFS_MAGIC 0x49353943 /* Sochi */
#define FILES_MAGIC 0x56303138 /* Toropets */
#define MEMFD_INODE_MAGIC 0x48453499 /* Dnipro */
#define TIMENS_MAGIC 0x43114433 /* Beslan */
#define IFADDR_MAGIC RAW_IMAGE_MAGIC
#define ROUTE_MAGIC RAW_IMAGE_MAGIC

View file

@ -34,7 +34,13 @@
#define CLONE_NEWCGROUP 0x02000000
#endif
#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | CLONE_NEWCGROUP)
#ifndef CLONE_NEWTIME
#define CLONE_NEWTIME 0x00000080
#endif
#define CLONE_ALLNS (CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWIPC | \
CLONE_NEWUTS | CLONE_NEWNS | CLONE_NEWUSER | \
CLONE_NEWCGROUP | CLONE_NEWTIME)
/* Nested namespaces are supported only for these types */
#define CLONE_SUBNS (CLONE_NEWNS | CLONE_NEWNET)
@ -146,6 +152,7 @@ extern bool check_ns_proc(struct fd_link *link);
extern struct ns_desc pid_ns_desc;
extern struct ns_desc user_ns_desc;
extern struct ns_desc time_ns_desc;
extern unsigned long root_ns_mask;
extern const struct fdtype_ops nsfile_dump_ops;

View file

@ -102,4 +102,6 @@ extern bool is_vma_range_fmt(char *line);
extern void parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf);
extern int parse_uptime(uint64_t *upt);
extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff);
#endif /* __CR_PROC_PARSE_H__ */

View file

@ -63,6 +63,7 @@ enum {
PB_FILE,
PB_MEMFD_FILE,
PB_MEMFD_INODE, /* 60 */
PB_TIMENS,
/* PB_AUTOGEN_STOP */

9
criu/include/timens.h Normal file
View file

@ -0,0 +1,9 @@
#ifndef __CR_TIME_NS_H__
#define __CR_TIME_NS_H__
extern int dump_time_ns(int ns_id);
extern int prepare_timens(int pid);
extern struct ns_desc time_ns_desc;
#endif /* __CR_TIME_NS_H__ */

View file

@ -783,6 +783,21 @@ out:
return ret;
}
static int has_time_namespace(void)
{
if (access("/proc/self/timens_offsets", F_OK) < 0) {
if (errno == ENOENT) {
pr_debug("Time namespaces are not supported.\n");
kdat.has_timens = false;
return 0;
}
pr_perror("Unable to access /proc/self/timens_offsets");
return -1;
}
kdat.has_timens = true;
return 0;
}
int __attribute__((weak)) kdat_x86_has_ptrace_fpu_xsave_bug(void)
{
return 0;
@ -1091,6 +1106,8 @@ int kerndat_init(void)
ret = kerndat_has_fsopen();
if (!ret)
ret = kerndat_has_clone3_set_tid();
if (!ret)
ret = has_time_namespace();
kerndat_lsm();
kerndat_mmap_min_addr();

View file

@ -20,6 +20,7 @@
#include "imgset.h"
#include "uts_ns.h"
#include "ipc_ns.h"
#include "timens.h"
#include "mount.h"
#include "pstree.h"
#include "namespaces.h"
@ -39,6 +40,7 @@ static struct ns_desc *ns_desc_array[] = {
&pid_ns_desc,
&user_ns_desc,
&mnt_ns_desc,
&time_ns_desc,
&cgroup_ns_desc,
};
@ -157,6 +159,9 @@ int join_ns_add(const char *type, char *ns_file, char *extra_opts)
} else if (!strncmp(type, "uts", 4)) {
jn->nd = &uts_ns_desc;
join_ns_flags |= CLONE_NEWUTS;
} else if (!strncmp(type, "time", 5)) {
jn->nd = &time_ns_desc;
join_ns_flags |= CLONE_NEWTIME;
} else if (!strncmp(type, "ipc", 4)) {
jn->nd = &ipc_ns_desc;
join_ns_flags |= CLONE_NEWIPC;
@ -568,6 +573,10 @@ static int open_ns_fd(struct file_desc *d, int *new_fd)
item = t;
nd = &cgroup_ns_desc;
break;
} else if (ids->time_ns_id == nfi->nfe->ns_id) {
item = t;
nd = &time_ns_desc;
break;
}
}
@ -671,6 +680,13 @@ int dump_task_ns_ids(struct pstree_item *item)
return -1;
}
ids->has_time_ns_id = true;
ids->time_ns_id = get_ns_id(pid, &time_ns_desc, NULL);
if (!ids->time_ns_id) {
pr_err("Can't make timens id\n");
return -1;
}
ids->has_mnt_ns_id = true;
ids->mnt_ns_id = get_ns_id(pid, &mnt_ns_desc, NULL);
if (!ids->mnt_ns_id) {
@ -914,6 +930,9 @@ static int check_user_ns(int pid)
if ((root_ns_mask & CLONE_NEWUTS) &&
switch_ns(pid, &uts_ns_desc, NULL))
exit(1);
if ((root_ns_mask & CLONE_NEWTIME) &&
switch_ns(pid, &time_ns_desc, NULL))
exit(1);
if ((root_ns_mask & CLONE_NEWIPC) &&
switch_ns(pid, &ipc_ns_desc, NULL))
exit(1);
@ -1002,6 +1021,11 @@ static int do_dump_namespaces(struct ns_id *ns)
ns->id, ns->ns_pid);
ret = dump_uts_ns(ns->id);
break;
case CLONE_NEWTIME:
pr_info("Dump TIME namespace %d via %d\n",
ns->id, ns->ns_pid);
ret = dump_time_ns(ns->id);
break;
case CLONE_NEWIPC:
pr_info("Dump IPC namespace %d via %d\n",
ns->id, ns->ns_pid);

View file

@ -1472,6 +1472,44 @@ static bool should_skip_mount(const char *mountpoint)
return false;
}
int parse_timens_offsets(struct timespec *boff, struct timespec *moff)
{
int exit_code = -1;
FILE *f;
f = fopen_proc(PROC_SELF, "timens_offsets");
if (!f) {
pr_perror("Unable to open /proc/self/timens_offsets");
goto out;
}
while (fgets(buf, BUF_SIZE, f)) {
int64_t sec, nsec;
int clockid;
if (sscanf(buf, "%d %"PRId64" %"PRId64"\n", &clockid, &sec, &nsec) != 3) {
pr_err("Unable to parse: %s\n", buf);
goto out;
}
switch (clockid) {
case CLOCK_MONOTONIC:
moff->tv_sec = sec;
moff->tv_nsec = nsec;
break;
case CLOCK_BOOTTIME:
boff->tv_sec = sec;
boff->tv_nsec = nsec;
break;
default:
pr_err("Unknown clockid: %d\n", clockid);
goto out;
}
}
exit_code = 0;
out:
fclose(f);
return exit_code;
}
struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump)
{
struct mount_info *list = NULL;

View file

@ -37,6 +37,7 @@
#include "images/creds.pb-c.h"
#include "images/timer.pb-c.h"
#include "images/utsns.pb-c.h"
#include "images/timens.pb-c.h"
#include "images/ipc-var.pb-c.h"
#include "images/ipc-shm.pb-c.h"
#include "images/ipc-msg.pb-c.h"

View file

@ -814,6 +814,8 @@ static unsigned long get_clone_mask(TaskKobjIdsEntry *i,
mask |= CLONE_NEWIPC;
if (i->uts_ns_id != p->uts_ns_id)
mask |= CLONE_NEWUTS;
if (i->time_ns_id != p->time_ns_id)
mask |= CLONE_NEWTIME;
if (i->mnt_ns_id != p->mnt_ns_id)
mask |= CLONE_NEWNS;
if (i->user_ns_id != p->user_ns_id)

130
criu/timens.c Normal file
View file

@ -0,0 +1,130 @@
#include <time.h>
#include <sched.h>
#include "types.h"
#include "proc_parse.h"
#include "namespaces.h"
#include "timens.h"
#include "protobuf.h"
#include "images/timens.pb-c.h"
int dump_time_ns(int ns_id)
{
struct cr_img *img;
TimensEntry te = TIMENS_ENTRY__INIT;
Timespec b = TIMESPEC__INIT, m = TIMESPEC__INIT;
struct timespec ts;
int ret;
img = open_image(CR_FD_TIMENS, O_DUMP, ns_id);
if (!img)
return -1;
clock_gettime(CLOCK_MONOTONIC, &ts);
te.monotonic = &m;
te.monotonic->tv_sec = ts.tv_sec;
te.monotonic->tv_nsec = ts.tv_nsec;
clock_gettime(CLOCK_BOOTTIME, &ts);
te.boottime = &b;
te.boottime->tv_sec = ts.tv_sec;
te.boottime->tv_nsec = ts.tv_nsec;
ret = pb_write_one(img, &te, PB_TIMENS);
close_image(img);
return ret < 0 ? -1 : 0;
}
static void normalize_timespec(struct timespec *ts)
{
while (ts->tv_nsec >= NSEC_PER_SEC) {
ts->tv_nsec -= NSEC_PER_SEC;
++ts->tv_sec;
}
while (ts->tv_nsec < 0) {
ts->tv_nsec += NSEC_PER_SEC;
--ts->tv_sec;
}
}
int prepare_timens(int id)
{
int exit_code = -1;
int ret, fd = -1;
struct cr_img *img;
TimensEntry *te;
struct timespec ts;
struct timespec prev_moff = {}, prev_boff = {};
img = open_image(CR_FD_TIMENS, O_RSTR, id);
if (!img)
return -1;
ret = pb_read_one(img, &te, PB_TIMENS);
close_image(img);
if (ret < 0)
goto err;
if (unshare(CLONE_NEWTIME)) {
pr_perror("Unable to create a new time namespace");
return -1;
}
if (parse_timens_offsets(&prev_boff, &prev_moff))
goto err;
fd = open_proc_rw(PROC_SELF, "timens_offsets");
if (fd < 0)
goto err;
clock_gettime(CLOCK_MONOTONIC, &ts);
ts.tv_sec = ts.tv_sec - prev_moff.tv_sec;
ts.tv_nsec = ts.tv_nsec - prev_moff.tv_nsec;
ts.tv_sec = te->monotonic->tv_sec - ts.tv_sec;
ts.tv_nsec = te->monotonic->tv_nsec - ts.tv_nsec;
normalize_timespec(&ts);
pr_debug("timens: %d %ld %ld\n",
CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec);
if (dprintf(fd, "%d %ld %ld\n",
CLOCK_MONOTONIC, ts.tv_sec, ts.tv_nsec) < 0) {
pr_perror("Unable to set a monotonic clock offset");
goto err;
}
clock_gettime(CLOCK_BOOTTIME, &ts);
ts.tv_sec = ts.tv_sec - prev_boff.tv_sec;
ts.tv_nsec = ts.tv_nsec - prev_boff.tv_nsec;
ts.tv_sec = te->boottime->tv_sec - ts.tv_sec;
ts.tv_nsec = te->boottime->tv_nsec - ts.tv_nsec;
normalize_timespec(&ts);
pr_debug("timens: %d %ld %ld\n",
CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec);
if (dprintf(fd, "%d %ld %ld\n",
CLOCK_BOOTTIME, ts.tv_sec, ts.tv_nsec) < 0) {
pr_perror("Unable to set a boottime clock offset");
goto err;
}
timens_entry__free_unpacked(te, NULL);
close_safe(&fd);
fd = open_proc(PROC_SELF, "ns/time_for_children");
if (fd < 0) {
pr_perror("Unable to open ns/time_for_children");
goto err;
}
if (switch_ns_by_fd(fd, &time_ns_desc, NULL))
goto err;
exit_code = 0;
err:
close_safe(&fd);
return exit_code;
}
struct ns_desc time_ns_desc = NS_DESC_ENTRY(CLONE_NEWTIME, "time");

View file

@ -967,6 +967,8 @@ const char *ns_to_string(unsigned int ns)
return "user";
case CLONE_NEWUTS:
return "uts";
case CLONE_NEWTIME:
return "time";
default:
return NULL;
}

View file

@ -64,6 +64,7 @@ proto-obj-y += autofs.o
proto-obj-y += macvlan.o
proto-obj-y += sit.o
proto-obj-y += memfd.o
proto-obj-y += timens.o
CFLAGS += -iquote $(obj)/

View file

@ -70,6 +70,7 @@ message task_kobj_ids_entry {
optional uint32 mnt_ns_id = 9;
optional uint32 user_ns_id = 10;
optional uint32 cgroup_ns_id = 11;
optional uint32 time_ns_id = 12;
}
message thread_sas_entry {

10
images/timens.proto Normal file
View file

@ -0,0 +1,10 @@
syntax = "proto2";
message timespec {
required uint64 tv_sec = 1;
required uint64 tv_nsec = 2;
}
message timens_entry {
required timespec monotonic = 1;
required timespec boottime = 2;
}

View file

@ -466,6 +466,7 @@ handlers = {
'IDS': entry_handler(pb.task_kobj_ids_entry),
'CREDS': entry_handler(pb.creds_entry),
'UTSNS': entry_handler(pb.utsns_entry),
'TIMENS': entry_handler(pb.timens_entry),
'IPC_VAR': entry_handler(pb.ipc_var_entry),
'FS': entry_handler(pb.fs_entry),
'GHOST_FILE': ghost_file_handler(),