criu/include/namespaces.h
Tycho Andersen f79f4546cf sysctl: move sysctl calls to usernsd
When in a userns, tasks can't write to certain sysctl files:

(00.009653)      1: Error (sysctl.c:142): Can't open sysctl kernel/hostname: Permission denied

See inline comments for details on affected namespaces.

Mostly for my own education in what is required to port something to be
userns restorable, I ported the sysctl stuff. A potential concern for this
patch is that copying structures with pointers around is kind of gory. I
did it ad-hoc here, but it may be worth inventing some mechanisms to make
it easier, although I'm not sure what exactly that would look like
(potentially re-using some of the protobuf bits; I'll investigate this more
if it looks helpful when doing the cgroup user namespaces port?).

Another issue is that there is not a great way to return non-fd stuff in
memory right now from userns_call; one of the little hacks in this code
would be "simplified" if we invented a way to do this.

v2: coalesce the individual struct sysctl_req requests into one big
    sysctl_userns_req that is in a contiguous region of memory so that we
    can pass it via userns_call. Hopefully nobody finds my little ascii
    diagram too offensive :)
v3: use the fork/setns trick to change the syctl values in the right ns for
    IPC/UTS nses; see inline comment for details
v4: only use sysctl_userns_req when actually doing a userns_call.

Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
2015-10-05 13:16:14 +03:00

120 lines
3.2 KiB
C

#ifndef __CR_NS_H__
#define __CR_NS_H__
#include "files.h"
struct ns_desc {
unsigned int cflag;
char *str;
size_t len;
};
enum ns_type {
NS_UNKNOWN = 0,
NS_CRIU,
NS_ROOT,
NS_OTHER,
};
struct ns_id {
unsigned int kid;
unsigned int id;
pid_t ns_pid;
struct ns_desc *nd;
struct ns_id *next;
enum ns_type type;
/*
* For mount namespaces on restore -- indicates that
* the namespace in question is created (all mounts
* are mounted) and other tasks may do setns on it
* and proceed.
*/
futex_t ns_created;
union {
struct {
struct mount_info *mntinfo_list;
struct mount_info *mntinfo_tree;
} mnt;
struct {
int nlsk; /* for sockets collection */
int seqsk; /* to talk to parasite daemons */
} net;
};
};
extern struct ns_id *ns_ids;
#define NS_DESC_ENTRY(_cflag, _str) \
{ \
.cflag = _cflag, \
.str = _str, \
.len = sizeof(_str) - 1, \
}
extern bool check_ns_proc(struct fd_link *link);
extern struct ns_desc pid_ns_desc;
extern struct ns_desc user_ns_desc;
extern unsigned long root_ns_mask;
extern const struct fdtype_ops nsfile_dump_ops;
extern struct collect_image_info nsfile_cinfo;
extern int walk_namespaces(struct ns_desc *nd, int (*cb)(struct ns_id *, void *), void *oarg);
extern int collect_namespaces(bool for_dump);
extern int collect_mnt_namespaces(bool for_dump);
extern int dump_mnt_namespaces(void);
extern int dump_namespaces(struct pstree_item *item, unsigned int ns_flags);
extern int prepare_namespace_before_tasks(void);
extern int prepare_namespace(struct pstree_item *item, unsigned long clone_flags);
extern int try_show_namespaces(int pid);
extern int switch_ns(int pid, struct ns_desc *nd, int *rst);
extern int restore_ns(int rst, struct ns_desc *nd);
extern int dump_task_ns_ids(struct pstree_item *);
extern int predump_task_ns_ids(struct pstree_item *);
extern struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid, struct ns_desc *nd);
extern int rst_add_ns_id(unsigned int id, struct pstree_item *, struct ns_desc *nd);
extern struct ns_id *lookup_ns_by_id(unsigned int id, struct ns_desc *nd);
extern int collect_user_namespaces(bool for_dump);
extern int prepare_userns(struct pstree_item *item);
extern int stop_usernsd(void);
extern int userns_uid(int uid);
extern int userns_gid(int gid);
extern int dump_user_ns(pid_t pid, int ns_id);
extern void free_userns_maps(void);
typedef int (*uns_call_t)(void *arg, int fd, pid_t pid);
/*
* Async call -- The call is guaranteed to be done till the
* CR_STATE_COMPLETE happens. The function may return even
* before the call starts.
* W/o flag the call is synchronous -- this function returns
* strictly after the call finishes.
*/
#define UNS_ASYNC 0x1
/*
* The call returns an FD which should be sent back. Conflicts
* with UNS_ASYNC.
*/
#define UNS_FDOUT 0x2
#define MAX_UNSFD_MSG_SIZE 4096
/*
* When we're restoring inside user namespace, some things are
* not allowed to be done there due to insufficient capabilities.
* If the operation in question can be offloaded to another process,
* this call allows to do that.
*
* In case we're not in userns, just call the callback immediatelly
* in the context of calling task.
*/
int userns_call(uns_call_t call, int flags,
void *arg, size_t arg_size, int fd);
#endif /* __CR_NS_H__ */