mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
ipc_sysctl: Prioritize restoring IPC variables using non usernsd approach
Since commit5563cabdde, user with enough capability can open IPC sysctl files and write to them. Therefore, we don't need to use usernsd process in the outside user namespace to help with that anymore. Furthermore, some later commits:1f5c135ee5,0889f44e28bind the IPC namespace to the opened file descriptor of IPC sysctl at the open() time, the changed value does not depend on the IPC namespace of write() time anymore. This breaks the current usernsd approach. So, we prioritize opening/writing IPC sysctl files in the context of restored process directly without usernsd help. This approach succeeds in the newer kernel since the restored process has enough capabilities at this restore stage. With older kernel, the open() fails and we fallback to the usernsd approach. Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
This commit is contained in:
parent
3db8d1a6c6
commit
840735aa08
3 changed files with 45 additions and 8 deletions
|
|
@ -34,8 +34,9 @@ enum {
|
|||
/*
|
||||
* Some entries might be missing mark them as optional.
|
||||
*/
|
||||
#define CTL_FLAGS_OPTIONAL 1
|
||||
#define CTL_FLAGS_HAS 2
|
||||
#define CTL_FLAGS_READ_EIO_SKIP 4
|
||||
#define CTL_FLAGS_OPTIONAL 1
|
||||
#define CTL_FLAGS_HAS 2
|
||||
#define CTL_FLAGS_READ_EIO_SKIP 4
|
||||
#define CTL_FLAGS_IPC_EACCES_SKIP 5
|
||||
|
||||
#endif /* __CR_SYSCTL_H__ */
|
||||
|
|
|
|||
|
|
@ -292,6 +292,8 @@ static void pr_info_ipc_shm(const IpcShmEntry *shm)
|
|||
|
||||
static int ipc_sysctl_req(IpcVarEntry *e, int op)
|
||||
{
|
||||
int i;
|
||||
|
||||
struct sysctl_req req[] = {
|
||||
{ "kernel/sem", e->sem_ctls, CTL_U32A(e->n_sem_ctls) },
|
||||
{ "kernel/msgmax", &e->msg_ctlmax, CTL_U32 },
|
||||
|
|
@ -332,6 +334,9 @@ static int ipc_sysctl_req(IpcVarEntry *e, int op)
|
|||
if (e->has_shm_next_id)
|
||||
req[nr++] = req[16];
|
||||
|
||||
for (i = 0; i < nr; i++)
|
||||
req[i].flags = CTL_FLAGS_IPC_EACCES_SKIP;
|
||||
|
||||
return sysctl_op(req, nr, op, CLONE_NEWIPC);
|
||||
}
|
||||
|
||||
|
|
@ -570,7 +575,7 @@ static int prepare_ipc_sem_desc(struct cr_img *img, const IpcSemEntry *sem)
|
|||
{
|
||||
int ret, id;
|
||||
struct sysctl_req req[] = {
|
||||
{ "kernel/sem_next_id", &sem->desc->id, CTL_U32 },
|
||||
{ "kernel/sem_next_id", &sem->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
|
||||
};
|
||||
struct semid_ds semid;
|
||||
|
||||
|
|
@ -703,7 +708,7 @@ static int prepare_ipc_msg_queue(struct cr_img *img, const IpcMsgEntry *msq)
|
|||
{
|
||||
int ret, id;
|
||||
struct sysctl_req req[] = {
|
||||
{ "kernel/msg_next_id", &msq->desc->id, CTL_U32 },
|
||||
{ "kernel/msg_next_id", &msq->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
|
||||
};
|
||||
struct msqid_ds msqid;
|
||||
|
||||
|
|
@ -841,7 +846,7 @@ static int prepare_ipc_shm_seg(struct cr_img *img, const IpcShmEntry *shm)
|
|||
{
|
||||
int ret, id, hugetlb_flag = 0;
|
||||
struct sysctl_req req[] = {
|
||||
{ "kernel/shm_next_id", &shm->desc->id, CTL_U32 },
|
||||
{ "kernel/shm_next_id", &shm->desc->id, CTL_U32, CTL_FLAGS_IPC_EACCES_SKIP },
|
||||
};
|
||||
struct shmid_ds shmid;
|
||||
|
||||
|
|
|
|||
|
|
@ -203,6 +203,17 @@ static int __userns_sysctl_op(void *arg, int proc_fd, pid_t pid)
|
|||
* 2. forks a task
|
||||
* 3. setns()es to the UTS/IPC namespace of the caller
|
||||
* 4. write()s to the files and exits
|
||||
*
|
||||
* For the IPC namespace, since
|
||||
* https://github.com/torvalds/linux/commit/5563cabdde, user with
|
||||
* enough capability can open IPC sysctl files and write to it. Later
|
||||
* commit https://github.com/torvalds/linux/commit/1f5c135ee5 and
|
||||
* https://github.com/torvalds/linux/commit/0889f44e28 bind the IPC
|
||||
* namespace at the open() time so the changed value does not depend
|
||||
* on the IPC namespace at the write() time. Also, the permission check
|
||||
* changes a little bit which makes the above approach unusable but we
|
||||
* can simply use nonuserns version for restoring as IPC sysctl as the
|
||||
* restored process currently has enough capability.
|
||||
*/
|
||||
dir = open("/proc/sys", O_RDONLY, O_DIRECTORY);
|
||||
if (dir < 0) {
|
||||
|
|
@ -335,9 +346,12 @@ out:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
|
||||
/* exit_code = 1 in case nonuserns failed but we want to fallback to userns approach */
|
||||
static int __nonuserns_sysctl_op(struct sysctl_req **orig_req, size_t *orig_nr_req, int op)
|
||||
{
|
||||
int ret, exit_code = -1;
|
||||
struct sysctl_req *req = *orig_req;
|
||||
size_t nr_req = *orig_nr_req;
|
||||
|
||||
while (nr_req--) {
|
||||
int fd;
|
||||
|
|
@ -351,6 +365,14 @@ static int __nonuserns_sysctl_op(struct sysctl_req *req, size_t nr_req, int op)
|
|||
req++;
|
||||
continue;
|
||||
}
|
||||
if (errno == EACCES && (req->flags & CTL_FLAGS_IPC_EACCES_SKIP)) {
|
||||
/* The remaining requests are restored using userns approach */
|
||||
*orig_req = req;
|
||||
*orig_nr_req = nr_req + 1;
|
||||
exit_code = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pr_perror("Can't open sysctl %s", req->name);
|
||||
goto out;
|
||||
}
|
||||
|
|
@ -404,7 +426,16 @@ int sysctl_op(struct sysctl_req *req, size_t nr_req, int op, unsigned int ns)
|
|||
* so we can do those in process as well.
|
||||
*/
|
||||
if (!ns || ns & CLONE_NEWNET || op == CTL_READ)
|
||||
return __nonuserns_sysctl_op(req, nr_req, op);
|
||||
return __nonuserns_sysctl_op(&req, &nr_req, op);
|
||||
|
||||
/* Try to use nonuserns for restoring IPC sysctl and fallback to
|
||||
* userns approach when the returned code is 1.
|
||||
*/
|
||||
if (ns & CLONE_NEWIPC && op == CTL_WRITE) {
|
||||
ret = __nonuserns_sysctl_op(&req, &nr_req, op);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* In order to avoid lots of opening of /proc/sys for each struct sysctl_req,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue