restore: add infrastructure to enable shadow stack

There are several gotachs when restoring a task with shadow stack:
* depending on the compiler options, glibc version and glibc tunables
  CRIU can run with or without shadow stack.
* shadow stack VMAs are special, they must be created using a dedicated
  map_shadow_stack() system call and can be modified only by a special
  instruction (wrss) that is only available when shadow stack is
  enabled.
* once shadow stack is enabled, it is not writable even with wrss;
  writes to shadow stack can be only enabled with ptrace() and only when
  shadow stack is enabled in the tracee.
* if the shadow stack is enabled during restore rather than by glibc,
  calling retq after arch_prctl() that enables the shadow stack causes
  #CP, so the function that enables shadow stack can never return.

Add the infrastructure required to cope with all of those:

* modify the restore code to allow trampoline (arch_shstk_trampoline)
  that will enable shadow stack and call restore_task_with_children().
* add call to arch_shstk_unlock() right after the tasks are clone()ed;
  this will allow unlocking shadow stack features and making shadow
  stack writable.
* add stubs for architectures that do not support shadow stacks
* add implementation of arch_shstk_trampoline() and arch_shstk_unlock()
  for x86, but keep it disabled; it will be enabled along with addtion
  of the code that will restore shadow stack in the restorer blob

Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
This commit is contained in:
Mike Rapoport (IBM) 2022-05-31 12:45:09 +03:00 committed by Andrei Vagin
parent f47899c9ef
commit 7dd5830023
5 changed files with 176 additions and 1 deletions

View file

@ -66,4 +66,13 @@ int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta);
#define arch_shstk_prepare arch_shstk_prepare
#if 0
int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid);
#define arch_shstk_unlock arch_shstk_unlock
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg);
#define arch_shstk_trampoline arch_shstk_trampoline
#endif
#endif /* __CR_ASM_SHSTK_H__ */

View file

@ -1,3 +1,6 @@
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <common/list.h>
#include <compel/cpu.h>
@ -88,3 +91,133 @@ int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
return 0;
}
int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid)
{
unsigned long features;
int status;
int ret = -1;
/*
* CRIU runs with no shadow stack and the task does not need one,
* nothing to do.
*/
if (!kdat.has_shstk && !task_needs_shstk(item, core))
return 0;
futex_wait_until(&rsti(item)->shstk_enable, 1);
if (ptrace(PTRACE_SEIZE, pid, 0, 0)) {
pr_perror("Cannot attach to %d", pid);
goto futex_wake;
}
if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
pr_perror("Cannot interrupt the %d task", pid);
goto detach;
}
if (wait4(pid, &status, __WALL, NULL) != pid) {
pr_perror("waitpid(%d) failed", pid);
goto detach;
}
features = ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS;
if (ptrace(PTRACE_ARCH_PRCTL, pid, features, ARCH_SHSTK_UNLOCK)) {
pr_perror("Cannot unlock CET for %d task", pid);
goto detach;
}
detach:
if (ptrace(PTRACE_DETACH, pid, NULL, 0)) {
pr_perror("Unable to detach %d", pid);
goto futex_wake;
}
ret = 0;
futex_wake:
futex_set_and_wake(&rsti(item)->shstk_unlock, 1);
return ret;
}
static void shstk_sync_unlock(struct pstree_item *item)
{
/* notify parent that shadow stack is enabled ... */
futex_set_and_wake(&rsti(item)->shstk_enable, 1);
/* ... and wait until it unlocks its features with ptrace */
futex_wait_until(&rsti(item)->shstk_unlock, 1);
}
static void __arch_shstk_enable(struct pstree_item *item,
int (*func)(void *arg), void *arg)
{
int ret;
shstk_sync_unlock(item);
/* return here would cause #CP, use exit() instead */
ret = func(arg);
exit(ret);
}
static int shstk_disable(struct pstree_item *item)
{
shstk_sync_unlock(item);
/* disable shadow stack, implicitly clears ARCH_SHSTK_WRSS */
if (syscall(__NR_arch_prctl, ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
pr_perror("Failed to disable shadow stack");
return -1;
}
if (syscall(__NR_arch_prctl, ARCH_SHSTK_LOCK,
ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS)) {
pr_perror("Failed to lock shadow stack controls");
return -1;
}
return 0;
}
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg)
{
unsigned long features = ARCH_SHSTK_SHSTK;
int code = ARCH_SHSTK_ENABLE;
/*
* If task does not need shadow stack but CRIU runs with shadow
* stack enabled, we should disable it before continuing with
* restore
*/
if (!task_needs_shstk(item, core)) {
if (kdat.has_shstk && shstk_disable(item))
return -1;
return func(arg);
}
/*
* Calling sys_arch_prctl() means there will be use of retq
* instruction after shadow stack is enabled and this will cause
* Control Protectiond fault. Open code sys_arch_prctl() in
* assembly.
*
* code and addr should be in %rdi and %rsi and will be passed to
* the system call as is.
*/
asm volatile("movq $"__stringify(__NR_arch_prctl)", %%rax \n"
"syscall \n"
"cmpq $0, %%rax \n"
"je 1f \n"
"retq \n"
"1: \n"
:: "D"(code), "S"(features));
__arch_shstk_enable(item, func, arg);
/* never reached */
return -1;
}

View file

@ -1498,6 +1498,8 @@ static inline int fork_with_pid(struct pstree_item *item)
pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item));
}
arch_shstk_unlock(item, ca.core, pid);
err_unlock:
if (!(ca.clone_flags & CLONE_NEWPID))
unlock_last_pid();
@ -1764,7 +1766,7 @@ static int create_children_and_session(void)
return 0;
}
static int restore_task_with_children(void *_arg)
static int __restore_task_with_children(void *_arg)
{
struct cr_clone_arg *ca = _arg;
pid_t pid;
@ -1956,6 +1958,16 @@ err:
exit(1);
}
static int restore_task_with_children(void *_arg)
{
struct cr_clone_arg *arg = _arg;
struct pstree_item *item = arg->item;
CoreEntry *core = arg->core;
return arch_shstk_trampoline(item, core, __restore_task_with_children,
arg);
}
static int attach_to_tasks(bool root_seized)
{
struct pstree_item *item;

View file

@ -20,4 +20,22 @@ static inline int arch_shstk_prepare(struct pstree_item *item,
#define arch_shstk_prepare arch_shstk_prepare
#endif
#ifndef arch_shstk_unlock
static inline int arch_shstk_unlock(struct pstree_item *item,
CoreEntry *core, pid_t pid)
{
return 0;
}
#define arch_shstk_unlock arch_shstk_unlock
#endif
#ifndef arch_shstk_trampoline
static inline int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg)
{
return func(arg);
}
#define arch_shstk_trampoline arch_shstk_trampoline
#endif
#endif

View file

@ -75,6 +75,9 @@ struct rst_info {
struct rst_rseq *rseqe;
futex_t shstk_enable;
futex_t shstk_unlock;
void *breakpoint;
};