criu/restore: gcs: adds restore implementation for Guarded Control Stack

This commit finalizes AArch64 Guarded Control Stack (GCS)
support by wiring the full dump and restore flow.

The restore path adds the following steps:

 - Define shared AArch64 GCS types and constants in a dedicated header
   for both compel and CRIU inclusion
 - compel: add get/set NT_ARM_GCS via ptrace, enabling user-space
   GCS state save and restore.
 - During restore switch to the new GCS (via GCSSTR) to place capability
   token sa_restorer address
 - arch_shstk_trampoline() — We enable GCS in a trampoline that using
   prctl(PR_SET_SHADOW_STACK_STATUS, ...) via inline SVC. The trampoline
   ineeded because we can’t RET without a valid GCS.
 - restorer: map the recorded GCS VMA, populate contents top-down with
   GCSSTR, write the signal capability at GCSPR_EL0 and the valid token at
   GCSPR_EL0-8, then switch to the rebuilt GCS (GCSSS1)
 - Save and restore registers via ptrace
 - Extend restorer argument structures to carry GCS state
   into post-restore execution
 - Add shstk_set_restorer_stack(): sets tmp_gcs to temporary restorer
   shadow stack start
 - Add gcs_vma_restore implementation (required for mremap of the GCS VMA)

Tested with:
    GCS_ENABLE=1 ./zdtm.py run -t zdtm/static/env00

Signed-off-by: Igor Svilenkov Bozic <svilenkov@gmail.com>
This commit is contained in:
Igor Svilenkov Bozic 2025-08-12 20:13:28 +02:00 committed by Alexander Mikhalitsyn
parent 2f27b12407
commit d73198d87f
No known key found for this signature in database
GPG key ID: B1F47F5CB05B4FA3
7 changed files with 387 additions and 0 deletions

View file

@ -136,6 +136,9 @@ int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
{
struct iovec iov;
struct user_gcs gcs;
struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
pr_info("Restoring GP/FPU registers for %d\n", pid);
iov.iov_base = &ext_regs->fpstate;
@ -144,6 +147,33 @@ int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
pr_perror("Failed to set FPU registers for %d", pid);
return -1;
}
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
pr_warn("gcs: Failed to get GCS for %d\n", pid);
} else {
ext_regs->gcs = gcs;
compel_set_task_gcs_regs(pid, ext_regs);
}
return 0;
}
int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
{
struct iovec iov;
pr_info("gcs: restoring GCS registers for %d\n", pid);
pr_info("gcs: restoring GCS: gcspr=%llx features=%llx\n",
ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
iov.iov_base = &ext_regs->gcs;
iov.iov_len = sizeof(ext_regs->gcs);
if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &iov)) {
pr_perror("gcs: Failed to set GCS registers for %d", pid);
return -1;
}
return 0;
}

View file

@ -124,3 +124,4 @@ openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size
pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
membarrier 283 389 (int cmd, unsigned int flags, int cpu_id)
map_shadow_stack 453 ! (unsigned long addr, unsigned long size, unsigned int flags)

View file

@ -72,6 +72,7 @@ extern bool arch_can_dump_task(struct parasite_ctl *ctl);
extern int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
void *arg, unsigned long flags);
extern int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
extern int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
extern int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s);
extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs,
user_fpregs_struct_t *fpregs);

View file

@ -6,3 +6,4 @@ obj-y += cpu.o
obj-y += crtools.o
obj-y += sigframe.o
obj-y += bitops.o
obj-y += gcs.o

157
criu/arch/aarch64/gcs.c Normal file
View file

@ -0,0 +1,157 @@
#include <sys/ptrace.h>
#include <sys/wait.h>
#include <common/list.h>
#include <compel/cpu.h>
#include "asm/gcs-types.h"
#include "pstree.h"
#include "restorer.h"
#include "rst-malloc.h"
#include "vma.h"
#include <sys/auxv.h>
#include <stdbool.h>
static bool task_has_gcs_enabled(UserAarch64GcsEntry *gcs)
{
return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
}
static bool host_supports_gcs(void)
{
unsigned long hwcap = getauxval(AT_HWCAP);
return (hwcap & HWCAP_GCS) != 0;
}
static bool task_needs_gcs(struct pstree_item *item, CoreEntry *core)
{
UserAarch64GcsEntry *gcs;
if (!task_alive(item))
return false;
gcs = core->ti_aarch64->gcs;
if (task_has_gcs_enabled(gcs)) {
if (!host_supports_gcs()) {
pr_warn_once("Restoring task with GCS on non-GCS host\n");
return false;
}
pr_info("Restoring task with GCS\n");
return true;
}
pr_info("Restoring a task without GCS\n");
return false;
}
static int gcs_prepare_task(struct vm_area_list *vmas,
struct rst_shstk_info *gcs)
{
struct vma_area *vma;
list_for_each_entry(vma, &vmas->h, list) {
if (vma_area_is(vma, VMA_AREA_SHSTK) &&
in_vma_area(vma, gcs->gcspr_el0)) {
unsigned long premapped_addr = vma->premmaped_addr;
unsigned long size = vma_area_len(vma);
gcs->vma_start = vma->e->start;
gcs->vma_size = size;
gcs->premapped_addr = premapped_addr;
return 0;
}
}
pr_err("Unable to find a shadow stack vma: %lx\n", gcs->gcspr_el0);
return -1;
}
int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta)
{
int i;
struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]);
struct vm_area_list *vmas = &rsti(item)->vmas;
struct rst_shstk_info *gcs = &ta->shstk;
if (!task_needs_gcs(item, core))
return 0;
gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
if (gcs_prepare_task(vmas, gcs)) {
pr_err("gcs: failed to prepare shadow stack memory\n");
return -1;
}
for (i = 0; i < item->nr_threads; i++) {
struct thread_restore_args *thread_args = &args_array[i];
core = item->core[i];
gcs = &thread_args->shstk;
gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
if (gcs_prepare_task(vmas, gcs)) {
pr_err("gcs: failed to prepare GCS memory\n");
return -1;
}
}
return 0;
}
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg)
{
int fret;
unsigned long flags = PR_SHADOW_STACK_ENABLE |
PR_SHADOW_STACK_PUSH |
PR_SHADOW_STACK_WRITE;
long ret, x1_after, x8_after;
/* If task doesn't need GCS, just call func */
if (!task_needs_gcs(item, core)) {
return func(arg);
}
pr_debug("gcs: GCS enable SVC about to fire: x8=%d x0=%d x1=0x%lx\n",
__NR_prctl, PR_SET_SHADOW_STACK_STATUS, flags);
asm volatile(
"mov x0, %3\n" // x0 = PR_SET_SHADOW_STACK_STATUS (75)
"mov x1, %4\n" // x1 = flags
"mov x2, xzr\n" // x2 = 0
"mov x3, xzr\n" // x3 = 0
"mov x4, xzr\n" // x4 = 0
"mov x8, %5\n" // x8 = __NR_prctl (167)
"svc #0\n" // Invoke syscall
"mov %0, x0\n" // Capture return value
"mov %1, x1\n" // Capture x1 after
"mov %2, x8\n" // Capture x8 after
: "=r"(ret), "=r"(x1_after), "=r"(x8_after)
: "i"(PR_SET_SHADOW_STACK_STATUS), // x0 - %3rd
"r"(flags), // x1 - %4th
"i"(__NR_prctl) // x8 - %5th
: "x0", "x1", "x2", "x3", "x4", "x8", "memory", "cc");
pr_info("gcs: after SVC: ret=%ld x1=%ld x8=%ld\n", ret, x1_after, x8_after);
if (ret != 0) {
int err = errno;
pr_err("gcs: failed to enable GCS: ret=%ld errno=%d (%s)\n", ret, err, strerror(err));
return -1;
}
fret = func(arg);
exit(fret);
return -1;
}

View file

@ -0,0 +1,196 @@
#ifndef __CR_ASM_GCS_H__
#define __CR_ASM_GCS_H__
#include <asm/gcs-types.h>
struct rst_shstk_info {
unsigned long vma_start; /* start of GCS VMA */
unsigned long vma_size; /* size of GCS VMA */
unsigned long premapped_addr; /* premapped buffer */
unsigned long tmp_gcs; /* temp area for GCS if needed */
u64 gcspr_el0; /* GCS pointer */
u64 features_enabled; /* GCS flags */
};
#define rst_shstk_info rst_shstk_info
struct task_restore_args;
struct pstree_item;
int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta);
#define arch_shstk_prepare arch_gcs_prepare
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg);
#define arch_shstk_trampoline arch_shstk_trampoline
static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *gcs, void *ptr)
{
gcs->tmp_gcs = (long unsigned)ptr;
}
#define shstk_set_restorer_stack shstk_set_restorer_stack
static always_inline long shstk_restorer_stack_size(void)
{
return PAGE_SIZE;
}
#define shstk_restorer_stack_size shstk_restorer_stack_size
#ifdef CR_NOGLIBC
#include <compel/plugins/std/syscall.h>
#include <compel/cpu.h>
#include "vma.h"
static inline unsigned long gcs_map(unsigned long addr, unsigned long size, unsigned int flags)
{
long gcspr = sys_map_shadow_stack(addr, size, flags);
pr_info("gcs: syscall: map_shadow_stack at=%lx size=%ld\n", addr, size);
if (gcspr < 0) {
pr_err("gcs: failed to map GCS at %lx: %ld\n", addr, gcspr);
return -1;
}
if (addr && gcspr != addr) {
pr_err("gcs: address mismatch: need %lx, got %lx\n", addr, gcspr);
return -1;
}
pr_info("gcs: mmapped GCS at %lx\n", gcspr);
return gcspr;
}
/* clang-format off */
static always_inline void gcsss1(unsigned long *Xt)
{
asm volatile (
"sys #3, C7, C7, #2, %0\n"
:
: "rZ" (Xt)
: "memory");
}
static always_inline unsigned long *gcsss2(void)
{
unsigned long *Xt;
asm volatile (
"SYSL %0, #3, C7, C7, #3\n"
: "=r" (Xt)
:
: "memory");
return Xt;
}
static inline void gcsstr(unsigned long addr, unsigned long val)
{
asm volatile(
"mov x0, %0\n"
"mov x1, %1\n"
".inst 0xd91f1c01\n" // GCSSTR x1, [x0]
"mov x0, #0\n"
:
: "r"(addr), "r"(val)
: "x0", "x1", "memory");
}
/* clang-format on */
static always_inline int gcs_restore(struct rst_shstk_info *gcs)
{
unsigned long gcspr, val;
if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
return 0;
}
gcspr = gcs->gcspr_el0 - 8;
val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8);
pr_debug("gcs: [0] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
gcsstr(gcspr, val);
val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8) | GCS_CAP_VALID_TOKEN;
gcspr -= 8;
pr_debug("gcs: [1] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
gcsstr(gcspr, val);
pr_debug("gcs: about to switch stacks via GCSSS1 to: %lx\n", gcspr);
gcsss1((unsigned long *)gcspr);
return 0;
}
#define arch_shstk_restore gcs_restore
static always_inline int gcs_vma_restore(VmaEntry *vma_entry)
{
unsigned long shstk, i, ret;
unsigned long *gcs_data = (void *)vma_premmaped_start(vma_entry);
unsigned long vma_size = vma_entry_len(vma_entry);
shstk = gcs_map(0, vma_size, SHADOW_STACK_SET_TOKEN);
if (shstk < 0) {
pr_err("Failed to map shadow stack at %lx: %ld\n", shstk, shstk);
}
/* restore shadow stack contents */
for (i = 0; i < vma_size / 8; i++)
gcsstr(shstk + i * 8, gcs_data[i]);
pr_debug("unmap %lx %ld\n", (unsigned long)gcs_data, vma_size);
ret = sys_munmap(gcs_data, vma_size);
if (ret < 0) {
pr_err("Failed to unmap premmaped shadow stack\n");
return ret;
}
vma_premmaped_start(vma_entry) = shstk;
return 0;
}
#define shstk_vma_restore gcs_vma_restore
static always_inline int gcs_switch_to_restorer(struct rst_shstk_info *gcs)
{
int ret;
unsigned long *ssp;
unsigned long addr;
unsigned long gcspr;
if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
return 0;
}
pr_debug("gcs->premapped_addr + gcs->vma_size = %lx\n", gcs->premapped_addr + gcs->vma_size);
pr_debug("gcs->tmp_gcs = %lx\n", gcs->tmp_gcs);
addr = gcs->tmp_gcs;
if (addr % PAGE_SIZE != 0) {
pr_err("gcs: 0x%lx not page-aligned to size 0x%lx\n", addr, PAGE_SIZE);
return -1;
}
ret = sys_munmap((void *)addr, PAGE_SIZE);
if (ret < 0) {
pr_err("gcs: Failed to unmap aarea for dumpee GCS VMAs\n");
return -1;
}
gcspr = gcs_map(addr, PAGE_SIZE, SHADOW_STACK_SET_TOKEN);
if (gcspr == -1) {
pr_err("gcs: failed to gcs_map(%lx, %lx)\n", (unsigned long)addr, PAGE_SIZE);
return -1;
}
ssp = (unsigned long *)(addr + PAGE_SIZE - 8);
gcsss1(ssp);
return 0;
}
#define arch_shstk_switch_to_restorer gcs_switch_to_restorer
#endif /* CR_NOGLIBC */
#endif /* __CR_ASM_GCS_H__ */

View file

@ -5,6 +5,7 @@
#include <sys/ucontext.h>
#include "asm/types.h"
#include "asm/gcs.h"
#include "images/core.pb-c.h"
#include <compel/asm/sigframe.h>