mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
Recent kernels allow for user to read proc pagemap file, but zero pfns in it. Support this mode for user dumps. https://github.com/xemul/criu/issues/101 Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com> Acked-by: Andrew Vagin <avagin@virtuozzo.com>
320 lines
7.9 KiB
C
320 lines
7.9 KiB
C
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <elf.h>
|
|
#include <fcntl.h>
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/mman.h>
|
|
|
|
#include "asm/types.h"
|
|
#include "asm/parasite-syscall.h"
|
|
|
|
#include "parasite-syscall.h"
|
|
#include "parasite.h"
|
|
#include "compiler.h"
|
|
#include "kerndat.h"
|
|
#include "vdso.h"
|
|
#include "util.h"
|
|
#include "log.h"
|
|
#include "mem.h"
|
|
#include "vma.h"
|
|
|
|
#ifdef LOG_PREFIX
|
|
# undef LOG_PREFIX
|
|
#endif
|
|
#define LOG_PREFIX "vdso: "
|
|
|
|
struct vdso_symtable vdso_sym_rt = VDSO_SYMTABLE_INIT;
|
|
u64 vdso_pfn = VDSO_BAD_PFN;
|
|
/*
|
|
* The VMAs list might have proxy vdso/vvar areas left
|
|
* from previous dump/restore cycle so we need to detect
|
|
* them and eliminated from the VMAs list, they will be
|
|
* generated again on restore if needed.
|
|
*/
|
|
int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid,
|
|
struct vm_area_list *vma_area_list)
|
|
{
|
|
unsigned long proxy_vdso_addr = VDSO_BAD_ADDR;
|
|
unsigned long proxy_vvar_addr = VVAR_BAD_ADDR;
|
|
struct vma_area *proxy_vdso_marked = NULL;
|
|
struct vma_area *proxy_vvar_marked = NULL;
|
|
struct parasite_vdso_vma_entry *args;
|
|
int fd = -1, ret, exit_code = -1;
|
|
u64 pfn = VDSO_BAD_PFN;
|
|
struct vma_area *vma;
|
|
off_t off;
|
|
|
|
args = parasite_args(ctl, struct parasite_vdso_vma_entry);
|
|
if (kdat.pmap == PM_FULL) {
|
|
BUG_ON(vdso_pfn == VDSO_BAD_PFN);
|
|
fd = open_proc(pid, "pagemap");
|
|
if (fd < 0)
|
|
return -1;
|
|
} else
|
|
pr_info("Pagemap is unavailable, trying a slow way\n");
|
|
|
|
list_for_each_entry(vma, &vma_area_list->h, list) {
|
|
if (!vma_area_is(vma, VMA_AREA_REGULAR))
|
|
continue;
|
|
|
|
if (vma_area_is(vma, VMA_FILE_SHARED) ||
|
|
vma_area_is(vma, VMA_FILE_PRIVATE))
|
|
continue;
|
|
/*
|
|
* It might be possible VVAR area from marked
|
|
* vDSO zone, we need to detect it earlier than
|
|
* VDSO_PROT test because VVAR_PROT is a subset
|
|
* of it but don't yield continue here,
|
|
* sigh... what a mess.
|
|
*/
|
|
BUILD_BUG_ON(!(VDSO_PROT & VVAR_PROT));
|
|
|
|
if ((vma->e->prot & VVAR_PROT) == VVAR_PROT) {
|
|
if (proxy_vvar_addr != VVAR_BAD_ADDR &&
|
|
proxy_vvar_addr == vma->e->start) {
|
|
BUG_ON(proxy_vvar_marked);
|
|
proxy_vvar_marked = vma;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if ((vma->e->prot & VDSO_PROT) != VDSO_PROT)
|
|
continue;
|
|
|
|
if (vma->e->start > kdat.task_size)
|
|
continue;
|
|
|
|
if (vma->e->flags & MAP_GROWSDOWN)
|
|
continue;
|
|
|
|
/*
|
|
* I need to poke every potentially marked vma,
|
|
* otherwise if task never called for vdso functions
|
|
* page frame number won't be reported.
|
|
*
|
|
* Moreover, if page frame numbers are not accessible
|
|
* we have to scan the vma zone for vDSO elf structure
|
|
* which gonna be a slow way.
|
|
*/
|
|
args->start = vma->e->start;
|
|
args->len = vma_area_len(vma);
|
|
args->try_fill_symtable = (fd < 0) ? true : false;
|
|
args->is_vdso = false;
|
|
|
|
if (parasite_execute_daemon(PARASITE_CMD_CHECK_VDSO_MARK, ctl)) {
|
|
pr_err("Parasite failed to poke for mark\n");
|
|
goto err;
|
|
}
|
|
|
|
/*
|
|
* Defer handling marked vdso until we walked over
|
|
* all vmas and restore potentially remapped vDSO
|
|
* area status.
|
|
*/
|
|
if (unlikely(args->is_marked)) {
|
|
if (proxy_vdso_marked) {
|
|
pr_err("Ow! Second vdso mark detected!\n");
|
|
goto err;
|
|
}
|
|
proxy_vdso_marked = vma;
|
|
proxy_vdso_addr = args->proxy_vdso_addr;
|
|
proxy_vvar_addr = args->proxy_vvar_addr;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* If we have an access to pagemap we can handle vDSO
|
|
* status early. Otherwise, in worst scenario, where
|
|
* the dumpee has been remapping vdso on its own and
|
|
* the kernel version is < 3.16, the vdso won't be
|
|
* detected via procfs status so we have to parse
|
|
* symbols in parasite code.
|
|
*/
|
|
if (fd >= 0) {
|
|
off = (vma->e->start / PAGE_SIZE) * sizeof(u64);
|
|
ret = pread(fd, &pfn, sizeof(pfn), off);
|
|
if (ret < 0 || ret != sizeof(pfn)) {
|
|
pr_perror("Can't read pme for pid %d", pid);
|
|
goto err;
|
|
}
|
|
|
|
pfn = PME_PFRAME(pfn);
|
|
if (!pfn) {
|
|
pr_err("Unexpected page fram number 0 for pid %d\n", pid);
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Setup proper VMA status. Note starting with 3.16
|
|
* the [vdso]/[vvar] marks are reported correctly
|
|
* even when they are remapped into a new place,
|
|
* but only since that particular version of the
|
|
* kernel!
|
|
*/
|
|
if ((pfn == vdso_pfn && pfn != VDSO_BAD_PFN) || args->is_vdso) {
|
|
if (!vma_area_is(vma, VMA_AREA_VDSO)) {
|
|
pr_debug("Restore vDSO status by pfn/symtable at %lx\n",
|
|
(long)vma->e->start);
|
|
vma->e->status |= VMA_AREA_VDSO;
|
|
}
|
|
} else {
|
|
if (unlikely(vma_area_is(vma, VMA_AREA_VDSO))) {
|
|
pr_debug("Drop mishinted vDSO status at %lx\n",
|
|
(long)vma->e->start);
|
|
vma->e->status &= ~VMA_AREA_VDSO;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* There is marked vdso, it means such vdso is autogenerated
|
|
* and must be dropped from vma list.
|
|
*/
|
|
if (proxy_vdso_marked) {
|
|
pr_debug("vdso: Found marked at %lx (proxy vDSO at %lx VVAR at %lx)\n",
|
|
(long)proxy_vdso_marked->e->start,
|
|
(long)proxy_vdso_addr, (long)proxy_vvar_addr);
|
|
|
|
/*
|
|
* Don't forget to restore the proxy vdso/vvar status, since
|
|
* it's unknown to the kernel.
|
|
*/
|
|
list_for_each_entry(vma, &vma_area_list->h, list) {
|
|
if (vma->e->start == proxy_vdso_addr) {
|
|
vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VDSO;
|
|
pr_debug("vdso: Restore proxy vDSO status at %lx\n",
|
|
(long)vma->e->start);
|
|
} else if (vma->e->start == proxy_vvar_addr) {
|
|
vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VVAR;
|
|
pr_debug("vdso: Restore proxy VVAR status at %lx\n",
|
|
(long)vma->e->start);
|
|
}
|
|
}
|
|
|
|
pr_debug("vdso: Droppping marked vdso at %lx\n",
|
|
(long)proxy_vdso_marked->e->start);
|
|
list_del(&proxy_vdso_marked->list);
|
|
xfree(proxy_vdso_marked);
|
|
vma_area_list->nr--;
|
|
|
|
if (proxy_vvar_marked) {
|
|
pr_debug("vdso: Droppping marked vvar at %lx\n",
|
|
(long)proxy_vvar_marked->e->start);
|
|
list_del(&proxy_vvar_marked->list);
|
|
xfree(proxy_vvar_marked);
|
|
vma_area_list->nr--;
|
|
}
|
|
}
|
|
exit_code = 0;
|
|
err:
|
|
close(fd);
|
|
return exit_code;
|
|
}
|
|
|
|
static int vdso_fill_self_symtable(struct vdso_symtable *s)
|
|
{
|
|
char buf[512];
|
|
int ret, exit_code = -1;
|
|
FILE *maps;
|
|
|
|
*s = (struct vdso_symtable)VDSO_SYMTABLE_INIT;
|
|
|
|
maps = fopen_proc(PROC_SELF, "maps");
|
|
if (!maps) {
|
|
pr_perror("Can't open self-vma");
|
|
return -1;
|
|
}
|
|
|
|
while (fgets(buf, sizeof(buf), maps)) {
|
|
unsigned long start, end;
|
|
char *has_vdso, *has_vvar;
|
|
|
|
has_vdso = strstr(buf, "[vdso]");
|
|
if (!has_vdso)
|
|
has_vvar = strstr(buf, "[vvar]");
|
|
else
|
|
has_vvar = NULL;
|
|
|
|
if (!has_vdso && !has_vvar)
|
|
continue;
|
|
|
|
ret = sscanf(buf, "%lx-%lx", &start, &end);
|
|
if (ret != 2) {
|
|
pr_err("Can't find vDSO/VVAR bounds\n");
|
|
goto err;
|
|
}
|
|
|
|
if (has_vdso) {
|
|
if (s->vma_start != VDSO_BAD_ADDR) {
|
|
pr_err("Got second vDSO entry\n");
|
|
goto err;
|
|
}
|
|
s->vma_start = start;
|
|
s->vma_end = end;
|
|
|
|
ret = vdso_fill_symtable((void *)start, end - start, s);
|
|
if (ret)
|
|
goto err;
|
|
} else {
|
|
if (s->vvar_start != VVAR_BAD_ADDR) {
|
|
pr_err("Got second VVAR entry\n");
|
|
goto err;
|
|
}
|
|
s->vvar_start = start;
|
|
s->vvar_end = end;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Validate its structure -- for new vDSO format the
|
|
* structure must be like
|
|
*
|
|
* 7fff1f5fd000-7fff1f5fe000 r-xp 00000000 00:00 0 [vdso]
|
|
* 7fff1f5fe000-7fff1f600000 r--p 00000000 00:00 0 [vvar]
|
|
*
|
|
* The areas may be in reverse order.
|
|
*
|
|
* 7fffc3502000-7fffc3504000 r--p 00000000 00:00 0 [vvar]
|
|
* 7fffc3504000-7fffc3506000 r-xp 00000000 00:00 0 [vdso]
|
|
*
|
|
*/
|
|
if (s->vma_start != VDSO_BAD_ADDR) {
|
|
if (s->vvar_start != VVAR_BAD_ADDR) {
|
|
if (s->vma_end != s->vvar_start &&
|
|
s->vvar_end != s->vma_start) {
|
|
pr_err("Unexpected rt vDSO area bounds\n");
|
|
goto err;
|
|
}
|
|
}
|
|
} else {
|
|
pr_err("Can't find rt vDSO\n");
|
|
goto err;
|
|
}
|
|
|
|
pr_debug("rt [vdso] %lx-%lx [vvar] %lx-%lx\n",
|
|
s->vma_start, s->vma_end,
|
|
s->vvar_start, s->vvar_end);
|
|
|
|
exit_code = 0;
|
|
err:
|
|
fclose(maps);
|
|
return exit_code;
|
|
}
|
|
|
|
int vdso_init(void)
|
|
{
|
|
if (vdso_fill_self_symtable(&vdso_sym_rt))
|
|
return -1;
|
|
|
|
if (kdat.pmap != PM_FULL)
|
|
pr_info("VDSO detection turned off\n");
|
|
else if (vaddr_to_pfn(vdso_sym_rt.vma_start, &vdso_pfn))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|