sync criu upstream v4.2

This commit is contained in:
Brandon Smith 2025-12-05 23:13:38 -06:00
commit 2d19a7b430
83 changed files with 1782 additions and 595 deletions

View file

@ -9,10 +9,11 @@ concurrency:
jobs:
build:
runs-on: ubuntu-22.04
strategy:
matrix:
os: [ubuntu-22.04, ubuntu-22.04-arm]
target: [GCC=1, CLANG=1]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4

View file

@ -14,7 +14,7 @@ jobs:
image: registry.fedoraproject.org/fedora:latest
steps:
- name: Install tools
run: sudo dnf -y install git make ruff xz clang-tools-extra which codespell git-clang-format ShellCheck
run: sudo dnf -y install git make ruff xz clang-tools-extra codespell git-clang-format ShellCheck
- uses: actions/checkout@v4

View file

@ -8,8 +8,8 @@ Here are some useful hints to get involved.
* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks;
* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting);
* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles;
* Feedback is expected on the GitHub issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu);
* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu).
* Feedback is expected on the GitHub issues page and on the [mailing list](https://lore.kernel.org/criu);
* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lore.kernel.org/criu).
Below we describe in more detail recommend practices for CRIU development.
* Spread the word about CRIU in [social networks](http://criu.org/Contacts);
* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events);
@ -366,7 +366,7 @@ We recommend to post patches using `git send-email`
```
git send-email --cover-letter --no-chain-reply-to --annotate \
--confirm=always --to=criu@openvz.org criu-dev
--confirm=always --to=criu@lists.linux.dev criu-dev
```
Note that the `git send-email` subcommand may not be in
@ -381,11 +381,11 @@ configure it to point it to your SMTP server with something like:
git config --global sendemail.smtpServer stmp.example.net
```
If you get tired of typing `--to=criu@openvz.org` all the time,
If you get tired of typing `--to=criu@lists.linux.dev` all the time,
you can configure that to be automatically handled as well:
```
git config sendemail.to criu@openvz.org
git config sendemail.to criu@lists.linux.dev
```
If a developer is sending another version of the patch (e.g. to address
@ -398,7 +398,7 @@ version if needed though).
### Mail patches
The patches should be sent to CRIU development mailing list, `criu AT openvz.org`. Note that you need to be subscribed first in order to post. The list web interface is available at https://openvz.org/mailman/listinfo/criu; you can also use standard mailman aliases to work with it.
The patches should be sent to CRIU development mailing list, `criu AT lists.linux.dev`. Note that you need to be subscribed first in order to post. The list web interface is available at https://lore.kernel.org/criu; you can also use standard mailman aliases to work with it.
Please make sure the email client you're using doesn't screw your patch (line wrapping and so on).

View file

@ -465,6 +465,30 @@ The 'mode' may be one of the following:
*skip*::: Don't lock the network. If *--tcp-close* is not used, the network
must be locked externally to allow CRIU to dump TCP connections.
*--allow-uprobes*::
Allow dumping when uprobes vma is present. When used on dump, this option is
required on restore as well.
A uprobes vma is automatically created by the kernel once a uprobe is
triggered. This mapping is not removed even once the uprobe is deleted. So,
even if a process once had uprobes attached to it, and they're removed by
the time the process is dumped, this option is still required because criu
has no way of knowing whether there are active uprobes or not.
When using this option on restore, make sure the uprobes (if any) active on
the dumped processes are still active. Otherwise, when execution reaches
a uprobe'd location in any of the restored processes, that process will be
sent a SIGTRAP.
As an example, say a uprobe is set at function foo in the executable of the
process p_bar. Whenever execution in p_bar reaches function foo, the uprobe
is triggered. If the uprobe has been triggered at least once, then the kernel
will have created the uprobes vma. To dump p_bar, this option is
necessary. After dumping, say the uprobe is deleted. Now, on restoring with
this option, once execution reaches function foo, SIGTRAP will be sent to
the restored p_bar. Unless it has a signal handler installed for SIGTRAP,
it will be terminated and core dumped.
*restore*
~~~~~~~~~
Restores previously checkpointed processes.
@ -692,6 +716,10 @@ The 'mode' may be one of the following:
*--skip-file-rwx-check*::
Skip checking file permissions (r/w/x for u/g/o) on restore.
*--allow-uprobes*::
Required when dumped with this option. Refer to this option in the section
on dumping for more details.
*check*
~~~~~~~
Checks whether the kernel supports the features needed by *criu* to

View file

@ -451,6 +451,10 @@ ruff:
test/zdtm.py \
test/inhfd/*.py \
test/others/rpc/config_file.py \
test/others/action-script/check_actions.py \
test/others/pycriu/*.py \
lib/pycriu/criu.py \
lib/pycriu/__init__.py \
lib/pycriu/images/pb2dict.py \
lib/pycriu/images/images.py \
scripts/criu-ns \
@ -488,7 +492,7 @@ lint: ruff shellcheck codespell
! git --no-pager grep -E '\s+$$' \*.c \*.h
.PHONY: lint ruff shellcheck codespell
codecov: SHELL := $(shell which bash)
codecov: SHELL := $(shell command -v bash)
codecov:
curl -Os https://uploader.codecov.io/latest/linux/codecov
chmod +x codecov

View file

@ -46,9 +46,13 @@ endif
endif
# Default flags for pip install:
# --upgrade: Upgrade crit/pycriu packages
# --ignore-installed: Ignore existing packages and reinstall them
PIPFLAGS ?= --upgrade --ignore-installed
# --ignore-installed: Overwrite already installed pycriu/crit packages
# --no-build-isolation: Use current Python environment to build pycriu/crit packages
# --no-deps: Don't install any dependencies
# --no-index: Don't use PyPI index to find packages
# --progress-bar: Cleaner output
# --upgrade: Treat the install as an upgrade when replacing the installed version
PIPFLAGS ?= --ignore-installed --no-build-isolation --no-deps --no-index --progress-bar off --upgrade
export SKIP_PIP_INSTALL PIPFLAGS

View file

@ -1,10 +1,10 @@
#
# CRIU version.
CRIU_VERSION_MAJOR := 4
CRIU_VERSION_MINOR := 0
CRIU_VERSION_MINOR := 2
CRIU_VERSION_SUBLEVEL :=
CRIU_VERSION_EXTRA :=
CRIU_VERSION_NAME := CRIUDA
CRIU_VERSION_NAME := CRIUTIBILITY
CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA))
export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL

View file

@ -1,10 +1,11 @@
#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
#define UAPI_COMPEL_ASM_SIGFRAME_H__
#include <asm/sigcontext.h>
#include <signal.h>
#include <sys/ucontext.h>
#include <stdint.h>
#include <asm/types.h>
/* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */

View file

@ -6,6 +6,7 @@ apk add --no-cache \
build-base \
coreutils \
e2fsprogs \
elfutils-dev \
git \
gnutls-dev \
go \
@ -20,8 +21,11 @@ apk add --no-cache \
libdrm-dev \
libnet-dev \
libnl3-dev \
libtraceevent-dev \
libtracefs-dev \
nftables \
nftables-dev \
perl \
pkgconfig \
procps \
protobuf-c-compiler \

View file

@ -14,6 +14,8 @@ fi
libc6-"${DEBIAN_ARCH}"-cross \
libc6-dev-"${DEBIAN_ARCH}"-cross \
libcap-dev:"${DEBIAN_ARCH}" \
libdrm-dev:"${DEBIAN_ARCH}" \
libelf-dev:"${DEBIAN_ARCH}" \
libexpat1-dev:"${DEBIAN_ARCH}" \
libgnutls28-dev:"${DEBIAN_ARCH}" \
libnet-dev:"${DEBIAN_ARCH}" \
@ -23,9 +25,10 @@ fi
libprotobuf-c-dev:"${DEBIAN_ARCH}" \
libprotobuf-dev:"${DEBIAN_ARCH}" \
libssl-dev:"${DEBIAN_ARCH}" \
libtraceevent-dev:"${DEBIAN_ARCH}" \
libtracefs-dev:"${DEBIAN_ARCH}" \
ncurses-dev:"${DEBIAN_ARCH}" \
uuid-dev:"${DEBIAN_ARCH}" \
libdrm-dev:"${DEBIAN_ARCH}" \
build-essential \
pkg-config \
git \

View file

@ -19,6 +19,7 @@ fi
libbsd-dev \
libcap-dev \
libdrm-dev \
libelf-dev \
libgnutls28-dev \
libgnutls30 \
libnet-dev \
@ -28,6 +29,8 @@ fi
libprotobuf-c-dev \
libprotobuf-dev \
libselinux-dev \
libtraceevent-dev \
libtracefs-dev \
pkg-config \
protobuf-c-compiler \
protobuf-compiler \

View file

@ -3,6 +3,7 @@
dnf install -y \
asciidoc \
binutils \
elfutils-libelf-devel \
gcc \
git \
glibc-devel \
@ -18,6 +19,8 @@ dnf install -y \
libnet-devel \
libnl3-devel \
libselinux-devel \
libtraceevent-devel \
libtracefs-devel \
libuuid-devel \
make \
nftables \
@ -31,5 +34,7 @@ dnf install -y \
python3-importlib-metadata \
python3-protobuf \
python3-pyyaml \
python3-setuptools \
python3-wheel \
rubygem-asciidoctor \
xmlto

View file

@ -15,8 +15,11 @@ pacman -Syu --noconfirm \
libbsd \
libcap \
libdrm \
libelf \
libnet \
libnl \
libtraceevent \
libtracefs \
nftables \
pkg-config \
protobuf \

View file

@ -418,7 +418,7 @@ resolve_path() {
local p
p="${2}"
if which realpath > /dev/null; then
if command -v realpath > /dev/null; then
p=$(realpath "${p}")
fi
${ECHO} "${1}: ${p}"
@ -427,7 +427,7 @@ resolve_path() {
resolve_cmd() {
local cpath
cpath=$(which "${2}")
cpath=$(command -v "${2}")
resolve_path "${1}" "${cpath}"
}

View file

@ -55,6 +55,7 @@ status = {
"VMA_AREA_VVAR": 1 << 12,
"VMA_AREA_AIORING": 1 << 13,
"VMA_AREA_MEMFD": 1 << 14,
"VMA_AREA_UPROBES": 1 << 17,
"VMA_AREA_UNSUPP": 1 << 31
}
@ -793,7 +794,9 @@ class coredump_generator:
off = 0 # in pages
for m in pagemap[1:]:
found = False
for i in range(m["nr_pages"]):
num_pages = m.get("nr_pages", m["compat_nr_pages"])
for i in range(num_pages):
if m["vaddr"] + i * PAGESIZE == page_no * PAGESIZE:
found = True
break

View file

@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
name = "crit"
description = "CRiu Image Tool"
authors = [
{name = "CRIU team", email = "criu@openvz.org"},
{name = "CRIU team", email = "criu@lists.linux.dev"},
]
license = {text = "GPLv2"}
dynamic = ["version"]

View file

@ -7,7 +7,7 @@
name = crit
description = CRiu Image Tool
author = CRIU team
author_email = criu@openvz.org
author_email = criu@lists.linux.dev
license = GPLv2
version = attr: crit.__version__

View file

@ -1,7 +1,7 @@
#ifndef __CR_ASM_RESTORER_H__
#define __CR_ASM_RESTORER_H__
#include <asm/sigcontext.h>
#include <signal.h>
#include <sys/ucontext.h>
#include "asm/types.h"

View file

@ -73,6 +73,23 @@ int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg);
#define arch_shstk_trampoline arch_shstk_trampoline
static always_inline long shstk_restorer_stack_size(void)
{
return PAGE_SIZE;
}
#define shstk_restorer_stack_size shstk_restorer_stack_size
static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr)
{
info->tmp_shstk = (unsigned long)ptr;
}
#define shstk_set_restorer_stack shstk_set_restorer_stack
static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long __maybe_unused def)
{
return !(info->cet & ARCH_SHSTK_SHSTK) ? def : (4UL << 30);
}
#define shstk_min_mmap_addr shstk_min_mmap_addr
#ifdef CR_NOGLIBC
#include <compel/plugins/std/syscall.h>
@ -146,33 +163,53 @@ static inline int shstk_finalize(void)
return ret;
}
/*
* Create shadow stack vma and restore its content from premmapped anonymous (non-shstk) vma
*/
static always_inline int shstk_vma_restore(VmaEntry *vma_entry)
{
long shstk, i;
unsigned long *shstk_data = (void *)vma_premmaped_start(vma_entry);
unsigned long vma_size = vma_entry_len(vma_entry);
long ret;
shstk = sys_map_shadow_stack(0, vma_size, SHADOW_STACK_SET_TOKEN);
if (shstk < 0) {
pr_err("Failed to map shadow stack: %ld\n", shstk);
return -1;
}
/* restore shadow stack contents */
for (i = 0; i < vma_size / 8; i++)
wrssq(shstk + i * 8, shstk_data[i]);
ret = sys_munmap(shstk_data, vma_size);
if (ret < 0) {
pr_err("Failed to unmap premmaped shadow stack\n");
return ret;
}
/*
* From that point premapped vma is (shstk) and we need
* to mremap() it to the final location. Originally premapped
* (shstk_data) has been unmapped already.
*/
vma_premmaped_start(vma_entry) = shstk;
return 0;
}
#define shstk_vma_restore shstk_vma_restore
/*
* Restore contents of the shadow stack and set shadow stack pointer
*/
static always_inline int shstk_restore(struct rst_shstk_info *cet)
{
unsigned long *shstk_data = (unsigned long *)cet->premmaped_addr;
unsigned long ssp = cet->vma_start + cet->vma_size - 8;
unsigned long shstk_top = cet->vma_size / 8 - 1;
unsigned long val;
long ret;
unsigned long ssp, val;
if (!(cet->cet & ARCH_SHSTK_SHSTK))
return 0;
if (shstk_map(cet->vma_start, cet->vma_size))
return -1;
/*
* Switch shadow stack from temporary location to the actual task's
* shadow stack VMA
*/
shstk_switch_ssp(ssp);
/* restore shadow stack contents */
for (; ssp >= cet->ssp; ssp -= 8, shstk_top--)
wrssq(ssp, shstk_data[shstk_top]);
/*
* Add tokens for sigreturn frame and for switch of the shadow stack.
* The sigreturn token will be checked by the kernel during
@ -182,6 +219,7 @@ static always_inline int shstk_restore(struct rst_shstk_info *cet)
*/
/* token for sigreturn frame */
ssp = cet->ssp - 8;
val = ALIGN_DOWN(cet->ssp, 8) | SHSTK_DATA_BIT;
wrssq(ssp, val);
@ -193,12 +231,6 @@ static always_inline int shstk_restore(struct rst_shstk_info *cet)
/* reset shadow stack pointer to the proper location */
shstk_switch_ssp(ssp);
ret = sys_munmap(shstk_data, cet->vma_size + PAGE_SIZE);
if (ret < 0) {
pr_err("Failed to unmap premmaped shadow stack\n");
return ret;
}
return shstk_finalize();
}
#define arch_shstk_restore shstk_restore

View file

@ -45,7 +45,6 @@ static int shstk_prepare_task(struct vm_area_list *vmas,
shstk->vma_start = vma->e->start;
shstk->vma_size = size;
shstk->premmaped_addr = premmaped_addr;
shstk->tmp_shstk = premmaped_addr + size;
break;
}

View file

@ -18,6 +18,7 @@
#include "cr_options.h"
#include "filesystems.h"
#include "file-lock.h"
#include "image.h"
#include "irmap.h"
#include "mount.h"
#include "mount-v2.h"
@ -703,7 +704,8 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode),
BOOL_OPT("unprivileged", &opts.unprivileged),
BOOL_OPT("ghost-fiemap", &opts.ghost_fiemap),
{ "posix-sem-migration", no_argument, 0, 1101 },
{ "posix-sem-migration", no_argument, 0, 1101 },
BOOL_OPT(OPT_ALLOW_UPROBES, &opts.allow_uprobes),
{},
};
@ -1044,9 +1046,9 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
return 1;
}
break;
case 1101:
opts.posix_sem_migration = true;
break;
case 1101:
opts.posix_sem_migration = true;
break;
case 'V':
pr_msg("Version: %s\n", CRIU_VERSION);
if (strcmp(CRIU_GITID, "0"))

View file

@ -2319,6 +2319,10 @@ int cr_dump_tasks(pid_t pid)
goto err;
he.has_pre_dump_mode = false;
if (found_uprobes_vma()) {
he.has_allow_uprobes = true;
he.allow_uprobes = true;
}
ret = write_img_inventory(&he);
if (ret)

View file

@ -2431,16 +2431,15 @@ err:
return ret;
}
static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long vma_len)
static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long min_addr, long vma_len)
{
struct vma_area *t_vma, *s_vma;
long prev_vma_end = 0;
long prev_vma_end = min_addr;
struct vma_area end_vma;
VmaEntry end_e;
end_vma.e = &end_e;
end_e.start = end_e.end = kdat.task_size;
prev_vma_end = kdat.mmap_min_addr;
s_vma = list_first_entry(self_vma_list, struct vma_area, list);
t_vma = list_first_entry(tgt_vma_list, struct vma_area, list);
@ -3196,7 +3195,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
rst_mem_size = rst_mem_lock();
memzone_size = round_up(sizeof(struct restore_mem_zone) * current->nr_threads, page_size());
task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size;
task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size + shstk_restorer_stack_size();
BUG_ON(task_args->bootstrap_len & (PAGE_SIZE - 1));
pr_info("%d threads require %ldK of memory\n", current->nr_threads, KBYTES(task_args->bootstrap_len));
@ -3226,7 +3225,9 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
* or inited from scratch).
*/
mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h, task_args->bootstrap_len);
mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h,
shstk_min_mmap_addr(&task_args->shstk, kdat.mmap_min_addr),
task_args->bootstrap_len);
if (mem == (void *)-1) {
pr_err("No suitable area for task_restore bootstrap (%ldK)\n", task_args->bootstrap_len);
goto err;
@ -3465,6 +3466,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
* self-vmas are unmaped.
*/
mem += rst_mem_size;
shstk_set_restorer_stack(&task_args->shstk, mem);
mem += shstk_restorer_stack_size();
task_args->vdso_rt_parked_at = (unsigned long)mem;
task_args->vdso_maps_rt = vdso_maps_rt;
task_args->vdso_rt_size = vdso_rt_size;

View file

@ -283,15 +283,122 @@ int exec_rpc_query_external_files(char *name, int sk)
return ret;
}
static char images_dir[PATH_MAX];
static int resolve_images_dir_path(char *images_dir_path,
bool imgs_changed_by_rpc_conf,
const CriuOpts *req,
pid_t peer_pid)
{
/*
* images_dir_fd is a required RPC parameter with -1 as default value.
*
* This assumes that if opts.imgs_dir is set, we have a value
* from the configuration file parser. The test to see that
* imgs_changed_by_rpc_conf is true is used to make sure the value
* is from the RPC configuration file. The idea is that only the
* RPC configuration file is able to overwrite RPC settings:
* * apply_config(global_conf)
* * apply_config(user_conf)
* * apply_config(environment variable)
* * apply_rpc_options()
* * apply_config(rpc_conf)
*/
if (imgs_changed_by_rpc_conf) {
strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1);
images_dir_path[PATH_MAX - 1] = '\0';
} else if (req->images_dir_fd != -1) {
snprintf(images_dir_path, PATH_MAX, "/proc/%d/fd/%d", peer_pid, req->images_dir_fd);
} else if (req->images_dir) {
strncpy(images_dir_path, req->images_dir, PATH_MAX - 1);
images_dir_path[PATH_MAX - 1] = '\0';
} else {
/*
* Since images dir is not required in CHECK mode, we need to
* check for work_dir_fd in setup_images_and_workdir()
*/
if (opts.mode == CR_CHECK)
return 0;
pr_err("Neither images_dir_fd nor images_dir was passed by RPC client.\n");
return -1;
}
return 0;
}
static int setup_images_and_workdir(const char *images_dir_path,
bool work_changed_by_rpc_conf,
CriuOpts *req,
pid_t peer_pid)
{
char work_dir_path[PATH_MAX] = "";
/* We don't need to open images dir in CHECK mode. */
if (opts.mode != CR_CHECK) {
/*
* Image streaming is not supported with CRIU's service feature as
* the streamer must be started for each dump/restore operation.
* It is unclear how to do that with RPC, so we punt for now.
* This explains why we provide the argument mode=-1 instead of
* O_RSTR or O_DUMP.
*/
if (open_image_dir(images_dir_path, -1) < 0) {
pr_perror("Can't open images directory");
return -1;
}
}
if (work_changed_by_rpc_conf)
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
else if (req->has_work_dir_fd)
sprintf(work_dir_path, "/proc/%d/fd/%d", peer_pid, req->work_dir_fd);
else if (opts.work_dir)
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
else if (images_dir_path[0] != '\0')
strcpy(work_dir_path, images_dir_path);
if (work_dir_path[0] == '\0') {
pr_err("images-dir or work-dir is required when using log file\n");
return -1;
}
if (chdir(work_dir_path)) {
pr_perror("Can't chdir to work_dir");
return -1;
}
return 0;
}
static int setup_logging_from_req(CriuOpts *req, bool output_changed_by_rpc_conf)
{
if (req->log_file && !output_changed_by_rpc_conf) {
if (strchr(req->log_file, '/')) {
pr_perror("No subdirs are allowed in log_file name");
return -1;
}
SET_CHAR_OPTS(output, req->log_file);
} else if (req->has_log_to_stderr && req->log_to_stderr && !output_changed_by_rpc_conf) {
xfree(opts.output);
opts.output = NULL; /* log_init(NULL) writes to stderr */
} else if (!opts.output) {
SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME);
}
opts.log_level = req->log_level;
log_set_loglevel(opts.log_level);
if (log_init(opts.output)) {
pr_perror("Can't initiate log");
return -1;
}
return 0;
}
static int setup_opts_from_req(int sk, CriuOpts *req)
{
struct ucred ids;
struct stat st;
socklen_t ids_len = sizeof(struct ucred);
char images_dir_path[PATH_MAX];
char work_dir_path[PATH_MAX];
char images_dir_path[PATH_MAX] = "";
char status_fd[PATH_MAX];
bool output_changed_by_rpc_conf = false;
bool work_changed_by_rpc_conf = false;
@ -304,6 +411,23 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
goto err;
}
/*
* The options relevant in CHECK mode are: log_file, log_to_stderr, and log_level.
* When logging to a file, we also need to resolve images_dir and work_dir.
*/
if (opts.mode == CR_CHECK) {
if (!req)
return 0; /* nothing to do */
/*
* A log file is needed only if:
* - log_file is explicitly set, or
* - log_to_stderr is NOT requested (i.e., using DEFAULT_LOG_FILENAME)
*/
if (!req->log_file || (req->has_log_to_stderr && req->log_to_stderr))
return 0; /* no log file, don't require images_dir or work_dir */
}
if (fstat(sk, &st)) {
pr_perror("Can't get socket stat");
goto err;
@ -312,161 +436,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
BUG_ON(st.st_ino == -1);
service_sk_ino = st.st_ino;
/*
* Evaluate an additional configuration file if specified.
* This needs to happen twice, because it is needed early to detect
* things like work_dir, imgs_dir and logfile. The second parsing
* of the optional RPC configuration file happens at the end and
* overwrites all options set via RPC.
*/
if (req->config_file) {
char *tmp_output = opts.output;
char *tmp_work = opts.work_dir;
char *tmp_imgs = opts.imgs_dir;
opts.output = NULL;
opts.work_dir = NULL;
opts.imgs_dir = NULL;
rpc_cfg_file = req->config_file;
i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF);
if (i) {
xfree(tmp_output);
xfree(tmp_work);
xfree(tmp_imgs);
goto err;
}
/* If this is non-NULL, the RPC configuration file had a value, use it.*/
if (opts.output)
output_changed_by_rpc_conf = true;
/* If this is NULL, use the old value if it was set. */
if (!opts.output && tmp_output) {
opts.output = tmp_output;
tmp_output = NULL;
}
if (opts.work_dir)
work_changed_by_rpc_conf = true;
if (!opts.work_dir && tmp_work) {
opts.work_dir = tmp_work;
tmp_work = NULL;
}
if (opts.imgs_dir)
imgs_changed_by_rpc_conf = true;
/*
* As the images directory is a required RPC setting, it is not
* necessary to use the value from other configuration files.
* Either it is set in the RPC configuration file or it is set
* via RPC.
*/
xfree(tmp_output);
xfree(tmp_work);
xfree(tmp_imgs);
}
/*
* open images_dir - images_dir_fd is a required RPC parameter
*
* This assumes that if opts.imgs_dir is set we have a value
* from the configuration file parser. The test to see that
* imgs_changed_by_rpc_conf is true is used to make sure the value
* is from the RPC configuration file.
* The idea is that only the RPC configuration file is able to
* overwrite RPC settings:
* * apply_config(global_conf)
* * apply_config(user_conf)
* * apply_config(environment variable)
* * apply_rpc_options()
* * apply_config(rpc_conf)
*/
if (imgs_changed_by_rpc_conf)
strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1);
else if (req->images_dir_fd != -1)
sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd);
else if (req->images_dir)
strncpy(images_dir_path, req->images_dir, PATH_MAX - 1);
else {
pr_err("Neither images_dir_fd nor images_dir was passed by RPC client.\n");
goto err;
}
if (req->parent_img)
SET_CHAR_OPTS(img_parent, req->parent_img);
if (req->stream)
opts.stream = true;
if (open_image_dir(images_dir_path, image_dir_mode()) < 0) {
pr_perror("Can't open images directory");
goto err;
}
/* get full path to images_dir to use in process title */
if (readlink(images_dir_path, images_dir, PATH_MAX) == -1) {
pr_perror("Can't readlink %s", images_dir_path);
goto err;
}
/* chdir to work dir */
if (work_changed_by_rpc_conf)
/* Use the value from the RPC configuration file first. */
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
else if (req->has_work_dir_fd)
/* Use the value set via RPC. */
sprintf(work_dir_path, "/proc/%d/fd/%d", ids.pid, req->work_dir_fd);
else if (opts.work_dir)
/* Use the value from one of the other configuration files. */
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
else
/* Use the images directory a work directory. */
strcpy(work_dir_path, images_dir_path);
if (chdir(work_dir_path)) {
pr_perror("Can't chdir to work_dir");
goto err;
}
/* initiate log file in work dir */
if (req->log_file && !output_changed_by_rpc_conf) {
/*
* If RPC sets a log file and if there nothing from the
* RPC configuration file, use the RPC value.
*/
if (strchr(req->log_file, '/')) {
pr_perror("No subdirs are allowed in log_file name");
goto err;
}
SET_CHAR_OPTS(output, req->log_file);
} else if (req->has_log_to_stderr && req->log_to_stderr && !output_changed_by_rpc_conf) {
xfree(opts.output);
opts.output = NULL;
} else if (!opts.output) {
SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME);
}
/* This is needed later to correctly set the log_level */
opts.log_level = req->log_level;
log_set_loglevel(req->log_level);
if (log_init(opts.output) == -1) {
pr_perror("Can't initiate log");
goto err;
}
if (req->config_file) {
pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
}
if (req->has_unprivileged)
opts.unprivileged = req->unprivileged;
if (check_caps())
return 1;
if (kerndat_init())
return 1;
if (log_keep_err()) {
pr_perror("Can't tune log");
goto err;
@ -749,14 +721,6 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
if (req->empty_ns & ~(CLONE_NEWNET))
goto err;
}
if (req->n_irmap_scan_paths) {
for (i = 0; i < req->n_irmap_scan_paths; i++) {
if (irmap_scan_path_add(req->irmap_scan_paths[i]))
goto err;
}
}
if (req->has_status_fd) {
pr_warn("status_fd is obsoleted; use status-ready notification instead\n");
@ -768,28 +732,95 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
}
}
if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk))
goto err;
if (req->orphan_pts_master)
opts.orphan_pts_master = true;
if (req->has_display_stats)
opts.display_stats = req->display_stats;
/* Evaluate additional configuration file a second time to overwrite
* all RPC settings. */
/* Evaluate additional configuration file (e.g., runc.conf) to overwrite all RPC settings. */
if (req->config_file) {
char *tmp_output = opts.output;
char *tmp_work = opts.work_dir;
opts.output = NULL;
opts.work_dir = NULL;
/*
* As the images directory is a required RPC setting, it is not
* necessary to use the value from other configuration files.
* Either it is set in the RPC configuration file or it is set
* via RPC.
*/
xfree(opts.imgs_dir);
opts.imgs_dir = NULL;
pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
rpc_cfg_file = req->config_file;
i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF);
if (i)
if (i) {
xfree(tmp_output);
xfree(tmp_work);
goto err;
}
/* If opts.{output,work_dir} is non-NULL, the RPC configuration file had a value, use it.*/
/* If opts.{output,work_dir} is NULL, use the old value if it was set. */
if (opts.output) {
output_changed_by_rpc_conf = true;
} else {
opts.output = tmp_output;
tmp_output = NULL;
}
if (opts.work_dir) {
work_changed_by_rpc_conf = true;
} else {
opts.work_dir = tmp_work;
tmp_work = NULL;
}
if (opts.imgs_dir)
imgs_changed_by_rpc_conf = true;
xfree(tmp_output);
xfree(tmp_work);
}
if (resolve_images_dir_path(images_dir_path, imgs_changed_by_rpc_conf, req, ids.pid) < 0)
goto err;
if (req->parent_img)
SET_CHAR_OPTS(img_parent, req->parent_img);
if (setup_images_and_workdir(images_dir_path, work_changed_by_rpc_conf, req, ids.pid))
goto err;
if (req->n_irmap_scan_paths) {
for (i = 0; i < req->n_irmap_scan_paths; i++) {
if (irmap_scan_path_add(req->irmap_scan_paths[i]))
goto err;
}
}
/* initiate log file in work dir */
if (setup_logging_from_req(req, output_changed_by_rpc_conf))
goto err;
if (check_caps())
goto err;
if (kerndat_init())
goto err;
/* init_pidfd_store_sk must be called after kerndat_init. */
if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk))
goto err;
if (req->mntns_compat_mode)
opts.mntns_compat_mode = true;
log_set_loglevel(opts.log_level);
if (check_options())
goto err;
@ -809,7 +840,7 @@ static int dump_using_req(int sk, CriuOpts *req)
if (setup_opts_from_req(sk, req))
goto exit;
__setproctitle("dump --rpc -t %d -D %s", req->pid, images_dir);
__setproctitle("dump --rpc -t %d", req->pid);
if (init_pidfd_store_hash())
goto pidfd_store_err;
@ -852,7 +883,7 @@ static int restore_using_req(int sk, CriuOpts *req)
if (setup_opts_from_req(sk, req))
goto exit;
__setproctitle("restore --rpc -D %s", images_dir);
__setproctitle("restore --rpc");
if (cr_restore_tasks())
goto exit;
@ -947,7 +978,7 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single)
if (setup_opts_from_req(sk, req))
goto cout;
__setproctitle("pre-dump --rpc -t %d -D %s", req->pid, images_dir);
__setproctitle("pre-dump --rpc -t %d", req->pid);
if (init_pidfd_store_hash())
goto pidfd_store_err;
@ -956,9 +987,9 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single)
goto cout;
ret = 0;
cout:
cout:
free_pidfd_store();
pidfd_store_err:
pidfd_store_err:
exit(ret);
}
@ -1048,7 +1079,7 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode)
}
ret = 0;
out_ch:
out_ch:
if (daemon_mode && ret < 0 && pid > 0)
kill(pid, SIGKILL);
close(start_pipe[1]);
@ -1283,14 +1314,13 @@ static int handle_cpuinfo(int sk, CriuReq *msg)
if (setup_opts_from_req(sk, msg->opts))
goto cout;
__setproctitle("cpuinfo %s --rpc -D %s", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check",
images_dir);
__setproctitle("cpuinfo %s --rpc", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check");
if (msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP)
ret = cpuinfo_dump();
else
ret = cpuinfo_check();
cout:
cout:
exit(ret);
}

View file

@ -426,7 +426,7 @@ usage:
" --network-lock METHOD network locking/unlocking method; argument\n"
" can be 'nftables' or 'iptables' (default).\n"
" --unprivileged accept limitations when running as non-root\n"
" consult documentation for further details\n"
" --allow-uprobes allow dump/restore with uprobes vma\n"
"\n"
"* External resources support:\n"
" --external RES dump objects from this list as external resources:\n"
@ -453,9 +453,9 @@ usage:
" --evasive-devices use any path to a device file if the original one\n"
" is inaccessible\n"
" --link-remap allow one to link unlinked files back when possible\n"
" --posix-sem-migration enable POSIX semaphore migration support for cross host\n"
" migration (uses object based dump/restore instead of\n"
" link-remap for migration purposes)\n"
" --posix-sem-migration enable POSIX semaphore migration support for cross host\n"
" migration (uses object based dump/restore instead of\n"
" link-remap for migration purposes)\n"
" --ghost-limit size limit max size of deleted file contents inside image\n"
" --ghost-fiemap enable dumping of deleted files using fiemap\n"
" --action-script FILE add an external action script\n"

View file

@ -1788,9 +1788,9 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i)
case FD_TYPES__PIDFD:
ret = collect_one_file_entry(fe, fe->pidfd->id, &fe->pidfd->base, &pidfd_cinfo);
break;
case FD_TYPES__POSIX_SEM:
ret = collect_one_file_entry(fe, fe->psm->fd_id, &fe->psm->base, &posix_sem_cinfo);
break;
case FD_TYPES__POSIX_SEM:
ret = collect_one_file_entry(fe, fe->psm->fd_id, &fe->psm->base, &posix_sem_cinfo);
break;
#ifdef CONFIG_HAS_LIBBPF
case FD_TYPES__BPFMAP:
ret = collect_one_file_entry(fe, fe->bpf->id, &fe->bpf->base, &bpfmap_cinfo);
@ -1815,9 +1815,9 @@ int prepare_files(void)
init_sk_info_hash();
init_dead_pidfd_hash();
if (collect_image(&files_cinfo))
return -1;
/* get posix semaphore info */
if (collect_image(&posix_sem_cinfo))
return -1;
return 0;
return -1;
/* get posix semaphore info */
if (collect_image(&posix_sem_cinfo))
return -1;
return 0;
}

View file

@ -25,100 +25,100 @@
}
struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = {
FD_ENTRY(INVENTORY, "inventory"),
FD_ENTRY(FDINFO, "fdinfo-%u"),
FD_ENTRY(PAGEMAP, "pagemap-%lu"),
FD_ENTRY(SHMEM_PAGEMAP, "pagemap-shmem-%lu"),
FD_ENTRY(REG_FILES, "reg-files"),
FD_ENTRY(EXT_FILES, "ext-files"),
FD_ENTRY(NS_FILES, "ns-files"),
FD_ENTRY(EVENTFD_FILE, "eventfd"),
FD_ENTRY(EVENTPOLL_FILE,"eventpoll"),
FD_ENTRY(EVENTPOLL_TFD, "eventpoll-tfd"),
FD_ENTRY(SIGNALFD, "signalfd"),
FD_ENTRY(INOTIFY_FILE, "inotify"),
FD_ENTRY(INOTIFY_WD, "inotify-wd"),
FD_ENTRY(FANOTIFY_FILE, "fanotify"),
FD_ENTRY(FANOTIFY_MARK, "fanotify-mark"),
FD_ENTRY(CORE, "core-%u"),
FD_ENTRY(IDS, "ids-%u"),
FD_ENTRY(MM, "mm-%u"),
FD_ENTRY(VMAS, "vmas-%u"),
FD_ENTRY(PIPES, "pipes"),
FD_ENTRY_F(PIPES_DATA, "pipes-data", O_NOBUF), /* splices data */
FD_ENTRY(FIFO, "fifo"),
FD_ENTRY_F(FIFO_DATA, "fifo-data", O_NOBUF), /* the same */
FD_ENTRY(PSTREE, "pstree"),
FD_ENTRY(SIGACT, "sigacts-%u"),
FD_ENTRY(UNIXSK, "unixsk"),
FD_ENTRY(INETSK, "inetsk"),
FD_ENTRY(PACKETSK, "packetsk"),
FD_ENTRY(NETLINK_SK, "netlinksk"),
FD_ENTRY_F(SK_QUEUES, "sk-queues", O_NOBUF), /* lseeks the image */
FD_ENTRY(ITIMERS, "itimers-%u"),
FD_ENTRY(POSIX_TIMERS, "posix-timers-%u"),
FD_ENTRY(CREDS, "creds-%u"),
FD_ENTRY(UTSNS, "utsns-%u"),
FD_ENTRY(IPC_VAR, "ipcns-var-%u"),
FD_ENTRY_F(IPCNS_SHM, "ipcns-shm-%u", O_NOBUF), /* writes segments of data */
FD_ENTRY(IPCNS_MSG, "ipcns-msg-%u"),
FD_ENTRY(IPCNS_SEM, "ipcns-sem-%u"),
FD_ENTRY(FS, "fs-%u"),
FD_ENTRY(REMAP_FPATH, "remap-fpath"),
FD_ENTRY_F(GHOST_FILE, "ghost-file-%x", O_NOBUF),
FD_ENTRY_F(MEMFD_INODE, "memfd", O_NOBUF),
FD_ENTRY(TCP_STREAM, "tcp-stream-%x"),
FD_ENTRY(MNTS, "mountpoints-%u"),
FD_ENTRY(NETDEV, "netdev-%u"),
FD_ENTRY(NETNS, "netns-%u"),
FD_ENTRY_F(IFADDR, "ifaddr-%u", O_NOBUF),
FD_ENTRY_F(ROUTE, "route-%u", O_NOBUF),
FD_ENTRY_F(ROUTE6, "route6-%u", O_NOBUF),
FD_ENTRY_F(RULE, "rule-%u", O_NOBUF),
FD_ENTRY_F(IPTABLES, "iptables-%u", O_NOBUF),
FD_ENTRY_F(IP6TABLES, "ip6tables-%u", O_NOBUF),
FD_ENTRY_F(NFTABLES, "nftables-%u", O_NOBUF),
FD_ENTRY_F(TMPFS_IMG, "tmpfs-%u.tar.gz", O_NOBUF),
FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%u.tar.gz", O_NOBUF),
FD_ENTRY_F(AUTOFS, "autofs-%u", O_NOBUF),
FD_ENTRY(INVENTORY, "inventory"),
FD_ENTRY(FDINFO, "fdinfo-%u"),
FD_ENTRY(PAGEMAP, "pagemap-%lu"),
FD_ENTRY(SHMEM_PAGEMAP, "pagemap-shmem-%lu"),
FD_ENTRY(REG_FILES, "reg-files"),
FD_ENTRY(EXT_FILES, "ext-files"),
FD_ENTRY(NS_FILES, "ns-files"),
FD_ENTRY(EVENTFD_FILE, "eventfd"),
FD_ENTRY(EVENTPOLL_FILE, "eventpoll"),
FD_ENTRY(EVENTPOLL_TFD, "eventpoll-tfd"),
FD_ENTRY(SIGNALFD, "signalfd"),
FD_ENTRY(INOTIFY_FILE, "inotify"),
FD_ENTRY(INOTIFY_WD, "inotify-wd"),
FD_ENTRY(FANOTIFY_FILE, "fanotify"),
FD_ENTRY(FANOTIFY_MARK, "fanotify-mark"),
FD_ENTRY(CORE, "core-%u"),
FD_ENTRY(IDS, "ids-%u"),
FD_ENTRY(MM, "mm-%u"),
FD_ENTRY(VMAS, "vmas-%u"),
FD_ENTRY(PIPES, "pipes"),
FD_ENTRY_F(PIPES_DATA, "pipes-data", O_NOBUF), /* splices data */
FD_ENTRY(FIFO, "fifo"),
FD_ENTRY_F(FIFO_DATA, "fifo-data", O_NOBUF), /* the same */
FD_ENTRY(PSTREE, "pstree"),
FD_ENTRY(SIGACT, "sigacts-%u"),
FD_ENTRY(UNIXSK, "unixsk"),
FD_ENTRY(INETSK, "inetsk"),
FD_ENTRY(PACKETSK, "packetsk"),
FD_ENTRY(NETLINK_SK, "netlinksk"),
FD_ENTRY_F(SK_QUEUES, "sk-queues", O_NOBUF), /* lseeks the image */
FD_ENTRY(ITIMERS, "itimers-%u"),
FD_ENTRY(POSIX_TIMERS, "posix-timers-%u"),
FD_ENTRY(CREDS, "creds-%u"),
FD_ENTRY(UTSNS, "utsns-%u"),
FD_ENTRY(IPC_VAR, "ipcns-var-%u"),
FD_ENTRY_F(IPCNS_SHM, "ipcns-shm-%u", O_NOBUF), /* writes segments of data */
FD_ENTRY(IPCNS_MSG, "ipcns-msg-%u"),
FD_ENTRY(IPCNS_SEM, "ipcns-sem-%u"),
FD_ENTRY(FS, "fs-%u"),
FD_ENTRY(REMAP_FPATH, "remap-fpath"),
FD_ENTRY_F(GHOST_FILE, "ghost-file-%x", O_NOBUF),
FD_ENTRY_F(MEMFD_INODE, "memfd", O_NOBUF),
FD_ENTRY(TCP_STREAM, "tcp-stream-%x"),
FD_ENTRY(MNTS, "mountpoints-%u"),
FD_ENTRY(NETDEV, "netdev-%u"),
FD_ENTRY(NETNS, "netns-%u"),
FD_ENTRY_F(IFADDR, "ifaddr-%u", O_NOBUF),
FD_ENTRY_F(ROUTE, "route-%u", O_NOBUF),
FD_ENTRY_F(ROUTE6, "route6-%u", O_NOBUF),
FD_ENTRY_F(RULE, "rule-%u", O_NOBUF),
FD_ENTRY_F(IPTABLES, "iptables-%u", O_NOBUF),
FD_ENTRY_F(IP6TABLES, "ip6tables-%u", O_NOBUF),
FD_ENTRY_F(NFTABLES, "nftables-%u", O_NOBUF),
FD_ENTRY_F(TMPFS_IMG, "tmpfs-%u.tar.gz", O_NOBUF),
FD_ENTRY_F(TMPFS_DEV, "tmpfs-dev-%u.tar.gz", O_NOBUF),
FD_ENTRY_F(AUTOFS, "autofs-%u", O_NOBUF),
FD_ENTRY(BINFMT_MISC_OLD, "binfmt-misc-%u"),
FD_ENTRY(BINFMT_MISC, "binfmt-misc"),
FD_ENTRY(TTY_FILES, "tty"),
FD_ENTRY(TTY_INFO, "tty-info"),
FD_ENTRY_F(TTY_DATA, "tty-data", O_NOBUF),
FD_ENTRY(FILE_LOCKS, "filelocks"),
FD_ENTRY(RLIMIT, "rlimit-%u"),
FD_ENTRY_F(PAGES, "pages-%u", O_NOBUF),
FD_ENTRY_F(PAGES_OLD, "pages-%d", O_NOBUF),
FD_ENTRY(BINFMT_MISC, "binfmt-misc"),
FD_ENTRY(TTY_FILES, "tty"),
FD_ENTRY(TTY_INFO, "tty-info"),
FD_ENTRY_F(TTY_DATA, "tty-data", O_NOBUF),
FD_ENTRY(FILE_LOCKS, "filelocks"),
FD_ENTRY(RLIMIT, "rlimit-%u"),
FD_ENTRY_F(PAGES, "pages-%u", O_NOBUF),
FD_ENTRY_F(PAGES_OLD, "pages-%d", O_NOBUF),
FD_ENTRY_F(SHM_PAGES_OLD, "pages-shmem-%ld", O_NOBUF),
FD_ENTRY(SIGNAL, "signal-s-%u"),
FD_ENTRY(PSIGNAL, "signal-p-%u"),
FD_ENTRY(TUNFILE, "tunfile"),
FD_ENTRY(CGROUP, "cgroup"),
FD_ENTRY(TIMERFD, "timerfd"),
FD_ENTRY(CPUINFO, "cpuinfo"),
FD_ENTRY(SECCOMP, "seccomp"),
FD_ENTRY(USERNS, "userns-%u"),
FD_ENTRY(NETNF_CT, "netns-ct-%u"),
FD_ENTRY(NETNF_EXP, "netns-exp-%u"),
FD_ENTRY(FILES, "files"),
FD_ENTRY(TIMENS, "timens-%u"),
FD_ENTRY(PIDNS, "pidns-%u"),
FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF),
FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF),
FD_ENTRY(APPARMOR, "apparmor"),
FD_ENTRY(PIDFD, "pidfd"),
FD_ENTRY(POSIX_SEM, "posix-sem"),
FD_ENTRY(SIGNAL, "signal-s-%u"),
FD_ENTRY(PSIGNAL, "signal-p-%u"),
FD_ENTRY(TUNFILE, "tunfile"),
FD_ENTRY(CGROUP, "cgroup"),
FD_ENTRY(TIMERFD, "timerfd"),
FD_ENTRY(CPUINFO, "cpuinfo"),
FD_ENTRY(SECCOMP, "seccomp"),
FD_ENTRY(USERNS, "userns-%u"),
FD_ENTRY(NETNF_CT, "netns-ct-%u"),
FD_ENTRY(NETNF_EXP, "netns-exp-%u"),
FD_ENTRY(FILES, "files"),
FD_ENTRY(TIMENS, "timens-%u"),
FD_ENTRY(PIDNS, "pidns-%u"),
FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF),
FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF),
FD_ENTRY(APPARMOR, "apparmor"),
FD_ENTRY(PIDFD, "pidfd"),
FD_ENTRY(POSIX_SEM, "posix-sem"),
[CR_FD_STATS] = {
.fmt = "stats-%s",
.magic = STATS_MAGIC,
.fmt = "stats-%s",
.magic = STATS_MAGIC,
.oflags = O_SERVICE | O_FORCE_LOCAL,
},
[CR_FD_IRMAP_CACHE] = {
.fmt = "irmap-cache",
.magic = IRMAP_CACHE_MAGIC,
.fmt = "irmap-cache",
.magic = IRMAP_CACHE_MAGIC,
.oflags = O_SERVICE | O_FORCE_LOCAL,
},
};

View file

@ -95,6 +95,11 @@ int check_img_inventory(bool restore)
goto out_err;
}
if (restore && he->allow_uprobes && !opts.allow_uprobes) {
pr_err("Dumped with --" OPT_ALLOW_UPROBES ". Need to set it on restore as well.\n");
goto out_err;
}
if (restore) {
if (!he->has_network_lock_method) {
/*
@ -712,7 +717,7 @@ struct cr_img *img_from_fd(int fd)
* This is used when opts.stream is enabled for picking the right streamer
* socket name. `mode` is ignored when opts.stream is not enabled.
*/
int open_image_dir(char *dir, int mode)
int open_image_dir(const char *dir, int mode)
{
int fd, ret;

View file

@ -196,6 +196,7 @@ struct cr_options {
char *work_dir;
int network_lock_method;
int skip_file_rwx_check;
int allow_uprobes;
/*
* When we scheduler for removal some functionality we first

View file

@ -77,14 +77,20 @@
* about virtual address space ranges covered by
* MADV_GUARD_INSTALL guards. These ones must be always at
* the end of the vma_area_list and properly skipped a.e.
* - uprobes
* stands for a "[uprobes]" vma that's automatically mapped by
* the kernel when an active uprobe is hit. Contents of this vma
* are not dumped and neither are its madvise bits restored,
* because the kernel is in complete control of this vma. This is
* just used to track the existence of the uprobes vma.
*/
#define VMA_AREA_NONE (0 << 0)
#define VMA_AREA_REGULAR (1 << 0)
#define VMA_AREA_STACK (1 << 1)
#define VMA_AREA_VSYSCALL (1 << 2)
#define VMA_AREA_VDSO (1 << 3)
#define VMA_AREA_NONE (0 << 0)
#define VMA_AREA_REGULAR (1 << 0)
#define VMA_AREA_STACK (1 << 1)
#define VMA_AREA_VSYSCALL (1 << 2)
#define VMA_AREA_VDSO (1 << 3)
#define VMA_AREA_POSIX_SEM (1 << 4)
#define VMA_AREA_HEAP (1 << 5)
#define VMA_AREA_HEAP (1 << 5)
#define VMA_FILE_PRIVATE (1 << 6)
#define VMA_FILE_SHARED (1 << 7)
@ -98,6 +104,7 @@
#define VMA_AREA_MEMFD (1 << 14)
#define VMA_AREA_SHSTK (1 << 15)
#define VMA_AREA_GUARD (1 << 16)
#define VMA_AREA_UPROBES (1 << 17)
#define VMA_EXT_PLUGIN (1 << 27)
#define VMA_FORCE_READ (1 << 28)
@ -112,6 +119,8 @@
#define CR_PARENT_LINK "parent"
#define OPT_ALLOW_UPROBES "allow-uprobes"
extern bool ns_per_id;
extern bool img_common_magic;
@ -161,7 +170,7 @@ static inline int img_raw_fd(struct cr_img *img)
extern off_t img_raw_size(struct cr_img *img);
extern int open_image_dir(char *dir, int mode);
extern int open_image_dir(const char *dir, int mode);
extern void close_image_dir(void);
/*
* Return -1 -- parent symlink points to invalid target

View file

@ -105,4 +105,6 @@ extern int parse_uptime(uint64_t *upt);
extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff);
extern bool found_uprobes_vma(void);
#endif /* __CR_PROC_PARSE_H__ */

View file

@ -9,6 +9,7 @@ extern int arch_set_thread_regs_nosigrt(struct pid *pid);
struct task_restore_args;
struct pstree_item;
struct rst_shstk_info;
#ifndef arch_shstk_prepare
static inline int arch_shstk_prepare(struct pstree_item *item,
@ -38,4 +39,25 @@ static inline int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *cor
#define arch_shstk_trampoline arch_shstk_trampoline
#endif
#ifndef shstk_restorer_stack_size
static always_inline long shstk_restorer_stack_size(void)
{
return 0;
}
#endif
#ifndef shstk_set_restorer_stack
static always_inline long shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr)
{
return 0;
}
#endif
#ifndef shstk_min_mmap_addr
static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long def)
{
return def;
}
#endif
#endif

View file

@ -357,4 +357,11 @@ static inline int arch_shstk_restore(struct rst_shstk_info *shstk)
#define arch_shstk_restore arch_shstk_restore
#endif
#ifndef shstk_vma_restore
static always_inline int shstk_vma_restore(VmaEntry *vma_entry)
{
return -1;
}
#endif
#endif /* __CR_RESTORER_H__ */

View file

@ -190,7 +190,7 @@ void flush_early_log_buffer(int fd)
* with reading the log_level.
*/
struct early_log_hdr *hdr = (void *)early_log_buffer + pos;
pos += sizeof(hdr);
pos += sizeof(*hdr);
if (hdr->level <= current_loglevel) {
size_t size = 0;
while (size < hdr->len) {
@ -202,7 +202,7 @@ void flush_early_log_buffer(int fd)
}
pos += hdr->len;
}
if (early_log_buf_off == EARLY_LOG_BUF_LEN)
if ((early_log_buf_off + sizeof(struct early_log_hdr)) >= EARLY_LOG_BUF_LEN)
pr_warn("The early log buffer is full, some messages may have been lost\n");
early_log_buf_off = 0;
}
@ -320,10 +320,10 @@ unsigned int log_get_loglevel(void)
static void early_vprint(const char *format, unsigned int loglevel, va_list params)
{
unsigned int log_size = 0;
int log_size = 0, log_space;
struct early_log_hdr *hdr;
if ((early_log_buf_off + sizeof(hdr)) >= EARLY_LOG_BUF_LEN)
if ((early_log_buf_off + sizeof(*hdr)) >= EARLY_LOG_BUF_LEN)
return;
/* Save loglevel */
@ -331,7 +331,8 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para
hdr = (void *)early_log_buffer + early_log_buf_off;
hdr->level = loglevel;
/* Skip the log entry size */
early_log_buf_off += sizeof(hdr);
early_log_buf_off += sizeof(*hdr);
log_space = EARLY_LOG_BUF_LEN - early_log_buf_off;
if (loglevel >= LOG_TIMESTAMP) {
/*
* If logging is not yet setup we just write zeros
@ -339,12 +340,17 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para
* keep the same format as the other messages on
* log levels with timestamps (>=LOG_TIMESTAMP).
*/
log_size = snprintf(early_log_buffer + early_log_buf_off, sizeof(early_log_buffer) - early_log_buf_off,
log_size = snprintf(early_log_buffer + early_log_buf_off, log_space,
"(00.000000) ");
}
log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size,
sizeof(early_log_buffer) - early_log_buf_off - log_size, format, params);
if (log_size < log_space)
log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size,
log_space - log_size, format, params);
if (log_size > log_space) {
/* vsnprintf always add the terminating null byte. */
log_size = log_space - 1;
}
/* Save log entry size */
hdr->len = log_size;

View file

@ -787,8 +787,6 @@ int prepare_mm_pid(struct pstree_item *i)
ri->vmas.rst_priv_size += vma_area_len(vma);
if (vma_has_guard_gap_hidden(vma))
ri->vmas.rst_priv_size += PAGE_SIZE;
if (vma_area_is(vma, VMA_AREA_SHSTK))
ri->vmas.rst_priv_size += PAGE_SIZE;
}
pr_info("vma 0x%" PRIx64 " 0x%" PRIx64 "\n", vma->e->start, vma->e->end);
@ -931,13 +929,6 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void
size = vma_entry_len(vma->e);
/*
* map an extra page for shadow stack VMAs, it will be used as a
* temporary shadow stack
*/
if (vma_area_is(vma, VMA_AREA_SHSTK))
size += PAGE_SIZE;
if (!vma_inherited(vma)) {
int flag = 0;
/*

View file

@ -1139,13 +1139,17 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
{
struct pstree_item *item;
struct page_pipe *pp;
unsigned long len;
unsigned long len, nr_pages;
int ret;
item = pstree_item_by_virt(pi->dst_id);
pp = dmpi(item)->mem_pp;
ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr, &pi->nr_pages, PPB_LAZY);
/* page_pipe_read() uses 'unsigned long *' but pi->nr_pages is u64.
* Use a temporary variable to fix the incompatible pointer type
* on 32-bit platforms (e.g. armv7). */
nr_pages = pi->nr_pages;
ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr, &nr_pages, PPB_LAZY);
if (ret)
return ret;
@ -1154,6 +1158,7 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi)
* .dst_id all remain intact.
*/
pi->nr_pages = nr_pages;
if (pi->nr_pages == 0) {
pr_debug("no iovs found, zero pages\n");
return -1;

View file

@ -162,7 +162,7 @@ static int seek_pagemap(struct page_read *pr, unsigned long vaddr)
if (end <= vaddr)
skip_pagemap_pages(pr, end - pr->cvaddr);
adv:; /* otherwise "label at end of compound stmt" gcc error */
adv:; /* otherwise "label at end of compound stmt" gcc error */
} while (advance(pr));
return 0;
@ -171,7 +171,7 @@ static int seek_pagemap(struct page_read *pr, unsigned long vaddr)
static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, unsigned long int nr)
{
if (vaddr < pe->vaddr || (vaddr - pe->vaddr) / PAGE_SIZE + nr > pe->nr_pages) {
pr_err("Page read err %" PRIx64 ":%lx vs %lx:%lx\n", pe->vaddr, pe->nr_pages, vaddr, nr);
pr_err("Page read err %" PRIx64 ":%" PRIx64 " vs %lx:%lx\n", pe->vaddr, pe->nr_pages, vaddr, nr);
BUG();
}
}
@ -539,7 +539,7 @@ static int process_async_reads(struct page_read *pr)
pr_debug("Read piov iovs %d, from %ju, len %ju, first %p:%zu\n", piov->nr, piov->from,
piov->end - piov->from, piov->to->iov_base, piov->to->iov_len);
more:
more:
ret = preadv(fd, piov->to, piov->nr, piov->from);
if (fault_injected(FI_PARTIAL_PAGES)) {
/*

View file

@ -1131,6 +1131,23 @@ static int vma_remap(VmaEntry *vma_entry, int uffd)
pr_info("Remap %lx->%lx len %lx\n", src, dst, len);
/*
* SHSTK VMAs are a bit special, in fact we create shstk vma right in the
* shstk_vma_restore() and populate it with contents from a premapped VMA
* (which in turns is just a normal anonymous VMA!). Then, we munmap() this
* premapped VMA. After, we need to adjust vma_premmaped_start(vma_entry)
* to point to a created shstk vma and treat it as a premmaped one in vma_remap().
*/
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK)) {
if (shstk_vma_restore(vma_entry)) {
pr_err("Unable to prepare shadow stack vma for remap %lx -> %lx\n", src, dst);
return -1;
}
/* shstk_vma_restore() modifies vma premapped address */
src = vma_premmaped_start(vma_entry);
}
if (src - dst < len)
guard = dst;
else if (dst - src < len)
@ -1830,13 +1847,6 @@ __visible long __export_restore_task(struct task_restore_args *args)
if (vma_entry->start > vma_entry->shmid)
break;
/*
* shadow stack VMAs cannot be remapped, they must be
* recreated with map_shadow_stack system call
*/
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
continue;
if (vma_remap(vma_entry, args->uffd))
goto core_restore_end;
}
@ -1854,13 +1864,6 @@ __visible long __export_restore_task(struct task_restore_args *args)
if (vma_entry->start < vma_entry->shmid)
break;
/*
* shadow stack VMAs cannot be remapped, they must be
* recreated with map_shadow_stack system call
*/
if (vma_entry_is(vma_entry, VMA_AREA_SHSTK))
continue;
if (vma_remap(vma_entry, args->uffd))
goto core_restore_end;
}

View file

@ -75,6 +75,8 @@ struct buffer {
static struct buffer __buf;
static char *buf = __buf.buf;
/* only ever goes from false to true, if at all */
static bool uprobes_vma_exists = false;
/*
* This is how AIO ring buffers look like in proc
@ -203,8 +205,10 @@ static void parse_vma_vmflags(char *buf, struct vma_area *vma_area)
* vmsplice doesn't work for VM_IO and VM_PFNMAP mappings, the
* only exception is VVAR area that mapped by the kernel as
* VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP
*
* The uprobes vma is also mapped by the kernel with VM_IO, among other flags
*/
if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED))
if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED) && !vma_area_is(vma_area, VMA_AREA_UPROBES))
vma_area->e->status |= VMA_UNSUPP;
if (vma_area->e->madv)
@ -604,25 +608,24 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
goto err;
} else if (!strcmp(file_path, "[heap]")) {
vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP;
} else if (vfi->dev_maj == 0 && vfi->dev_min == 0 && vfi->ino == 0 &&
file_path[0] != '/') {
} else if (vfi->dev_maj == 0 && vfi->dev_min == 0 && vfi->ino == 0 &&
file_path[0] != '/') {
/* Anonymous mapping with special name */
vma_area->e->status = VMA_AREA_REGULAR;
} else if (file_path[0] == '/' && strstr(file_path, "/dev/shm/sem.")) {
pr_info("Found POSIX semaphore VMA mapping: %s\n", file_path);
if (opts.posix_sem_migration) {
pr_info("POSIX semaphore migration mode enabled, dumping as object: %s\n", file_path);
if (access(file_path, F_OK) != 0) {
pr_info("POSIX semaphore VMA mapping for deleted semaphore: %s\n", file_path);
/* create a POSIX semaphore file entry for this VMA */
if (try_dump_posix_semaphore(file_path + 1, *vm_file_fd, vma_area->vmst->st_ino,
&(struct fd_parms){
.stat = *vma_area->vmst,
.mnt_id = vma_area->mnt_id,
.fs_type = TMPFS_MAGIC
}) == 1) {
if (try_dump_posix_semaphore(file_path + 1, *vm_file_fd, vma_area->vmst->st_ino,
&(struct fd_parms){
.stat = *vma_area->vmst,
.mnt_id = vma_area->mnt_id,
.fs_type = TMPFS_MAGIC }) == 1) {
pr_info("Skipping refular file processing for POSIX semaphore VMA\n");
close_safe(vm_file_fd);
vma_area->e->status = VMA_AREA_REGULAR | VMA_AREA_POSIX_SEM;
@ -635,9 +638,17 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat
vma_area->e->status = VMA_AREA_REGULAR | VMA_AREA_POSIX_SEM;
}
} else {
/* link remap would be needed for this case */
/* link remap would be needed for this case */
vma_area->e->status = VMA_AREA_REGULAR;
}
} else if (!strcmp(file_path, "[uprobes]")) {
uprobes_vma_exists = true;
if (!opts.allow_uprobes) {
pr_err("PID %d has uprobes vma. Consider using --" OPT_ALLOW_UPROBES ".\n",
pid);
goto err;
}
vma_area->e->status |= VMA_AREA_UPROBES;
} else {
vma_area->e->status = VMA_AREA_REGULAR;
}
@ -774,6 +785,10 @@ static int vma_list_add(struct vma_area *vma_area, struct vm_area_list *vma_area
*/
pr_debug("Device file mapping %016" PRIx64 "-%016" PRIx64 " supported via device plugins\n",
vma_area->e->start, vma_area->e->end);
} else if (vma_area->e->status & VMA_AREA_UPROBES) {
pr_debug("Skipping uprobes vma %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start,
vma_area->e->end);
return 0;
} else if (vma_area->e->status & VMA_UNSUPP) {
pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start,
vma_area->e->end);
@ -1476,7 +1491,7 @@ static void cure_path(char *path)
if (off)
path[i - off] = path[i];
continue;
replace:
replace:
off += 3;
i += 3;
}
@ -1755,7 +1770,7 @@ struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump)
goto end;
}
}
end:
end:
if (fsname)
free(fsname);
@ -2833,7 +2848,7 @@ int collect_controllers(struct list_head *cgroups, unsigned int *n_cgroups)
nc->controllers[nc->n_controllers - 1] = n;
}
skip:
skip:
if (!off)
break;
controllers = off + 1;
@ -2964,3 +2979,8 @@ int parse_uptime(uint64_t *upt)
fclose(f);
return 0;
}
bool found_uprobes_vma(void)
{
return uprobes_vma_exists;
}

View file

@ -195,6 +195,7 @@ static void vma_opt_str(const struct vma_area *v, char *opt)
opt2s(VMA_ANON_PRIVATE, "ap");
opt2s(VMA_AREA_SYSVIPC, "sysv");
opt2s(VMA_AREA_SOCKET, "sk");
opt2s(VMA_AREA_UPROBES, "uprobes");
#undef opt2s
}

View file

@ -33,4 +33,5 @@ message inventory_entry {
// This is currently used to delete the correct nftables
// network locking rule.
optional string dump_criu_run_id = 13;
optional bool allow_uprobes = 14;
}

View file

@ -2041,3 +2041,22 @@ void criu_set_empty_ns(int namespaces)
{
criu_local_set_empty_ns(global_opts, namespaces);
}
int criu_local_set_config_file(criu_opts *opts, const char *path)
{
char *new;
new = strdup(path);
if (!new)
return -ENOMEM;
free(opts->rpc->config_file);
opts->rpc->config_file = new;
return 0;
}
int criu_set_config_file(const char *path)
{
return criu_local_set_config_file(global_opts, path);
}

View file

@ -116,6 +116,7 @@ void criu_set_pidfd_store_sk(int sk);
int criu_set_network_lock(enum criu_network_lock_method method);
int criu_join_ns_add(const char *ns, const char *ns_file, const char *extra_opt);
void criu_set_mntns_compat_mode(bool val);
int criu_set_config_file(const char *path);
/*
* The criu_notify_arg_t na argument is an opaque
@ -281,6 +282,7 @@ void criu_local_set_pidfd_store_sk(criu_opts *opts, int sk);
int criu_local_set_network_lock(criu_opts *opts, enum criu_network_lock_method method);
int criu_local_join_ns_add(criu_opts *opts, const char *ns, const char *ns_file, const char *extra_opt);
void criu_local_set_mntns_compat_mode(criu_opts *opts, bool val);
int criu_local_set_config_file(criu_opts *opts, const char *path);
void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na));

View file

@ -1,4 +1,15 @@
from . import rpc_pb2 as rpc
from . import images
from .criu import *
from .version import __version__
from .criu import criu, CRIUExceptionExternal, CRIUException
from .criu import CR_DEFAULT_SERVICE_ADDRESS
from .version import __version__
__all__ = (
"rpc",
"images",
"criu",
"CRIUExceptionExternal",
"CRIUException",
"CR_DEFAULT_SERVICE_ADDRESS",
"__version__",
)

View file

@ -8,6 +8,7 @@ import struct
import pycriu.rpc_pb2 as rpc
CR_DEFAULT_SERVICE_ADDRESS = "./criu_service.socket"
class _criu_comm:
"""
@ -45,7 +46,14 @@ class _criu_comm_sk(_criu_comm):
def connect(self, daemon):
self.sk = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET)
self.sk.connect(self.comm)
try:
self.sk.connect(self.comm)
except FileNotFoundError:
raise FileNotFoundError("Socket file not found.")
except ConnectionRefusedError:
raise ConnectionRefusedError("Service not running.")
return self.sk
@ -103,7 +111,7 @@ class _criu_comm_bin(_criu_comm):
os.close(2)
css[0].send(struct.pack('i', os.getpid()))
os.execv(self.comm,
os.execvp(self.comm,
[self.comm, 'swrk',
"%d" % css[0].fileno()])
os._exit(1)
@ -181,15 +189,14 @@ class CRIUExceptionExternal(CRIUException):
if self.errno == errno.EBADRQC:
s += "Bad options"
if self.typ == rpc.DUMP:
if self.errno == errno.ESRCH:
s += "No process with such pid"
elif self.typ == rpc.DUMP and self.errno == errno.ESRCH:
s += "No process with such pid"
if self.typ == rpc.RESTORE:
if self.errno == errno.EEXIST:
s += "Process with requested pid already exists"
elif self.typ == rpc.RESTORE and self.errno == errno.EEXIST:
s += "Process with requested pid already exists"
s += "Unknown"
else:
s += "Unknown"
return s
@ -204,10 +211,11 @@ class criu:
def __init__(self):
self.use_binary('criu')
self.opts = rpc.criu_opts()
# images_dir_fd is required field with default value of -1
self.opts = rpc.criu_opts(images_dir_fd=-1)
self.sk = None
def use_sk(self, sk_name):
def use_sk(self, sk_name=CR_DEFAULT_SERVICE_ADDRESS):
"""
Access criu using unix socket which that belongs to criu service daemon.
"""
@ -266,6 +274,7 @@ class criu:
"""
req = rpc.criu_req()
req.type = rpc.CHECK
req.opts.MergeFrom(self.opts)
resp = self._send_req_and_recv_resp(req)

View file

@ -105,6 +105,7 @@ mmap_status_map = [
('VMA_AREA_AIORING', 1 << 13),
('VMA_AREA_MEMFD', 1 << 14),
('VMA_AREA_SHSTK', 1 << 15),
('VMA_AREA_UPROBES', 1 << 17),
('VMA_UNSUPP', 1 << 31),
]

View file

@ -6,11 +6,12 @@ build-backend = "setuptools.build_meta"
name = "pycriu"
description = "Python bindings for CRIU"
authors = [
{name = "CRIU team", email = "criu@openvz.org"},
{name = "CRIU team", email = "criu@lists.linux.dev"},
]
license = {text = "GPLv2"}
license = {text = "LGPLv2.1"}
dynamic = ["version"]
requires-python = ">=3.6"
dependencies = ["protobuf"]
[tool.setuptools]
packages = ["pycriu", "pycriu.images"]

View file

@ -7,10 +7,12 @@
name = pycriu
description = Python bindings for CRIU
author = CRIU team
author_email = criu@openvz.org
license = GPLv2
author_email = criu@lists.linux.dev
license = LGPLv2.1
version = attr: pycriu.__version__
[options]
packages = find:
python_requires = >=3.6
install_requires =
protobuf

View file

@ -66,18 +66,6 @@ bool plugin_added_to_inventory = false;
bool plugin_disabled = false;
/*
* In the case of a single process (common case), this optimization can effectively
* reduce the restore latency with parallel restore. In the case of multiple processes,
* states are already restored in parallel within different processes. Therefore, this
* optimization does not introduce further improvement and will be disabled by default
* in this case. The flag, parallel_disabled, is used to control whether the
* optimization is enabled or disabled.
*/
bool parallel_disabled = false;
pthread_t parallel_thread = 0;
int parallel_thread_result = 0;
/**************************************************************************************************/
/* Call ioctl, restarting if it is interrupted */
@ -527,8 +515,8 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va,
}
static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp,
void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
uint64_t max_copy_size, enum sdma_op_type type)
void *buffer, size_t buffer_size, amdgpu_device_handle h_dev,
uint64_t max_copy_size, enum sdma_op_type type)
{
uint64_t size, src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain;
uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size;
@ -1243,7 +1231,6 @@ int amdgpu_plugin_dump_file(int fd, int id)
/* Check whether this plugin was called for kfd or render nodes */
if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) {
/* This is RenderD dumper plugin, for now just save renderD
* minor number to be used during restore. In later phases this
* needs to save more data for video decode etc.
@ -1691,7 +1678,7 @@ int amdgpu_plugin_restore_file(int id)
fd = node_get_drm_render_device(tp_node);
if (fd < 0)
pr_err("Failed to open render device (minor:%d)\n", tp_node->drm_render_minor);
fail:
fail:
criu_render_node__free_unpacked(rd, NULL);
xfree(buf);
/*
@ -1898,25 +1885,6 @@ int amdgpu_plugin_resume_devices_late(int target_pid)
if (plugin_disabled)
return -ENOTSUP;
if (!parallel_disabled) {
pr_info("Close parallel restore server\n");
if (close_parallel_restore_server()) {
pr_err("Close parallel restore server fail\n");
return -1;
}
exit_code = pthread_join(parallel_thread, NULL);
if (exit_code) {
pr_err("Failed to join parallel thread ret:%d\n", exit_code);
return -1;
}
if (parallel_thread_result) {
pr_err("Parallel restore fail\n");
return parallel_thread_result;
}
}
pr_info("Inside %s for target pid = %d\n", __func__, target_pid);
fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC);
@ -2183,4 +2151,4 @@ int amdgpu_plugin_post_forking(void)
return back_thread_create(&parallel_thread, restore_device_parallel_worker, &parallel_thread_result);
}
CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking)
CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking)

View file

@ -15,8 +15,7 @@ dnf install -y \
python-unversioned-command \
redhat-rpm-config \
sudo \
tar \
which
tar
# /tmp is no longer 755 in the rawhide container image and breaks CI - fix it
chmod 1777 /tmp

View file

@ -23,7 +23,7 @@ MAKE := make
MKDIR := mkdir -p
AWK := awk
PERL := perl
FULL_PYTHON := $(shell which python3 2>/dev/null)
FULL_PYTHON := $(shell command -v python3 2>/dev/null)
PYTHON ?= $(shell basename $(FULL_PYTHON))
FIND := find
SH := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
@ -36,7 +36,7 @@ CTAGS := ctags
export RM HOSTLD LD HOSTCC CC CPP AS AR STRIP OBJCOPY OBJDUMP
export NM SH MAKE MKDIR AWK PERL PYTHON SH CSCOPE
export USE_ASCIIDOCTOR ?= $(shell which asciidoctor 2>/dev/null)
export USE_ASCIIDOCTOR ?= $(shell command -v asciidoctor 2>/dev/null)
#
# Footer.

View file

@ -1,41 +0,0 @@
#!/usr/bin/env python3
import sys
import os
actions = set(['pre-dump', 'pre-restore', 'post-dump', 'setup-namespaces', \
'post-setup-namespaces', 'post-restore', 'post-resume', \
'network-lock', 'network-unlock' ])
errors = []
af = os.path.dirname(os.path.abspath(__file__)) + '/actions_called.txt'
for act in open(af):
act = act.strip().split()
act.append('EMPTY')
act.append('EMPTY')
if act[0] == 'EMPTY':
raise Exception("Error in test, bogus actions line")
if act[1] == 'EMPTY':
errors.append('Action %s misses CRTOOLS_IMAGE_DIR' % act[0])
if act[0] in ('post-dump', 'setup-namespaces', 'post-setup-namespaces', \
'post-restore', 'post-resume', 'network-lock', 'network-unlock'):
if act[2] == 'EMPTY':
errors.append('Action %s misses CRTOOLS_INIT_PID' % act[0])
elif not act[2].isdigit() or int(act[2]) == 0:
errors.append('Action %s PID is not number (%s)' %
(act[0], act[2]))
actions -= set([act[0]])
if actions:
errors.append('Not all actions called: %r' % actions)
if errors:
for x in errors:
print(x)
sys.exit(1)
print('PASS')

View file

@ -1,8 +0,0 @@
# Check how crit de/encodes images
set -e
source `dirname $0`/criu-lib.sh
# prep
rm -f actions_called.txt
./test/zdtm.py run -t zdtm/static/env00 --script "$(pwd)/test/show_action.sh" || fail
./test/check_actions.py || fail
exit 0

View file

@ -1 +1 @@
img-dir-*
actions_called.txt

View file

@ -1,5 +1,3 @@
run:
@make -C .. loop
./run.sh
.PHONY: run

View file

@ -1,2 +0,0 @@
#!/bin/bash
touch action-hook-"$CRTOOLS_SCRIPT_ACTION"

View file

@ -0,0 +1,54 @@
#!/usr/bin/env python3
import os
import sys
EXPECTED_ACTIONS = [
'pre-dump',
'network-lock',
'post-dump',
'pre-restore',
'setup-namespaces',
'post-setup-namespaces',
'post-restore',
'network-unlock',
'pre-resume',
'post-resume',
]
errors = []
actions_called = []
actions_called_file = os.path.join(os.path.dirname(__file__), 'actions_called.txt')
with open(actions_called_file) as f:
for index, line in enumerate(f):
parts = line.strip().split()
parts += ['EMPTY'] * (3 - len(parts))
action_hook, image_dir, pid = parts
if action_hook == 'EMPTY':
raise ValueError("Error in test: bogus actions line")
expected_action = EXPECTED_ACTIONS[index] if index < len(EXPECTED_ACTIONS) else None
if action_hook != expected_action:
raise ValueError(f"Invalid action: {action_hook} != {expected_action}")
if image_dir == 'EMPTY':
errors.append(f'Action {action_hook} misses CRTOOLS_IMAGE_DIR')
if action_hook != 'pre-restore':
if pid == 'EMPTY':
errors.append(f'Action {action_hook} misses CRTOOLS_INIT_PID')
elif not pid.isdigit() or int(pid) == 0:
errors.append(f'Action {action_hook} PID is not a valid number ({pid})')
actions_called.append(action_hook)
if actions_called != EXPECTED_ACTIONS:
errors.append(f'Not all actions called: {actions_called!r}')
if errors:
print('\n'.join(errors))
sys.exit(1)
print('Check Actions PASS')

View file

@ -1,60 +1,11 @@
#!/bin/bash
set -ebm
set -e
# shellcheck source=test/others/env.sh
source ../env.sh || exit 1
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
SELFDIR="$(dirname "$(readlink -f "$0")")"
SCRIPT="$SELFDIR/action-script.sh"
IMGDIR="$SELFDIR/img-dir-$$"
rm -f "${SCRIPT_DIR}"/actions_called.txt
"${SCRIPT_DIR}"/../../zdtm.py run -t zdtm/static/env00 -f ns --script "$SCRIPT_DIR/show_action.sh" || exit 1
"${SCRIPT_DIR}"/check_actions.py || exit 1
rm -rf "$IMGDIR"
mkdir "$IMGDIR"
trap "cleanup" QUIT TERM INT HUP EXIT
# shellcheck disable=SC2317
# https://github.com/koalaman/shellcheck/issues/2660
function cleanup()
{
if [[ -n "$PID" ]]; then
kill -9 "$PID"
fi
}
PID=$(../loop)
if ! $CRIU dump -v4 -o dump.log -t "$PID" -D "$IMGDIR" --action-script "$SCRIPT"; then
echo "Failed to checkpoint process $PID"
cat dump.log
kill -9 "$PID"
exit 1
fi
if ! $CRIU restore -v4 -o restore.log -D "$IMGDIR" -d --pidfile test.pidfile --action-script "$SCRIPT"; then
echo "CRIU restore failed"
echo FAIL
exit 1
fi
PID=$(cat "$IMGDIR"/test.pidfile)
found_missing_file=false
hooks=("pre-dump" "post-dump" "pre-restore" "pre-resume" "post-restore" "post-resume")
for hook in "${hooks[@]}"
do
if [ ! -e "$IMGDIR/action-hook-$hook" ]; then
echo "ERROR: action-hook-$hook does not exist"
found_missing_file=true
fi
done
if [ "$found_missing_file" = true ]; then
exit 1
fi
echo PASS
rm -rf "$IMGDIR"
exit 0

View file

@ -1,3 +1,4 @@
#!/bin/bash
echo "${CRTOOLS_SCRIPT_ACTION} ${CRTOOLS_IMAGE_DIR} ${CRTOOLS_INIT_PID}" \
>> "$(dirname $0)/actions_called.txt"
>> "$(dirname "$0")/actions_called.txt"

View file

@ -8,3 +8,4 @@ test_pre_dump
test_feature_check
output/
libcriu.so.*
test_rpc_config

View file

@ -3,10 +3,12 @@ include ../../../../criu/Makefile.versions
TESTS += test_sub
TESTS += test_self
TESTS += test_notify
TESTS += test_rpc_config
TESTS += test_iters
TESTS += test_errno
TESTS += test_join_ns
TESTS += test_pre_dump
TESTS += test_check
TESTS += test_feature_check
all: $(TESTS)

View file

@ -55,6 +55,7 @@ run_test() {
run_test test_sub
run_test test_self
run_test test_notify
run_test test_rpc_config
if [ "$(uname -m)" = "x86_64" ]; then
# Skip this on aarch64 as aarch64 has no dirty page tracking
run_test test_iters
@ -62,6 +63,7 @@ if [ "$(uname -m)" = "x86_64" ]; then
fi
run_test test_errno
run_test test_join_ns
run_test test_check
if criu check --feature mem_dirty_track > /dev/null; then
export CRIU_FEATURE_MEM_TRACK=1
fi

View file

@ -0,0 +1,17 @@
#include <stdio.h>
#include "criu.h"
#include "lib.h"
int main(int argc, char **argv)
{
int ret;
printf("--- Start check ---\n");
criu_init_opts();
criu_set_service_binary(argv[1]);
if (criu_check())
return -1;
return 0;
}

View file

@ -0,0 +1,223 @@
#include "criu.h"
#include "lib.h"
#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <string.h>
#include <time.h>
#define RANDOM_NAME_LEN 6
#define PATH_BUF_SIZE 128
static volatile sig_atomic_t stop = 0;
static char base_name[RANDOM_NAME_LEN + 1];
static char log_file[PATH_BUF_SIZE];
static char conf_file[PATH_BUF_SIZE];
static void handle_signal(int sig)
{
(void)sig;
stop = 1;
}
static void generate_random_base_name(void)
{
const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
size_t charset_len;
int i;
charset_len = sizeof(charset) - 1;
for (i = 0; i < RANDOM_NAME_LEN; i++) {
base_name[i] = charset[rand() % charset_len];
}
base_name[i] = '\0';
snprintf(log_file, sizeof(log_file), "/tmp/criu-%s.log", base_name);
snprintf(conf_file, sizeof(conf_file), "/tmp/criu-%s.conf", base_name);
}
static int create_criu_config_file(void)
{
int fd;
FILE *fp;
srand(time(NULL));
generate_random_base_name();
fd = open(conf_file, O_CREAT | O_EXCL | O_WRONLY, 0600);
if (fd < 0) {
perror("Failed to create config file");
return -1;
}
fp = fdopen(fd, "w");
if (!fp) {
perror("fdopen failed");
close(fd);
unlink(conf_file);
return -1;
}
fprintf(fp, "log-file=%s\n", log_file);
fflush(fp);
fclose(fp);
return 0;
}
static int check_log_file(void)
{
struct stat st;
if (stat(log_file, &st) < 0) {
perror("Config file does not exist");
return -1;
}
if (st.st_size == 0) {
fprintf(stderr, "Config file is empty\n");
return -1;
}
unlink(log_file);
return 0;
}
int main(int argc, char **argv)
{
int pipe_fd[2];
pid_t pid;
int ret;
int child_ret;
int img_fd = open(argv[2], O_DIRECTORY);
if (img_fd < 0) {
perror("Failed to open images directory");
goto cleanup;
}
if (create_criu_config_file() < 0) {
printf("Failed to create config file\n");
return EXIT_FAILURE;
}
if (pipe(pipe_fd) < 0) {
perror("pipe");
return EXIT_FAILURE;
}
pid = fork();
if (pid < 0) {
perror("fork failed");
return EXIT_FAILURE;
}
if (pid == 0) {
/** child process **/
printf(" `- loop: initializing\n");
if (setsid() < 0 || signal(SIGUSR1, handle_signal) == SIG_ERR) {
_exit(EXIT_FAILURE);
}
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
close(pipe_fd[0]);
child_ret = SUCC_ECODE;
write(pipe_fd[1], &child_ret, sizeof(child_ret));
close(pipe_fd[1]);
while (!stop) {
sleep(1);
}
_exit(SUCC_ECODE);
}
/** parent process **/
close(pipe_fd[1]);
ret = -1;
if (read(pipe_fd[0], &ret, sizeof(ret)) != sizeof(ret) || ret != SUCC_ECODE) {
printf("Error starting loop\n");
goto cleanup;
}
read(pipe_fd[0], &ret, 1);
close(pipe_fd[0]);
printf("--- Loop process started (pid: %d) ---\n", pid);
printf("--- Checkpoint ---\n");
criu_init_opts();
criu_set_service_binary(argv[1]);
criu_set_images_dir_fd(img_fd);
criu_set_pid(pid);
criu_set_log_level(CRIU_LOG_DEBUG);
/* The RPC config file should overwrite the log-file set below */
printf("Setting dump RPC config file: %s\n", conf_file);
criu_set_config_file(conf_file);
criu_set_log_file("dump.log");
ret = criu_dump();
if (ret < 0) {
what_err_ret_mean(ret);
kill(pid, SIGKILL);
printf("criu dump failed\n");
goto cleanup;
}
printf(" `- Dump succeeded\n");
waitpid(pid, NULL, 0);
if (check_log_file()) {
printf("Error: log file not overwritten by RPC config file\n");
goto cleanup;
}
printf("--- Restore loop ---\n");
criu_init_opts();
criu_set_images_dir_fd(img_fd);
criu_set_log_level(CRIU_LOG_DEBUG);
/* The RPC config file should overwrite the log-file set below */
printf("Setting restore RPC config file: %s\n", conf_file);
criu_set_config_file(conf_file);
criu_set_log_file("restore.log");
pid = criu_restore_child();
if (pid <= 0) {
what_err_ret_mean(pid);
ret = EXIT_FAILURE;
goto cleanup;
}
printf(" `- Restore returned pid %d\n", pid);
kill(pid, SIGUSR1);
if (check_log_file()) {
printf("Error: log file not overwritten by RPC config file\n");
goto cleanup;
}
cleanup:
if (waitpid(pid, &ret, 0) < 0) {
perror("waitpid failed");
return EXIT_FAILURE;
}
printf("Remove RPC config file: %s\n", conf_file);
unlink(conf_file);
return chk_exit(ret, SUCC_ECODE);
}

1
test/others/pycriu/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
build/

View file

@ -0,0 +1,63 @@
.SHELLFLAGS := -eu -o pipefail -c
.ONESHELL:
CRIU ?= ../../../criu/criu
BUILD_DIR ?= build
SOCKET_NAME ?= criu_service.socket
PIDFILE_NAME ?= pidfile
SERVICE_LOG ?= service.log
PYTHON ?= python3
PIDFILE := $(BUILD_DIR)/$(PIDFILE_NAME)
CRIU_SOCKET := $(BUILD_DIR)/$(SOCKET_NAME)
STATUS_FIFO := $(BUILD_DIR)/startup.status
STATUS_FD := 200
run: start
cleanup() { $(MAKE) --no-print-directory stop || true; }
trap cleanup EXIT INT TERM
"$(PYTHON)" test_check.py
"$(PYTHON)" test_check_fail.py
"$(PYTHON)" test_check_images_dir.py
"$(PYTHON)" test_check_work_dir_fd.py
start:
mkdir -p "$(BUILD_DIR)"
if [ -s "$(PIDFILE)" ] && kill -0 "$$(cat "$(PIDFILE)")" 2>/dev/null; then
echo "Service running (PID $$(cat "$(PIDFILE)"))."
exit 0
fi
if ! command -v "$(CRIU)" >/dev/null 2>&1; then
echo "CRIU not found at $(CRIU)"
exit 1
fi
mkfifo "$(STATUS_FIFO)"
exec $(STATUS_FD)<>"$(STATUS_FIFO)"
"$(CRIU)" service \
-v4 \
-W "$(BUILD_DIR)" \
--address "$(SOCKET_NAME)" \
-d \
--pidfile "$(PIDFILE_NAME)" \
-o "$(SERVICE_LOG)" \
--status-fd "$(STATUS_FD)"
"$(PYTHON)" read.py "$(STATUS_FIFO)"
stop:
if [ ! -s "$(PIDFILE)" ]; then
echo "pidfile missing or empty"
exit 1
fi
pid=$$(cat "$(PIDFILE)")
if kill -0 "$$pid" 2>/dev/null; then
kill -9 "$$pid" || true
fi
rm -f "$(PIDFILE)" "$(CRIU_SOCKET)" "$(STATUS_FIFO)"
clean:
if [ -s "$(PIDFILE)" ] && kill -0 "$$(cat "$(PIDFILE)")" 2>/dev/null; then
kill -9 "$$(cat "$(PIDFILE)")" || true
fi
rm -rf "$(BUILD_DIR)"
.PHONY: start stop clean run

1
test/others/pycriu/read.py Symbolic link
View file

@ -0,0 +1 @@
../rpc/read.py

View file

@ -0,0 +1,29 @@
#!/usr/bin/env python3
import os
import sys
# Add ../../../lib so we can import pycriu
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
if LIB_DIR not in sys.path:
sys.path.insert(0, LIB_DIR)
import pycriu # noqa: E402
def main():
socket_path = os.path.join(SCRIPT_DIR, "build", "criu_service.socket")
criu = pycriu.criu()
criu.use_sk(socket_path)
try:
criu.check()
except Exception as e:
print(f"FAIL: {e}")
return 1
print("PASS")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,32 @@
#!/usr/bin/env python3
import os
import sys
# Add ../../../lib so we can import pycriu
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
if LIB_DIR not in sys.path:
sys.path.insert(0, LIB_DIR)
import pycriu # noqa: E402
def main():
socket_path = os.path.join(SCRIPT_DIR, "build", "criu_service.socket")
criu = pycriu.criu()
criu.use_sk(socket_path)
# Intentionally set only log_file (no images/work dir) to ensure check() fails
criu.opts.log_file = "check.log"
try:
criu.check()
except Exception:
print("PASS")
return 0
print("FAIL: check() did not fail when log_file is set without images/work dir")
return 1
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,44 @@
#!/usr/bin/env python3
import os
import sys
# Add ../../../lib so we can import pycriu
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
if LIB_DIR not in sys.path:
sys.path.insert(0, LIB_DIR)
import pycriu # noqa: E402
def _log_path(images_dir, log_file):
return log_file if os.path.isabs(log_file) else os.path.join(images_dir, log_file)
def main():
build_dir = os.path.join(SCRIPT_DIR, "build")
socket_path = os.path.join(build_dir, "criu_service.socket")
criu = pycriu.criu()
criu.use_sk(socket_path)
criu.opts.images_dir = build_dir
criu.opts.log_file = "check.log"
criu.opts.log_level = 4
try:
criu.check()
except Exception as e:
lp = _log_path(build_dir, criu.opts.log_file)
msg = f"FAIL: {e} ({'see log: ' + lp if os.path.exists(lp) else 'no log found'})"
print(msg)
return 1
lp = _log_path(build_dir, criu.opts.log_file)
if not (os.path.isfile(lp) and os.path.getsize(lp) > 0):
print(f"FAIL: log file missing or empty: {lp}")
return 1
print("PASS")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -0,0 +1,44 @@
#!/usr/bin/env python3
import os
import sys
# Add ../../../lib so we can import pycriu
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib"))
if LIB_DIR not in sys.path:
sys.path.insert(0, LIB_DIR)
import pycriu # noqa: E402
def main():
build_dir = os.path.join(SCRIPT_DIR, "build")
socket_path = os.path.join(build_dir, "criu_service.socket")
os.makedirs(build_dir, exist_ok=True)
# Open a directory FD to use as work_dir_fd (prefer O_PATH if available)
flags = getattr(os, "O_PATH", 0) or os.O_RDONLY
fd = os.open(build_dir, flags)
criu = pycriu.criu()
criu.use_sk(socket_path)
criu.opts.work_dir_fd = fd
criu.opts.log_file = "check.log"
criu.opts.log_level = 4
try:
criu.check()
except Exception as e:
print(f"FAIL: {e}")
return 1
finally:
try:
os.close(fd)
except Exception:
pass
print("PASS")
return 0
if __name__ == "__main__":
sys.exit(main())

View file

@ -12,6 +12,7 @@ run: all
chmod a+rwx build
chmod a+rwx build/{imgs_errno,imgs_ps,imgs_c,imgs_loop,imgs_py}
rm -f build/status
rm -f build/_marker_*
@# Create all log files to be accessible for anybody
@# so that they can be displayed by any user.
for i in imgs_errno/criu.log imgs_ps/page-server.log imgs_ps/dump.log \

View file

@ -0,0 +1,17 @@
#!/bin/bash
MARKER_FILE="_marker_${CRTOOLS_SCRIPT_ACTION}"
if [ -z "$CRTOOLS_SCRIPT_ACTION" ]; then
echo "Error: CRTOOLS_SCRIPT_ACTION is not set."
exit 2
fi
if [ ! -f "$MARKER_FILE" ]; then
touch "$MARKER_FILE"
else
echo "Error: Running the same action hook for the second time"
exit 1
fi
exit 0

View file

@ -13,6 +13,9 @@ from setup_swrk import setup_swrk
log_file = 'config_file_test.log'
does_not_exist = 'does-not.exist'
script_path = os.path.dirname(os.path.abspath(__file__))
action_script_file = os.path.join(script_path, 'action-script.sh')
def setup_config_file(content):
# Creating a temporary file which will be used as configuration file.
@ -89,29 +92,37 @@ def test_broken_configuration_file():
sys.exit(-1)
def search_in_log_file(log, message):
with open(os.path.join(args['dir'], log)) as f:
def search_in_log_file(log_path, message):
with open(log_path) as f:
if message not in f.read():
print(
'FAIL: Missing the expected error message (%s) in the log file'
% message)
print('FAIL: Missing the expected error message (%s) in the log file' % message)
sys.exit(-1)
def print_log_file(log_path):
print("\n--- Begin log file: %s ---" % log_path)
with open(log_path, 'r') as f:
print(f.read())
print("--- End log file ---\n")
def check_results(resp, log):
# Check if the specified log file exists
if not os.path.isfile(os.path.join(args['dir'], log)):
log_path = os.path.join(args['dir'], log)
if not os.path.isfile(log_path):
print('FAIL: Expected log file %s does not exist' % log)
sys.exit(-1)
# Dump should have failed with: 'The criu itself is within dumped tree'
if resp.type != rpc.DUMP:
print('FAIL: Unexpected msg type %r' % resp.type)
print_log_file(log_path)
sys.exit(-1)
if 'The criu itself is within dumped tree' not in resp.cr_errmsg:
print('FAIL: Missing the expected error message in RPC response')
print_log_file(log_path)
sys.exit(-1)
# Look into the log file for the same message
search_in_log_file(log, 'The criu itself is within dumped tree')
search_in_log_file(log_path, 'The criu itself is within dumped tree')
def test_rpc_without_configuration_file():
@ -156,6 +167,7 @@ def test_rpc_with_configuration_file_overwriting_rpc():
# file settings in the default configuration.
log = does_not_exist
content = 'log-file ' + log + '\n'
content += 'action-script ' + action_script_file + '\n'
content += 'no-tcp-established\nno-shell-job'
path = setup_config_file(content)
# Only set the configuration file via RPC;
@ -180,11 +192,18 @@ args = vars(parser.parse_args())
cleanup_output(args['dir'])
print("*** Test broken config file ***")
test_broken_configuration_file()
cleanup_output(args['dir'])
print("*** Test RPC without config file ***")
test_rpc_without_configuration_file()
cleanup_output(args['dir'])
print("*** Test RPC with config file ***")
test_rpc_with_configuration_file()
cleanup_output(args['dir'])
print("*** Test configuration file overwriting RPC ***")
test_rpc_with_configuration_file_overwriting_rpc()
cleanup_output(args['dir'])

View file

@ -53,6 +53,9 @@ class test:
if errmsg and errmsg not in resp.cr_errmsg:
raise Exception('Unexpected cr_msg \'' + str(resp.cr_errmsg) + '\'')
if errmsg and errmsg not in str(resp.cr_errmsg):
raise Exception('Unexpected cr_msg \'' + str(resp.cr_errmsg) + '\'')
def no_process(self):
print('Try to dump unexisting process')
# Get pid of non-existing process.
@ -151,6 +154,21 @@ class test:
print('Success')
def child_first_err(self):
print('Receive correct first error message')
req = self.get_base_req()
req.type = rpc.CHECK
# Log file must not have subdirectory
req.opts.log_file = "/foo/bar.log"
self.send_req(req)
resp = self.recv_resp()
self.check_resp(resp, rpc.CHECK, None, "No subdirs are allowed in log_file name")
print('Success')
def run(self):
self.no_process()
self.process_exists()

0
test/others/rpc/read.py Normal file → Executable file
View file

View file

@ -66,6 +66,11 @@ endif
export PKG_CONFIG_PATH
endif
ifeq ($(SHSTK_ENABLE),1)
CFLAGS += -mshstk
LDFLAGS += -Wl,-z,shstk
endif
define pkg-libs
$(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(PKG_CONFIG) --libs $(1))
endef

View file

@ -290,6 +290,7 @@ TST_NOFILE := \
PKG_CONFIG ?= pkg-config
pkg-config-check = $(shell sh -c '$(PKG_CONFIG) $(1) && echo y')
pkg-config-atleast-version = $(shell sh -c '$(PKG_CONFIG) --atleast-version=$(2) $(1) && echo y')
ifeq ($(call pkg-config-check,libbpf),y)
TST_NOFILE += \
bpf_hash \
@ -298,7 +299,10 @@ endif
ifneq ($(ARCH),arm)
ifneq ($(COMPAT_TEST),y)
TST_NOFILE += maps03
TST_NOFILE += maps03
ifeq ($(call pkg-config-atleast-version,libtracefs,1.7),y)
TST_NOFILE += uprobes
endif
endif
endif
@ -382,6 +386,7 @@ TST_FILE = \
sk-unix-listen02 \
sk-unix-listen03 \
sk-unix-listen04 \
sk-unix-restore-fs-share \
mnt_ext_file_bind_auto \
TST_DIR = \
@ -726,6 +731,9 @@ sk-unix-listen04: CFLAGS += -DSK_UNIX_LISTEN02 -DSK_UNIX_LISTEN03
cgroupv2_01: LDLIBS += -pthread
uprobes: CFLAGS += $(call pkg-cflags, libtracefs libtraceevent)
uprobes: LDLIBS += $(call pkg-libs, libtracefs libelf)
$(LIB): force
$(Q) $(MAKE) -C $(LIBDIR)

View file

@ -0,0 +1,196 @@
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/un.h>
#include "zdtmtst.h"
const char *test_doc = "Test non-empty process group with terminated parent and unix socket";
const char *test_author = "Qiao Ma <mqaio@linux.alibaba.com>";
char *filename;
TEST_OPTION(filename, string, "socket file name", 1);
static int create_and_connect(void)
{
struct sockaddr_un addr;
int client_fd;
client_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (client_fd == -1) {
pr_perror("socket");
return -1;
}
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
if (snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", filename) >= (int)sizeof(addr.sun_path)) {
pr_err("Socket path too long\n");
close(client_fd);
return -1;
}
if (connect(client_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
pr_perror("connect");
close(client_fd);
return -1;
}
return 0;
}
static int child(int ready_fd)
{
int listen_fd;
struct sockaddr_un addr;
int ret = EXIT_FAILURE;
listen_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (listen_fd == -1) {
pr_perror("socket");
return EXIT_FAILURE;
}
memset(&addr, 0, sizeof(addr));
addr.sun_family = AF_UNIX;
if (strlen(filename) >= sizeof(addr.sun_path)) {
pr_err("Socket path too long\n");
goto cleanup;
}
strncpy(addr.sun_path, filename, sizeof(addr.sun_path));
unlink(filename); /* Ignore error if file doesn't exist */
if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) {
pr_perror("bind");
goto cleanup;
}
if (listen(listen_fd, 5) == -1) {
pr_perror("listen");
goto cleanup;
}
if (create_and_connect() != 0) {
pr_err("Failed to create and connect\n");
goto cleanup;
}
/* Signal parent that socket is ready */
if (write(ready_fd, "1", 1) != 1) {
pr_perror("write ready_fd");
goto cleanup;
}
/* Wait indefinitely */
pause();
ret = EXIT_SUCCESS;
cleanup:
if (listen_fd != -1)
close(listen_fd);
unlink(filename);
return ret;
}
static int zombie_leader(int *cpid)
{
char buf;
pid_t pid;
int pipefd[2];
if (pipe(pipefd) == -1) {
pr_perror("pipe");
return EXIT_FAILURE;
}
if (setpgid(0, 0) == -1) {
pr_perror("setpgid");
return EXIT_FAILURE;
}
pid = fork();
if (pid < 0) {
pr_perror("Failed to fork child");
return EXIT_FAILURE;
}
if (pid == 0) {
/* Close read end */
close(pipefd[0]);
exit(child(pipefd[1]));
}
/* Close write end in parent */
close(pipefd[1]);
/* Wait for child to set up socket */
if (read(pipefd[0], &buf, 1) != 1) {
pr_err("Failed to receive readiness signal from child\n");
close(pipefd[0]);
return EXIT_FAILURE;
}
close(pipefd[0]);
*cpid = pid;
return EXIT_SUCCESS;
}
int main(int argc, char **argv)
{
int ret = EXIT_FAILURE, status;
pid_t pid;
int *cpid;
test_init(argc, argv);
cpid = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
if (cpid == MAP_FAILED) {
pr_perror("mmap");
return EXIT_FAILURE;
}
*cpid = 0;
pid = fork();
if (pid < 0) {
pr_perror("Failed to fork zombie");
goto out;
}
if (pid == 0)
exit(zombie_leader(cpid));
if (waitpid(pid, &status, 0) < 0) {
pr_perror("Failed to waitpid zombie");
goto out;
}
if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) {
pr_err("Unexpected exit code: %d\n", WEXITSTATUS(status));
goto out;
}
if (!*cpid) {
pr_err("Don't know grandchild's pid\n");
goto out;
}
test_daemon();
test_waitsig();
ret = EXIT_SUCCESS;
pass();
out:
/* Clean up */
if (*cpid)
kill(*cpid, SIGKILL);
munmap(cpid, sizeof(int));
return ret;
}

View file

@ -0,0 +1 @@
{'flavor': 'ns uns'}

295
test/zdtm/static/uprobes.c Normal file
View file

@ -0,0 +1,295 @@
#include <fcntl.h>
#include <gelf.h>
#include <libelf.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <tracefs.h>
#include <unistd.h>
#include "zdtmtst.h"
const char *test_doc = "Test the --allow-uprobes option";
const char *test_author = "Shashank Balaji <shashank.mahadasyam@sony.com>";
#define UPROBE_GROUP_NAME "zdtm"
#define UPROBE_EVENT_NAME "uprobes_test"
#define UPROBED_FUNCTION uprobe_target
/*
* A uprobe can be set at the start of a function, but not all instructions
* will trigger the creation of a uprobes vma.
*
* Examples:
* - aarch64: if the function is a single `ret`, then no vma creation
* - x64: if the function is `nop; ret`, then no vma creation
*
* So to guarantee vma creation, create a volatile dummy variable (to prevent
* compiler optimization) and use it (to prevent "unused variable" warning)
*/
void UPROBED_FUNCTION(void) {
volatile int dummy = 0;
dummy += 1;
}
/* Calling via volatile function pointer ensures noinline at callsite */
typedef void (*func_ptr)(void);
volatile func_ptr uprobe_target_alias = UPROBED_FUNCTION;
struct uprobe_context {
struct tracefs_instance *instance;
struct tracefs_dynevent *uprobe;
};
volatile bool got_sigtrap = false;
/*
* Returns the file offset of a symbol in the executable of this program
* Returns 0 on failure
*/
uint64_t calc_sym_offset(const char *sym_name)
{
GElf_Shdr section_header;
Elf_Scn *section = NULL;
Elf_Data *symtab_data;
uint64_t offset = 0;
char buf[PATH_MAX];
GElf_Sym symbol;
ssize_t n_bytes;
int n_entries;
Elf *elf;
int fd;
int i;
if (elf_version(EV_CURRENT) == EV_NONE) {
pr_err("ELF version of libelf is lower than that of the program\n");
return 0;
}
n_bytes = readlink("/proc/self/exe", buf, sizeof(buf));
if (n_bytes < 0) {
pr_perror("Failed to readlink /proc/self/exe");
return 0;
}
buf[n_bytes] = '\0';
fd = open(buf, O_RDONLY);
if (fd < 0) {
pr_perror("Failed to open self-executable");
return 0;
}
elf = elf_begin(fd, ELF_C_READ, NULL);
if (!elf) {
pr_err("%s\n", elf_errmsg(elf_errno()));
goto out_fd;
}
/* Look for the symbol table section and its header */
while ((section = elf_nextscn(elf, section)) != NULL) {
gelf_getshdr(section, &section_header);
if (section_header.sh_type == SHT_SYMTAB)
break;
}
if (!section) {
pr_err("Failed to find symbol table\n");
goto out_elf;
}
symtab_data = elf_getdata(section, NULL);
n_entries = section_header.sh_size / section_header.sh_entsize;
/* Look for a symbol with the required name */
for (i = 0; i < n_entries; i++) {
gelf_getsym(symtab_data, i, &symbol);
/* Symbol table's sh_link is the index of the string table section header */
if (!strcmp(sym_name,
elf_strptr(elf, section_header.sh_link, symbol.st_name)))
break;
}
if (i == n_entries) {
pr_err("Failed to find symbol \"%s\"\n", sym_name);
goto out_elf;
}
/* Get the section the symbol belongs to (mostly .text) */
section = elf_getscn(elf, symbol.st_shndx);
gelf_getshdr(section, &section_header);
offset = symbol.st_value - section_header.sh_addr + section_header.sh_offset;
out_elf:
elf_end(elf);
out_fd:
close(fd);
return offset;
}
/*
* Set and enable a uprobe on the file at the given offset
* Returns struct uprobe_context with members set to NULL on failure
*/
struct uprobe_context enable_uprobe(const char *file, uint64_t offset)
{
struct tracefs_instance *trace_instance;
struct tracefs_dynevent *uprobe;
struct uprobe_context context = {};
trace_instance = tracefs_instance_create("zdtm_uprobes_test");
if (!trace_instance) {
pr_perror("Failed to create tracefs instance");
return context;
}
tracefs_instance_reset(trace_instance);
uprobe = tracefs_uprobe_alloc(UPROBE_GROUP_NAME, UPROBE_EVENT_NAME, file, offset, NULL);
if (!uprobe) {
pr_perror("Failed to allocate uprobe");
goto instance_destroy;
}
if (tracefs_dynevent_create(uprobe)) {
pr_perror("Failed to create uprobe");
goto uprobe_free;
}
if (tracefs_event_enable(trace_instance, UPROBE_GROUP_NAME, UPROBE_EVENT_NAME)) {
pr_perror("Failed to enable uprobe");
goto uprobe_destroy;
}
context.instance = trace_instance;
context.uprobe = uprobe;
return context;
uprobe_destroy:
tracefs_dynevent_destroy(uprobe, false);
uprobe_free:
tracefs_dynevent_free(uprobe);
instance_destroy:
tracefs_instance_destroy(trace_instance);
tracefs_instance_free(trace_instance);
return context;
}
void destroy_uprobe(struct uprobe_context context)
{
tracefs_dynevent_destroy(context.uprobe, true);
tracefs_dynevent_free(context.uprobe);
tracefs_instance_destroy(context.instance);
tracefs_instance_free(context.instance);
}
/*
* Check for the existence of the "[uprobes]" vma in /proc/self/maps
* Returns -1 on failure, 0 if not found, 1 if found
*/
int uprobes_vma_exists(void)
{
FILE *f;
char buf[LINE_MAX];
int ret = 0;
f = fopen("/proc/self/maps", "r");
if (!f) {
pr_perror("Failed to open /proc/self/maps");
return -1;
}
while (fgets(buf, sizeof(buf), f)) {
if (strstr(buf, "[uprobes]")) {
ret = 1;
break;
}
}
if (ret == 0 && !feof(f)) {
pr_err("Failed to finish reading /proc/self/maps\n");
ret = -1;
}
fclose(f);
return ret;
}
/*
* SIGTRAP is sent if execution reaches a previously set uprobed location, and
* the corresponding uprobe is not active. We don't want this to happen on restore
*/
void sigtrap_handler(int signo, siginfo_t *info, void* context)
{
if (info->si_code == SI_KERNEL) {
got_sigtrap = true;
fail("SIGTRAP on attempting to call uprobed function");
}
}
int main(int argc, char **argv)
{
struct uprobe_context context;
struct sigaction sa;
char buf[PATH_MAX];
uint64_t offset;
int n_bytes;
int ret = 1;
test_init(argc, argv);
offset = calc_sym_offset(__stringify(UPROBED_FUNCTION));
if (!offset)
return 1;
n_bytes = readlink("/proc/self/exe", buf, sizeof(buf));
if (n_bytes < 0) {
pr_perror("Failed to readlink /proc/self/exe");
return 1;
}
buf[n_bytes] = '\0';
sa.sa_flags = SA_SIGINFO;
sa.sa_sigaction = sigtrap_handler;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGTRAP, &sa, NULL)) {
pr_perror("Failed to set SIGTRAP handler");
return 1;
}
context = enable_uprobe(buf, offset);
if (!context.instance)
return 1;
/*
* Execution must reach the uprobed location at least once
* for the kernel to create the uprobes vma
*/
uprobe_target_alias();
switch (uprobes_vma_exists()) {
case -1:
goto out_uprobe;
break;
case 0:
pr_err("uprobes vma does not exist\n");
goto out_uprobe;
break;
case 1:
test_msg("Found uprobes vma\n");
break;
}
test_daemon();
test_waitsig();
/*
* Calling the uprobed function after restore should not cause
* a SIGTRAP, since the uprobe is still active
*/
uprobe_target_alias();
if (!got_sigtrap) {
pass();
ret = 0;
}
out_uprobe:
destroy_uprobe(context);
return ret;
}

View file

@ -0,0 +1,6 @@
{
'feature': 'cgroupns',
'flags': 'suid nouser',
'flavor': 'h',
'opts': '--allow-uprobes'
}

View file

@ -70,6 +70,7 @@ static int parse_maps(struct vm_area *vmas)
#endif
v->is_vvar_or_vdso |= strstr(buf, "[vdso]") != NULL;
v->is_vvar_or_vdso |= strstr(buf, "[vvar]") != NULL;
v->is_vvar_or_vdso |= strstr(buf, "[vvar_vclock]") != NULL;
test_msg("[NOTE]\tVMA: [%#" PRIx64 ", %#" PRIx64 "]\n", v->start, v->end);
}
@ -86,42 +87,35 @@ static int parse_maps(struct vm_area *vmas)
return i;
}
int compare_vmas(struct vm_area *vmax, struct vm_area *vmay)
{
if (vmax->start > vmay->start)
return 1;
if (vmax->start < vmay->start)
return -1;
if (vmax->end > vmay->end)
return 1;
if (vmax->end < vmay->end)
return -1;
return 0;
}
static int check_vvar_vdso(struct vm_area *before, struct vm_area *after)
static int check_vvar_vdso(struct vm_area *before, int nr_before, struct vm_area *after, int nr_after)
{
int i, j = 0;
for (i = 0; i < MAX_VMAS && j < MAX_VMAS; i++, j++) {
int cmp = compare_vmas(&before[i], &after[j]);
if (cmp == 0)
continue;
if (cmp < 0) { /* Lost mapping */
for (i = 0, j = 0; i < nr_before || j < nr_after;) {
if (j == nr_after || before[i].start < after[j].start) {
test_msg("[NOTE]\tLost mapping: %#" PRIx64 "-%#" PRIx64 "\n", before[i].start, before[i].end);
j--;
if (before[i].is_vvar_or_vdso) {
fail("Lost vvar/vdso mapping");
return -1;
}
i++;
continue;
}
test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end);
i--;
if (i == nr_before || before[i].start > after[j].start) {
test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end);
j++;
continue;
}
if (before[i].end == after[j].end) {
i++;
j++;
} else if (before[i].end > after[j].end) {
before[i].start = after[j].end;
j++;
} else {
after[j].start = before[i].end;
i++;
}
}
return 0;
@ -129,11 +123,10 @@ static int check_vvar_vdso(struct vm_area *before, struct vm_area *after)
static struct vm_area vmas_before[MAX_VMAS];
static struct vm_area vmas_after[MAX_VMAS];
static int nr_before, nr_after;
int main(int argc, char *argv[])
{
int nr_before, nr_after;
test_init(argc, argv);
test_msg("[NOTE]\tMappings before:\n");
@ -154,7 +147,7 @@ int main(int argc, char *argv[])
}
/* After restore vDSO/VVAR blobs must remain in the old place. */
if (check_vvar_vdso(vmas_before, vmas_after))
if (check_vvar_vdso(vmas_before, nr_before, vmas_after, nr_after))
return -1;
if (nr_before + 2 < nr_after) {