mirror of
https://github.com/checkpoint-restore/criu.git
synced 2026-01-23 02:14:37 +00:00
Compare commits
196 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9e5fbcd668 | ||
|
|
21a6758268 | ||
|
|
07af3304fd | ||
|
|
fb59ae504e | ||
|
|
b208bec12d | ||
|
|
9885fb3c75 | ||
|
|
71fe85ec90 | ||
|
|
36f1e9d38c | ||
|
|
ddf7a170ff | ||
|
|
2dd66866e3 | ||
|
|
974c1bc898 | ||
|
|
b1a51489dd | ||
|
|
fc1867c44d | ||
|
|
2e5f9facf9 | ||
|
|
d4e8114130 | ||
|
|
30acbabcdd | ||
|
|
f66e59ee5c | ||
|
|
f78bea8d34 | ||
|
|
d591e320e0 | ||
|
|
2429d49e67 | ||
|
|
41ecb7ac71 | ||
|
|
92e6e523b5 | ||
|
|
2f676d20e4 | ||
|
|
6bb856b0af | ||
|
|
73ca071483 | ||
|
|
501b714f76 | ||
|
|
90300748ef | ||
|
|
09bb362664 | ||
|
|
bf82389de3 | ||
|
|
2cf8f13ca1 | ||
|
|
62aadb22ab | ||
|
|
1db7eed69f | ||
|
|
29525f8cb3 | ||
|
|
e4a5e164b4 | ||
|
|
f56ccfd2d6 | ||
|
|
6ed49894c5 | ||
|
|
77e6558ddb | ||
|
|
690b610432 | ||
|
|
ff35a9126e | ||
|
|
9e404e2083 | ||
|
|
d43217dadb | ||
|
|
db0ec806d1 | ||
|
|
5eb61e1b14 | ||
|
|
0b7ca29c19 | ||
|
|
fb02dbf685 | ||
|
|
7a4ee0ae8e | ||
|
|
920437205c | ||
|
|
4a3a695dfb | ||
|
|
33ed774c8d | ||
|
|
6386140754 | ||
|
|
ddbb3dbd8d | ||
|
|
3c7d4fa013 | ||
|
|
0a7e7d09dd | ||
|
|
e689d902b3 | ||
|
|
6344e8d71c | ||
|
|
a525b3c32e | ||
|
|
ce680fc6c7 | ||
|
|
1d08ff8ca7 | ||
|
|
cb8e1da3f4 | ||
|
|
0fa6ff3d18 | ||
|
|
567f70ce19 | ||
|
|
a1dc885027 | ||
|
|
3c841af2cf | ||
|
|
f7ccb63bdd | ||
|
|
9371c4a789 | ||
|
|
72ca94db4d | ||
|
|
5966ffe8a7 | ||
|
|
60a731ab38 | ||
|
|
ee4100c09f | ||
|
|
71a637923f | ||
|
|
d2c46b92b0 | ||
|
|
7aad7317b4 | ||
|
|
3f97cfe876 | ||
|
|
2878faa74c | ||
|
|
07ad2473f2 | ||
|
|
afcfcd3bf6 | ||
|
|
6860181474 | ||
|
|
d3dfb663b1 | ||
|
|
f74e68daf9 | ||
|
|
f824dc735b | ||
|
|
d5c81f8108 | ||
|
|
540c631dd0 | ||
|
|
a5ae3c184b | ||
|
|
697c31abe4 | ||
|
|
6fd71b9ee9 | ||
|
|
abf4a71d99 | ||
|
|
02462c19c4 | ||
|
|
b18c07d8a8 | ||
|
|
f29cb750db | ||
|
|
3365c7c025 | ||
|
|
bb9a7202a7 | ||
|
|
9d072222ef | ||
|
|
c03c08d1bc | ||
|
|
dcce9bd0e2 | ||
|
|
f548d3af4a | ||
|
|
aeec40bf02 | ||
|
|
bab72af9a5 | ||
|
|
74bf40feeb | ||
|
|
0ff2e0a66e | ||
|
|
7bf402f6b3 | ||
|
|
520266d895 | ||
|
|
790b3cf425 | ||
|
|
77553f07d3 | ||
|
|
3379c122e5 | ||
|
|
7a4b35a910 | ||
|
|
76394e93a8 | ||
|
|
0a81dc8bbe | ||
|
|
b25ff1d336 | ||
|
|
25f8be0f60 | ||
|
|
67751bc11b | ||
|
|
91758a68e9 | ||
|
|
2d2168fc9c | ||
|
|
2e26b36d44 | ||
|
|
7e0da4d975 | ||
|
|
afb2e6c3f9 | ||
|
|
c7395f4cbe | ||
|
|
a8c5e11715 | ||
|
|
80c280610e | ||
|
|
053a22a23b | ||
|
|
a779417a3f | ||
|
|
254ba3e8cc | ||
|
|
4b73985955 | ||
|
|
fa1b399064 | ||
|
|
2ba3430106 | ||
|
|
dcee5bd6ff | ||
|
|
98f2bd525a | ||
|
|
01265cfc69 | ||
|
|
9c0f725a62 | ||
|
|
59b4d662ae | ||
|
|
63c7029686 | ||
|
|
cc047d595f | ||
|
|
5843cbf975 | ||
|
|
42580fcb16 | ||
|
|
1873e8f502 | ||
|
|
4fc07a8a41 | ||
|
|
2bb77daa92 | ||
|
|
fce491113b | ||
|
|
5f94dd71e7 | ||
|
|
c6c6f6f231 | ||
|
|
d586b30c6b | ||
|
|
2762b21e4a | ||
|
|
0d1e280d09 | ||
|
|
64276874d8 | ||
|
|
95d5e2e59b | ||
|
|
22c83e3eba | ||
|
|
066bf7bf3c | ||
|
|
21c3b9c005 | ||
|
|
7fbf7b2be4 | ||
|
|
455c677399 | ||
|
|
e31828ed8c | ||
|
|
3dc865bc80 | ||
|
|
a80c544845 | ||
|
|
677a568919 | ||
|
|
87bd09a0d1 | ||
|
|
45d09ae17e | ||
|
|
4f057a6aeb | ||
|
|
4c7d42f67a | ||
|
|
922754dffd | ||
|
|
a79b33d0c5 | ||
|
|
99ba6db89b | ||
|
|
fcbaac0598 | ||
|
|
fbfed312e0 | ||
|
|
5f18ca1bbe | ||
|
|
dfa0ce1808 | ||
|
|
4f9dcfb9c8 | ||
|
|
b90cfc1a80 | ||
|
|
6476488a51 | ||
|
|
af5412a433 | ||
|
|
2b8951a9cf | ||
|
|
1fdff7c7a6 | ||
|
|
ae1395de18 | ||
|
|
7a5b3d1f41 | ||
|
|
a61116fd93 | ||
|
|
e8ba7c103a | ||
|
|
1fd1b670c4 | ||
|
|
e257d04974 | ||
|
|
497109eb4e | ||
|
|
427c0dc27b | ||
|
|
d57d40a5ad | ||
|
|
fddca67cc6 | ||
|
|
366d73a4c2 | ||
|
|
1eaa870cce | ||
|
|
b458a5c1ad | ||
|
|
5a725266ac | ||
|
|
6b3826a6fb | ||
|
|
88cb552f69 | ||
|
|
b6dca31162 | ||
|
|
5de61a721f | ||
|
|
b9da95b0b2 | ||
|
|
74799ae023 | ||
|
|
6805841660 | ||
|
|
e7aee3c5c7 | ||
|
|
5ff52326e1 | ||
|
|
9a1e979666 | ||
|
|
daa548bbfb | ||
|
|
34226fd243 |
249 changed files with 11922 additions and 1808 deletions
37
.cirrus.yml
37
.cirrus.yml
|
|
@ -13,9 +13,8 @@ task:
|
|||
nested_virtualization: true
|
||||
|
||||
setup_script: |
|
||||
scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
sudo kvm-ok
|
||||
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
build_script: |
|
||||
make -C scripts/ci vagrant-fedora-no-vdso
|
||||
|
||||
|
|
@ -33,10 +32,9 @@ task:
|
|||
memory: 8G
|
||||
|
||||
setup_script: |
|
||||
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
dnf config-manager --set-enabled crb # Same as CentOS 8 powertools
|
||||
dnf -y install epel-release epel-next-release
|
||||
dnf -y install --allowerasing asciidoc gcc git gnutls-devel libaio-devel libasan libcap-devel libnet-devel libnl3-devel libbsd-devel libselinux-devel make protobuf-c-devel protobuf-devel python-devel python-PyYAML python-protobuf python-junit_xml python3-importlib-metadata xmlto libdrm-devel libuuid-devel
|
||||
contrib/dependencies/dnf-packages.sh
|
||||
# The image has a too old version of nettle which does not work with gnutls.
|
||||
# Just upgrade to the latest to make the error go away.
|
||||
dnf -y upgrade nettle nettle-devel
|
||||
|
|
@ -65,9 +63,8 @@ task:
|
|||
nested_virtualization: true
|
||||
|
||||
setup_script: |
|
||||
scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
sudo kvm-ok
|
||||
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
build_script: |
|
||||
make -C scripts/ci vagrant-fedora-rawhide
|
||||
|
||||
|
|
@ -86,36 +83,11 @@ task:
|
|||
nested_virtualization: true
|
||||
|
||||
setup_script: |
|
||||
scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
|
||||
sudo kvm-ok
|
||||
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
build_script: |
|
||||
make -C scripts/ci vagrant-fedora-non-root
|
||||
|
||||
task:
|
||||
name: aarch64 build GCC (native)
|
||||
arm_container:
|
||||
image: docker.io/library/ubuntu:jammy
|
||||
cpu: 4
|
||||
memory: 4G
|
||||
script: uname -a
|
||||
build_script: |
|
||||
scripts/ci/apt-install make
|
||||
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
make -C scripts/ci local
|
||||
|
||||
task:
|
||||
name: aarch64 build CLANG (native)
|
||||
arm_container:
|
||||
image: docker.io/library/ubuntu:jammy
|
||||
cpu: 4
|
||||
memory: 4G
|
||||
script: uname -a
|
||||
build_script: |
|
||||
scripts/ci/apt-install make
|
||||
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
make -C scripts/ci local CLANG=1
|
||||
|
||||
task:
|
||||
name: aarch64 Fedora Rawhide
|
||||
arm_container:
|
||||
|
|
@ -125,6 +97,5 @@ task:
|
|||
script: uname -a
|
||||
build_script: |
|
||||
scripts/ci/prepare-for-fedora-rawhide.sh
|
||||
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
|
||||
make -C scripts/ci/ local CC=gcc SKIP_CI_PREP=1 SKIP_CI_TEST=1 CD_TO_TOP=1
|
||||
make -C test/zdtm -j 4
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
[codespell]
|
||||
skip = ./.git,./test/pki,./tags
|
||||
skip = ./.git,./test/pki,./tags,./plugins/amdgpu/amdgpu_drm.h,./plugins/amdgpu/drm.h,./plugins/amdgpu/drm_mode.h
|
||||
ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng,inh,wronly,renderd,bui,clen,sems
|
||||
|
|
|
|||
|
|
@ -1,43 +1,25 @@
|
|||
name: Actuated aarch64 test
|
||||
name: aarch64 test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
# Cancel any preceding run on the pull request.
|
||||
concurrency:
|
||||
group: actuated-test-${{ github.event.pull_request.number || github.ref }}
|
||||
group: aarch64-test-${{ github.event.pull_request.number || github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }}
|
||||
|
||||
jobs:
|
||||
build:
|
||||
# Actuated runners are not available in all repositories.
|
||||
if: ${{ github.repository == 'checkpoint-restore/criu' }}
|
||||
# The memory size and the number of CPUs can be freely selected.
|
||||
# 3GB and 4 CPUs seems to be enough according to the result from 'vmmeter'.
|
||||
runs-on: actuated-arm64-4cpu-3gb
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-24.04-arm, ubuntu-22.04-arm]
|
||||
target: [GCC=1, CLANG=1]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
# https://gist.github.com/alexellis/1f33e581c75e11e161fe613c46180771#file-metering-gha-md
|
||||
# vmmeter start
|
||||
- name: Prepare arkade
|
||||
uses: alexellis/arkade-get@master
|
||||
with:
|
||||
crane: latest
|
||||
print-summary: false
|
||||
|
||||
- name: Install vmmeter
|
||||
run: |
|
||||
crane export --platform linux/arm64 ghcr.io/openfaasltd/vmmeter:latest | sudo tar -xvf - -C /usr/local/bin
|
||||
|
||||
- name: Run vmmeter
|
||||
uses: self-actuated/vmmeter-action@master
|
||||
# vmmeter end
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run Tests ${{ matrix.target }}
|
||||
# Following tests are failing on the actuated VMs:
|
||||
- name: Run Tests ${{ matrix.target }} on ${{ matrix.os }}
|
||||
# Following tests are failing on the VMs:
|
||||
# ./change_mnt_context --pidfile=change_mnt_context.pid --outfile=change_mnt_context.out
|
||||
# 45: ERR: change_mnt_context.c:23: mount (errno = 22 (Invalid argument))
|
||||
#
|
||||
3
.github/workflows/alpine-test.yml
vendored
3
.github/workflows/alpine-test.yml
vendored
|
|
@ -9,10 +9,11 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-22.04, ubuntu-22.04-arm]
|
||||
target: [GCC=1, CLANG=1]
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
|
|||
2
.github/workflows/check-commits.yml
vendored
2
.github/workflows/check-commits.yml
vendored
|
|
@ -19,7 +19,7 @@ jobs:
|
|||
# Checkout pull request HEAD commit instead of merge commit
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
- name: Install dependencies
|
||||
run: sudo scripts/ci/apt-install libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnl-3-dev libnet-dev libcap-dev uuid-dev
|
||||
run: sudo contrib/apt-install libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnl-3-dev libnet-dev libcap-dev uuid-dev
|
||||
- name: Configure git user details
|
||||
run: |
|
||||
git config --global user.email "checkpoint-restore@users.noreply.github.com"
|
||||
|
|
|
|||
2
.github/workflows/codeql.yml
vendored
2
.github/workflows/codeql.yml
vendored
|
|
@ -34,7 +34,7 @@ jobs:
|
|||
- name: Install Packages (cpp)
|
||||
if: ${{ matrix.language == 'cpp' }}
|
||||
run: |
|
||||
sudo scripts/ci/apt-install protobuf-c-compiler libprotobuf-c-dev libprotobuf-dev build-essential libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnet-dev pkg-config libnl-3-dev libbsd0 libbsd-dev iproute2 libcap-dev libaio-dev libbsd-dev python3-yaml libnl-route-3-dev gnutls-dev
|
||||
sudo contrib/apt-install protobuf-c-compiler libprotobuf-c-dev libprotobuf-dev build-essential libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnet-dev pkg-config libnl-3-dev libbsd0 libbsd-dev iproute2 libcap-dev libaio-dev libbsd-dev python3-yaml libnl-route-3-dev gnutls-dev
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
|
|
|
|||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
|
|
@ -14,7 +14,7 @@ jobs:
|
|||
image: registry.fedoraproject.org/fedora:latest
|
||||
steps:
|
||||
- name: Install tools
|
||||
run: sudo dnf -y install git make ruff xz clang-tools-extra which codespell git-clang-format ShellCheck
|
||||
run: sudo dnf -y install git make ruff xz clang-tools-extra codespell git-clang-format ShellCheck
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
|
|
|
|||
2
.github/workflows/nftables-test.yml
vendored
2
.github/workflows/nftables-test.yml
vendored
|
|
@ -15,7 +15,7 @@ jobs:
|
|||
- name: Remove iptables
|
||||
run: sudo apt remove -y iptables
|
||||
- name: Install libnftables-dev
|
||||
run: sudo scripts/ci/apt-install libnftables-dev
|
||||
run: sudo contrib/apt-install libnftables-dev
|
||||
- name: chmod 755 /home/runner
|
||||
# CRIU's tests are sometimes running as some random user and need
|
||||
# to be able to access the test files.
|
||||
|
|
|
|||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -20,8 +20,6 @@ compel/compel
|
|||
compel/compel-host-bin
|
||||
images/*.c
|
||||
images/*.h
|
||||
images/google/protobuf/*.c
|
||||
images/google/protobuf/*.h
|
||||
.gitid
|
||||
criu/criu
|
||||
criu/unittest/unittest
|
||||
|
|
|
|||
|
|
@ -23,8 +23,3 @@ extraction:
|
|||
- "python3-yaml"
|
||||
- "libnl-route-3-dev"
|
||||
- "gnutls-dev"
|
||||
configure:
|
||||
command:
|
||||
- "ls -laR images/google"
|
||||
- "ln -s /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto"
|
||||
- "ls -laR images/google"
|
||||
|
|
|
|||
35
.travis.yml
35
.travis.yml
|
|
@ -1,35 +0,0 @@
|
|||
language: c
|
||||
os: linux
|
||||
dist: bionic
|
||||
services:
|
||||
- docker
|
||||
jobs:
|
||||
include:
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
env: TR_ARCH=local
|
||||
dist: bionic
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
env: TR_ARCH=local CLANG=1
|
||||
dist: bionic
|
||||
- os: linux
|
||||
arch: s390x
|
||||
env: TR_ARCH=local
|
||||
dist: bionic
|
||||
- os: linux
|
||||
arch: arm64-graviton2
|
||||
env: TR_ARCH=local RUN_TESTS=1
|
||||
dist: focal
|
||||
group: edge
|
||||
virt: vm
|
||||
- os: linux
|
||||
arch: arm64-graviton2
|
||||
env: TR_ARCH=local CLANG=1 RUN_TESTS=1
|
||||
group: edge
|
||||
virt: vm
|
||||
dist: bionic
|
||||
script:
|
||||
- sudo make -C scripts/ci $TR_ARCH
|
||||
after_success:
|
||||
- make -C scripts/ci after_success
|
||||
1
CLAUDE.md
Symbolic link
1
CLAUDE.md
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
GEMINI.md
|
||||
|
|
@ -8,8 +8,8 @@ Here are some useful hints to get involved.
|
|||
* We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks;
|
||||
* CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting);
|
||||
* Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles;
|
||||
* Feedback is expected on the GitHub issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu);
|
||||
* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu).
|
||||
* Feedback is expected on the GitHub issues page and on the [mailing list](https://lore.kernel.org/criu);
|
||||
* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lore.kernel.org/criu).
|
||||
Below we describe in more detail recommend practices for CRIU development.
|
||||
* Spread the word about CRIU in [social networks](http://criu.org/Contacts);
|
||||
* If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events);
|
||||
|
|
@ -27,19 +27,43 @@ The repository may contain multiple branches. Development happens in the **criu-
|
|||
To clone CRIU repo and switch to the proper branch, run:
|
||||
|
||||
```
|
||||
git clone https://github.com/checkpoint-restore/criu criu
|
||||
cd criu
|
||||
git checkout criu-dev
|
||||
git clone https://github.com/checkpoint-restore/criu criu
|
||||
cd criu
|
||||
git checkout criu-dev
|
||||
```
|
||||
|
||||
### Compile
|
||||
### Building from source
|
||||
|
||||
First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info.
|
||||
Follow these steps to compile CRIU from source code.
|
||||
|
||||
To compile CRIU, run:
|
||||
#### Installing build dependencies
|
||||
|
||||
First, you need to install the required build dependencies. We provide scripts to simplify this process for several Linux distributions in [contrib/dependencies](contrib/dependencies). For a complete list of dependencies, please refer to the [installation guide](https://criu.org/Installation).
|
||||
|
||||
##### On Ubuntu/Debian-based systems:
|
||||
|
||||
```
|
||||
make
|
||||
./contrib/dependencies/apt-packages.sh
|
||||
```
|
||||
|
||||
##### On Fedora/CentOS-based systems:
|
||||
|
||||
```
|
||||
./contrib/dependencies/dnf-packages.sh
|
||||
```
|
||||
|
||||
##### Using Nix:
|
||||
|
||||
```
|
||||
nix develop
|
||||
```
|
||||
|
||||
#### Compiling CRIU
|
||||
|
||||
Once the dependencies are installed, you can compile CRIU by running the `make` command from the root of the source directory:
|
||||
|
||||
```
|
||||
make
|
||||
```
|
||||
|
||||
This should create the `./criu/criu` executable.
|
||||
|
|
@ -63,7 +87,7 @@ The following command can be used to automatically run a code linter for Python
|
|||
text spelling (codespell), and a number of CRIU-specific checks (usage of print macros and EOL whitespace for C files).
|
||||
|
||||
```
|
||||
make lint
|
||||
make lint
|
||||
```
|
||||
|
||||
In addition, we have adopted a [clang-format configuration file](https://www.kernel.org/doc/Documentation/process/clang-format.rst)
|
||||
|
|
@ -73,7 +97,7 @@ results in decreased readability, we may choose to ignore these errors.
|
|||
Run the following command to check if your changes are compliant with the clang-format rules:
|
||||
|
||||
```
|
||||
make indent
|
||||
make indent
|
||||
```
|
||||
|
||||
This command is built upon the `git-clang-format` tool and supports two options `BASE` and `OPTS`. The `BASE` option allows you to
|
||||
|
|
@ -83,7 +107,7 @@ can use `BASE=origin/criu-dev`. The `OPTS` option can be used to pass additional
|
|||
to check the last *N* commits for formatting errors, without applying the changes to the codebase you can use the following command.
|
||||
|
||||
```
|
||||
make indent OPTS=--diff BASE=HEAD~N
|
||||
make indent OPTS=--diff BASE=HEAD~N
|
||||
```
|
||||
|
||||
Note that for pull requests, the "Run code linter" workflow runs these checks for all commits. If a clang-format error is detected
|
||||
|
|
@ -96,7 +120,7 @@ Here are some bad examples of clang-format-ing:
|
|||
```
|
||||
@@ -58,8 +59,7 @@ static int register_membarriers(void)
|
||||
}
|
||||
|
||||
|
||||
if (!all_ok) {
|
||||
- fail("can't register membarrier()s - tried %#x, kernel %#x",
|
||||
- barriers_registered, barriers_supported);
|
||||
|
|
@ -129,16 +153,11 @@ Here are some bad examples of clang-format-ing:
|
|||
CRIU comes with an extensive test suite. To check whether your changes introduce any regressions, run
|
||||
|
||||
```
|
||||
make test
|
||||
make test
|
||||
```
|
||||
|
||||
The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it.
|
||||
|
||||
In case you'd rather have someone else run the tests, you can use travis-ci for your
|
||||
own GitHub fork of CRIU. It will check the compilation for various supported platforms,
|
||||
as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu
|
||||
for more details.
|
||||
|
||||
## Describe your changes
|
||||
|
||||
Describe your problem. Whether your change is a one-line bug fix or
|
||||
|
|
@ -166,21 +185,21 @@ If your change fixes a bug in a specific commit, e.g. you found an issue using
|
|||
the SHA-1 ID, and the one line summary. For example:
|
||||
|
||||
```
|
||||
Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism")
|
||||
Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism")
|
||||
```
|
||||
|
||||
The following `git config` settings can be used to add a pretty format for
|
||||
outputting the above style in the `git log` or `git show` commands:
|
||||
|
||||
```
|
||||
[pretty]
|
||||
fixes = Fixes: %h (\"%s\")
|
||||
[pretty]
|
||||
fixes = Fixes: %h (\"%s\")
|
||||
```
|
||||
|
||||
If your change address an issue listed in GitHub, please use `Fixes:` tag with the number of the issue. For instance:
|
||||
|
||||
```
|
||||
Fixes: #339
|
||||
Fixes: #339
|
||||
```
|
||||
|
||||
The `Fixes:` tags should be put at the end of the detailed description.
|
||||
|
|
@ -263,7 +282,7 @@ can certify the below:
|
|||
then you just add a line saying
|
||||
|
||||
```
|
||||
Signed-off-by: Random J Developer <random at developer.example.org>
|
||||
Signed-off-by: Random J Developer <random at developer.example.org>
|
||||
```
|
||||
|
||||
using your real name (please, no pseudonyms or anonymous contributions if
|
||||
|
|
@ -275,14 +294,14 @@ commit message. To append such line to a commit you already made, use
|
|||
|
||||
```
|
||||
From: Random J Developer <random at developer.example.org>
|
||||
Subject: [PATCH] component: Short patch description
|
||||
Subject: [PATCH] component: Short patch description
|
||||
|
||||
Long patch description (could be skipped if patch
|
||||
is trivial enough)
|
||||
Long patch description (could be skipped if patch
|
||||
is trivial enough)
|
||||
|
||||
Signed-off-by: Random J Developer <random at developer.example.org>
|
||||
---
|
||||
Patch body here
|
||||
Signed-off-by: Random J Developer <random at developer.example.org>
|
||||
---
|
||||
Patch body here
|
||||
```
|
||||
|
||||
## Submit your work upstream
|
||||
|
|
@ -316,8 +335,8 @@ contains the following:
|
|||
revisions should be listed. For example:
|
||||
|
||||
```
|
||||
v3: rebase on the current criu-dev
|
||||
v2: add commit to foo() and update bar() coding style
|
||||
v3: rebase on the current criu-dev
|
||||
v2: add commit to foo() and update bar() coding style
|
||||
```
|
||||
|
||||
If there are only minor updates to the commits in a pull request, it is
|
||||
|
|
@ -335,7 +354,7 @@ Historically, CRIU worked with mailing lists and patches so if you still prefer
|
|||
To create a patch, run
|
||||
|
||||
```
|
||||
git format-patch --signoff origin/criu-dev
|
||||
git format-patch --signoff origin/criu-dev
|
||||
```
|
||||
|
||||
You might need to read GIT documentation on how to prepare patches
|
||||
|
|
@ -346,8 +365,8 @@ at all.
|
|||
We recommend to post patches using `git send-email`
|
||||
|
||||
```
|
||||
git send-email --cover-letter --no-chain-reply-to --annotate \
|
||||
--confirm=always --to=criu@openvz.org criu-dev
|
||||
git send-email --cover-letter --no-chain-reply-to --annotate \
|
||||
--confirm=always --to=criu@lists.linux.dev criu-dev
|
||||
```
|
||||
|
||||
Note that the `git send-email` subcommand may not be in
|
||||
|
|
@ -359,14 +378,14 @@ If this is your first time using git send-email, you might need to
|
|||
configure it to point it to your SMTP server with something like:
|
||||
|
||||
```
|
||||
git config --global sendemail.smtpServer stmp.example.net
|
||||
git config --global sendemail.smtpServer stmp.example.net
|
||||
```
|
||||
|
||||
If you get tired of typing `--to=criu@openvz.org` all the time,
|
||||
If you get tired of typing `--to=criu@lists.linux.dev` all the time,
|
||||
you can configure that to be automatically handled as well:
|
||||
|
||||
```
|
||||
git config sendemail.to criu@openvz.org
|
||||
git config sendemail.to criu@lists.linux.dev
|
||||
```
|
||||
|
||||
If a developer is sending another version of the patch (e.g. to address
|
||||
|
|
@ -379,7 +398,7 @@ version if needed though).
|
|||
|
||||
### Mail patches
|
||||
|
||||
The patches should be sent to CRIU development mailing list, `criu AT openvz.org`. Note that you need to be subscribed first in order to post. The list web interface is available at https://openvz.org/mailman/listinfo/criu; you can also use standard mailman aliases to work with it.
|
||||
The patches should be sent to CRIU development mailing list, `criu AT lists.linux.dev`. Note that you need to be subscribed first in order to post. The list web interface is available at https://lore.kernel.org/criu; you can also use standard mailman aliases to work with it.
|
||||
|
||||
Please make sure the email client you're using doesn't screw your patch (line wrapping and so on).
|
||||
|
||||
|
|
@ -396,5 +415,3 @@ sometimes a patch may fly around a week before it gets reviewed.
|
|||
Wiki article: [Continuous integration](https://criu.org/Continuous_integration)
|
||||
|
||||
CRIU tests are run for each series sent to the mailing list. If you get a message from our patchwork that patches failed to pass the tests, you have to investigate what is wrong.
|
||||
|
||||
We also recommend you to [enable Travis CI for your repo](https://criu.org/Continuous_integration#Enable_Travis_CI_for_your_repo) to check patches in your git branch, before sending them to the mailing list.
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ Checkpoint / Restore inside a docker container
|
|||
Pytorch
|
||||
Tensorflow
|
||||
Using CRIU Image Streamer
|
||||
Parallel Restore
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
|
|
|||
|
|
@ -465,6 +465,30 @@ The 'mode' may be one of the following:
|
|||
*skip*::: Don't lock the network. If *--tcp-close* is not used, the network
|
||||
must be locked externally to allow CRIU to dump TCP connections.
|
||||
|
||||
*--allow-uprobes*::
|
||||
Allow dumping when uprobes vma is present. When used on dump, this option is
|
||||
required on restore as well.
|
||||
|
||||
A uprobes vma is automatically created by the kernel once a uprobe is
|
||||
triggered. This mapping is not removed even once the uprobe is deleted. So,
|
||||
even if a process once had uprobes attached to it, and they're removed by
|
||||
the time the process is dumped, this option is still required because criu
|
||||
has no way of knowing whether there are active uprobes or not.
|
||||
|
||||
When using this option on restore, make sure the uprobes (if any) active on
|
||||
the dumped processes are still active. Otherwise, when execution reaches
|
||||
a uprobe'd location in any of the restored processes, that process will be
|
||||
sent a SIGTRAP.
|
||||
|
||||
As an example, say a uprobe is set at function foo in the executable of the
|
||||
process p_bar. Whenever execution in p_bar reaches function foo, the uprobe
|
||||
is triggered. If the uprobe has been triggered at least once, then the kernel
|
||||
will have created the uprobes vma. To dump p_bar, this option is
|
||||
necessary. After dumping, say the uprobe is deleted. Now, on restoring with
|
||||
this option, once execution reaches function foo, SIGTRAP will be sent to
|
||||
the restored p_bar. Unless it has a signal handler installed for SIGTRAP,
|
||||
it will be terminated and core dumped.
|
||||
|
||||
*restore*
|
||||
~~~~~~~~~
|
||||
Restores previously checkpointed processes.
|
||||
|
|
@ -478,8 +502,8 @@ Restores previously checkpointed processes.
|
|||
The 'resource' argument can be one of the following:
|
||||
+
|
||||
- **tty[**__rdev__**:**__dev__**]**
|
||||
- **pipe[**__inode__**]**
|
||||
- **socket[**__inode__*]*
|
||||
- **pipe:[**__inode__**]**
|
||||
- **socket:[**__inode__*]*
|
||||
- **file[**__mnt_id__**:**__inode__**]**
|
||||
- 'path/to/file'
|
||||
|
||||
|
|
@ -692,6 +716,10 @@ The 'mode' may be one of the following:
|
|||
*--skip-file-rwx-check*::
|
||||
Skip checking file permissions (r/w/x for u/g/o) on restore.
|
||||
|
||||
*--allow-uprobes*::
|
||||
Required when dumped with this option. Refer to this option in the section
|
||||
on dumping for more details.
|
||||
|
||||
*check*
|
||||
~~~~~~~
|
||||
Checks whether the kernel supports the features needed by *criu* to
|
||||
|
|
|
|||
136
Documentation/logo.svg
Normal file
136
Documentation/logo.svg
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!-- Generator: Adobe Illustrator 16.0.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
|
||||
width="560px" height="560px" viewBox="0 0 560 560" enable-background="new 0 0 560 560" xml:space="preserve">
|
||||
<path opacity="0.3" fill="#990000" d="M315.137,360.271c-18.771-7.159-41.548-8.85-68.479-8.85c-16.661,0-46.255,2.939-74.654,3.38
|
||||
c11.209-4.884,20.734-10.265,24.842-16.87c14.531-23.346,17.645-65.893,17.645-65.893l-20.758,3.114c0,0-2.591,35.8-16.085,47.733
|
||||
c-5.35,4.736-15.96,7.834-27.916,10.856c2.447-26.071,29.477-57.552,29.477-57.552l-14.874-3.966l-5.88-7.448
|
||||
c0,0-3.011,1.761-7.588,5.315c-18.298,4.208-75.946,20.443-75.946,57.983c0,15.292,5.77,26.308,14.768,34.244
|
||||
c-22.858,26.966-20.755,61.618-20.755,61.618s-8.945,16.61-8.021,31.254c2.083,32.973,34.931,25.097,44.313,26.374
|
||||
c9.644,1.313,34.313-4.18,34.313-4.18s-16.276-2.639-15.329-18.562c0.5-8.369-0.947-27.628-21.404-37.307
|
||||
c-1.13-10.066,2.111-18.309,6.379-28.015c18.452,45.263,92.601,53.97,92.601,53.97c0.393-0.097-10.269,20.047,0.221,35.632
|
||||
c4.652,6.915,18.284,10.019,22.436,19.356c4.151,9.341,2.199,30.354,2.199,30.354s21.267-16.864,27.239-30.18
|
||||
c3.334-7.432,25.989,0.926,25.989-34.047c0-14.077-12.26-26.841-13.675-29.815c-20.858-20.334-5.427-4.743,2.677-8.236
|
||||
c12.758-5.499,35.412,11.657,35.412,11.657s-10.402-20.119-11.437-31.013c-0.795-8.335-4.537-16.816-16.624-30.042
|
||||
c7.166-0.752,20.362,2.327,20.362,2.327s-5.202,11.251-0.879,25.515c3.588,11.84,7.193,7.193,14.736,14.737
|
||||
c6.599,6.598,3.146,26.284,3.146,26.284s4.674-4.513,18.081-18.235c9.072-9.29,23.645-16.717,23.645-47.86
|
||||
C355.312,365.969,334.97,360.979,315.137,360.271z M134.108,285.901c-11.5,13.048-23.667,32.329-28.23,58.293
|
||||
c-4.821-3.519-7.613-8.1-7.613-14.043C98.265,309.699,117.078,295.016,134.108,285.901z"/>
|
||||
<path fill="#990000" d="M382.184,115.435c3.654,1.208,7.327,2.37,10.968,3.444c14.16,4.183,26.745-9.798,26.745-9.798
|
||||
s-8.785-2.243-17.857-3.497c12.173-2.653,21.085-18.66,21.085-18.66s-17.366,4.819-27.224,5.087
|
||||
c-2.042,0.057-4.107,0.118-6.189,0.186c2.464-0.37,4.925-0.847,7.361-1.485c14.201-3.714,21.505-23.382,21.505-23.382
|
||||
s-15.411,6.743-24.951,9.239c-2.694,0.703-5.438,1.437-8.197,2.185c3.038-1.071,6.008-2.306,8.815-3.82
|
||||
c12.922-6.965,12.241-29.347,12.241-29.347s-10.162,11.926-18.844,16.605c-3.557,1.916-7.199,3.904-10.846,5.911
|
||||
c3.798-2.277,7.45-4.743,10.596-7.569c10.918-9.814,7.722-29.605,7.722-29.605s-9.801,12.54-17.135,19.131
|
||||
c-8.939,8.037-18.775,14.104-27.014,21.81c-6.427,6.011-25.14,35.236-36.812,46.283c-11.671,11.047-18.301,12.476-19.159,14.388
|
||||
c-0.863,1.913,1.006,30.46-14.078,39.145c-16.476-21.583-50.565-44.007-53.101-72.033c-2.079-22.959,5.209-34.055,19.149-35.316
|
||||
c14.994-1.359,15.998,24.507,15.998,24.507s-1.379,1.064-1.708,6.391c-0.097,0.629-0.145,1.272-0.083,1.934
|
||||
c0.004,0.031,0.008,0.06,0.011,0.091c-0.014,1.674,0.065,3.664,0.278,6.039c1.131,12.474,4.53,14.574,4.53,14.574l2.075-0.722
|
||||
c0,0-2.24-4.079-2.554-7.529c-0.172-1.917-0.187-3.556-0.079-4.977c0.45,0.067,0.949,0.081,1.506,0.031
|
||||
c4.398-0.399,6.049-4.141,5.65-8.539c-0.042-0.45-0.069-0.885-0.094-1.316c2.485-26.032-1.756-29.637,4.788-41.391
|
||||
c9.032-16.218,17.279-16.015,17.279-16.015l1.402-8.155c0,0-6.817,2.462-14.819,13.652c-8.833,12.354-8.983,26.229-9.066,47.958
|
||||
c-0.188-0.761-0.502-1.37-1.017-1.784c-2.457-11.192-9.087-32.13-24.112-30.77c-16.72,1.514-29.419,14.974-26.773,44.171
|
||||
c3.609,39.832,26.186,52.701,29.829,80.84c-13.47-2.349-23.883-10.656-30.866-20.282c-7.803-10.749-7.297-22.949-8.324-24.779
|
||||
c-1.027-1.829-7.761-2.662-20.367-12.627c-12.605-9.965-33.845-37.41-40.78-42.824c-8.895-6.942-19.229-12.111-28.848-19.32
|
||||
c-7.892-5.915-18.769-17.531-18.769-17.531s-1.419,19.995,10.323,28.8c3.386,2.536,7.246,4.665,11.229,6.597
|
||||
c-3.808-1.674-7.616-3.33-11.327-4.925c-9.062-3.887-20.246-14.861-20.246-14.861s1.31,22.353,14.803,28.143
|
||||
c2.931,1.257,6,2.223,9.12,3.019c-2.818-0.5-5.615-0.985-8.357-1.447c-9.728-1.636-25.677-6.981-25.677-6.981
|
||||
s9.025,18.94,23.5,21.376c2.485,0.417,4.975,0.674,7.466,0.822c-2.08,0.118-4.148,0.242-6.183,0.368
|
||||
c-9.843,0.61-27.566-2.645-27.566-2.645S85.667,120.333,110,120c-8.922,2.057-25.678,6.008-25.678,6.008s13.778,12.806,27.508,7.38
|
||||
c3.533-1.394,7.087-2.876,10.62-4.404c-3.726,1.804-7.424,3.581-11.005,5.273c-8.963,4.243-19.428,10.176-19.428,10.176
|
||||
s15.069,9.759,27.305,1.497c0.558-0.378,3.121-1.76,3.678-2.143c-7.904,5.808-19.754,14.937-19.754,14.937
|
||||
s15.802,6.027,27.092-3.354c4.663-3.875,8.104-7.185,12.238-11.618c-3.773,4.55-6.699,8.018-10.634,12.106
|
||||
c-6.839,7.104-13.06,19.791-13.06,19.791s15.597,0.39,24.359-11.388c4.488-6.035,7.482-11.633,10.974-18.191
|
||||
c-3.113,6.479-5.468,11.95-8.911,17.788c-5.018,8.49-7.574,22.624-7.574,22.624s15.342-3.655,21.07-17.17
|
||||
c2.231-5.266,2.107-9.783,3.694-15.291c-1.257,5.272-0.666,9.475-2.24,14.319c-3.045,9.379,0.011,25.554,0.011,25.554
|
||||
s9.713-5.855,10.359-20.52c0.006-0.153,0.5-8.47,0.5-8.625L171,171.496c0,9.917,6.295,23.276,6.295,23.276
|
||||
s11.459-10.649,9.369-25.266c-0.188-1.31-0.1-2.627-0.305-3.947c0.408,1.507,0.998,3.016,1.493,4.524
|
||||
c3.075,9.429,3.5,15.957,3.5,15.957s6.483,1.251,8.73-1.594c0.764,5.625-0.843,10.2-0.843,10.2s5.471-1.1,8.893-3.756
|
||||
c0.705,5.331,0.155,8.789,0.155,8.789s5.106-1.603,8.419-4.323c0.611,4.642,1.764,7.542,1.764,7.542s6.398-0.88,9.021-5.393
|
||||
c0.199,0.038,0.395,0.079,0.59,0.117c2.269,4.875,1.438,8.517,1.438,8.517s7.492-2.14,9.492-6.14c0.003,0,0.007,0,0.01,0
|
||||
c1.798,4,2.727,6.102,2.727,6.102s4.853-2.349,7.093-6.064c0.189,0.009,0.364-0.093,0.547-0.086
|
||||
c-4.702,19.629-23.62,29.658-42.207,42.764c-1.392,0.981-2.712,1.925-3.97,2.884c-2.891,1.512-6.788,3.495-11.311,5.724
|
||||
c-9.829,3.363-23.7,6.057-41.038,4.084c-9.798-1.115-21.037,10.02-21.037,10.02s6.87,4.843,16.565,5.028
|
||||
c-8.819,3.621-17.438,12.632-17.438,12.632s0.045,0.019,0.069,0.029c-27.096,11.688-51.621,29.917-47.651,57.105
|
||||
c2.375,16.27,14.692,25.475,31.704,30.254c-17.81,14.742-32.921,36.129-30.707,60.59c0.134,1.487,0.309,2.916,0.508,4.311
|
||||
c-2.209,5.6-3.288,17.842-2.674,24.886c0.949,10.838,13.686,8.662,18.219,6.729c14.139,12.202,32.258,10.252,32.258,10.252
|
||||
s-17.301,1.211-30.306-11.156c5.551-2.659,6.424-3.925,6.788-11.579c0.36-7.61-9.104-20.759-20.57-21.966
|
||||
c-1.25-20.07,9.861-43.32,30.603-60.203c0.02,0.249,0.023,0.491,0.048,0.742c4.248,46.957,30.584,54.634,81.148,63.26
|
||||
c12.603,2.15,22.04,5.821,29.042,10.457c-3.844,5.388-5.706,21.559-2.895,32.325c3.045,11.655,12.647,14.53,19.429,14.955
|
||||
c-3.304,16.035-11.235,29.024-11.235,29.024s10.015-11.628,15.04-29.016c0.48-0.031,0.928-0.069,1.319-0.114
|
||||
c10.922-1.262,16.17-11.338,14.743-23.071c-1.195-9.826-13.974-24.54-28.598-25.992c-33.117-21.52-109.104-9.05-113.877-61.769
|
||||
c-0.341-3.746-0.517-7.367-0.571-10.888c5.709,1.111,11.782,1.844,18.104,2.244c14.111,28.517,62.158,22.269,95.818,20.694
|
||||
c1.764,3.09,7.043,7.064,13.929,9.779c11.751,4.633,14.889,3.742,18.869,1.502c1.484-0.835,2.828-1.92,3.979-3.155
|
||||
c10.822,10.456,25.37,30.251,25.37,30.251s-12.29-22.284-22.733-33.97c2.601-4.923,2.433-10.619-2.559-13.297
|
||||
c-6.956-3.732-31.321,1.581-36.316,4.981c-30.811,1.668-71.853,6.551-89.576-16.474c41.005,1.192,88.786-9.133,102.385-10.365
|
||||
c21.726-1.966,47.319,1.367,64.887,8.228c-0.783,5.681,1.867,18.47,4.641,25.318c3.316,8.197,11.561,5.887,16.562,3.028
|
||||
c-0.588,13.3-4.495,22.638-4.495,22.638s7.86-14.125,9.117-26.183c4.354-4.041,4.774-5.562,2.904-12.887
|
||||
c-1.849-7.24-14.317-16.821-25.47-15.096c-21.855-8.906-54.594-11.087-75.74-9.175c-18.253,1.653-61.404,10.802-97.611,10.237
|
||||
c-1.895-3.338-3.402-7.122-4.412-11.479c5.113-2.364,10.551-4.388,16.307-5.975c30.999-8.551,40.97-29.258,42.943-48.579
|
||||
c1.127,1.303,1.938,2.069,1.938,2.069s7.087-12.679,5.522-27.275c-0.264-2.469-0.429-4.737-0.553-6.911
|
||||
c2.499,6.741,7.778,13.001,7.778,13.001s16.438-20.208,5.846-27.268c-11.583-7.714-6.836-13.283-4.31-15.299
|
||||
c3.354-1.984,6.973-3.94,10.859-5.817c26.561-12.817,59.903-20.002,64.443-40.039c0.265-1.172,0.388-2.34,0.443-3.507
|
||||
c3.701,2.396,9.165,2.053,9.165,2.053s-0.367-2.88-0.601-7.556c3.747,2.081,8.874,1.758,8.874,1.758s-0.986-2.319-1.255-7.689
|
||||
c3.846,1.998,8.434,2.278,8.434,2.278s-0.725-2.246-1.24-5.573c3.788,0.719,8.84,0.419,8.84,0.419s-3.543-7.302-1.316-16.965
|
||||
c0.357-1.547,0.666-3.09,0.938-4.626c-0.087,1.332-0.169,2.662-0.238,3.985c-0.783,14.742,10.85,24.47,10.85,24.47
|
||||
S337,172.178,337,162.303c0-0.021,0-0.042,0-0.061c0,0.153-0.804,0.309-0.782,0.46c1.951,14.548,13.499,20.839,13.499,20.839
|
||||
s2.388-16.471-1.478-25.542c-1.998-4.686-3.966-9.742-5.688-14.881c2.068,5.344,4.374,10.673,7.067,15.72
|
||||
c6.909,12.952,20.498,15.406,20.498,15.406s-1.832-14.029-7.581-22.041c-3.952-5.505-7.874-11.654-11.551-17.83
|
||||
c4.059,6.22,8.622,12.438,13.631,18.048c9.774,10.953,25.27,9.178,25.27,9.178s-7.323-12.085-14.767-18.552
|
||||
c-4.283-3.722-8.589-7.824-12.754-12.019c4.513,4.047,9.319,7.944,14.31,11.39c12.077,8.341,27.281,0.931,27.281,0.931
|
||||
s-10.533-7.219-18.926-12.302c0.595,0.332,1.186,0.662,1.777,0.988c12.922,7.14,28.146-3.013,28.146-3.013
|
||||
s-12.036-5.887-21.343-9.313C389.896,118.341,386.055,116.903,382.184,115.435z M116.917,367.418
|
||||
c-0.172,0.131-0.344,0.268-0.516,0.398c-17.301-3.899-29.646-12.415-31.124-28.752c-2.244-24.777,21.669-42.631,47.562-54.59
|
||||
c3.553,1,9.203,1.919,15.541,0.503c-4.694,4.817-7.998,9.859-7.998,9.859s2.076,0.564,5.3,0.733
|
||||
C133.582,308.673,115.917,333.715,116.917,367.418z M146.295,295.598c1.834,0.062,3.979-0.014,6.326-0.386
|
||||
c-0.141,0.365-0.274,0.72-0.401,1.069c-10.511,14.57-18.745,34.363-17.404,59.912c-4.522,2.267-9.248,5.074-13.939,8.343
|
||||
C122.237,330.3,136.218,307.613,146.295,295.598z M121.776,368.86c4.131-2.979,8.589-5.697,13.361-8.115
|
||||
c0.358,3.527,1.032,6.741,2.025,9.634C131.805,370.131,126.629,369.657,121.776,368.86z M150.478,350.278
|
||||
c-3.791,0.864-8.16,2.403-12.812,4.546c-0.062-0.425-0.168-0.803-0.224-1.236c-2.557-19.875,3.873-37.276,13.005-51.347
|
||||
c0,0.005-0.007,0.032-0.007,0.032s13.533-3.395,23.088-14.017c-1.715,7.205,0.158,14.79,0.158,14.79s9.774-5.185,16.654-15.216
|
||||
c-0.131,5.548,2.84,10.803,5.451,14.331C193.303,321.731,182.711,342.934,150.478,350.278z M259.516,275.357
|
||||
c0.846-4.127,1.649-8.135,2.42-12.012c2.199-4.002,5.203-6.524,9.011-7.55c3.808-1.04,7.78-1.559,11.919-1.559l1.739-17.042
|
||||
c-5.942,0.378-11.657,1.419-17.144,3.105c-5.492,1.672-10.946,3.611-16.369,5.8c-4.526,4.131-7.915,8.875-10.169,14.237
|
||||
c-2.262,5.359-3.755,11.051-4.655,17.055c-0.906,6.007-1.268,12.17-1.268,18.489v18.209c0,3.23,0.201,6.368,0.779,9.393
|
||||
c0.584,3.045,1.728,5.66,3.543,7.85c3.614,2.588,7.203,3.85,10.822,3.771c3.619-0.066,7.224-0.712,10.842-1.925
|
||||
c3.611-1.23,7.162-2.757,10.647-4.558c3.484-1.811,6.904-3.293,10.266-4.457l7.159-14.521c-2.066,0.505-4.2,1.23-6.394,2.127
|
||||
c-2.199,0.9-4.453,1.643-6.777,2.224c-2.322,0.585-4.649,0.773-6.977,0.585c-2.322-0.189-4.649-1.2-6.976-2.994
|
||||
c-2.063-3.626-3.355-7.475-3.87-11.541c-0.519-4.065-0.612-8.165-0.289-12.296C258.1,283.619,258.674,279.488,259.516,275.357z
|
||||
M367.6,320.582c-0.196-3.025-1.001-5.908-2.42-8.623c-1.031-3.608-2.649-6.588-4.846-8.905c-2.193-2.333-4.682-4.162-7.458-5.516
|
||||
c-2.773-1.358-5.712-2.364-8.812-3.014c-3.098-0.643-6.004-1.056-8.717-1.259c-2.711-0.188-5.101-0.285-7.166-0.285
|
||||
s-3.419-0.062-4.064-0.189c0.25-1.037,0.449-2.302,0.574-3.783c0.133-1.481,0.322-2.866,0.584-4.162
|
||||
c0.258-1.419,0.512-2.977,0.773-4.65c6.326,0,12.073-0.581,17.242-1.749c5.165-1.148,9.688-3.059,13.558-5.705
|
||||
c3.876-2.646,7.135-6.131,9.781-10.469c2.649-4.318,4.558-9.583,5.715-15.776c-5.684,0-11.596,0.029-17.727,0.093
|
||||
s-12.328,0.158-18.593,0.284c-6.266,0.143-12.431,0.332-18.5,0.583c-6.066,0.27-11.812,0.584-17.236,0.979
|
||||
c0.128,0,0.221,1.387,0.293,4.161c0.062,2.775,0.062,6.465,0,11.035c-0.072,4.588-0.2,9.788-0.386,15.589
|
||||
c-0.199,5.819-0.49,11.73-0.875,17.734c-0.386,6.007-0.878,11.901-1.451,17.72c-0.584,5.815-1.262,10.908-2.035,15.304
|
||||
c5.552-0.268,11.432-0.488,17.624-0.677c2.162-0.065,4.33-0.127,6.503-0.176l1.247-5.547c0.385-2.192,0.708-4.776,0.969-7.739
|
||||
c0.259-2.979,0.513-5.754,0.773-8.338c0.259-3.093,0.386-6.196,0.386-9.286c0.646-0.127,1.677-0.206,3.103-0.206
|
||||
c1.547,0,3.225,0.269,5.039,0.773c1.804,0.519,3.68,1.292,5.612,2.334c1.938,1.041,3.615,2.522,5.034,4.46
|
||||
c1.42,1.925,2.45,4.352,3.104,7.252c0.638,2.914,0.638,6.495,0,10.75l0.631,5.39c1.609,0.033,3.207,0.079,4.796,0.144
|
||||
c6.068,0.189,11.812,0.471,17.234,0.866C367.891,326.747,367.795,323.609,367.6,320.582z M327.506,263.345
|
||||
c0.707-4.397,1.323-8.133,1.835-11.238c1.168-0.521,2.522-0.835,4.069-0.962c1.549-0.125,3.103-0.205,4.65-0.205
|
||||
c1.677,0,3.291,0.031,4.845,0.112c1.547,0.062,2.901,0.093,4.069,0.093c0,1.151-0.041,2.586-0.103,4.256
|
||||
c-0.066,1.688-0.189,3.42-0.389,5.232c-0.189,1.815-0.512,3.578-0.97,5.331c-0.446,1.732-1.127,3.182-2.034,4.347
|
||||
c-0.896,0.918-2.128,1.657-3.681,2.224c-1.543,0.584-3.159,1.042-4.84,1.357c-1.677,0.33-3.291,0.55-4.838,0.677
|
||||
c-1.555,0.141-2.78,0.207-3.682,0.207C326.439,271.542,326.798,267.727,327.506,263.345z M393.035,246.385
|
||||
c-2.517,0.33-4.84,0.584-6.97,0.773c-2.135,0.205-3.781,0.172-4.939-0.096l3.678,2.711c0.899,5.423,1.356,11.051,1.356,16.851
|
||||
c0,5.818-0.195,11.695-0.584,17.642c-0.385,5.941-0.872,11.805-1.45,17.624c-0.581,5.801-1,11.427-1.261,16.85
|
||||
c-0.907,4.522-1.519,9.238-1.835,14.139c-0.331,4.901-0.843,9.713-1.554,14.425c-0.708,4.712-1.812,9.3-3.297,13.761
|
||||
c-1.48,4.443-3.773,8.481-6.869,12.107l-2.908,1.543c0.513,0.52,1.323,0.993,2.42,1.45c1.093,0.457,1.842,0.678,2.23,0.678
|
||||
c2.708-3.23,4.712-6.558,6.004-9.978c1.286-3.419,2.64-6.746,4.069-9.963c1.544-2.711,2.969-5.626,4.261-8.716
|
||||
c1.286-3.107,2.774-6.008,4.455-8.719c1.671-2.708,3.681-5.045,6.008-6.984c2.322-1.938,5.285-3.15,8.903-3.67
|
||||
c0.386-6.319,0.836-13.114,1.354-20.335c0.517-7.235,1.001-14.534,1.451-21.896c0.457-7.361,0.846-14.596,1.168-21.689
|
||||
c0.323-7.111,0.482-13.684,0.482-19.769c-2.713,0-5.458,0.143-8.229,0.394C398.196,245.785,395.553,246.07,393.035,246.385z
|
||||
M483.002,245c0,4-0.061,5.618-0.188,7.038c-0.135,1.419-0.323,3.525-0.581,5.259c-0.261,1.751-0.584,4.166-0.972,6.752
|
||||
c-0.386,2.584-0.843,6.388-1.354,11.165c-0.519,4.791-1.135,11.551-1.839,19.167c-0.715,7.612-1.519,18.619-2.427,29.619h-32.15
|
||||
c0-15,1.065-26.686,3.192-39.535c2.138-12.847,4.101-25.911,5.911-38.695c-5.034,0.52-9.85,1.042-14.427,1.812
|
||||
c-4.589,0.773-9.136,0.898-13.662,0.52c-0.513,13.682-1.543,27.507-3.097,41.521c-1.553,13.998-3.23,27.586-5.038,40.749
|
||||
c4.52,0,9.396-0.166,14.631-0.496c5.224-0.316,10.292-0.479,15.2-0.479c0.649,1.152,1.285,2.776,1.942,4.838
|
||||
c0.638,2.065,1.22,4.318,1.738,6.779c0.517,2.457,0.997,5.027,1.454,7.753c0.447,2.715,0.873,5.424,1.258,8.135
|
||||
c0.9,6.32,1.681,13.102,2.327,20.336c2.192-6.196,4.454-12.28,6.777-18.209c1.938-5.045,4.004-10.262,6.196-15.699
|
||||
c2.199-5.423,4.327-10.073,6.393-13.936c2.323,0.254,4.649,0.316,6.974,0.188c2.326-0.124,4.681-0.25,7.071-0.392
|
||||
c2.386-0.127,4.775-0.127,7.163,0c2.389,0.142,4.681,0.52,6.88,1.165c-0.257-6.716-0.164-13.619,0.293-20.728
|
||||
c0.449-7.093,1.096-14.204,1.932-21.297c0.841-7.111,1.707-15.14,2.615-22.062c0.907-6.901,1.742-13.27,2.522-21.27H483.002z"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 15 KiB |
136
GEMINI.md
Normal file
136
GEMINI.md
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
# CRIU (Checkpoint/Restore In User-space)
|
||||
|
||||
CRIU is a tool for saving the state of a running application to a set of files
|
||||
(checkpointing) and restoring it back to a live state. It is primarily used for
|
||||
live migration of containers, in-place updates, and fast application startup.
|
||||
|
||||
It is implemented as a command-line tool called `criu`. The two primary commands
|
||||
are `dump` and `restore`.
|
||||
|
||||
- `dump`: Saves a process tree and all its related resources (file
|
||||
descriptors, IPC, sockets, namespaces, etc.) into a collection of image
|
||||
files.
|
||||
- `restore`: Restores processes from image files to the same state they were
|
||||
in before the dump.
|
||||
|
||||
## Quick Start
|
||||
|
||||
To get a feel for `criu`, you can try checkpointing and restoring a simple
|
||||
process.
|
||||
|
||||
1. **Run a simple process:**
|
||||
Open a terminal and run a command that will run for a while. Find its PID.
|
||||
```bash
|
||||
sleep 1000 &
|
||||
[1] 12345
|
||||
```
|
||||
|
||||
2. **Dump the process:**
|
||||
As root, use `criu dump` with the process ID (`-t`) and a directory for the
|
||||
image files (`-D`).
|
||||
```bash
|
||||
sudo criu dump -t 12345 -D /tmp/sleep_images -v4 --shell-job
|
||||
```
|
||||
The `sleep` process will no longer be running.
|
||||
|
||||
3. **Restore the process:**
|
||||
Use `criu restore` to bring the process back to life from the images.
|
||||
```bash
|
||||
sudo criu restore -D /tmp/sleep_images -v4 --shell-job
|
||||
```
|
||||
The `sleep` process will be running again as if nothing happened.
|
||||
|
||||
# For Developers and Contributors
|
||||
|
||||
This section contains more technical details about CRIU's internals and
|
||||
development process.
|
||||
|
||||
## Dump Process
|
||||
|
||||
On dump, CRIU uses available kernel interfaces to collect information about
|
||||
processes. For properties that can only be retrieved from within the process
|
||||
itself, CRIU injects a binary blob (called a "parasite") into the process's
|
||||
address space and executes it in the context of one of the process's threads.
|
||||
This injection is handled by a subproject called **Compel**.
|
||||
|
||||
## Restore Process
|
||||
|
||||
On restore, CRIU reads the image files to reconstruct the processes. The goal is
|
||||
to restore them to the exact state they were in before the dump. The restore
|
||||
process is divided into several stages (defined as `CR_STATE_*` in
|
||||
`./criu/include/restorer.h`).
|
||||
|
||||
The main `criu` process acts as a coordinator. It first restores resources with
|
||||
inter-process dependencies (file descriptors, sockets, shared memory,
|
||||
namespaces, etc.). It then forks the process tree and sets up namespaces.
|
||||
Finally, it restores process-specific resources like file descriptors and memory
|
||||
mappings.
|
||||
|
||||
A key step involves a small, self-contained binary called the "restorer". All
|
||||
restored processes switch to executing this code, which unmaps the CRIU-specific
|
||||
memory and restores the application's original memory mappings. On the final
|
||||
step, the restorer calls `sigreturn` on a prepared signal frame to resume the
|
||||
process with the state it had at the moment of the dump.
|
||||
|
||||
## Compel
|
||||
|
||||
Compel is a subproject responsible for generating the binary blobs used for the
|
||||
parasite code (for dumping) and the restorer code (for restoring). It provides a
|
||||
library for injecting and executing this code within the target process's
|
||||
address space. It is a separate project because the logic for generating and
|
||||
injecting Position-Independent Executable (PIE) code is complex and
|
||||
self-contained.
|
||||
|
||||
## Coding Style
|
||||
|
||||
The C code in the CRIU project follows the
|
||||
[Linux Kernel Coding Style](https://www.kernel.org/doc/html/latest/process/coding-style.html).
|
||||
Here are some of the main points:
|
||||
|
||||
- **Indentation**: Use tabs, which are set to 8 characters.
|
||||
- **Line Length**: The preferred line limit is 80 characters, but it can be
|
||||
extended to 120 if it improves code readability.
|
||||
- **Braces**:
|
||||
- The opening brace for a function goes on a new line.
|
||||
- The opening brace for a block (like `if`, `for`, `while`, `switch`) goes
|
||||
on the same line.
|
||||
- **Spaces**: Use spaces around operators (`+`, `-`, `*`, `/`, `%`, `<`, `>`,
|
||||
`=`, etc.).
|
||||
- **Naming**: Use descriptive names for functions and variables.
|
||||
- **Comments**: Use C-style comments (`/* ... */`). For multi-line comments,
|
||||
the preferred format is:
|
||||
```c
|
||||
/*
|
||||
* This is a multi-line
|
||||
* comment.
|
||||
*/
|
||||
```
|
||||
|
||||
## Code Layout
|
||||
|
||||
The code is organized into the following directories:
|
||||
|
||||
- `./compel`: The Compel sub-project.
|
||||
- `./criu`: The main `criu` tool source code.
|
||||
- `./images`: Protobuf descriptions for the image files.
|
||||
- `./test`: All tests.
|
||||
- `./test/zdtm`: The Zero-Downtime Migration (ZDTM) test suite.
|
||||
- `./test/zdtm.py`: The executor script for ZDTM tests.
|
||||
- `./scripts`: Helper scripts.
|
||||
- `./scripts/build`: Docker image files used for CI and cross-compilation
|
||||
checks.
|
||||
- `./crit`: A tool to inspect and manipulate CRIU image files.
|
||||
- `./soccr`: A library for TCP socket checkpoint/restore.
|
||||
|
||||
## Tests
|
||||
|
||||
The main test suite is ZDTM. Here is an example of how to run a single test:
|
||||
|
||||
```bash
|
||||
sudo ./test/zdtm.py run -t zdtm/static/env00
|
||||
```
|
||||
|
||||
Each ZDTM test has three stages: preparation, C/R, and results checks. During
|
||||
the test, a process calls `test_daemon()` to signal it is ready for C/R, then
|
||||
calls `test_waitsig()` to wait for the C/R stage to complete. After being
|
||||
restored, the test checks that all its resources are still in a valid state.
|
||||
13
Makefile
13
Makefile
|
|
@ -43,7 +43,7 @@ ifeq ($(ARCH),arm)
|
|||
endif
|
||||
|
||||
ifeq ($(ARMV),8)
|
||||
# Running 'setarch linux32 uname -m' returns armv8l on travis aarch64.
|
||||
# Running 'setarch linux32 uname -m' returns armv8l on aarch64.
|
||||
# This tells CRIU to handle armv8l just as armv7hf. Right now this is
|
||||
# only used for compile testing. No further verification of armv8l exists.
|
||||
ARCHCFLAGS += -march=armv7-a
|
||||
|
|
@ -64,6 +64,8 @@ endif
|
|||
|
||||
ifeq ($(ARCH),aarch64)
|
||||
DEFINES := -DCONFIG_AARCH64
|
||||
CC_MBRANCH_PROT := $(shell $(CC) -c -x c /dev/null -mbranch-protection=none -o /dev/null >/dev/null 2>&1 && echo "-mbranch-protection=none")
|
||||
CFLAGS_PIE := $(CC_MBRANCH_PROT)
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH),ppc64)
|
||||
|
|
@ -449,6 +451,10 @@ ruff:
|
|||
test/zdtm.py \
|
||||
test/inhfd/*.py \
|
||||
test/others/rpc/config_file.py \
|
||||
test/others/action-script/check_actions.py \
|
||||
test/others/pycriu/*.py \
|
||||
lib/pycriu/criu.py \
|
||||
lib/pycriu/__init__.py \
|
||||
lib/pycriu/images/pb2dict.py \
|
||||
lib/pycriu/images/images.py \
|
||||
scripts/criu-ns \
|
||||
|
|
@ -462,7 +468,8 @@ ruff:
|
|||
shellcheck:
|
||||
shellcheck --version
|
||||
shellcheck scripts/*.sh
|
||||
shellcheck scripts/ci/*.sh scripts/ci/apt-install
|
||||
shellcheck scripts/ci/*.sh
|
||||
shellcheck contrib/apt-install contrib/dependencies/*.sh
|
||||
shellcheck -x test/others/crit/*.sh
|
||||
shellcheck -x test/others/libcriu/*.sh
|
||||
shellcheck -x test/others/crit/*.sh test/others/criu-coredump/*.sh
|
||||
|
|
@ -485,7 +492,7 @@ lint: ruff shellcheck codespell
|
|||
! git --no-pager grep -E '\s+$$' \*.c \*.h
|
||||
.PHONY: lint ruff shellcheck codespell
|
||||
|
||||
codecov: SHELL := $(shell which bash)
|
||||
codecov: SHELL := $(shell command -v bash)
|
||||
codecov:
|
||||
curl -Os https://uploader.codecov.io/latest/linux/codecov
|
||||
chmod +x codecov
|
||||
|
|
|
|||
|
|
@ -50,8 +50,8 @@ compel/plugins/%: $(compel-deps) .FORCE
|
|||
|
||||
#
|
||||
# GNU make 4.x supports targets matching via wide
|
||||
# match targeting, where GNU make 3.x series (used on
|
||||
# Travis) is not, so we have to write them here explicitly.
|
||||
# match targeting, where GNU make 3.x series is not,
|
||||
# so we have to write them here explicitly.
|
||||
compel/plugins/std.lib.a: $(compel-deps) .FORCE
|
||||
$(Q) $(MAKE) $(build)=compel/plugins $@
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ ifeq ($(call try-cc,$(FEATURE_TEST_LIBBSD_DEV),-lbsd),true)
|
|||
LIBS_FEATURES += -lbsd
|
||||
FEATURE_DEFINES += -DCONFIG_HAS_LIBBSD
|
||||
else
|
||||
$(info Note: Building without setproctitle() and strlcpy() support.)
|
||||
$(info Note: Building without setproctitle() support.)
|
||||
$(info $S Install libbsd-devel (RPM) / libbsd-dev (DEB) to fix.)
|
||||
endif
|
||||
|
||||
|
|
@ -84,7 +84,7 @@ endif
|
|||
export DEFINES += $(FEATURE_DEFINES)
|
||||
export CFLAGS += $(FEATURE_DEFINES)
|
||||
|
||||
FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \
|
||||
FEATURES_LIST := TCP_REPAIR PTRACE_PEEKSIGINFO \
|
||||
SETPROCTITLE_INIT TCP_REPAIR_WINDOW MEMFD_CREATE \
|
||||
OPENAT2 NO_LIBC_RSEQ_DEFS
|
||||
|
||||
|
|
|
|||
|
|
@ -46,9 +46,13 @@ endif
|
|||
endif
|
||||
|
||||
# Default flags for pip install:
|
||||
# --upgrade: Upgrade crit/pycriu packages
|
||||
# --ignore-installed: Ignore existing packages and reinstall them
|
||||
PIPFLAGS ?= --upgrade --ignore-installed
|
||||
# --ignore-installed: Overwrite already installed pycriu/crit packages
|
||||
# --no-build-isolation: Use current Python environment to build pycriu/crit packages
|
||||
# --no-deps: Don't install any dependencies
|
||||
# --no-index: Don't use PyPI index to find packages
|
||||
# --progress-bar: Cleaner output
|
||||
# --upgrade: Treat the install as an upgrade when replacing the installed version
|
||||
PIPFLAGS ?= --ignore-installed --no-build-isolation --no-deps --no-index --progress-bar off --upgrade
|
||||
|
||||
export SKIP_PIP_INSTALL PIPFLAGS
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
#
|
||||
# CRIU version.
|
||||
CRIU_VERSION_MAJOR := 4
|
||||
CRIU_VERSION_MINOR := 1
|
||||
CRIU_VERSION_SUBLEVEL := 1
|
||||
CRIU_VERSION_MINOR := 2
|
||||
CRIU_VERSION_SUBLEVEL :=
|
||||
CRIU_VERSION_EXTRA :=
|
||||
CRIU_VERSION_NAME := CRISCV
|
||||
CRIU_VERSION_NAME := CRIUTIBILITY
|
||||
CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA))
|
||||
|
||||
export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
[](
|
||||
https://circleci.com/gh/checkpoint-restore/criu)
|
||||
|
||||
<p align="center"><img src="https://criu.org/w/images/1/1c/CRIU.svg" width="256px"/></p>
|
||||
<p align="center"><img src="Documentation/logo.svg" width="256px"/></p>
|
||||
|
||||
## CRIU -- A project to implement checkpoint/restore functionality for Linux
|
||||
|
||||
|
|
|
|||
47
compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h
Normal file
47
compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
#ifndef __UAPI_ASM_GCS_TYPES_H__
|
||||
#define __UAPI_ASM_GCS_TYPES_H__
|
||||
|
||||
#ifndef NT_ARM_GCS
|
||||
#define NT_ARM_GCS 0x410 /* ARM GCS state */
|
||||
#endif
|
||||
|
||||
/* Shadow Stack/Guarded Control Stack interface */
|
||||
#define PR_GET_SHADOW_STACK_STATUS 74
|
||||
#define PR_SET_SHADOW_STACK_STATUS 75
|
||||
#define PR_LOCK_SHADOW_STACK_STATUS 76
|
||||
|
||||
/* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack */
|
||||
#ifndef PR_SHADOW_STACK_ENABLE
|
||||
#define PR_SHADOW_STACK_ENABLE (1UL << 0)
|
||||
#endif
|
||||
|
||||
/* Allows explicit GCS stores (eg. using GCSSTR) */
|
||||
#ifndef PR_SHADOW_STACK_WRITE
|
||||
#define PR_SHADOW_STACK_WRITE (1UL << 1)
|
||||
#endif
|
||||
|
||||
/* Allows explicit GCS pushes (eg. using GCSPUSHM) */
|
||||
#ifndef PR_SHADOW_STACK_PUSH
|
||||
#define PR_SHADOW_STACK_PUSH (1UL << 2)
|
||||
#endif
|
||||
|
||||
#ifndef SHADOW_STACK_SET_TOKEN
|
||||
#define SHADOW_STACK_SET_TOKEN 0x1 /* Set up a restore token in the shadow stack */
|
||||
#endif
|
||||
|
||||
#define PR_SHADOW_STACK_ALL_MODES \
|
||||
PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH
|
||||
|
||||
/* copied from: arch/arm64/include/asm/sysreg.h */
|
||||
#define GCS_CAP_VALID_TOKEN 0x1
|
||||
#define GCS_CAP_ADDR_MASK 0xFFFFFFFFFFFFF000ULL
|
||||
#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | GCS_CAP_VALID_TOKEN)
|
||||
#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK)
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
|
||||
#ifndef HWCAP_GCS
|
||||
#define HWCAP_GCS (1UL << 32)
|
||||
#endif
|
||||
|
||||
#endif /* __UAPI_ASM_GCS_TYPES_H__ */
|
||||
|
|
@ -2,6 +2,7 @@
|
|||
#define UAPI_COMPEL_ASM_TYPES_H__
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <signal.h>
|
||||
#include <sys/mman.h>
|
||||
#include <asm/ptrace.h>
|
||||
|
|
@ -16,7 +17,24 @@
|
|||
*/
|
||||
|
||||
typedef struct user_pt_regs user_regs_struct_t;
|
||||
typedef struct user_fpsimd_state user_fpregs_struct_t;
|
||||
|
||||
/*
|
||||
* GCS (Guarded Control Stack)
|
||||
*
|
||||
* This mirrors the kernel definition but renamed to cr_user_gcs
|
||||
* to avoid conflict with kernel headers (/usr/include/asm/ptrace.h).
|
||||
*/
|
||||
struct cr_user_gcs {
|
||||
__u64 features_enabled;
|
||||
__u64 features_locked;
|
||||
__u64 gcspr_el0;
|
||||
};
|
||||
|
||||
struct user_fpregs_struct {
|
||||
struct user_fpsimd_state fpstate;
|
||||
struct cr_user_gcs gcs;
|
||||
};
|
||||
typedef struct user_fpregs_struct user_fpregs_struct_t;
|
||||
|
||||
#define __compel_arch_fetch_thread_area(tid, th) 0
|
||||
#define compel_arch_fetch_thread_area(tctl) 0
|
||||
|
|
@ -39,4 +57,12 @@ typedef struct user_fpsimd_state user_fpregs_struct_t;
|
|||
__NR_##syscall; \
|
||||
})
|
||||
|
||||
extern bool __compel_host_supports_gcs(void);
|
||||
#define compel_host_supports_gcs __compel_host_supports_gcs
|
||||
|
||||
struct parasite_ctl;
|
||||
extern int __parasite_setup_shstk(struct parasite_ctl *ctl,
|
||||
user_fpregs_struct_t *ext_regs);
|
||||
#define parasite_setup_shstk __parasite_setup_shstk
|
||||
|
||||
#endif /* UAPI_COMPEL_ASM_TYPES_H__ */
|
||||
|
|
|
|||
|
|
@ -1,19 +1,29 @@
|
|||
#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__
|
||||
#define UAPI_COMPEL_ASM_SIGFRAME_H__
|
||||
|
||||
#include <asm/sigcontext.h>
|
||||
#include <signal.h>
|
||||
#include <sys/ucontext.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#include <asm/types.h>
|
||||
|
||||
/* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */
|
||||
|
||||
#define FPSIMD_MAGIC 0x46508001
|
||||
#define GCS_MAGIC 0x47435300
|
||||
|
||||
typedef struct fpsimd_context fpu_state_t;
|
||||
|
||||
struct gcs_context {
|
||||
struct _aarch64_ctx head;
|
||||
__u64 gcspr;
|
||||
__u64 features_enabled;
|
||||
__u64 reserved;
|
||||
};
|
||||
|
||||
struct aux_context {
|
||||
struct fpsimd_context fpsimd;
|
||||
struct gcs_context gcs;
|
||||
/* additional context to be added before "end" */
|
||||
struct _aarch64_ctx end;
|
||||
};
|
||||
|
|
@ -62,6 +72,7 @@ struct cr_sigcontext {
|
|||
#define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct aux_context *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved))
|
||||
#define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd)
|
||||
#define RT_SIGFRAME_OFFSET(rt_sigframe) 0
|
||||
#define RT_SIGFRAME_GCS(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->gcs)
|
||||
|
||||
#define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t))
|
||||
#define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t))
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
#include <sys/ptrace.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <linux/elf.h>
|
||||
|
||||
#include <compel/plugins/std/syscall-codes.h>
|
||||
#include "common/page.h"
|
||||
|
|
@ -13,6 +13,8 @@
|
|||
#include "infect.h"
|
||||
#include "infect-priv.h"
|
||||
#include "asm/breakpoints.h"
|
||||
#include "asm/gcs-types.h"
|
||||
#include <linux/prctl.h>
|
||||
|
||||
unsigned __page_size = 0;
|
||||
unsigned __page_shift = 0;
|
||||
|
|
@ -33,24 +35,54 @@ static inline void __always_unused __check_code_syscall(void)
|
|||
BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
|
||||
}
|
||||
|
||||
bool __compel_host_supports_gcs(void)
|
||||
{
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
return (hwcap & HWCAP_GCS) != 0;
|
||||
}
|
||||
|
||||
static bool __compel_gcs_enabled(struct cr_user_gcs *gcs)
|
||||
{
|
||||
if (!compel_host_supports_gcs())
|
||||
return false;
|
||||
|
||||
return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
|
||||
}
|
||||
|
||||
int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
|
||||
{
|
||||
struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe);
|
||||
struct gcs_context *gcs = RT_SIGFRAME_GCS(sigframe);
|
||||
|
||||
memcpy(sigframe->uc.uc_mcontext.regs, regs->regs, sizeof(regs->regs));
|
||||
|
||||
pr_debug("sigreturn_prep_regs_plain: sp %lx pc %lx\n", (long)regs->sp, (long)regs->pc);
|
||||
|
||||
sigframe->uc.uc_mcontext.sp = regs->sp;
|
||||
sigframe->uc.uc_mcontext.pc = regs->pc;
|
||||
sigframe->uc.uc_mcontext.pstate = regs->pstate;
|
||||
|
||||
memcpy(fpsimd->vregs, fpregs->vregs, 32 * sizeof(__uint128_t));
|
||||
memcpy(fpsimd->vregs, fpregs->fpstate.vregs, 32 * sizeof(__uint128_t));
|
||||
|
||||
fpsimd->fpsr = fpregs->fpsr;
|
||||
fpsimd->fpcr = fpregs->fpcr;
|
||||
fpsimd->fpsr = fpregs->fpstate.fpsr;
|
||||
fpsimd->fpcr = fpregs->fpstate.fpcr;
|
||||
|
||||
fpsimd->head.magic = FPSIMD_MAGIC;
|
||||
fpsimd->head.size = sizeof(*fpsimd);
|
||||
|
||||
if (__compel_gcs_enabled(&fpregs->gcs)) {
|
||||
gcs->head.magic = GCS_MAGIC;
|
||||
gcs->head.size = sizeof(*gcs);
|
||||
gcs->reserved = 0;
|
||||
gcs->gcspr = fpregs->gcs.gcspr_el0 - 8;
|
||||
gcs->features_enabled = fpregs->gcs.features_enabled;
|
||||
|
||||
pr_debug("sigframe gcspr=%llx features_enabled=%llx\n", fpregs->gcs.gcspr_el0 - 8, fpregs->gcs.features_enabled);
|
||||
} else {
|
||||
pr_debug("sigframe gcspr=[disabled]\n");
|
||||
memset(gcs, 0, sizeof(*gcs));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -59,7 +91,7 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr
|
|||
return 0;
|
||||
}
|
||||
|
||||
int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd, save_regs_t save,
|
||||
int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
|
||||
void *arg, __maybe_unused unsigned long flags)
|
||||
{
|
||||
struct iovec iov;
|
||||
|
|
@ -74,14 +106,28 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct
|
|||
goto err;
|
||||
}
|
||||
|
||||
iov.iov_base = fpsimd;
|
||||
iov.iov_len = sizeof(*fpsimd);
|
||||
iov.iov_base = &ext_regs->fpstate;
|
||||
iov.iov_len = sizeof(ext_regs->fpstate);
|
||||
if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) {
|
||||
pr_perror("Failed to obtain FPU registers for %d", pid);
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = save(pid, arg, regs, fpsimd);
|
||||
memset(&ext_regs->gcs, 0, sizeof(ext_regs->gcs));
|
||||
|
||||
iov.iov_base = &ext_regs->gcs;
|
||||
iov.iov_len = sizeof(ext_regs->gcs);
|
||||
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &iov) == 0) {
|
||||
pr_info("gcs: GCSPR_EL0 for %d: 0x%llx, features: 0x%llx\n",
|
||||
pid, ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
|
||||
|
||||
if (!__compel_gcs_enabled(&ext_regs->gcs))
|
||||
pr_info("gcs: GCS is NOT enabled\n");
|
||||
} else {
|
||||
pr_info("gcs: GCS state not available for %d\n", pid);
|
||||
}
|
||||
|
||||
ret = save(pid, arg, regs, ext_regs);
|
||||
err:
|
||||
return ret;
|
||||
}
|
||||
|
|
@ -90,14 +136,44 @@ int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
|
|||
{
|
||||
struct iovec iov;
|
||||
|
||||
struct cr_user_gcs gcs;
|
||||
struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
|
||||
|
||||
pr_info("Restoring GP/FPU registers for %d\n", pid);
|
||||
|
||||
iov.iov_base = ext_regs;
|
||||
iov.iov_len = sizeof(*ext_regs);
|
||||
iov.iov_base = &ext_regs->fpstate;
|
||||
iov.iov_len = sizeof(ext_regs->fpstate);
|
||||
if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) {
|
||||
pr_perror("Failed to set FPU registers for %d", pid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
|
||||
pr_warn("gcs: Failed to get GCS for %d\n", pid);
|
||||
} else {
|
||||
ext_regs->gcs = gcs;
|
||||
compel_set_task_gcs_regs(pid, ext_regs);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs)
|
||||
{
|
||||
struct iovec iov;
|
||||
|
||||
pr_info("gcs: restoring GCS registers for %d\n", pid);
|
||||
pr_info("gcs: restoring GCS: gcspr=%llx features=%llx\n",
|
||||
ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled);
|
||||
|
||||
iov.iov_base = &ext_regs->gcs;
|
||||
iov.iov_len = sizeof(ext_regs->gcs);
|
||||
|
||||
if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &iov)) {
|
||||
pr_perror("gcs: Failed to set GCS registers for %d", pid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -286,3 +362,68 @@ int ptrace_flush_breakpoints(pid_t pid)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int inject_gcs_cap_token(struct parasite_ctl *ctl, pid_t pid, struct cr_user_gcs *gcs)
|
||||
{
|
||||
struct iovec gcs_iov = { .iov_base = gcs, .iov_len = sizeof(*gcs) };
|
||||
|
||||
uint64_t token_addr = gcs->gcspr_el0 - 8;
|
||||
uint64_t sigtramp_addr = gcs->gcspr_el0 - 16;
|
||||
|
||||
uint64_t cap_token = ALIGN_DOWN(GCS_SIGNAL_CAP(token_addr), 8);
|
||||
unsigned long restorer_addr;
|
||||
|
||||
pr_info("gcs: (setup) CAP token: 0x%lx at addr: 0x%lx\n", cap_token, token_addr);
|
||||
|
||||
/* Inject capability token at gcspr_el0 - 8 */
|
||||
if (ptrace(PTRACE_POKEDATA, pid, (void *)token_addr, cap_token)) {
|
||||
pr_perror("gcs: (setup) Inject GCS cap token failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Inject restorer trampoline address (gcspr_el0 - 16) */
|
||||
restorer_addr = ctl->parasite_ip;
|
||||
if (ptrace(PTRACE_POKEDATA, pid, (void *)sigtramp_addr, restorer_addr)) {
|
||||
pr_perror("gcs: (setup) Inject GCS restorer failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Update GCSPR_EL0 */
|
||||
gcs->gcspr_el0 = token_addr;
|
||||
if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &gcs_iov)) {
|
||||
pr_perror("gcs: PTRACE_SETREGS FAILED");
|
||||
return -1;
|
||||
}
|
||||
|
||||
pr_debug("gcs: parasite_ip=%#lx sp=%#llx gcspr_el0=%#llx\n",
|
||||
ctl->parasite_ip, ctl->orig.regs.sp, gcs->gcspr_el0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs)
|
||||
{
|
||||
struct cr_user_gcs gcs;
|
||||
struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) };
|
||||
pid_t pid = ctl->rpid;
|
||||
|
||||
if(!__compel_host_supports_gcs())
|
||||
return 0;
|
||||
|
||||
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) != 0) {
|
||||
pr_perror("GCS state not available for %d", pid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!__compel_gcs_enabled(&gcs))
|
||||
return 0;
|
||||
|
||||
if (inject_gcs_cap_token(ctl, pid, &gcs)) {
|
||||
pr_perror("Failed to inject GCS cap token for %d", pid);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pr_info("gcs: GCS enabled for %d\n", pid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer
|
|||
timer_gettime 108 259 (int timer_id, const struct itimerspec *setting)
|
||||
timer_getoverrun 109 260 (int timer_id)
|
||||
timer_delete 111 261 (kernel_timer_t timer_id)
|
||||
clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp)
|
||||
clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp)
|
||||
exit_group 94 248 (int error_code)
|
||||
set_robust_list 99 338 (struct robust_list_head *head, size_t len)
|
||||
get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
|
||||
|
|
@ -124,3 +124,4 @@ openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size
|
|||
pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags)
|
||||
rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig)
|
||||
membarrier 283 389 (int cmd, unsigned int flags, int cpu_id)
|
||||
map_shadow_stack 453 ! (unsigned long addr, unsigned long size, unsigned int flags)
|
||||
|
|
@ -46,7 +46,7 @@ __NR_sys_timer_gettime 108 sys_timer_gettime (int timer_id, const struct itimer
|
|||
__NR_sys_timer_getoverrun 109 sys_timer_getoverrun (int timer_id)
|
||||
__NR_sys_timer_settime 110 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting)
|
||||
__NR_sys_timer_delete 111 sys_timer_delete (kernel_timer_t timer_id)
|
||||
__NR_clock_gettime 113 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
|
||||
__NR_clock_gettime 113 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
|
||||
__NR_sched_setscheduler 119 sys_sched_setscheduler (int pid, int policy, struct sched_param *p)
|
||||
__NR_restart_syscall 128 sys_restart_syscall (void)
|
||||
__NR_kill 129 sys_kill (long pid, int sig)
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ __NR_sys_timer_settime 5217 sys_timer_settime (kernel_timer_t timer_id, int fl
|
|||
__NR_sys_timer_gettime 5218 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
|
||||
__NR_sys_timer_getoverrun 5219 sys_timer_getoverrun (int timer_id)
|
||||
__NR_sys_timer_delete 5220 sys_timer_delete (kernel_timer_t timer_id)
|
||||
__NR_clock_gettime 5222 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
|
||||
__NR_clock_gettime 5222 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
|
||||
__NR_exit_group 5205 sys_exit_group (int error_code)
|
||||
__NR_set_thread_area 5242 sys_set_thread_area (unsigned long *addr)
|
||||
__NR_openat 5247 sys_openat (int dfd, const char *filename, int flags, int mode)
|
||||
|
|
|
|||
|
|
@ -5,18 +5,31 @@
|
|||
#include "piegen.h"
|
||||
#include "log.h"
|
||||
|
||||
static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = {
|
||||
0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
extern int __handle_elf(void *mem, size_t size);
|
||||
|
||||
int handle_binary(void *mem, size_t size)
|
||||
{
|
||||
if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0)
|
||||
return __handle_elf(mem, size);
|
||||
Elf64_Ehdr *ehdr = (Elf64_Ehdr *)mem;
|
||||
|
||||
pr_err("Unsupported Elf format detected\n");
|
||||
return -EINVAL;
|
||||
/* check ELF magic */
|
||||
if (ehdr->e_ident[EI_MAG0] != ELFMAG0 ||
|
||||
ehdr->e_ident[EI_MAG1] != ELFMAG1 ||
|
||||
ehdr->e_ident[EI_MAG2] != ELFMAG2 ||
|
||||
ehdr->e_ident[EI_MAG3] != ELFMAG3) {
|
||||
pr_err("Invalid ELF magic\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* check ELF class and data encoding */
|
||||
if (ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
|
||||
ehdr->e_ident[EI_DATA] != ELFDATA2LSB) {
|
||||
pr_err("Unsupported ELF class or data encoding\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (ehdr->e_ident[EI_ABIVERSION] != 0) {
|
||||
pr_warn("Unusual ABI version: %d\n", ehdr->e_ident[EI_ABIVERSION]);
|
||||
}
|
||||
|
||||
return __handle_elf(mem, size);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ __NR_sys_timer_settime 241 sys_timer_settime (kernel_timer_t timer_id, int flag
|
|||
__NR_sys_timer_gettime 242 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
|
||||
__NR_sys_timer_getoverrun 243 sys_timer_getoverrun (int timer_id)
|
||||
__NR_sys_timer_delete 244 sys_timer_delete (kernel_timer_t timer_id)
|
||||
__NR_clock_gettime 246 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
|
||||
__NR_clock_gettime 246 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
|
||||
__NR_exit_group 234 sys_exit_group (int error_code)
|
||||
__NR_waitid 272 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
|
||||
__NR_set_robust_list 300 sys_set_robust_list (struct robust_list_head *head, size_t len)
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer
|
|||
timer_gettime 108 259 (int timer_id, const struct itimerspec *setting)
|
||||
timer_getoverrun 109 260 (int timer_id)
|
||||
timer_delete 111 261 (kernel_timer_t timer_id)
|
||||
clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp)
|
||||
clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp)
|
||||
exit_group 94 248 (int error_code)
|
||||
set_robust_list 99 338 (struct robust_list_head *head, size_t len)
|
||||
get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr)
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ __NR_sys_timer_settime 255 sys_timer_settime (kernel_timer_t timer_id, int flag
|
|||
__NR_sys_timer_gettime 256 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
|
||||
__NR_sys_timer_getoverrun 257 sys_timer_getoverrun (int timer_id)
|
||||
__NR_sys_timer_delete 258 sys_timer_delete (kernel_timer_t timer_id)
|
||||
__NR_clock_gettime 260 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
|
||||
__NR_clock_gettime 260 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
|
||||
__NR_exit_group 248 sys_exit_group (int error_code)
|
||||
__NR_waitid 281 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
|
||||
__NR_set_robust_list 304 sys_set_robust_list (struct robust_list_head *head, size_t len)
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ __NR_sys_timer_settime 223 sys_timer_settime (kernel_timer_t timer_id, int fla
|
|||
__NR_sys_timer_gettime 224 sys_timer_gettime (int timer_id, const struct itimerspec *setting)
|
||||
__NR_sys_timer_getoverrun 225 sys_timer_getoverrun (int timer_id)
|
||||
__NR_sys_timer_delete 226 sys_timer_delete (kernel_timer_t timer_id)
|
||||
__NR_clock_gettime 228 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp)
|
||||
__NR_clock_gettime 228 sys_clock_gettime (clockid_t which_clock, struct timespec *tp)
|
||||
__NR_exit_group 231 sys_exit_group (int error_code)
|
||||
__NR_openat 257 sys_openat (int dfd, const char *filename, int flags, int mode)
|
||||
__NR_waitid 247 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru)
|
||||
|
|
|
|||
|
|
@ -761,7 +761,7 @@ bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
|
|||
return false;
|
||||
}
|
||||
|
||||
int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs)
|
||||
int parasite_setup_shstk(struct parasite_ctl *ctl, __maybe_unused user_fpregs_struct_t *ext_regs)
|
||||
{
|
||||
pid_t pid = ctl->rpid;
|
||||
unsigned long sa_restorer = ctl->parasite_ip;
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ extern bool arch_can_dump_task(struct parasite_ctl *ctl);
|
|||
extern int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save,
|
||||
void *arg, unsigned long flags);
|
||||
extern int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
|
||||
extern int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs);
|
||||
extern int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s);
|
||||
extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs,
|
||||
user_fpregs_struct_t *fpregs);
|
||||
|
|
|
|||
|
|
@ -13,6 +13,15 @@
|
|||
|
||||
#define PARASITE_START_AREA_MIN (4096)
|
||||
|
||||
#define PARASITE_STACK_SIZE (16 << 10)
|
||||
/*
|
||||
* A stack redzone is a small, protected region of memory located immediately
|
||||
* after a parasite stack. It is intended to remain unchanged. While it can be
|
||||
* implemented as a guard page, we want to avoid the overhead of additional
|
||||
* remote system calls.
|
||||
*/
|
||||
#define PARASITE_STACK_REDZONE 128
|
||||
|
||||
extern int __must_check compel_interrupt_task(int pid);
|
||||
|
||||
struct seize_task_status {
|
||||
|
|
@ -183,6 +192,14 @@ void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v);
|
|||
|
||||
extern void compel_get_stack(struct parasite_ctl *ctl, void **rstack, void **r_thread_stack);
|
||||
|
||||
#ifndef compel_host_supports_gcs
|
||||
static inline bool compel_host_supports_gcs(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#define compel_host_supports_gcs
|
||||
#endif
|
||||
|
||||
#ifndef compel_shstk_enabled
|
||||
static inline bool compel_shstk_enabled(user_fpregs_struct_t *ext_regs)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -38,8 +38,6 @@
|
|||
#define UNIX_PATH_MAX (sizeof(struct sockaddr_un) - (size_t)((struct sockaddr_un *)0)->sun_path)
|
||||
#endif
|
||||
|
||||
#define PARASITE_STACK_SIZE (16 << 10)
|
||||
|
||||
#ifndef SECCOMP_MODE_DISABLED
|
||||
#define SECCOMP_MODE_DISABLED 0
|
||||
#endif
|
||||
|
|
@ -1056,6 +1054,16 @@ int compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads,
|
|||
|
||||
memcpy(ctl->local_map, ctl->pblob.hdr.mem, ctl->pblob.hdr.bsize);
|
||||
compel_relocs_apply(ctl->local_map, ctl->remote_map, &ctl->pblob);
|
||||
/*
|
||||
* Ensure the infected thread sees the updated code.
|
||||
*
|
||||
* On architectures like ARM64, the Data Cache (D-cache) and
|
||||
* Instruction Cache (I-cache) are not automatically coherent.
|
||||
* Modifications land in the D-cache, so we must flush (clean) the
|
||||
* D-cache to push changes to RAM to ensure the CPU fetches the updated
|
||||
* instructions.
|
||||
*/
|
||||
__builtin___clear_cache(ctl->local_map, ctl->local_map + ctl->pblob.hdr.bsize);
|
||||
|
||||
p = parasite_size;
|
||||
|
||||
|
|
@ -1064,7 +1072,7 @@ int compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads,
|
|||
|
||||
p += RESTORE_STACK_SIGFRAME;
|
||||
p += PARASITE_STACK_SIZE;
|
||||
ctl->rstack = ctl->remote_map + p;
|
||||
ctl->rstack = ctl->remote_map + p - PARASITE_STACK_REDZONE;
|
||||
|
||||
/*
|
||||
* x86-64 ABI requires a 16 bytes aligned stack.
|
||||
|
|
@ -1078,7 +1086,7 @@ int compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads,
|
|||
|
||||
if (nr_threads > 1) {
|
||||
p += PARASITE_STACK_SIZE;
|
||||
ctl->r_thread_stack = ctl->remote_map + p;
|
||||
ctl->r_thread_stack = ctl->remote_map + p - PARASITE_STACK_REDZONE;
|
||||
}
|
||||
|
||||
ret = arch_fetch_sas(ctl, ctl->rsigframe);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,11 @@ CFLAGS ?= -O2 -g -Wall -Werror
|
|||
|
||||
COMPEL := ../../../compel/compel-host
|
||||
|
||||
ifeq ($(GCS_ENABLE),1)
|
||||
CFLAGS += -mbranch-protection=standard -DGCS_TEST_ENABLE=1
|
||||
LDFLAGS += -z experimental-gcs=check
|
||||
endif
|
||||
|
||||
all: victim spy
|
||||
|
||||
run:
|
||||
|
|
@ -17,7 +22,7 @@ clean:
|
|||
rm -f parasite.o
|
||||
|
||||
victim: victim.c
|
||||
$(CC) $(CFLAGS) -o $@ $^
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
spy: spy.c parasite.h
|
||||
$(CC) $(CFLAGS) $(shell $(COMPEL) includes) -o $@ $< $(shell $(COMPEL) --static libs)
|
||||
|
|
|
|||
|
|
@ -112,6 +112,9 @@ int main(int argc, char **argv)
|
|||
return -1;
|
||||
}
|
||||
|
||||
#ifdef GCS_TEST_ENABLE
|
||||
setenv("GLIBC_TUNABLES", "glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2", 1);
|
||||
#endif
|
||||
pid = vfork();
|
||||
if (pid == 0) {
|
||||
close(p_in[1]);
|
||||
|
|
|
|||
|
|
@ -50,70 +50,6 @@ static void *get_parasite_rstack_start(struct parasite_ctl *ctl)
|
|||
return rstack_start;
|
||||
}
|
||||
|
||||
static int page_writable(struct parasite_ctl *ctl, int pid, void *page)
|
||||
{
|
||||
FILE *maps;
|
||||
size_t maps_line_len = 0;
|
||||
char *maps_line = NULL;
|
||||
char victim_maps_path[6 + 11 + 5 + 1];
|
||||
int written;
|
||||
int ret = 0;
|
||||
|
||||
if (((uintptr_t)page & (page_size() - 1)) != 0) {
|
||||
fprintf(stderr, "Page address not aligned\n");
|
||||
ret = -1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
written = snprintf(victim_maps_path, sizeof(victim_maps_path), "/proc/%d/maps", pid);
|
||||
if (written < 0 || written >= sizeof(victim_maps_path)) {
|
||||
fprintf(stderr, "Failed to create path string to victim's /proc/%d/maps file\n", pid);
|
||||
ret = -1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
maps = fopen(victim_maps_path, "r");
|
||||
if (maps == NULL) {
|
||||
perror("Can't open victim's /proc/$pid/maps");
|
||||
ret = -1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
while (getline(&maps_line, &maps_line_len, maps) != -1) {
|
||||
unsigned long vmstart, vmend;
|
||||
char r, w;
|
||||
|
||||
if (sscanf(maps_line, "%lx-%lx %c%c", &vmstart, &vmend, &r, &w) < 4) {
|
||||
fprintf(stderr, "Can't parse victim's /proc/%d/maps; line: %s\n", pid, maps_line);
|
||||
ret = -1;
|
||||
goto free_linebuf;
|
||||
}
|
||||
|
||||
if (page >= (void *)vmstart && page < (void *)vmend) {
|
||||
if (w == 'w') {
|
||||
if (r != 'r') {
|
||||
fprintf(stderr, "Expecting writable memory to also be readable");
|
||||
ret = -1;
|
||||
goto free_linebuf;
|
||||
}
|
||||
ret = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (errno) {
|
||||
perror("Can't read victim's /proc/$pid/maps");
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
free_linebuf:
|
||||
free(maps_line);
|
||||
fclose(maps);
|
||||
done:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *read_proc_mem(int pid, void *offset, size_t len)
|
||||
{
|
||||
char victim_mem_path[6 + 11 + 4 + 1];
|
||||
|
|
@ -153,51 +89,6 @@ freebuf:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int save_data_near_stack(struct parasite_ctl *ctl, int pid, void *stack, void **saved_data,
|
||||
size_t *saved_data_size)
|
||||
{
|
||||
size_t page_mask = page_size() - 1;
|
||||
size_t saved_size = 0;
|
||||
size_t stack_size_last_page = (uintptr_t)stack & page_mask;
|
||||
void *next_page = stack;
|
||||
|
||||
if (stack_size_last_page != 0) {
|
||||
size_t empty_space_last_page = page_size() - stack_size_last_page;
|
||||
saved_size = min(empty_space_last_page, (size_t)SAVED_DATA_MAX);
|
||||
next_page += page_size() - stack_size_last_page;
|
||||
}
|
||||
|
||||
while (saved_size < SAVED_DATA_MAX && next_page != NULL) {
|
||||
switch (page_writable(ctl, pid, next_page)) {
|
||||
case 1:
|
||||
saved_size = min((size_t)(saved_size + page_size()), (size_t)SAVED_DATA_MAX);
|
||||
next_page += page_size();
|
||||
break;
|
||||
case 0:
|
||||
next_page = NULL;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (saved_size > 0) {
|
||||
void *sd;
|
||||
|
||||
sd = read_proc_mem(pid, stack, saved_size);
|
||||
if (sd == NULL)
|
||||
return -1;
|
||||
|
||||
*saved_data = sd;
|
||||
} else {
|
||||
*saved_data = NULL;
|
||||
}
|
||||
|
||||
*saved_data_size = saved_size;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_saved_data(struct parasite_ctl *ctl, int pid, void *stack, void *saved_data, size_t saved_data_size)
|
||||
{
|
||||
if (saved_data != NULL) {
|
||||
|
|
@ -221,7 +112,7 @@ static int do_infection(int pid)
|
|||
struct infect_ctx *ictx;
|
||||
int *arg;
|
||||
void *stack;
|
||||
size_t saved_data_size;
|
||||
size_t saved_data_size = PARASITE_STACK_REDZONE;
|
||||
int saved_data_check;
|
||||
|
||||
compel_log_init(print_vmsg, COMPEL_LOG_DEBUG);
|
||||
|
|
@ -257,8 +148,6 @@ static int do_infection(int pid)
|
|||
err_and_ret("Can't register cleanup function with atexit\n");
|
||||
|
||||
stack = get_parasite_rstack_start(ctl);
|
||||
if (save_data_near_stack(ctl, pid, stack, &saved_data, &saved_data_size))
|
||||
err_and_ret("Can't save data above stack\n");
|
||||
|
||||
if (compel_start_daemon(ctl))
|
||||
err_and_ret("Can't start daemon in victim\n");
|
||||
|
|
|
|||
|
|
@ -1,19 +0,0 @@
|
|||
# Required packages for development in Debian
|
||||
build-essential
|
||||
libprotobuf-dev
|
||||
libprotobuf-c-dev
|
||||
protobuf-c-compiler
|
||||
protobuf-compiler
|
||||
python3-protobuf
|
||||
libnet-dev
|
||||
|
||||
# Extra packages, required for testing and building other tools
|
||||
pkg-config
|
||||
libnl-3-dev
|
||||
libbsd0
|
||||
libbsd-dev
|
||||
iproute2
|
||||
libcap-dev
|
||||
libaio-dev
|
||||
python3-yaml
|
||||
libnl-route-3-dev
|
||||
42
contrib/dependencies/apk-packages.sh
Executable file
42
contrib/dependencies/apk-packages.sh
Executable file
|
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
apk add --no-cache \
|
||||
asciidoctor \
|
||||
bash \
|
||||
build-base \
|
||||
coreutils \
|
||||
e2fsprogs \
|
||||
elfutils-dev \
|
||||
git \
|
||||
gnutls-dev \
|
||||
go \
|
||||
ip6tables \
|
||||
iproute2 \
|
||||
iptables \
|
||||
iptables-legacy \
|
||||
libaio-dev \
|
||||
libbsd-dev \
|
||||
libcap-dev \
|
||||
libcap-utils \
|
||||
libdrm-dev \
|
||||
libnet-dev \
|
||||
libnl3-dev \
|
||||
libtraceevent-dev \
|
||||
libtracefs-dev \
|
||||
nftables \
|
||||
nftables-dev \
|
||||
perl \
|
||||
pkgconfig \
|
||||
procps \
|
||||
protobuf-c-compiler \
|
||||
protobuf-c-dev \
|
||||
protobuf-dev \
|
||||
py3-importlib-metadata \
|
||||
py3-pip \
|
||||
py3-protobuf \
|
||||
py3-yaml \
|
||||
python3 \
|
||||
sudo \
|
||||
tar \
|
||||
util-linux \
|
||||
util-linux-dev
|
||||
37
contrib/dependencies/apt-cross-packages.sh
Executable file
37
contrib/dependencies/apt-cross-packages.sh
Executable file
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
APT_INSTALL="$(cd "$(dirname "$0")/.." >/dev/null 2>&1 && pwd)/apt-install"
|
||||
if [ ! -x "$APT_INSTALL" ]; then
|
||||
echo "Error: apt-install not found or not executable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"$APT_INSTALL" \
|
||||
crossbuild-essential-"${DEBIAN_ARCH}" \
|
||||
iproute2:"${DEBIAN_ARCH}" \
|
||||
libaio-dev:"${DEBIAN_ARCH}" \
|
||||
libbz2-dev:"${DEBIAN_ARCH}" \
|
||||
libc6-"${DEBIAN_ARCH}"-cross \
|
||||
libc6-dev-"${DEBIAN_ARCH}"-cross \
|
||||
libcap-dev:"${DEBIAN_ARCH}" \
|
||||
libdrm-dev:"${DEBIAN_ARCH}" \
|
||||
libelf-dev:"${DEBIAN_ARCH}" \
|
||||
libexpat1-dev:"${DEBIAN_ARCH}" \
|
||||
libgnutls28-dev:"${DEBIAN_ARCH}" \
|
||||
libnet-dev:"${DEBIAN_ARCH}" \
|
||||
libnftables-dev:"${DEBIAN_ARCH}" \
|
||||
libnl-3-dev:"${DEBIAN_ARCH}" \
|
||||
libnl-route-3-dev:"${DEBIAN_ARCH}" \
|
||||
libprotobuf-c-dev:"${DEBIAN_ARCH}" \
|
||||
libprotobuf-dev:"${DEBIAN_ARCH}" \
|
||||
libssl-dev:"${DEBIAN_ARCH}" \
|
||||
libtraceevent-dev:"${DEBIAN_ARCH}" \
|
||||
libtracefs-dev:"${DEBIAN_ARCH}" \
|
||||
ncurses-dev:"${DEBIAN_ARCH}" \
|
||||
uuid-dev:"${DEBIAN_ARCH}" \
|
||||
build-essential \
|
||||
pkg-config \
|
||||
git \
|
||||
protobuf-c-compiler \
|
||||
protobuf-compiler \
|
||||
python3-protobuf
|
||||
44
contrib/dependencies/apt-packages.sh
Executable file
44
contrib/dependencies/apt-packages.sh
Executable file
|
|
@ -0,0 +1,44 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
APT_INSTALL="$(cd "$(dirname "$0")/.." >/dev/null 2>&1 && pwd)/apt-install"
|
||||
if [ ! -x "$APT_INSTALL" ]; then
|
||||
echo "Error: apt-install not found or not executable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
"$APT_INSTALL" \
|
||||
asciidoctor \
|
||||
bash \
|
||||
bsdmainutils \
|
||||
build-essential \
|
||||
gdb \
|
||||
git-core \
|
||||
iproute2 \
|
||||
iptables \
|
||||
kmod \
|
||||
libaio-dev \
|
||||
libbsd-dev \
|
||||
libcap-dev \
|
||||
libdrm-dev \
|
||||
libelf-dev \
|
||||
libgnutls28-dev \
|
||||
libgnutls30 \
|
||||
libnet-dev \
|
||||
libnl-3-dev \
|
||||
libnl-route-3-dev \
|
||||
libperl-dev \
|
||||
libprotobuf-c-dev \
|
||||
libprotobuf-dev \
|
||||
libselinux-dev \
|
||||
libtraceevent-dev \
|
||||
libtracefs-dev \
|
||||
pkg-config \
|
||||
protobuf-c-compiler \
|
||||
protobuf-compiler \
|
||||
python3-importlib-metadata \
|
||||
python3-pip \
|
||||
python3-protobuf \
|
||||
python3-yaml \
|
||||
time \
|
||||
util-linux \
|
||||
uuid-dev
|
||||
40
contrib/dependencies/dnf-packages.sh
Executable file
40
contrib/dependencies/dnf-packages.sh
Executable file
|
|
@ -0,0 +1,40 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
dnf install -y \
|
||||
asciidoc \
|
||||
binutils \
|
||||
elfutils-libelf-devel \
|
||||
gcc \
|
||||
git \
|
||||
glibc-devel \
|
||||
gnutls-devel \
|
||||
iproute \
|
||||
iptables \
|
||||
libaio-devel \
|
||||
libasan \
|
||||
libbpf-devel \
|
||||
libbsd-devel \
|
||||
libcap-devel \
|
||||
libdrm-devel \
|
||||
libnet-devel \
|
||||
libnl3-devel \
|
||||
libselinux-devel \
|
||||
libtraceevent-devel \
|
||||
libtracefs-devel \
|
||||
libuuid-devel \
|
||||
make \
|
||||
nftables \
|
||||
pkg-config \
|
||||
protobuf \
|
||||
protobuf-c \
|
||||
protobuf-c-devel \
|
||||
protobuf-compiler \
|
||||
protobuf-devel \
|
||||
python-devel \
|
||||
python3-importlib-metadata \
|
||||
python3-protobuf \
|
||||
python3-pyyaml \
|
||||
python3-setuptools \
|
||||
python3-wheel \
|
||||
rubygem-asciidoctor \
|
||||
xmlto
|
||||
34
contrib/dependencies/pacman-packages.sh
Executable file
34
contrib/dependencies/pacman-packages.sh
Executable file
|
|
@ -0,0 +1,34 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
pacman -Syu --noconfirm \
|
||||
asciidoctor \
|
||||
base-devel \
|
||||
bash \
|
||||
coreutils \
|
||||
diffutils \
|
||||
git \
|
||||
gnutls \
|
||||
go \
|
||||
iproute2 \
|
||||
iptables \
|
||||
libaio \
|
||||
libbsd \
|
||||
libcap \
|
||||
libdrm \
|
||||
libelf \
|
||||
libnet \
|
||||
libnl \
|
||||
libtraceevent \
|
||||
libtracefs \
|
||||
nftables \
|
||||
pkg-config \
|
||||
protobuf \
|
||||
protobuf-c \
|
||||
python-importlib-metadata \
|
||||
python-pip \
|
||||
python-protobuf \
|
||||
python-yaml \
|
||||
sudo \
|
||||
tar \
|
||||
util-linux \
|
||||
util-linux-libs
|
||||
|
|
@ -418,7 +418,7 @@ resolve_path() {
|
|||
local p
|
||||
|
||||
p="${2}"
|
||||
if which realpath > /dev/null; then
|
||||
if command -v realpath > /dev/null; then
|
||||
p=$(realpath "${p}")
|
||||
fi
|
||||
${ECHO} "${1}: ${p}"
|
||||
|
|
@ -427,7 +427,7 @@ resolve_path() {
|
|||
resolve_cmd() {
|
||||
local cpath
|
||||
|
||||
cpath=$(which "${2}")
|
||||
cpath=$(command -v "${2}")
|
||||
resolve_path "${1}" "${cpath}"
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ status = {
|
|||
"VMA_AREA_VVAR": 1 << 12,
|
||||
"VMA_AREA_AIORING": 1 << 13,
|
||||
"VMA_AREA_MEMFD": 1 << 14,
|
||||
"VMA_AREA_UPROBES": 1 << 17,
|
||||
"VMA_AREA_UNSUPP": 1 << 31
|
||||
}
|
||||
|
||||
|
|
@ -793,7 +794,9 @@ class coredump_generator:
|
|||
off = 0 # in pages
|
||||
for m in pagemap[1:]:
|
||||
found = False
|
||||
for i in range(m["nr_pages"]):
|
||||
num_pages = m.get("nr_pages", m["compat_nr_pages"])
|
||||
|
||||
for i in range(num_pages):
|
||||
if m["vaddr"] + i * PAGESIZE == page_no * PAGESIZE:
|
||||
found = True
|
||||
break
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
|
|||
name = "crit"
|
||||
description = "CRiu Image Tool"
|
||||
authors = [
|
||||
{name = "CRIU team", email = "criu@openvz.org"},
|
||||
{name = "CRIU team", email = "criu@lists.linux.dev"},
|
||||
]
|
||||
license = {text = "GPLv2"}
|
||||
dynamic = ["version"]
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
name = crit
|
||||
description = CRiu Image Tool
|
||||
author = CRIU team
|
||||
author_email = criu@openvz.org
|
||||
author_email = criu@lists.linux.dev
|
||||
license = GPLv2
|
||||
version = attr: crit.__version__
|
||||
|
||||
|
|
|
|||
|
|
@ -6,3 +6,4 @@ obj-y += cpu.o
|
|||
obj-y += crtools.o
|
||||
obj-y += sigframe.o
|
||||
obj-y += bitops.o
|
||||
obj-y += gcs.o
|
||||
|
|
@ -12,6 +12,7 @@
|
|||
#include "common/compiler.h"
|
||||
#include <compel/ptrace.h>
|
||||
#include "asm/dump.h"
|
||||
#include "asm/gcs-types.h"
|
||||
#include "protobuf.h"
|
||||
#include "images/core.pb-c.h"
|
||||
#include "images/creds.pb-c.h"
|
||||
|
|
@ -22,6 +23,50 @@
|
|||
#include "restorer.h"
|
||||
#include "compel/infect.h"
|
||||
#include "pstree.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
/*
|
||||
* cr_user_pac_* are a copy of the corresponding uapi structs
|
||||
* in arch/arm64/include/uapi/asm/ptrace.h
|
||||
*/
|
||||
struct cr_user_pac_address_keys {
|
||||
__uint128_t apiakey;
|
||||
__uint128_t apibkey;
|
||||
__uint128_t apdakey;
|
||||
__uint128_t apdbkey;
|
||||
};
|
||||
|
||||
struct cr_user_pac_generic_keys {
|
||||
__uint128_t apgakey;
|
||||
};
|
||||
|
||||
/*
|
||||
* The following HWCAP constants are copied from
|
||||
* arch/arm64/include/uapi/asm/hwcap.h
|
||||
*/
|
||||
#ifndef HWCAP_PACA
|
||||
#define HWCAP_PACA (1 << 30)
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP_PACG
|
||||
#define HWCAP_PACG (1UL << 31)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The following NT_ARM_PAC constants are copied from
|
||||
* include/uapi/linux/elf.h
|
||||
*/
|
||||
#ifndef NT_ARM_PACA_KEYS
|
||||
#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */
|
||||
#endif
|
||||
|
||||
#ifndef NT_ARM_PACG_KEYS
|
||||
#define NT_ARM_PACG_KEYS 0x408
|
||||
#endif
|
||||
|
||||
#ifndef NT_ARM_PAC_ENABLED_KEYS
|
||||
#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* AArch64 pointer authentication enabled keys. */
|
||||
#endif
|
||||
|
||||
extern unsigned long getauxval(unsigned long type);
|
||||
|
||||
|
|
@ -29,8 +74,8 @@ extern unsigned long getauxval(unsigned long type);
|
|||
|
||||
static int save_pac_keys(int pid, CoreEntry *core)
|
||||
{
|
||||
struct user_pac_address_keys paca;
|
||||
struct user_pac_generic_keys pacg;
|
||||
struct cr_user_pac_address_keys paca;
|
||||
struct cr_user_pac_generic_keys pacg;
|
||||
PacKeys *pac_entry;
|
||||
long pac_enabled_key;
|
||||
struct iovec iov;
|
||||
|
|
@ -103,6 +148,11 @@ static int save_pac_keys(int pid, CoreEntry *core)
|
|||
int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd)
|
||||
{
|
||||
int i;
|
||||
struct cr_user_gcs gcs_live;
|
||||
struct iovec gcs_iov = {
|
||||
.iov_base = &gcs_live,
|
||||
.iov_len = sizeof(gcs_live),
|
||||
};
|
||||
CoreEntry *core = x;
|
||||
|
||||
// Save the Aarch64 CPU state
|
||||
|
|
@ -114,14 +164,25 @@ int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_str
|
|||
|
||||
// Save the FP/SIMD state
|
||||
for (i = 0; i < 32; ++i) {
|
||||
core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->vregs[i];
|
||||
core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->vregs[i] >> 64;
|
||||
core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->fpstate.vregs[i];
|
||||
core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->fpstate.vregs[i] >> 64;
|
||||
}
|
||||
assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpsr);
|
||||
assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpcr);
|
||||
assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpsr);
|
||||
assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpcr);
|
||||
|
||||
if (save_pac_keys(pid, core))
|
||||
return -1;
|
||||
|
||||
/* Save the GCS state */
|
||||
if (compel_host_supports_gcs()) {
|
||||
if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) {
|
||||
pr_perror("Failed to get GCS for %d", pid);
|
||||
return -1;
|
||||
}
|
||||
core->ti_aarch64->gcs->gcspr_el0 = gcs_live.gcspr_el0;
|
||||
core->ti_aarch64->gcs->features_enabled = gcs_live.features_enabled;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -130,6 +191,7 @@ int arch_alloc_thread_info(CoreEntry *core)
|
|||
ThreadInfoAarch64 *ti_aarch64;
|
||||
UserAarch64RegsEntry *gpregs;
|
||||
UserAarch64FpsimdContextEntry *fpsimd;
|
||||
UserAarch64GcsEntry *gcs;
|
||||
|
||||
ti_aarch64 = xmalloc(sizeof(*ti_aarch64));
|
||||
if (!ti_aarch64)
|
||||
|
|
@ -159,6 +221,15 @@ int arch_alloc_thread_info(CoreEntry *core)
|
|||
if (!fpsimd->vregs)
|
||||
goto err;
|
||||
|
||||
/* Allocate & init GCS */
|
||||
if (compel_host_supports_gcs()) {
|
||||
gcs = xmalloc(sizeof(*gcs));
|
||||
if (!gcs)
|
||||
goto err;
|
||||
user_aarch64_gcs_entry__init(gcs);
|
||||
ti_aarch64->gcs = gcs;
|
||||
}
|
||||
|
||||
return 0;
|
||||
err:
|
||||
return -1;
|
||||
|
|
@ -188,6 +259,7 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
|
|||
{
|
||||
int i;
|
||||
struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe);
|
||||
struct gcs_context *gcs;
|
||||
|
||||
if (core->ti_aarch64->fpsimd->n_vregs != 64)
|
||||
return 1;
|
||||
|
|
@ -201,6 +273,18 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core)
|
|||
fpsimd->head.magic = FPSIMD_MAGIC;
|
||||
fpsimd->head.size = sizeof(*fpsimd);
|
||||
|
||||
if (compel_host_supports_gcs()) {
|
||||
gcs = RT_SIGFRAME_GCS(sigframe);
|
||||
|
||||
pr_debug("sigframe gcspr %llx enabled %llx\n", gcs->gcspr, gcs->features_enabled);
|
||||
|
||||
gcs->head.magic = GCS_MAGIC;
|
||||
gcs->head.size = sizeof(*gcs);
|
||||
gcs->reserved = 0;
|
||||
gcs->gcspr = core->ti_aarch64->gcs->gcspr_el0 - 8;
|
||||
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -224,8 +308,8 @@ int restore_gpregs(struct rt_sigframe *f, UserRegsEntry *r)
|
|||
int arch_ptrace_restore(int pid, struct pstree_item *item)
|
||||
{
|
||||
unsigned long hwcaps = getauxval(AT_HWCAP);
|
||||
struct user_pac_address_keys upaca;
|
||||
struct user_pac_generic_keys upacg;
|
||||
struct cr_user_pac_address_keys upaca;
|
||||
struct cr_user_pac_generic_keys upacg;
|
||||
PacAddressKeys *paca;
|
||||
PacGenericKeys *pacg;
|
||||
long pac_enabled_keys;
|
||||
|
|
|
|||
157
criu/arch/aarch64/gcs.c
Normal file
157
criu/arch/aarch64/gcs.c
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
#include <sys/ptrace.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include <common/list.h>
|
||||
#include <compel/cpu.h>
|
||||
|
||||
#include "asm/gcs-types.h"
|
||||
#include "pstree.h"
|
||||
#include "restorer.h"
|
||||
#include "rst-malloc.h"
|
||||
#include "vma.h"
|
||||
|
||||
#include <sys/auxv.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
static bool task_has_gcs_enabled(UserAarch64GcsEntry *gcs)
|
||||
{
|
||||
return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0;
|
||||
}
|
||||
|
||||
static bool host_supports_gcs(void)
|
||||
{
|
||||
unsigned long hwcap = getauxval(AT_HWCAP);
|
||||
return (hwcap & HWCAP_GCS) != 0;
|
||||
}
|
||||
|
||||
static bool task_needs_gcs(struct pstree_item *item, CoreEntry *core)
|
||||
{
|
||||
UserAarch64GcsEntry *gcs;
|
||||
|
||||
if (!task_alive(item))
|
||||
return false;
|
||||
|
||||
gcs = core->ti_aarch64->gcs;
|
||||
|
||||
if (task_has_gcs_enabled(gcs)) {
|
||||
if (!host_supports_gcs()) {
|
||||
pr_warn_once("Restoring task with GCS on non-GCS host\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
pr_info("Restoring task with GCS\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
pr_info("Restoring a task without GCS\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
static int gcs_prepare_task(struct vm_area_list *vmas,
|
||||
struct rst_shstk_info *gcs)
|
||||
{
|
||||
struct vma_area *vma;
|
||||
|
||||
list_for_each_entry(vma, &vmas->h, list) {
|
||||
if (vma_area_is(vma, VMA_AREA_SHSTK) &&
|
||||
in_vma_area(vma, gcs->gcspr_el0)) {
|
||||
unsigned long premapped_addr = vma->premmaped_addr;
|
||||
unsigned long size = vma_area_len(vma);
|
||||
|
||||
gcs->vma_start = vma->e->start;
|
||||
gcs->vma_size = size;
|
||||
gcs->premapped_addr = premapped_addr;
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
pr_err("Unable to find a shadow stack vma: %lx\n", gcs->gcspr_el0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
|
||||
struct task_restore_args *ta)
|
||||
{
|
||||
int i;
|
||||
struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]);
|
||||
struct vm_area_list *vmas = &rsti(item)->vmas;
|
||||
struct rst_shstk_info *gcs = &ta->shstk;
|
||||
|
||||
if (!task_needs_gcs(item, core))
|
||||
return 0;
|
||||
|
||||
gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
|
||||
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
|
||||
|
||||
if (gcs_prepare_task(vmas, gcs)) {
|
||||
pr_err("gcs: failed to prepare shadow stack memory\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < item->nr_threads; i++) {
|
||||
struct thread_restore_args *thread_args = &args_array[i];
|
||||
|
||||
core = item->core[i];
|
||||
gcs = &thread_args->shstk;
|
||||
|
||||
gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0;
|
||||
gcs->features_enabled = core->ti_aarch64->gcs->features_enabled;
|
||||
|
||||
if (gcs_prepare_task(vmas, gcs)) {
|
||||
pr_err("gcs: failed to prepare GCS memory\n");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
|
||||
int (*func)(void *arg), void *arg)
|
||||
{
|
||||
int fret;
|
||||
unsigned long flags = PR_SHADOW_STACK_ENABLE |
|
||||
PR_SHADOW_STACK_PUSH |
|
||||
PR_SHADOW_STACK_WRITE;
|
||||
|
||||
long ret, x1_after, x8_after;
|
||||
|
||||
/* If task doesn't need GCS, just call func */
|
||||
if (!task_needs_gcs(item, core)) {
|
||||
return func(arg);
|
||||
}
|
||||
|
||||
pr_debug("gcs: GCS enable SVC about to fire: x8=%d x0=%d x1=0x%lx\n",
|
||||
__NR_prctl, PR_SET_SHADOW_STACK_STATUS, flags);
|
||||
|
||||
asm volatile(
|
||||
"mov x0, %3\n" // x0 = PR_SET_SHADOW_STACK_STATUS (75)
|
||||
"mov x1, %4\n" // x1 = flags
|
||||
"mov x2, xzr\n" // x2 = 0
|
||||
"mov x3, xzr\n" // x3 = 0
|
||||
"mov x4, xzr\n" // x4 = 0
|
||||
"mov x8, %5\n" // x8 = __NR_prctl (167)
|
||||
"svc #0\n" // Invoke syscall
|
||||
"mov %0, x0\n" // Capture return value
|
||||
"mov %1, x1\n" // Capture x1 after
|
||||
"mov %2, x8\n" // Capture x8 after
|
||||
: "=r"(ret), "=r"(x1_after), "=r"(x8_after)
|
||||
: "i"(PR_SET_SHADOW_STACK_STATUS), // x0 - %3rd
|
||||
"r"(flags), // x1 - %4th
|
||||
"i"(__NR_prctl) // x8 - %5th
|
||||
: "x0", "x1", "x2", "x3", "x4", "x8", "memory", "cc");
|
||||
|
||||
pr_info("gcs: after SVC: ret=%ld x1=%ld x8=%ld\n", ret, x1_after, x8_after);
|
||||
|
||||
if (ret != 0) {
|
||||
int err = errno;
|
||||
pr_err("gcs: failed to enable GCS: ret=%ld errno=%d (%s)\n", ret, err, strerror(err));
|
||||
return -1;
|
||||
}
|
||||
|
||||
fret = func(arg);
|
||||
exit(fret);
|
||||
|
||||
return -1;
|
||||
}
|
||||
196
criu/arch/aarch64/include/asm/gcs.h
Normal file
196
criu/arch/aarch64/include/asm/gcs.h
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
#ifndef __CR_ASM_GCS_H__
|
||||
#define __CR_ASM_GCS_H__
|
||||
|
||||
#include <asm/gcs-types.h>
|
||||
|
||||
struct rst_shstk_info {
|
||||
unsigned long vma_start; /* start of GCS VMA */
|
||||
unsigned long vma_size; /* size of GCS VMA */
|
||||
unsigned long premapped_addr; /* premapped buffer */
|
||||
unsigned long tmp_gcs; /* temp area for GCS if needed */
|
||||
u64 gcspr_el0; /* GCS pointer */
|
||||
u64 features_enabled; /* GCS flags */
|
||||
};
|
||||
|
||||
#define rst_shstk_info rst_shstk_info
|
||||
|
||||
struct task_restore_args;
|
||||
struct pstree_item;
|
||||
|
||||
int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core,
|
||||
struct task_restore_args *ta);
|
||||
#define arch_shstk_prepare arch_gcs_prepare
|
||||
|
||||
int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
|
||||
int (*func)(void *arg), void *arg);
|
||||
#define arch_shstk_trampoline arch_shstk_trampoline
|
||||
|
||||
static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *gcs, void *ptr)
|
||||
{
|
||||
gcs->tmp_gcs = (long unsigned)ptr;
|
||||
}
|
||||
#define shstk_set_restorer_stack shstk_set_restorer_stack
|
||||
|
||||
static always_inline long shstk_restorer_stack_size(void)
|
||||
{
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
#define shstk_restorer_stack_size shstk_restorer_stack_size
|
||||
|
||||
#ifdef CR_NOGLIBC
|
||||
#include <compel/plugins/std/syscall.h>
|
||||
#include <compel/cpu.h>
|
||||
#include "vma.h"
|
||||
|
||||
static inline unsigned long gcs_map(unsigned long addr, unsigned long size, unsigned int flags)
|
||||
{
|
||||
long gcspr = sys_map_shadow_stack(addr, size, flags);
|
||||
pr_info("gcs: syscall: map_shadow_stack at=%lx size=%ld\n", addr, size);
|
||||
|
||||
if (gcspr < 0) {
|
||||
pr_err("gcs: failed to map GCS at %lx: %ld\n", addr, gcspr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (addr && gcspr != addr) {
|
||||
pr_err("gcs: address mismatch: need %lx, got %lx\n", addr, gcspr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
pr_info("gcs: mmapped GCS at %lx\n", gcspr);
|
||||
|
||||
return gcspr;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
static always_inline void gcsss1(unsigned long *Xt)
|
||||
{
|
||||
asm volatile (
|
||||
"sys #3, C7, C7, #2, %0\n"
|
||||
:
|
||||
: "rZ" (Xt)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static always_inline unsigned long *gcsss2(void)
|
||||
{
|
||||
unsigned long *Xt;
|
||||
|
||||
asm volatile (
|
||||
"SYSL %0, #3, C7, C7, #3\n"
|
||||
: "=r" (Xt)
|
||||
:
|
||||
: "memory");
|
||||
|
||||
return Xt;
|
||||
}
|
||||
|
||||
static inline void gcsstr(unsigned long addr, unsigned long val)
|
||||
{
|
||||
asm volatile(
|
||||
"mov x0, %0\n"
|
||||
"mov x1, %1\n"
|
||||
".inst 0xd91f1c01\n" // GCSSTR x1, [x0]
|
||||
"mov x0, #0\n"
|
||||
:
|
||||
: "r"(addr), "r"(val)
|
||||
: "x0", "x1", "memory");
|
||||
}
|
||||
/* clang-format on */
|
||||
|
||||
static always_inline int gcs_restore(struct rst_shstk_info *gcs)
|
||||
{
|
||||
unsigned long gcspr, val;
|
||||
|
||||
if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
gcspr = gcs->gcspr_el0 - 8;
|
||||
|
||||
val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8);
|
||||
pr_debug("gcs: [0] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
|
||||
gcsstr(gcspr, val);
|
||||
|
||||
val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8) | GCS_CAP_VALID_TOKEN;
|
||||
gcspr -= 8;
|
||||
pr_debug("gcs: [1] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr);
|
||||
gcsstr(gcspr, val);
|
||||
|
||||
pr_debug("gcs: about to switch stacks via GCSSS1 to: %lx\n", gcspr);
|
||||
gcsss1((unsigned long *)gcspr);
|
||||
return 0;
|
||||
}
|
||||
#define arch_shstk_restore gcs_restore
|
||||
|
||||
static always_inline int gcs_vma_restore(VmaEntry *vma_entry)
|
||||
{
|
||||
unsigned long shstk, i, ret;
|
||||
unsigned long *gcs_data = (void *)vma_premmaped_start(vma_entry);
|
||||
unsigned long vma_size = vma_entry_len(vma_entry);
|
||||
|
||||
shstk = gcs_map(0, vma_size, SHADOW_STACK_SET_TOKEN);
|
||||
if (shstk < 0) {
|
||||
pr_err("Failed to map shadow stack at %lx: %ld\n", shstk, shstk);
|
||||
}
|
||||
|
||||
/* restore shadow stack contents */
|
||||
for (i = 0; i < vma_size / 8; i++)
|
||||
gcsstr(shstk + i * 8, gcs_data[i]);
|
||||
|
||||
pr_debug("unmap %lx %ld\n", (unsigned long)gcs_data, vma_size);
|
||||
ret = sys_munmap(gcs_data, vma_size);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to unmap premmaped shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
vma_premmaped_start(vma_entry) = shstk;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define shstk_vma_restore gcs_vma_restore
|
||||
|
||||
static always_inline int gcs_switch_to_restorer(struct rst_shstk_info *gcs)
|
||||
{
|
||||
int ret;
|
||||
unsigned long *ssp;
|
||||
unsigned long addr;
|
||||
unsigned long gcspr;
|
||||
|
||||
if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
pr_debug("gcs->premapped_addr + gcs->vma_size = %lx\n", gcs->premapped_addr + gcs->vma_size);
|
||||
pr_debug("gcs->tmp_gcs = %lx\n", gcs->tmp_gcs);
|
||||
addr = gcs->tmp_gcs;
|
||||
|
||||
if (addr % PAGE_SIZE != 0) {
|
||||
pr_err("gcs: 0x%lx not page-aligned to size 0x%lx\n", addr, PAGE_SIZE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = sys_munmap((void *)addr, PAGE_SIZE);
|
||||
if (ret < 0) {
|
||||
pr_err("gcs: Failed to unmap aarea for dumpee GCS VMAs\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
gcspr = gcs_map(addr, PAGE_SIZE, SHADOW_STACK_SET_TOKEN);
|
||||
|
||||
if (gcspr == -1) {
|
||||
pr_err("gcs: failed to gcs_map(%lx, %lx)\n", (unsigned long)addr, PAGE_SIZE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ssp = (unsigned long *)(addr + PAGE_SIZE - 8);
|
||||
gcsss1(ssp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define arch_shstk_switch_to_restorer gcs_switch_to_restorer
|
||||
|
||||
#endif /* CR_NOGLIBC */
|
||||
|
||||
#endif /* __CR_ASM_GCS_H__ */
|
||||
|
|
@ -1,10 +1,11 @@
|
|||
#ifndef __CR_ASM_RESTORER_H__
|
||||
#define __CR_ASM_RESTORER_H__
|
||||
|
||||
#include <asm/sigcontext.h>
|
||||
#include <signal.h>
|
||||
#include <sys/ucontext.h>
|
||||
|
||||
#include "asm/types.h"
|
||||
#include "asm/gcs.h"
|
||||
#include "images/core.pb-c.h"
|
||||
|
||||
#include <compel/asm/sigframe.h>
|
||||
|
|
|
|||
|
|
@ -64,6 +64,12 @@ int cpu_validate_cpuinfo(void)
|
|||
if (!img)
|
||||
return -1;
|
||||
|
||||
if (empty_image(img)) {
|
||||
pr_err("No cpuinfo image\n");
|
||||
close_image(img);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pb_read_one(img, &cpu_info, PB_CPUINFO) < 0)
|
||||
goto error;
|
||||
|
||||
|
|
|
|||
|
|
@ -87,6 +87,12 @@ int cpu_validate_cpuinfo(void)
|
|||
if (!img)
|
||||
return -1;
|
||||
|
||||
if (empty_image(img)) {
|
||||
pr_err("No cpuinfo image\n");
|
||||
close_image(img);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (pb_read_one(img, &cpu_info, PB_CPUINFO) < 0)
|
||||
goto error;
|
||||
|
|
|
|||
|
|
@ -142,6 +142,29 @@ static void print_core_fp_regs(const char *msg, CoreEntry *core)
|
|||
print_core_ri_cb(core);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate floating point registers
|
||||
*/
|
||||
static UserS390FpregsEntry *allocate_fp_regs(void)
|
||||
{
|
||||
UserS390FpregsEntry *fpregs;
|
||||
|
||||
fpregs = xmalloc(sizeof(*fpregs));
|
||||
if (!fpregs)
|
||||
return NULL;
|
||||
user_s390_fpregs_entry__init(fpregs);
|
||||
|
||||
fpregs->n_fprs = 16;
|
||||
fpregs->fprs = xzalloc(16 * sizeof(uint64_t));
|
||||
if (!fpregs->fprs)
|
||||
goto fail_free_fpregs;
|
||||
return fpregs;
|
||||
|
||||
fail_free_fpregs:
|
||||
xfree(fpregs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate VxrsLow registers
|
||||
*/
|
||||
|
|
@ -294,7 +317,13 @@ int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_stru
|
|||
CoreEntry *core = arg;
|
||||
|
||||
gpregs = CORE_THREAD_ARCH_INFO(core)->gpregs;
|
||||
fpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
|
||||
/*
|
||||
* We delay allocating this until now because checkpointing can fail earlier.
|
||||
* When it fails we need to know if we reached here or not so that the cleanup
|
||||
* code doesn't restore FPRs that were never saved in the first place.
|
||||
*/
|
||||
fpregs = allocate_fp_regs();
|
||||
CORE_THREAD_ARCH_INFO(core)->fpregs = fpregs;
|
||||
|
||||
/* Vector registers */
|
||||
if (f->flags & USER_FPREGS_VXRS) {
|
||||
|
|
@ -399,36 +428,15 @@ int restore_fpu(struct rt_sigframe *f, CoreEntry *core)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate floating point registers
|
||||
*/
|
||||
static UserS390FpregsEntry *allocate_fp_regs(void)
|
||||
{
|
||||
UserS390FpregsEntry *fpregs;
|
||||
|
||||
fpregs = xmalloc(sizeof(*fpregs));
|
||||
if (!fpregs)
|
||||
return NULL;
|
||||
user_s390_fpregs_entry__init(fpregs);
|
||||
|
||||
fpregs->n_fprs = 16;
|
||||
fpregs->fprs = xzalloc(16 * sizeof(uint64_t));
|
||||
if (!fpregs->fprs)
|
||||
goto fail_free_fpregs;
|
||||
return fpregs;
|
||||
|
||||
fail_free_fpregs:
|
||||
xfree(fpregs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free floating point registers
|
||||
*/
|
||||
static void free_fp_regs(UserS390FpregsEntry *fpregs)
|
||||
{
|
||||
xfree(fpregs->fprs);
|
||||
xfree(fpregs);
|
||||
if (fpregs) {
|
||||
xfree(fpregs->fprs);
|
||||
xfree(fpregs);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -487,15 +495,17 @@ int arch_alloc_thread_info(CoreEntry *core)
|
|||
ti_s390->gpregs = allocate_gp_regs();
|
||||
if (!ti_s390->gpregs)
|
||||
goto fail_free_ti_s390;
|
||||
ti_s390->fpregs = allocate_fp_regs();
|
||||
if (!ti_s390->fpregs)
|
||||
goto fail_free_gp_regs;
|
||||
|
||||
/*
|
||||
* Delay allocating space until needed. Checkpointing can fail before that
|
||||
* and the cleanup code needs to be able to tell if FPRs were saved or not
|
||||
* before trying to restore the register state.
|
||||
*/
|
||||
ti_s390->fpregs = NULL;
|
||||
|
||||
CORE_THREAD_ARCH_INFO(core) = ti_s390;
|
||||
return 0;
|
||||
|
||||
fail_free_gp_regs:
|
||||
free_gp_regs(ti_s390->gpregs);
|
||||
fail_free_ti_s390:
|
||||
xfree(ti_s390);
|
||||
return -1;
|
||||
|
|
@ -678,14 +688,18 @@ static int set_task_regs(pid_t pid, CoreEntry *core)
|
|||
user_fpregs_struct_t fpregs;
|
||||
|
||||
memset(&fpregs, 0, sizeof(fpregs));
|
||||
/* Floating point registers */
|
||||
/*
|
||||
* Floating point registers
|
||||
* Optional on checkpoint; checkpoint may have failed and we may reach here as part of cleanup
|
||||
* so there's no guarantee that we saved FPRs for this thread.
|
||||
*/
|
||||
cfpregs = CORE_THREAD_ARCH_INFO(core)->fpregs;
|
||||
if (!cfpregs)
|
||||
return -1;
|
||||
fpregs.prfpreg.fpc = cfpregs->fpc;
|
||||
memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs));
|
||||
if (set_fp_regs(pid, &fpregs) < 0)
|
||||
return -1;
|
||||
if (cfpregs) {
|
||||
fpregs.prfpreg.fpc = cfpregs->fpc;
|
||||
memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs));
|
||||
if (set_fp_regs(pid, &fpregs) < 0)
|
||||
return -1;
|
||||
}
|
||||
/* Vector registers (optional) */
|
||||
cvxrs_low = CORE_THREAD_ARCH_INFO(core)->vxrs_low;
|
||||
if (cvxrs_low != NULL) {
|
||||
|
|
|
|||
|
|
@ -407,6 +407,12 @@ int cpu_validate_cpuinfo(void)
|
|||
if (!img)
|
||||
return -1;
|
||||
|
||||
if (empty_image(img)) {
|
||||
pr_err("No cpuinfo image\n");
|
||||
close_image(img);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pb_read_one(img, &img_cpu_info, PB_CPUINFO) < 0)
|
||||
goto err;
|
||||
|
||||
|
|
|
|||
|
|
@ -73,6 +73,23 @@ int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
|
|||
int (*func)(void *arg), void *arg);
|
||||
#define arch_shstk_trampoline arch_shstk_trampoline
|
||||
|
||||
static always_inline long shstk_restorer_stack_size(void)
|
||||
{
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
#define shstk_restorer_stack_size shstk_restorer_stack_size
|
||||
static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr)
|
||||
{
|
||||
info->tmp_shstk = (unsigned long)ptr;
|
||||
}
|
||||
#define shstk_set_restorer_stack shstk_set_restorer_stack
|
||||
|
||||
static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long __maybe_unused def)
|
||||
{
|
||||
return !(info->cet & ARCH_SHSTK_SHSTK) ? def : (4UL << 30);
|
||||
}
|
||||
#define shstk_min_mmap_addr shstk_min_mmap_addr
|
||||
|
||||
#ifdef CR_NOGLIBC
|
||||
|
||||
#include <compel/plugins/std/syscall.h>
|
||||
|
|
@ -146,33 +163,53 @@ static inline int shstk_finalize(void)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create shadow stack vma and restore its content from premmapped anonymous (non-shstk) vma
|
||||
*/
|
||||
static always_inline int shstk_vma_restore(VmaEntry *vma_entry)
|
||||
{
|
||||
long shstk, i;
|
||||
unsigned long *shstk_data = (void *)vma_premmaped_start(vma_entry);
|
||||
unsigned long vma_size = vma_entry_len(vma_entry);
|
||||
long ret;
|
||||
|
||||
shstk = sys_map_shadow_stack(0, vma_size, SHADOW_STACK_SET_TOKEN);
|
||||
if (shstk < 0) {
|
||||
pr_err("Failed to map shadow stack: %ld\n", shstk);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* restore shadow stack contents */
|
||||
for (i = 0; i < vma_size / 8; i++)
|
||||
wrssq(shstk + i * 8, shstk_data[i]);
|
||||
|
||||
ret = sys_munmap(shstk_data, vma_size);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to unmap premmaped shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* From that point premapped vma is (shstk) and we need
|
||||
* to mremap() it to the final location. Originally premapped
|
||||
* (shstk_data) has been unmapped already.
|
||||
*/
|
||||
vma_premmaped_start(vma_entry) = shstk;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#define shstk_vma_restore shstk_vma_restore
|
||||
|
||||
/*
|
||||
* Restore contents of the shadow stack and set shadow stack pointer
|
||||
*/
|
||||
static always_inline int shstk_restore(struct rst_shstk_info *cet)
|
||||
{
|
||||
unsigned long *shstk_data = (unsigned long *)cet->premmaped_addr;
|
||||
unsigned long ssp = cet->vma_start + cet->vma_size - 8;
|
||||
unsigned long shstk_top = cet->vma_size / 8 - 1;
|
||||
unsigned long val;
|
||||
long ret;
|
||||
unsigned long ssp, val;
|
||||
|
||||
if (!(cet->cet & ARCH_SHSTK_SHSTK))
|
||||
return 0;
|
||||
|
||||
if (shstk_map(cet->vma_start, cet->vma_size))
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* Switch shadow stack from temporary location to the actual task's
|
||||
* shadow stack VMA
|
||||
*/
|
||||
shstk_switch_ssp(ssp);
|
||||
|
||||
/* restore shadow stack contents */
|
||||
for (; ssp >= cet->ssp; ssp -= 8, shstk_top--)
|
||||
wrssq(ssp, shstk_data[shstk_top]);
|
||||
|
||||
/*
|
||||
* Add tokens for sigreturn frame and for switch of the shadow stack.
|
||||
* The sigreturn token will be checked by the kernel during
|
||||
|
|
@ -182,6 +219,7 @@ static always_inline int shstk_restore(struct rst_shstk_info *cet)
|
|||
*/
|
||||
|
||||
/* token for sigreturn frame */
|
||||
ssp = cet->ssp - 8;
|
||||
val = ALIGN_DOWN(cet->ssp, 8) | SHSTK_DATA_BIT;
|
||||
wrssq(ssp, val);
|
||||
|
||||
|
|
@ -193,12 +231,6 @@ static always_inline int shstk_restore(struct rst_shstk_info *cet)
|
|||
/* reset shadow stack pointer to the proper location */
|
||||
shstk_switch_ssp(ssp);
|
||||
|
||||
ret = sys_munmap(shstk_data, cet->vma_size + PAGE_SIZE);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to unmap premmaped shadow stack\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return shstk_finalize();
|
||||
}
|
||||
#define arch_shstk_restore shstk_restore
|
||||
|
|
|
|||
|
|
@ -45,7 +45,6 @@ static int shstk_prepare_task(struct vm_area_list *vmas,
|
|||
shstk->vma_start = vma->e->start;
|
||||
shstk->vma_size = size;
|
||||
shstk->premmaped_addr = premmaped_addr;
|
||||
shstk->tmp_shstk = premmaped_addr + size;
|
||||
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@
|
|||
#include "cr_options.h"
|
||||
#include "filesystems.h"
|
||||
#include "file-lock.h"
|
||||
#include "image.h"
|
||||
#include "irmap.h"
|
||||
#include "mount.h"
|
||||
#include "mount-v2.h"
|
||||
|
|
@ -703,6 +704,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
|
|||
BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode),
|
||||
BOOL_OPT("unprivileged", &opts.unprivileged),
|
||||
BOOL_OPT("ghost-fiemap", &opts.ghost_fiemap),
|
||||
BOOL_OPT(OPT_ALLOW_UPROBES, &opts.allow_uprobes),
|
||||
{},
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1392,6 +1392,14 @@ static int check_pagemap_scan(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int check_timer_cr_ids(void)
|
||||
{
|
||||
if (!kdat.has_timer_cr_ids)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* musl doesn't have a statx wrapper... */
|
||||
struct staty {
|
||||
__u32 stx_dev_major;
|
||||
|
|
@ -1581,6 +1589,23 @@ static int check_overlayfs_maps(void)
|
|||
return status == 0 ? 0 : -1;
|
||||
}
|
||||
|
||||
static int check_breakpoints(void)
|
||||
{
|
||||
if (!kdat.has_breakpoints) {
|
||||
pr_warn("Hardware breakpoints don't seem to work\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_pagemap_scan_guard_pages(void)
|
||||
{
|
||||
kerndat_warn_about_madv_guards();
|
||||
|
||||
return kdat.has_pagemap_scan_guard_pages ? 0 : -1;
|
||||
}
|
||||
|
||||
static int (*chk_feature)(void);
|
||||
|
||||
/*
|
||||
|
|
@ -1608,6 +1633,7 @@ static int (*chk_feature)(void);
|
|||
return ret; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
int cr_check(void)
|
||||
{
|
||||
struct ns_id *ns;
|
||||
|
|
@ -1703,6 +1729,8 @@ int cr_check(void)
|
|||
ret |= check_ipv6_freebind();
|
||||
ret |= check_pagemap_scan();
|
||||
ret |= check_overlayfs_maps();
|
||||
ret |= check_timer_cr_ids();
|
||||
ret |= check_pagemap_scan_guard_pages();
|
||||
|
||||
if (kdat.lsm == LSMTYPE__APPARMOR)
|
||||
ret |= check_apparmor_stacking();
|
||||
|
|
@ -1715,6 +1743,10 @@ int cr_check(void)
|
|||
ret |= check_autofs();
|
||||
ret |= check_compat_cr();
|
||||
}
|
||||
/*
|
||||
* Category 4 - optional.
|
||||
*/
|
||||
check_breakpoints();
|
||||
|
||||
pr_msg("%s\n", ret ? CHECK_MAYBE : CHECK_GOOD);
|
||||
return ret;
|
||||
|
|
@ -1825,7 +1857,10 @@ static struct feature_list feature_list[] = {
|
|||
{ "get_rseq_conf", check_ptrace_get_rseq_conf },
|
||||
{ "ipv6_freebind", check_ipv6_freebind },
|
||||
{ "pagemap_scan", check_pagemap_scan },
|
||||
{ "timer_cr_ids", check_timer_cr_ids },
|
||||
{ "overlayfs_maps", check_overlayfs_maps },
|
||||
{ "breakpoints", check_breakpoints },
|
||||
{ "pagemap_scan_guard_pages", check_pagemap_scan_guard_pages },
|
||||
{ NULL, NULL },
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -87,7 +87,8 @@ static int cr_dedup_one_pagemap(unsigned long img_id, int flags)
|
|||
if (ret <= 0)
|
||||
goto exit;
|
||||
|
||||
pr_debug("dedup iovec base=%" PRIx64 ", len=%lu\n", pr.pe->vaddr, pagemap_len(pr.pe));
|
||||
pr_debug("dedup iovec %" PRIx64 " - %" PRIx64 "\n",
|
||||
pr.pe->vaddr, pr.pe->vaddr + pagemap_len(pr.pe));
|
||||
if (!pagemap_in_parent(pr.pe)) {
|
||||
ret = dedup_one_iovec(prp, pr.pe->vaddr, pagemap_len(pr.pe));
|
||||
if (ret)
|
||||
|
|
|
|||
|
|
@ -130,6 +130,23 @@ int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap
|
|||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* In addition to real process VMAs we should keep an info about
|
||||
* madvise(MADV_GUARD_INSTALL) pages. While these are not represented
|
||||
* as a struct vm_area_struct in the kernel, it is convenient to treat
|
||||
* them as mappings in CRIU and reuse the same VMA images but with only
|
||||
* VMA_AREA_GUARD flag set.
|
||||
*
|
||||
* Also, we don't need to dump them during pre-dump.
|
||||
*/
|
||||
if (dump_file) {
|
||||
ret = collect_madv_guards(pid, vma_area_list);
|
||||
if (ret < 0) {
|
||||
pr_err("Collect MADV_GUARD_INSTALL pages (pid: %d) failed with %d\n", pid, ret);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
pr_info("Collected, longest area occupies %lu pages\n", vma_area_list->nr_priv_pages_longest);
|
||||
pr_info_vma_list(&vma_area_list->h);
|
||||
|
||||
|
|
@ -1396,7 +1413,7 @@ static int dump_zombies(void)
|
|||
item->sid = pps_buf.sid;
|
||||
item->pgid = pps_buf.pgid;
|
||||
|
||||
BUG_ON(!list_empty(&item->children));
|
||||
BUG_ON(has_children(item));
|
||||
|
||||
if (!item->sid) {
|
||||
pr_err("A session leader of zombie process %d(%d) is outside of its pid namespace\n",
|
||||
|
|
@ -2121,8 +2138,10 @@ int cr_dump_tasks(pid_t pid)
|
|||
InventoryEntry he = INVENTORY_ENTRY__INIT;
|
||||
InventoryEntry *parent_ie = NULL;
|
||||
struct pstree_item *item;
|
||||
int pre_dump_ret = 0;
|
||||
int ret = -1;
|
||||
int ret;
|
||||
int exit_code = -1;
|
||||
|
||||
kerndat_warn_about_madv_guards();
|
||||
|
||||
pr_info("========================================\n");
|
||||
pr_info("Dumping processes (pid: %d comm: %s)\n", pid, __task_comm_info(pid));
|
||||
|
|
@ -2140,9 +2159,9 @@ int cr_dump_tasks(pid_t pid)
|
|||
goto err;
|
||||
root_item->pid->real = pid;
|
||||
|
||||
pre_dump_ret = run_scripts(ACT_PRE_DUMP);
|
||||
if (pre_dump_ret != 0) {
|
||||
pr_err("Pre dump script failed with %d!\n", pre_dump_ret);
|
||||
ret = run_scripts(ACT_PRE_DUMP);
|
||||
if (ret != 0) {
|
||||
pr_err("Pre dump script failed with %d!\n", ret);
|
||||
goto err;
|
||||
}
|
||||
if (init_stats(DUMP_STATS))
|
||||
|
|
@ -2228,6 +2247,10 @@ int cr_dump_tasks(pid_t pid)
|
|||
goto err;
|
||||
}
|
||||
|
||||
ret = run_plugins(DUMP_DEVICES_LATE, pid);
|
||||
if (ret && ret != -ENOTSUP)
|
||||
goto err;
|
||||
|
||||
if (parent_ie) {
|
||||
inventory_entry__free_unpacked(parent_ie, NULL);
|
||||
parent_ie = NULL;
|
||||
|
|
@ -2264,49 +2287,44 @@ int cr_dump_tasks(pid_t pid)
|
|||
* ipc shared memory, but an ipc namespace is dumped in a child
|
||||
* process.
|
||||
*/
|
||||
ret = cr_dump_shmem();
|
||||
if (ret)
|
||||
if (cr_dump_shmem())
|
||||
goto err;
|
||||
|
||||
if (root_ns_mask) {
|
||||
ret = dump_namespaces(root_item, root_ns_mask);
|
||||
if (ret)
|
||||
if (dump_namespaces(root_item, root_ns_mask))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if ((root_ns_mask & CLONE_NEWTIME) == 0) {
|
||||
ret = dump_time_ns(0);
|
||||
if (ret)
|
||||
if (dump_time_ns(0))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (dump_aa_namespaces() < 0)
|
||||
goto err;
|
||||
|
||||
ret = dump_cgroups();
|
||||
if (ret)
|
||||
if (dump_cgroups())
|
||||
goto err;
|
||||
|
||||
ret = fix_external_unix_sockets();
|
||||
if (ret)
|
||||
if (fix_external_unix_sockets())
|
||||
goto err;
|
||||
|
||||
ret = tty_post_actions();
|
||||
if (ret)
|
||||
if (tty_post_actions())
|
||||
goto err;
|
||||
|
||||
ret = inventory_save_uptime(&he);
|
||||
if (ret)
|
||||
if (inventory_save_uptime(&he))
|
||||
goto err;
|
||||
|
||||
he.has_pre_dump_mode = false;
|
||||
if (found_uprobes_vma()) {
|
||||
he.has_allow_uprobes = true;
|
||||
he.allow_uprobes = true;
|
||||
}
|
||||
|
||||
ret = write_img_inventory(&he);
|
||||
if (ret)
|
||||
goto err;
|
||||
exit_code = write_img_inventory(&he);
|
||||
err:
|
||||
if (parent_ie)
|
||||
inventory_entry__free_unpacked(parent_ie, NULL);
|
||||
|
||||
return cr_dump_finish(ret);
|
||||
return cr_dump_finish(exit_code);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1238,7 +1238,7 @@ static inline int fork_with_pid(struct pstree_item *item)
|
|||
pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item));
|
||||
}
|
||||
|
||||
arch_shstk_unlock(item, ca.core, pid);
|
||||
arch_shstk_unlock(item, ca.core, ret);
|
||||
|
||||
err_unlock:
|
||||
if (!(ca.clone_flags & CLONE_NEWPID))
|
||||
|
|
@ -1820,6 +1820,7 @@ static int restore_rseq_cs(void)
|
|||
static int catch_tasks(bool root_seized)
|
||||
{
|
||||
struct pstree_item *item;
|
||||
bool nobp = fault_injected(FI_NO_BREAKPOINTS) || !kdat.has_breakpoints;
|
||||
|
||||
for_each_pstree_item(item) {
|
||||
int status, i, ret;
|
||||
|
|
@ -1847,7 +1848,7 @@ static int catch_tasks(bool root_seized)
|
|||
return -1;
|
||||
}
|
||||
|
||||
ret = compel_stop_pie(pid, rsti(item)->breakpoint, fault_injected(FI_NO_BREAKPOINTS));
|
||||
ret = compel_stop_pie(pid, rsti(item)->breakpoint, nobp);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -2119,7 +2120,7 @@ static int restore_root_task(struct pstree_item *init)
|
|||
* the '--empty-ns net' mode no iptables C/R is done and we
|
||||
* need to return these rules by hands.
|
||||
*/
|
||||
ret = network_lock_internal();
|
||||
ret = network_lock_internal(/* restore = */ true);
|
||||
if (ret)
|
||||
goto out_kill;
|
||||
}
|
||||
|
|
@ -2131,6 +2132,9 @@ static int restore_root_task(struct pstree_item *init)
|
|||
__restore_switch_stage(CR_STATE_FORKING);
|
||||
|
||||
skip_ns_bouncing:
|
||||
ret = run_plugins(POST_FORKING);
|
||||
if (ret < 0 && ret != -ENOTSUP)
|
||||
goto out_kill;
|
||||
|
||||
ret = restore_wait_inprogress_tasks();
|
||||
if (ret < 0)
|
||||
|
|
@ -2258,7 +2262,7 @@ skip_ns_bouncing:
|
|||
* might actually be a true error code but that would be also
|
||||
* captured in the plugin so no need to print the error here.
|
||||
*/
|
||||
if (ret < 0)
|
||||
if (ret < 0 && ret != -ENOTSUP)
|
||||
pr_debug("restore late stage hook for external plugin failed\n");
|
||||
}
|
||||
|
||||
|
|
@ -2362,41 +2366,47 @@ int cr_restore_tasks(void)
|
|||
return 1;
|
||||
|
||||
if (check_img_inventory(/* restore = */ true) < 0)
|
||||
goto err;
|
||||
|
||||
if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
|
||||
return -1;
|
||||
|
||||
if (init_stats(RESTORE_STATS))
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
if (lsm_check_opts())
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
timing_start(TIME_RESTORE);
|
||||
|
||||
if (cpu_init() < 0)
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
if (vdso_init_restore())
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
if (tty_init_restore())
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
if (opts.cpu_cap & CPU_CAP_IMAGE) {
|
||||
if (cpu_validate_cpuinfo())
|
||||
goto err;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (prepare_task_entries() < 0)
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
if (prepare_pstree() < 0)
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
if (fdstore_init())
|
||||
goto err;
|
||||
return -1;
|
||||
|
||||
/*
|
||||
* For the AMDGPU plugin, its parallel restore feature needs to use fdstore to store
|
||||
* its socket file descriptor. This allows the main process and the target process to
|
||||
* communicate with each other through this file descriptor. Therefore, cr_plugin_init
|
||||
* must be initialized after fdstore_init.
|
||||
*/
|
||||
if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE))
|
||||
return -1;
|
||||
|
||||
if (inherit_fd_move_to_fdstore())
|
||||
goto err;
|
||||
|
|
@ -2421,23 +2431,24 @@ err:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long vma_len)
|
||||
static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long min_addr, long vma_len)
|
||||
{
|
||||
struct vma_area *t_vma, *s_vma;
|
||||
long prev_vma_end = 0;
|
||||
long prev_vma_end = min_addr;
|
||||
struct vma_area end_vma;
|
||||
VmaEntry end_e;
|
||||
|
||||
end_vma.e = &end_e;
|
||||
end_e.start = end_e.end = kdat.task_size;
|
||||
prev_vma_end = kdat.mmap_min_addr;
|
||||
INIT_LIST_HEAD(&end_vma.list);
|
||||
|
||||
s_vma = list_first_entry(self_vma_list, struct vma_area, list);
|
||||
t_vma = list_first_entry(tgt_vma_list, struct vma_area, list);
|
||||
|
||||
while (1) {
|
||||
if (prev_vma_end + vma_len > s_vma->e->start) {
|
||||
if (s_vma->list.next == self_vma_list) {
|
||||
if ((s_vma->list.next == self_vma_list) ||
|
||||
vma_area_is(vma_next(s_vma), VMA_AREA_GUARD)) {
|
||||
s_vma = &end_vma;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -2450,7 +2461,8 @@ static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_he
|
|||
}
|
||||
|
||||
if (prev_vma_end + vma_len > t_vma->e->start) {
|
||||
if (t_vma->list.next == tgt_vma_list) {
|
||||
if ((t_vma->list.next == tgt_vma_list) ||
|
||||
vma_area_is(vma_next(t_vma), VMA_AREA_GUARD)) {
|
||||
t_vma = &end_vma;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -2559,6 +2571,17 @@ static int remap_restorer_blob(void *addr)
|
|||
restorer_setup_c_header_desc(&pbd, true);
|
||||
compel_relocs_apply(addr, addr, &pbd);
|
||||
|
||||
/*
|
||||
* Ensure the infected thread sees the updated code.
|
||||
*
|
||||
* On architectures like ARM64, the Data Cache (D-cache) and
|
||||
* Instruction Cache (I-cache) are not automatically coherent.
|
||||
* Modifications land in the D-cache, so we must flush (clean) the
|
||||
* D-cache to push changes to RAM to ensure the CPU fetches the updated
|
||||
* instructions.
|
||||
*/
|
||||
__builtin___clear_cache(addr, addr + pbd.hdr.bsize);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -3173,7 +3196,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
|||
|
||||
rst_mem_size = rst_mem_lock();
|
||||
memzone_size = round_up(sizeof(struct restore_mem_zone) * current->nr_threads, page_size());
|
||||
task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size;
|
||||
task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size + shstk_restorer_stack_size();
|
||||
BUG_ON(task_args->bootstrap_len & (PAGE_SIZE - 1));
|
||||
pr_info("%d threads require %ldK of memory\n", current->nr_threads, KBYTES(task_args->bootstrap_len));
|
||||
|
||||
|
|
@ -3203,7 +3226,9 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
|||
* or inited from scratch).
|
||||
*/
|
||||
|
||||
mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h, task_args->bootstrap_len);
|
||||
mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h,
|
||||
shstk_min_mmap_addr(&task_args->shstk, kdat.mmap_min_addr),
|
||||
task_args->bootstrap_len);
|
||||
if (mem == (void *)-1) {
|
||||
pr_err("No suitable area for task_restore bootstrap (%ldK)\n", task_args->bootstrap_len);
|
||||
goto err;
|
||||
|
|
@ -3442,6 +3467,10 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
|
|||
* self-vmas are unmaped.
|
||||
*/
|
||||
mem += rst_mem_size;
|
||||
|
||||
shstk_set_restorer_stack(&task_args->shstk, mem);
|
||||
mem += shstk_restorer_stack_size();
|
||||
|
||||
task_args->vdso_rt_parked_at = (unsigned long)mem;
|
||||
task_args->vdso_maps_rt = vdso_maps_rt;
|
||||
task_args->vdso_rt_size = vdso_rt_size;
|
||||
|
|
|
|||
|
|
@ -283,15 +283,122 @@ int exec_rpc_query_external_files(char *name, int sk)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static char images_dir[PATH_MAX];
|
||||
static int resolve_images_dir_path(char *images_dir_path,
|
||||
bool imgs_changed_by_rpc_conf,
|
||||
const CriuOpts *req,
|
||||
pid_t peer_pid)
|
||||
{
|
||||
/*
|
||||
* images_dir_fd is a required RPC parameter with -1 as default value.
|
||||
*
|
||||
* This assumes that if opts.imgs_dir is set, we have a value
|
||||
* from the configuration file parser. The test to see that
|
||||
* imgs_changed_by_rpc_conf is true is used to make sure the value
|
||||
* is from the RPC configuration file. The idea is that only the
|
||||
* RPC configuration file is able to overwrite RPC settings:
|
||||
* * apply_config(global_conf)
|
||||
* * apply_config(user_conf)
|
||||
* * apply_config(environment variable)
|
||||
* * apply_rpc_options()
|
||||
* * apply_config(rpc_conf)
|
||||
*/
|
||||
if (imgs_changed_by_rpc_conf) {
|
||||
strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1);
|
||||
images_dir_path[PATH_MAX - 1] = '\0';
|
||||
} else if (req->images_dir_fd != -1) {
|
||||
snprintf(images_dir_path, PATH_MAX, "/proc/%d/fd/%d", peer_pid, req->images_dir_fd);
|
||||
} else if (req->images_dir) {
|
||||
strncpy(images_dir_path, req->images_dir, PATH_MAX - 1);
|
||||
images_dir_path[PATH_MAX - 1] = '\0';
|
||||
} else {
|
||||
/*
|
||||
* Since images dir is not required in CHECK mode, we need to
|
||||
* check for work_dir_fd in setup_images_and_workdir()
|
||||
*/
|
||||
if (opts.mode == CR_CHECK)
|
||||
return 0;
|
||||
pr_err("Neither images_dir_fd nor images_dir was passed by RPC client.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_images_and_workdir(const char *images_dir_path,
|
||||
bool work_changed_by_rpc_conf,
|
||||
CriuOpts *req,
|
||||
pid_t peer_pid)
|
||||
{
|
||||
char work_dir_path[PATH_MAX] = "";
|
||||
|
||||
/* We don't need to open images dir in CHECK mode. */
|
||||
if (opts.mode != CR_CHECK) {
|
||||
/*
|
||||
* Image streaming is not supported with CRIU's service feature as
|
||||
* the streamer must be started for each dump/restore operation.
|
||||
* It is unclear how to do that with RPC, so we punt for now.
|
||||
* This explains why we provide the argument mode=-1 instead of
|
||||
* O_RSTR or O_DUMP.
|
||||
*/
|
||||
if (open_image_dir(images_dir_path, -1) < 0) {
|
||||
pr_perror("Can't open images directory");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (work_changed_by_rpc_conf)
|
||||
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
|
||||
else if (req->has_work_dir_fd)
|
||||
sprintf(work_dir_path, "/proc/%d/fd/%d", peer_pid, req->work_dir_fd);
|
||||
else if (opts.work_dir)
|
||||
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
|
||||
else if (images_dir_path[0] != '\0')
|
||||
strcpy(work_dir_path, images_dir_path);
|
||||
|
||||
if (work_dir_path[0] == '\0') {
|
||||
pr_err("images-dir or work-dir is required when using log file\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (chdir(work_dir_path)) {
|
||||
pr_perror("Can't chdir to work_dir");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_logging_from_req(CriuOpts *req, bool output_changed_by_rpc_conf)
|
||||
{
|
||||
if (req->log_file && !output_changed_by_rpc_conf) {
|
||||
if (strchr(req->log_file, '/')) {
|
||||
pr_perror("No subdirs are allowed in log_file name");
|
||||
return -1;
|
||||
}
|
||||
SET_CHAR_OPTS(output, req->log_file);
|
||||
} else if (req->has_log_to_stderr && req->log_to_stderr && !output_changed_by_rpc_conf) {
|
||||
xfree(opts.output);
|
||||
opts.output = NULL; /* log_init(NULL) writes to stderr */
|
||||
} else if (!opts.output) {
|
||||
SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME);
|
||||
}
|
||||
|
||||
opts.log_level = req->log_level;
|
||||
log_set_loglevel(opts.log_level);
|
||||
if (log_init(opts.output)) {
|
||||
pr_perror("Can't initiate log");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int setup_opts_from_req(int sk, CriuOpts *req)
|
||||
{
|
||||
struct ucred ids;
|
||||
struct stat st;
|
||||
socklen_t ids_len = sizeof(struct ucred);
|
||||
char images_dir_path[PATH_MAX];
|
||||
char work_dir_path[PATH_MAX];
|
||||
char images_dir_path[PATH_MAX] = "";
|
||||
char status_fd[PATH_MAX];
|
||||
bool output_changed_by_rpc_conf = false;
|
||||
bool work_changed_by_rpc_conf = false;
|
||||
|
|
@ -304,6 +411,23 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
|
|||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* The options relevant in CHECK mode are: log_file, log_to_stderr, and log_level.
|
||||
* When logging to a file, we also need to resolve images_dir and work_dir.
|
||||
*/
|
||||
if (opts.mode == CR_CHECK) {
|
||||
if (!req)
|
||||
return 0; /* nothing to do */
|
||||
|
||||
/*
|
||||
* A log file is needed only if:
|
||||
* - log_file is explicitly set, or
|
||||
* - log_to_stderr is NOT requested (i.e., using DEFAULT_LOG_FILENAME)
|
||||
*/
|
||||
if (!req->log_file || (req->has_log_to_stderr && req->log_to_stderr))
|
||||
return 0; /* no log file, don't require images_dir or work_dir */
|
||||
}
|
||||
|
||||
if (fstat(sk, &st)) {
|
||||
pr_perror("Can't get socket stat");
|
||||
goto err;
|
||||
|
|
@ -312,165 +436,9 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
|
|||
BUG_ON(st.st_ino == -1);
|
||||
service_sk_ino = st.st_ino;
|
||||
|
||||
/*
|
||||
* Evaluate an additional configuration file if specified.
|
||||
* This needs to happen twice, because it is needed early to detect
|
||||
* things like work_dir, imgs_dir and logfile. The second parsing
|
||||
* of the optional RPC configuration file happens at the end and
|
||||
* overwrites all options set via RPC.
|
||||
*/
|
||||
if (req->config_file) {
|
||||
char *tmp_output = opts.output;
|
||||
char *tmp_work = opts.work_dir;
|
||||
char *tmp_imgs = opts.imgs_dir;
|
||||
|
||||
opts.output = NULL;
|
||||
opts.work_dir = NULL;
|
||||
opts.imgs_dir = NULL;
|
||||
|
||||
rpc_cfg_file = req->config_file;
|
||||
i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF);
|
||||
if (i) {
|
||||
xfree(tmp_output);
|
||||
xfree(tmp_work);
|
||||
xfree(tmp_imgs);
|
||||
goto err;
|
||||
}
|
||||
/* If this is non-NULL, the RPC configuration file had a value, use it.*/
|
||||
if (opts.output)
|
||||
output_changed_by_rpc_conf = true;
|
||||
/* If this is NULL, use the old value if it was set. */
|
||||
if (!opts.output && tmp_output) {
|
||||
opts.output = tmp_output;
|
||||
tmp_output = NULL;
|
||||
}
|
||||
|
||||
if (opts.work_dir)
|
||||
work_changed_by_rpc_conf = true;
|
||||
if (!opts.work_dir && tmp_work) {
|
||||
opts.work_dir = tmp_work;
|
||||
tmp_work = NULL;
|
||||
}
|
||||
|
||||
if (opts.imgs_dir)
|
||||
imgs_changed_by_rpc_conf = true;
|
||||
/*
|
||||
* As the images directory is a required RPC setting, it is not
|
||||
* necessary to use the value from other configuration files.
|
||||
* Either it is set in the RPC configuration file or it is set
|
||||
* via RPC.
|
||||
*/
|
||||
xfree(tmp_output);
|
||||
xfree(tmp_work);
|
||||
xfree(tmp_imgs);
|
||||
}
|
||||
|
||||
/*
|
||||
* open images_dir - images_dir_fd is a required RPC parameter
|
||||
*
|
||||
* This assumes that if opts.imgs_dir is set we have a value
|
||||
* from the configuration file parser. The test to see that
|
||||
* imgs_changed_by_rpc_conf is true is used to make sure the value
|
||||
* is from the RPC configuration file.
|
||||
* The idea is that only the RPC configuration file is able to
|
||||
* overwrite RPC settings:
|
||||
* * apply_config(global_conf)
|
||||
* * apply_config(user_conf)
|
||||
* * apply_config(environment variable)
|
||||
* * apply_rpc_options()
|
||||
* * apply_config(rpc_conf)
|
||||
*/
|
||||
if (imgs_changed_by_rpc_conf)
|
||||
strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1);
|
||||
else if (req->images_dir_fd != -1)
|
||||
sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd);
|
||||
else if (req->images_dir)
|
||||
strncpy(images_dir_path, req->images_dir, PATH_MAX - 1);
|
||||
else {
|
||||
pr_err("Neither images_dir_fd nor images_dir was passed by RPC client.\n");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (req->parent_img)
|
||||
SET_CHAR_OPTS(img_parent, req->parent_img);
|
||||
|
||||
/*
|
||||
* Image streaming is not supported with CRIU's service feature as
|
||||
* the streamer must be started for each dump/restore operation.
|
||||
* It is unclear how to do that with RPC, so we punt for now.
|
||||
* This explains why we provide the argument mode=-1 instead of
|
||||
* O_RSTR or O_DUMP.
|
||||
*/
|
||||
if (open_image_dir(images_dir_path, -1) < 0) {
|
||||
pr_perror("Can't open images directory");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* get full path to images_dir to use in process title */
|
||||
if (readlink(images_dir_path, images_dir, PATH_MAX) == -1) {
|
||||
pr_perror("Can't readlink %s", images_dir_path);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* chdir to work dir */
|
||||
if (work_changed_by_rpc_conf)
|
||||
/* Use the value from the RPC configuration file first. */
|
||||
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
|
||||
else if (req->has_work_dir_fd)
|
||||
/* Use the value set via RPC. */
|
||||
sprintf(work_dir_path, "/proc/%d/fd/%d", ids.pid, req->work_dir_fd);
|
||||
else if (opts.work_dir)
|
||||
/* Use the value from one of the other configuration files. */
|
||||
strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1);
|
||||
else
|
||||
/* Use the images directory a work directory. */
|
||||
strcpy(work_dir_path, images_dir_path);
|
||||
|
||||
if (chdir(work_dir_path)) {
|
||||
pr_perror("Can't chdir to work_dir");
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* initiate log file in work dir */
|
||||
if (req->log_file && !output_changed_by_rpc_conf) {
|
||||
/*
|
||||
* If RPC sets a log file and if there nothing from the
|
||||
* RPC configuration file, use the RPC value.
|
||||
*/
|
||||
if (strchr(req->log_file, '/')) {
|
||||
pr_perror("No subdirs are allowed in log_file name");
|
||||
goto err;
|
||||
}
|
||||
|
||||
SET_CHAR_OPTS(output, req->log_file);
|
||||
} else if (req->has_log_to_stderr && req->log_to_stderr && !output_changed_by_rpc_conf) {
|
||||
xfree(opts.output);
|
||||
opts.output = NULL;
|
||||
} else if (!opts.output) {
|
||||
SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME);
|
||||
}
|
||||
|
||||
/* This is needed later to correctly set the log_level */
|
||||
opts.log_level = req->log_level;
|
||||
log_set_loglevel(req->log_level);
|
||||
if (log_init(opts.output) == -1) {
|
||||
pr_perror("Can't initiate log");
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (req->config_file) {
|
||||
pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
|
||||
}
|
||||
|
||||
if (req->has_unprivileged)
|
||||
opts.unprivileged = req->unprivileged;
|
||||
|
||||
if (check_caps())
|
||||
return 1;
|
||||
|
||||
if (kerndat_init())
|
||||
return 1;
|
||||
|
||||
if (log_keep_err()) {
|
||||
pr_perror("Can't tune log");
|
||||
goto err;
|
||||
|
|
@ -753,14 +721,6 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
|
|||
if (req->empty_ns & ~(CLONE_NEWNET))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (req->n_irmap_scan_paths) {
|
||||
for (i = 0; i < req->n_irmap_scan_paths; i++) {
|
||||
if (irmap_scan_path_add(req->irmap_scan_paths[i]))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (req->has_status_fd) {
|
||||
pr_warn("status_fd is obsoleted; use status-ready notification instead\n");
|
||||
|
||||
|
|
@ -772,28 +732,95 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
|
|||
}
|
||||
}
|
||||
|
||||
if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk))
|
||||
goto err;
|
||||
|
||||
if (req->orphan_pts_master)
|
||||
opts.orphan_pts_master = true;
|
||||
|
||||
if (req->has_display_stats)
|
||||
opts.display_stats = req->display_stats;
|
||||
|
||||
/* Evaluate additional configuration file a second time to overwrite
|
||||
* all RPC settings. */
|
||||
/* Evaluate additional configuration file (e.g., runc.conf) to overwrite all RPC settings. */
|
||||
if (req->config_file) {
|
||||
char *tmp_output = opts.output;
|
||||
char *tmp_work = opts.work_dir;
|
||||
|
||||
opts.output = NULL;
|
||||
opts.work_dir = NULL;
|
||||
|
||||
/*
|
||||
* As the images directory is a required RPC setting, it is not
|
||||
* necessary to use the value from other configuration files.
|
||||
* Either it is set in the RPC configuration file or it is set
|
||||
* via RPC.
|
||||
*/
|
||||
xfree(opts.imgs_dir);
|
||||
opts.imgs_dir = NULL;
|
||||
|
||||
pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
|
||||
|
||||
rpc_cfg_file = req->config_file;
|
||||
i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF);
|
||||
if (i)
|
||||
if (i) {
|
||||
xfree(tmp_output);
|
||||
xfree(tmp_work);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* If opts.{output,work_dir} is non-NULL, the RPC configuration file had a value, use it.*/
|
||||
/* If opts.{output,work_dir} is NULL, use the old value if it was set. */
|
||||
if (opts.output) {
|
||||
output_changed_by_rpc_conf = true;
|
||||
} else {
|
||||
opts.output = tmp_output;
|
||||
tmp_output = NULL;
|
||||
}
|
||||
|
||||
if (opts.work_dir) {
|
||||
work_changed_by_rpc_conf = true;
|
||||
} else {
|
||||
opts.work_dir = tmp_work;
|
||||
tmp_work = NULL;
|
||||
}
|
||||
|
||||
if (opts.imgs_dir)
|
||||
imgs_changed_by_rpc_conf = true;
|
||||
|
||||
xfree(tmp_output);
|
||||
xfree(tmp_work);
|
||||
}
|
||||
|
||||
if (resolve_images_dir_path(images_dir_path, imgs_changed_by_rpc_conf, req, ids.pid) < 0)
|
||||
goto err;
|
||||
|
||||
if (req->parent_img)
|
||||
SET_CHAR_OPTS(img_parent, req->parent_img);
|
||||
|
||||
if (setup_images_and_workdir(images_dir_path, work_changed_by_rpc_conf, req, ids.pid))
|
||||
goto err;
|
||||
|
||||
if (req->n_irmap_scan_paths) {
|
||||
for (i = 0; i < req->n_irmap_scan_paths; i++) {
|
||||
if (irmap_scan_path_add(req->irmap_scan_paths[i]))
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
/* initiate log file in work dir */
|
||||
if (setup_logging_from_req(req, output_changed_by_rpc_conf))
|
||||
goto err;
|
||||
|
||||
if (check_caps())
|
||||
goto err;
|
||||
|
||||
if (kerndat_init())
|
||||
goto err;
|
||||
|
||||
/* init_pidfd_store_sk must be called after kerndat_init. */
|
||||
if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk))
|
||||
goto err;
|
||||
|
||||
if (req->mntns_compat_mode)
|
||||
opts.mntns_compat_mode = true;
|
||||
|
||||
log_set_loglevel(opts.log_level);
|
||||
if (check_options())
|
||||
goto err;
|
||||
|
||||
|
|
@ -813,7 +840,7 @@ static int dump_using_req(int sk, CriuOpts *req)
|
|||
if (setup_opts_from_req(sk, req))
|
||||
goto exit;
|
||||
|
||||
__setproctitle("dump --rpc -t %d -D %s", req->pid, images_dir);
|
||||
__setproctitle("dump --rpc -t %d", req->pid);
|
||||
|
||||
if (init_pidfd_store_hash())
|
||||
goto pidfd_store_err;
|
||||
|
|
@ -856,7 +883,7 @@ static int restore_using_req(int sk, CriuOpts *req)
|
|||
if (setup_opts_from_req(sk, req))
|
||||
goto exit;
|
||||
|
||||
__setproctitle("restore --rpc -D %s", images_dir);
|
||||
__setproctitle("restore --rpc");
|
||||
|
||||
if (cr_restore_tasks())
|
||||
goto exit;
|
||||
|
|
@ -895,6 +922,11 @@ static int check(int sk, CriuOpts *req)
|
|||
|
||||
resp.type = CRIU_REQ_TYPE__CHECK;
|
||||
|
||||
if (log_keep_err()) {
|
||||
pr_perror("Can't tune log");
|
||||
goto out;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
pr_perror("Can't fork");
|
||||
|
|
@ -919,6 +951,7 @@ static int check(int sk, CriuOpts *req)
|
|||
|
||||
resp.success = true;
|
||||
out:
|
||||
set_resp_err(&resp);
|
||||
return send_criu_msg(sk, &resp);
|
||||
}
|
||||
|
||||
|
|
@ -927,6 +960,11 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single)
|
|||
int pid, status;
|
||||
bool success = false;
|
||||
|
||||
if (log_keep_err()) {
|
||||
pr_perror("Can't tune log");
|
||||
goto out;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
pr_perror("Can't fork");
|
||||
|
|
@ -940,7 +978,7 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single)
|
|||
if (setup_opts_from_req(sk, req))
|
||||
goto cout;
|
||||
|
||||
__setproctitle("pre-dump --rpc -t %d -D %s", req->pid, images_dir);
|
||||
__setproctitle("pre-dump --rpc -t %d", req->pid);
|
||||
|
||||
if (init_pidfd_store_hash())
|
||||
goto pidfd_store_err;
|
||||
|
|
@ -1005,6 +1043,11 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode)
|
|||
CriuPageServerInfo ps = CRIU_PAGE_SERVER_INFO__INIT;
|
||||
struct ps_info info;
|
||||
|
||||
if (log_keep_err()) {
|
||||
pr_perror("Can't tune log");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (pipe(start_pipe)) {
|
||||
pr_perror("No start pipe");
|
||||
goto out;
|
||||
|
|
@ -1078,6 +1121,7 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode)
|
|||
out:
|
||||
resp.type = CRIU_REQ_TYPE__PAGE_SERVER;
|
||||
resp.success = success;
|
||||
set_resp_err(&resp);
|
||||
return send_criu_msg(sk, &resp);
|
||||
}
|
||||
|
||||
|
|
@ -1252,6 +1296,11 @@ static int handle_cpuinfo(int sk, CriuReq *msg)
|
|||
bool success = false;
|
||||
int pid, status;
|
||||
|
||||
if (log_keep_err()) {
|
||||
pr_perror("Can't tune log");
|
||||
goto out;
|
||||
}
|
||||
|
||||
pid = fork();
|
||||
if (pid < 0) {
|
||||
pr_perror("Can't fork");
|
||||
|
|
@ -1261,12 +1310,11 @@ static int handle_cpuinfo(int sk, CriuReq *msg)
|
|||
if (pid == 0) {
|
||||
int ret = 1;
|
||||
|
||||
opts.mode = CR_CPUINFO;
|
||||
opts.mode = (msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP) ? CR_CPUINFO_DUMP : CR_CPUINFO_CHECK;
|
||||
if (setup_opts_from_req(sk, msg->opts))
|
||||
goto cout;
|
||||
|
||||
__setproctitle("cpuinfo %s --rpc -D %s", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check",
|
||||
images_dir);
|
||||
__setproctitle("cpuinfo %s --rpc", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check");
|
||||
|
||||
if (msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP)
|
||||
ret = cpuinfo_dump();
|
||||
|
|
@ -1301,7 +1349,7 @@ static int handle_cpuinfo(int sk, CriuReq *msg)
|
|||
out:
|
||||
resp.type = msg->type;
|
||||
resp.success = success;
|
||||
|
||||
set_resp_err(&resp);
|
||||
return send_criu_msg(sk, &resp);
|
||||
}
|
||||
|
||||
|
|
|
|||
165
criu/crtools.c
165
criu/crtools.c
|
|
@ -54,19 +54,17 @@ void flush_early_log_to_stderr(void)
|
|||
flush_early_log_buffer(STDERR_FILENO);
|
||||
}
|
||||
|
||||
static int image_dir_mode(char *argv[], int optind)
|
||||
static int image_dir_mode(void)
|
||||
{
|
||||
switch (opts.mode) {
|
||||
case CR_DUMP:
|
||||
/* fallthrough */
|
||||
case CR_CPUINFO_DUMP:
|
||||
/* fallthrough */
|
||||
case CR_PRE_DUMP:
|
||||
return O_DUMP;
|
||||
case CR_RESTORE:
|
||||
return O_RSTR;
|
||||
case CR_CPUINFO:
|
||||
if (!strcmp(argv[optind + 1], "dump"))
|
||||
return O_DUMP;
|
||||
/* fallthrough */
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
|
|
@ -76,36 +74,55 @@ static int image_dir_mode(char *argv[], int optind)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static int parse_criu_mode(char *mode)
|
||||
{
|
||||
if (!strcmp(mode, "dump"))
|
||||
opts.mode = CR_DUMP;
|
||||
else if (!strcmp(mode, "pre-dump"))
|
||||
opts.mode = CR_PRE_DUMP;
|
||||
else if (!strcmp(mode, "restore"))
|
||||
opts.mode = CR_RESTORE;
|
||||
else if (!strcmp(mode, "lazy-pages"))
|
||||
opts.mode = CR_LAZY_PAGES;
|
||||
else if (!strcmp(mode, "check"))
|
||||
opts.mode = CR_CHECK;
|
||||
else if (!strcmp(mode, "page-server"))
|
||||
opts.mode = CR_PAGE_SERVER;
|
||||
else if (!strcmp(mode, "service"))
|
||||
opts.mode = CR_SERVICE;
|
||||
else if (!strcmp(mode, "swrk"))
|
||||
opts.mode = CR_SWRK;
|
||||
else if (!strcmp(mode, "dedup"))
|
||||
opts.mode = CR_DEDUP;
|
||||
else if (!strcmp(mode, "cpuinfo"))
|
||||
opts.mode = CR_CPUINFO;
|
||||
else if (!strcmp(mode, "exec"))
|
||||
opts.mode = CR_EXEC_DEPRECATED;
|
||||
else if (!strcmp(mode, "show"))
|
||||
opts.mode = CR_SHOW_DEPRECATED;
|
||||
else
|
||||
return -1;
|
||||
struct {
|
||||
char *cmd;
|
||||
int mode;
|
||||
} commands[] = {
|
||||
{ "dump", CR_DUMP },
|
||||
{ "pre-dump", CR_PRE_DUMP },
|
||||
{ "restore", CR_RESTORE },
|
||||
{ "lazy-pages", CR_LAZY_PAGES },
|
||||
{ "check", CR_CHECK },
|
||||
{ "page-server", CR_PAGE_SERVER },
|
||||
{ "service", CR_SERVICE },
|
||||
{ "swrk", CR_SWRK },
|
||||
{ "dedup", CR_DEDUP },
|
||||
{ "exec", CR_EXEC_DEPRECATED },
|
||||
{ "show", CR_SHOW_DEPRECATED },
|
||||
};
|
||||
|
||||
return 0;
|
||||
static int parse_criu_mode(int argc, char **argv, int *optind)
|
||||
{
|
||||
char *cmd = argv[*optind];
|
||||
bool has_sub_command = (argc - *optind) > 1;
|
||||
char *subcommand = has_sub_command ? argv[*optind + 1] : NULL;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(commands); i++) {
|
||||
if (strcmp(cmd, commands[i].cmd))
|
||||
continue;
|
||||
opts.mode = commands[i].mode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!strcmp(cmd, "cpuinfo")) {
|
||||
if (subcommand == NULL) {
|
||||
pr_err("cpuinfo requires an action: dump or check\n");
|
||||
return -1;
|
||||
}
|
||||
if (!strcmp(subcommand, "dump"))
|
||||
opts.mode = CR_CPUINFO_DUMP;
|
||||
else if (!strcmp(subcommand, "check"))
|
||||
opts.mode = CR_CPUINFO_CHECK;
|
||||
else {
|
||||
pr_err("unknown cpuinfo sub-command: %s\n", subcommand);
|
||||
return -1;
|
||||
}
|
||||
(*optind)++;
|
||||
return 0;
|
||||
}
|
||||
pr_err("unknown command: %s\n", argv[*optind]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[], char *envp[])
|
||||
|
|
@ -115,6 +132,7 @@ int main(int argc, char *argv[], char *envp[])
|
|||
bool has_exec_cmd = false;
|
||||
bool has_sub_command;
|
||||
int state = PARSING_GLOBAL_CONF;
|
||||
char *cmd;
|
||||
|
||||
BUILD_BUG_ON(CTL_32 != SYSCTL_TYPE__CTL_32);
|
||||
BUILD_BUG_ON(__CTL_STR != SYSCTL_TYPE__CTL_STR);
|
||||
|
|
@ -165,10 +183,11 @@ int main(int argc, char *argv[], char *envp[])
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (parse_criu_mode(argv[optind])) {
|
||||
pr_err("unknown command: %s\n", argv[optind]);
|
||||
cmd = argv[optind];
|
||||
ret = parse_criu_mode(argc, argv, &optind);
|
||||
if (ret)
|
||||
goto usage;
|
||||
}
|
||||
|
||||
/*
|
||||
* util_init initializes criu_run_id and compel_run_id so that sockets
|
||||
* are generated with an unique name identifying the specific process
|
||||
|
|
@ -223,25 +242,19 @@ int main(int argc, char *argv[], char *envp[])
|
|||
return 1;
|
||||
memcpy(opts.exec_cmd, &argv[optind + 1], (argc - optind - 1) * sizeof(char *));
|
||||
opts.exec_cmd[argc - optind - 1] = NULL;
|
||||
} else {
|
||||
/* No subcommands except for cpuinfo and restore --exec-cmd */
|
||||
if (opts.mode != CR_CPUINFO && has_sub_command) {
|
||||
pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]);
|
||||
goto usage;
|
||||
} else if (opts.mode == CR_CPUINFO && !has_sub_command) {
|
||||
pr_err("cpuinfo requires an action: dump or check\n");
|
||||
goto usage;
|
||||
}
|
||||
} else if (has_sub_command) {
|
||||
pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", cmd);
|
||||
goto usage;
|
||||
}
|
||||
|
||||
if (opts.stream && image_dir_mode(argv, optind) == -1) {
|
||||
pr_err("--stream cannot be used with the %s command\n", argv[optind]);
|
||||
if (opts.stream && image_dir_mode() == -1) {
|
||||
pr_err("--stream cannot be used with the %s command\n", cmd);
|
||||
goto usage;
|
||||
}
|
||||
|
||||
/* We must not open imgs dir, if service is called */
|
||||
if (opts.mode != CR_SERVICE) {
|
||||
ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind));
|
||||
ret = open_image_dir(opts.imgs_dir, image_dir_mode());
|
||||
if (ret < 0) {
|
||||
pr_err("Couldn't open image dir %s\n", opts.imgs_dir);
|
||||
return 1;
|
||||
|
|
@ -286,14 +299,13 @@ int main(int argc, char *argv[], char *envp[])
|
|||
if (opts.img_parent)
|
||||
pr_info("Will do snapshot from %s\n", opts.img_parent);
|
||||
|
||||
if (opts.mode == CR_DUMP) {
|
||||
switch (opts.mode) {
|
||||
case CR_DUMP:
|
||||
if (!opts.tree_id)
|
||||
goto opt_pid_missing;
|
||||
|
||||
return cr_dump_tasks(opts.tree_id);
|
||||
}
|
||||
|
||||
if (opts.mode == CR_PRE_DUMP) {
|
||||
case CR_PRE_DUMP:
|
||||
if (!opts.tree_id)
|
||||
goto opt_pid_missing;
|
||||
|
||||
|
|
@ -303,9 +315,7 @@ int main(int argc, char *argv[], char *envp[])
|
|||
}
|
||||
|
||||
return cr_pre_dump_tasks(opts.tree_id) != 0;
|
||||
}
|
||||
|
||||
if (opts.mode == CR_RESTORE) {
|
||||
case CR_RESTORE:
|
||||
if (opts.tree_id)
|
||||
pr_warn("Using -t with criu restore is obsoleted\n");
|
||||
|
||||
|
|
@ -318,46 +328,41 @@ int main(int argc, char *argv[], char *envp[])
|
|||
}
|
||||
|
||||
return ret != 0;
|
||||
}
|
||||
|
||||
if (opts.mode == CR_LAZY_PAGES)
|
||||
case CR_LAZY_PAGES:
|
||||
return cr_lazy_pages(opts.daemon_mode) != 0;
|
||||
|
||||
if (opts.mode == CR_CHECK)
|
||||
case CR_CHECK:
|
||||
return cr_check() != 0;
|
||||
|
||||
if (opts.mode == CR_PAGE_SERVER)
|
||||
case CR_PAGE_SERVER:
|
||||
return cr_page_server(opts.daemon_mode, false, -1) != 0;
|
||||
|
||||
if (opts.mode == CR_SERVICE)
|
||||
case CR_SERVICE:
|
||||
return cr_service(opts.daemon_mode);
|
||||
|
||||
if (opts.mode == CR_DEDUP)
|
||||
case CR_DEDUP:
|
||||
return cr_dedup() != 0;
|
||||
|
||||
if (opts.mode == CR_CPUINFO) {
|
||||
if (!argv[optind + 1]) {
|
||||
pr_err("cpuinfo requires an action: dump or check\n");
|
||||
goto usage;
|
||||
}
|
||||
if (!strcmp(argv[optind + 1], "dump"))
|
||||
return cpuinfo_dump();
|
||||
else if (!strcmp(argv[optind + 1], "check"))
|
||||
return cpuinfo_check();
|
||||
}
|
||||
case CR_CPUINFO_DUMP:
|
||||
return cpuinfo_dump();
|
||||
|
||||
if (opts.mode == CR_EXEC_DEPRECATED) {
|
||||
case CR_CPUINFO_CHECK:
|
||||
return cpuinfo_check();
|
||||
|
||||
case CR_EXEC_DEPRECATED:
|
||||
pr_err("The \"exec\" action is deprecated by the Compel library.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (opts.mode == CR_SHOW_DEPRECATED) {
|
||||
case CR_SHOW_DEPRECATED:
|
||||
pr_err("The \"show\" action is deprecated by the CRIT utility.\n");
|
||||
pr_err("To view an image use the \"crit decode -i $name --pretty\" command.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
pr_err("unknown command: %s\n", argv[optind]);
|
||||
case CR_UNSET:
|
||||
default:
|
||||
pr_err("unknown command: %s\n", cmd);
|
||||
}
|
||||
usage:
|
||||
pr_msg("\n"
|
||||
"Usage:\n"
|
||||
|
|
@ -421,7 +426,7 @@ usage:
|
|||
" --network-lock METHOD network locking/unlocking method; argument\n"
|
||||
" can be 'nftables' or 'iptables' (default).\n"
|
||||
" --unprivileged accept limitations when running as non-root\n"
|
||||
" consult documentation for further details\n"
|
||||
" --allow-uprobes allow dump/restore with uprobes vma\n"
|
||||
"\n"
|
||||
"* External resources support:\n"
|
||||
" --external RES dump objects from this list as external resources:\n"
|
||||
|
|
@ -498,8 +503,8 @@ usage:
|
|||
" Inherit file descriptors, treating fd NUM as being\n"
|
||||
" already opened via an existing RES, which can be:\n"
|
||||
" tty[rdev:dev]\n"
|
||||
" pipe[inode]\n"
|
||||
" socket[inode]\n"
|
||||
" pipe:[inode]\n"
|
||||
" socket:[inode]\n"
|
||||
" file[mnt_id:inode]\n"
|
||||
" /memfd:name\n"
|
||||
" path/to/file\n"
|
||||
|
|
|
|||
|
|
@ -45,10 +45,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
|
|||
{
|
||||
struct ext_file_info *xfi;
|
||||
int fd;
|
||||
bool retry_needed;
|
||||
|
||||
xfi = container_of(d, struct ext_file_info, d);
|
||||
|
||||
fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id);
|
||||
fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed);
|
||||
if (fd < 0) {
|
||||
pr_err("Unable to restore %#x\n", xfi->xfe->id);
|
||||
return -1;
|
||||
|
|
@ -57,8 +58,11 @@ static int open_fd(struct file_desc *d, int *new_fd)
|
|||
if (restore_fown(fd, xfi->xfe->fown))
|
||||
return -1;
|
||||
|
||||
*new_fd = fd;
|
||||
return 0;
|
||||
if (!retry_needed)
|
||||
*new_fd = fd;
|
||||
else
|
||||
*new_fd = -1;
|
||||
return retry_needed;
|
||||
}
|
||||
|
||||
static struct file_desc_ops ext_desc_ops = {
|
||||
|
|
|
|||
|
|
@ -1329,7 +1329,6 @@ int prepare_fds(struct pstree_item *me)
|
|||
}
|
||||
}
|
||||
|
||||
BUG_ON(current->pid->state == TASK_HELPER);
|
||||
ret = open_fdinfos(me);
|
||||
|
||||
if (rsti(me)->fdt)
|
||||
|
|
|
|||
|
|
@ -95,6 +95,11 @@ int check_img_inventory(bool restore)
|
|||
goto out_err;
|
||||
}
|
||||
|
||||
if (restore && he->allow_uprobes && !opts.allow_uprobes) {
|
||||
pr_err("Dumped with --" OPT_ALLOW_UPROBES ". Need to set it on restore as well.\n");
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
if (restore) {
|
||||
if (!he->has_network_lock_method) {
|
||||
/*
|
||||
|
|
@ -712,7 +717,7 @@ struct cr_img *img_from_fd(int fd)
|
|||
* This is used when opts.stream is enabled for picking the right streamer
|
||||
* socket name. `mode` is ignored when opts.stream is not enabled.
|
||||
*/
|
||||
int open_image_dir(char *dir, int mode)
|
||||
int open_image_dir(const char *dir, int mode)
|
||||
{
|
||||
int fd, ret;
|
||||
|
||||
|
|
|
|||
|
|
@ -125,7 +125,8 @@ enum criu_mode {
|
|||
CR_SERVICE,
|
||||
CR_SWRK,
|
||||
CR_DEDUP,
|
||||
CR_CPUINFO,
|
||||
CR_CPUINFO_DUMP,
|
||||
CR_CPUINFO_CHECK,
|
||||
CR_EXEC_DEPRECATED,
|
||||
CR_SHOW_DEPRECATED,
|
||||
};
|
||||
|
|
@ -195,6 +196,7 @@ struct cr_options {
|
|||
char *work_dir;
|
||||
int network_lock_method;
|
||||
int skip_file_rwx_check;
|
||||
int allow_uprobes;
|
||||
|
||||
/*
|
||||
* When we scheduler for removal some functionality we first
|
||||
|
|
|
|||
|
|
@ -60,6 +60,12 @@ enum {
|
|||
|
||||
CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11,
|
||||
|
||||
CR_PLUGIN_HOOK__POST_FORKING = 12,
|
||||
|
||||
CR_PLUGIN_HOOK__RESTORE_INIT = 13,
|
||||
|
||||
CR_PLUGIN_HOOK__DUMP_DEVICES_LATE = 14,
|
||||
|
||||
CR_PLUGIN_HOOK__MAX
|
||||
};
|
||||
|
||||
|
|
@ -68,7 +74,7 @@ enum {
|
|||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind);
|
||||
|
|
@ -78,6 +84,9 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const
|
|||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__POST_FORKING, void);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_INIT, void);
|
||||
DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, int id);
|
||||
|
||||
enum {
|
||||
CR_PLUGIN_STAGE__DUMP,
|
||||
|
|
@ -152,5 +161,6 @@ typedef int(cr_plugin_handle_device_vma_t)(int fd, const struct stat *stat);
|
|||
typedef int(cr_plugin_update_vma_map_t)(const char *path, const uint64_t addr, const uint64_t old_pgoff,
|
||||
uint64_t *new_pgoff, int *plugin_fd);
|
||||
typedef int(cr_plugin_resume_devices_late_t)(int pid);
|
||||
typedef int(cr_plugin_post_forking_t)(void);
|
||||
|
||||
#endif /* __CRIU_PLUGIN_H__ */
|
||||
|
|
|
|||
|
|
@ -68,6 +68,18 @@
|
|||
* processing exiting with error; while the rest of bits
|
||||
* are part of image ABI, this particular one must never
|
||||
* be used in image.
|
||||
* - guard
|
||||
* stands for a fake VMA (not represented in the kernel
|
||||
* by a struct vm_area_struct). Used to keep an information
|
||||
* about virtual address space ranges covered by
|
||||
* MADV_GUARD_INSTALL guards. These ones must be always at
|
||||
* the end of the vma_area_list and properly skipped a.e.
|
||||
* - uprobes
|
||||
* stands for a "[uprobes]" vma that's automatically mapped by
|
||||
* the kernel when an active uprobe is hit. Contents of this vma
|
||||
* are not dumped and neither are its madvise bits restored,
|
||||
* because the kernel is in complete control of this vma. This is
|
||||
* just used to track the existence of the uprobes vma.
|
||||
*/
|
||||
#define VMA_AREA_NONE (0 << 0)
|
||||
#define VMA_AREA_REGULAR (1 << 0)
|
||||
|
|
@ -87,6 +99,8 @@
|
|||
#define VMA_AREA_AIORING (1 << 13)
|
||||
#define VMA_AREA_MEMFD (1 << 14)
|
||||
#define VMA_AREA_SHSTK (1 << 15)
|
||||
#define VMA_AREA_GUARD (1 << 16)
|
||||
#define VMA_AREA_UPROBES (1 << 17)
|
||||
|
||||
#define VMA_EXT_PLUGIN (1 << 27)
|
||||
#define VMA_CLOSE (1 << 28)
|
||||
|
|
@ -100,6 +114,8 @@
|
|||
|
||||
#define CR_PARENT_LINK "parent"
|
||||
|
||||
#define OPT_ALLOW_UPROBES "allow-uprobes"
|
||||
|
||||
extern bool ns_per_id;
|
||||
extern bool img_common_magic;
|
||||
|
||||
|
|
@ -149,7 +165,7 @@ static inline int img_raw_fd(struct cr_img *img)
|
|||
|
||||
extern off_t img_raw_size(struct cr_img *img);
|
||||
|
||||
extern int open_image_dir(char *dir, int mode);
|
||||
extern int open_image_dir(const char *dir, int mode);
|
||||
extern void close_image_dir(void);
|
||||
/*
|
||||
* Return -1 -- parent symlink points to invalid target
|
||||
|
|
|
|||
|
|
@ -89,6 +89,10 @@ struct kerndat_s {
|
|||
bool has_pagemap_scan;
|
||||
bool has_shstk;
|
||||
bool has_close_range;
|
||||
bool has_timer_cr_ids;
|
||||
bool has_breakpoints;
|
||||
bool has_madv_guard;
|
||||
bool has_pagemap_scan_guard_pages;
|
||||
};
|
||||
|
||||
extern struct kerndat_s kdat;
|
||||
|
|
@ -111,4 +115,6 @@ extern int kerndat_fs_virtualized(unsigned int which, u32 kdev);
|
|||
|
||||
extern int kerndat_has_nspid(void);
|
||||
|
||||
extern void kerndat_warn_about_madv_guards(void);
|
||||
|
||||
#endif /* __CR_KERNDAT_H__ */
|
||||
|
|
|
|||
|
|
@ -31,10 +31,12 @@ extern int do_task_reset_dirty_track(int pid);
|
|||
extern unsigned long dump_pages_args_size(struct vm_area_list *vmas);
|
||||
extern int parasite_dump_pages_seized(struct pstree_item *item, struct vm_area_list *vma_area_list,
|
||||
struct mem_dump_ctl *mdc, struct parasite_ctl *ctl);
|
||||
extern int collect_madv_guards(pid_t pid, struct vm_area_list *vma_area_list);
|
||||
|
||||
#define PME_PRESENT (1ULL << 63)
|
||||
#define PME_SWAP (1ULL << 62)
|
||||
#define PME_FILE (1ULL << 61)
|
||||
#define PME_GUARD_REGION (1ULL << 58)
|
||||
#define PME_SOFT_DIRTY (1ULL << 55)
|
||||
#define PME_PSHIFT_BITS (6)
|
||||
#define PME_STATUS_BITS (3)
|
||||
|
|
@ -49,5 +51,11 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta);
|
|||
int unmap_guard_pages(struct pstree_item *t);
|
||||
int prepare_mappings(struct pstree_item *t);
|
||||
|
||||
u64 should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, bool *softdirty);
|
||||
struct page_info {
|
||||
u64 next;
|
||||
bool softdirty;
|
||||
};
|
||||
|
||||
int should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, struct page_info *page_info);
|
||||
|
||||
#endif /* __CR_MEM_H__ */
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@
|
|||
#ifndef MAP_HUGETLB
|
||||
#define MAP_HUGETLB 0x40000
|
||||
#endif
|
||||
#ifndef MAP_DROPPABLE
|
||||
#define MAP_DROPPABLE 0x08
|
||||
#endif
|
||||
#ifndef MADV_HUGEPAGE
|
||||
#define MADV_HUGEPAGE 14
|
||||
#endif
|
||||
|
|
@ -13,5 +16,11 @@
|
|||
#ifndef MADV_DONTDUMP
|
||||
#define MADV_DONTDUMP 16
|
||||
#endif
|
||||
#ifndef MADV_WIPEONFORK
|
||||
#define MADV_WIPEONFORK 18
|
||||
#endif
|
||||
#ifndef MADV_GUARD_INSTALL
|
||||
#define MADV_GUARD_INSTALL 102
|
||||
#endif
|
||||
|
||||
#endif /* __CR_MMAN_H__ */
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump);
|
|||
|
||||
extern int network_lock(void);
|
||||
extern void network_unlock(void);
|
||||
extern int network_lock_internal(void);
|
||||
extern int network_lock_internal(bool restore);
|
||||
|
||||
extern struct ns_desc net_ns_desc;
|
||||
|
||||
|
|
|
|||
|
|
@ -92,9 +92,9 @@ struct kernel_pipe_buffer {
|
|||
struct page_pipe_buf {
|
||||
int p[2]; /* pipe with pages */
|
||||
unsigned int pipe_size; /* how many pages can be fit into pipe */
|
||||
unsigned int pipe_off; /* where this buf is started in a pipe */
|
||||
unsigned int pages_in; /* how many pages are there */
|
||||
unsigned int nr_segs; /* how many iov-s are busy */
|
||||
unsigned long pipe_off; /* where this buf is started in a pipe */
|
||||
unsigned long pages_in; /* how many pages are there */
|
||||
#define PPB_LAZY (1 << 0)
|
||||
unsigned int flags;
|
||||
struct iovec *iov; /* vaddr:len map */
|
||||
|
|
@ -149,7 +149,7 @@ struct pipe_read_dest {
|
|||
};
|
||||
|
||||
extern int pipe_read_dest_init(struct pipe_read_dest *prd);
|
||||
extern int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned int *nr_pages,
|
||||
extern int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned long *nr_pages,
|
||||
unsigned int ppb_flags);
|
||||
|
||||
#endif /* __CR_PAGE_PIPE_H__ */
|
||||
|
|
|
|||
|
|
@ -69,9 +69,9 @@ extern int check_parent_page_xfer(int fd_type, unsigned long id);
|
|||
*/
|
||||
|
||||
/* async request/receive of remote pages */
|
||||
extern int request_remote_pages(unsigned long img_id, unsigned long addr, int nr_pages);
|
||||
extern int request_remote_pages(unsigned long img_id, unsigned long addr, unsigned long nr_pages);
|
||||
|
||||
typedef int (*ps_async_read_complete)(unsigned long img_id, unsigned long vaddr, int nr_pages, void *);
|
||||
extern int page_server_start_read(void *buf, int nr_pages, ps_async_read_complete complete, void *priv, unsigned flags);
|
||||
typedef int (*ps_async_read_complete)(unsigned long img_id, unsigned long vaddr, unsigned long nr_pages, void *);
|
||||
extern int page_server_start_read(void *buf, unsigned long nr_pages, ps_async_read_complete complete, void *priv, unsigned flags);
|
||||
|
||||
#endif /* __CR_PAGE_XFER__H__ */
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
|
||||
struct page_read {
|
||||
/* reads page from current pagemap */
|
||||
int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *, unsigned flags);
|
||||
int (*read_pages)(struct page_read *, unsigned long vaddr, unsigned long nr, void *, unsigned flags);
|
||||
/* Advance page_read to the next entry */
|
||||
int (*advance)(struct page_read *pr);
|
||||
void (*close)(struct page_read *);
|
||||
|
|
@ -52,8 +52,8 @@ struct page_read {
|
|||
int (*sync)(struct page_read *pr);
|
||||
int (*seek_pagemap)(struct page_read *pr, unsigned long vaddr);
|
||||
void (*reset)(struct page_read *pr);
|
||||
int (*io_complete)(struct page_read *, unsigned long vaddr, int nr);
|
||||
int (*maybe_read_page)(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags);
|
||||
int (*io_complete)(struct page_read *, unsigned long vaddr, unsigned long nr);
|
||||
int (*maybe_read_page)(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags);
|
||||
|
||||
/* Whether or not pages can be read in PIE code */
|
||||
bool pieok;
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#define PAGE_IS_PFNZERO (1 << 5)
|
||||
#define PAGE_IS_HUGE (1 << 6)
|
||||
#define PAGE_IS_SOFT_DIRTY (1 << 7)
|
||||
#define PAGE_IS_GUARD (1 << 8)
|
||||
|
||||
/*
|
||||
* struct page_region - Page region with flags
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ struct parasite_dump_pages_args {
|
|||
unsigned int add_prot;
|
||||
unsigned int off;
|
||||
unsigned int nr_segs;
|
||||
unsigned int nr_pages;
|
||||
unsigned long nr_pages;
|
||||
};
|
||||
|
||||
static inline struct parasite_vma_entry *pargs_vmas(struct parasite_dump_pages_args *a)
|
||||
|
|
|
|||
|
|
@ -97,4 +97,11 @@ struct prctl_mm_map {
|
|||
#define PR_GET_THP_DISABLE 42
|
||||
#endif
|
||||
|
||||
#ifndef PR_TIMER_CREATE_RESTORE_IDS
|
||||
#define PR_TIMER_CREATE_RESTORE_IDS 77
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
|
||||
#endif
|
||||
|
||||
#endif /* __CR_PRCTL_H__ */
|
||||
|
|
|
|||
|
|
@ -105,4 +105,6 @@ extern int parse_uptime(uint64_t *upt);
|
|||
|
||||
extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff);
|
||||
|
||||
extern bool found_uprobes_vma(void);
|
||||
|
||||
#endif /* __CR_PROC_PARSE_H__ */
|
||||
|
|
|
|||
|
|
@ -104,6 +104,7 @@ extern void pstree_insert_pid(struct pid *pid_node);
|
|||
extern struct pid *pstree_pid_by_virt(pid_t pid);
|
||||
|
||||
extern struct pstree_item *root_item;
|
||||
extern bool has_children(struct pstree_item *item);
|
||||
extern struct pstree_item *pstree_item_next(struct pstree_item *item);
|
||||
#define for_each_pstree_item(pi) for (pi = root_item; pi != NULL; pi = pstree_item_next(pi))
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ extern int arch_set_thread_regs_nosigrt(struct pid *pid);
|
|||
|
||||
struct task_restore_args;
|
||||
struct pstree_item;
|
||||
struct rst_shstk_info;
|
||||
|
||||
#ifndef arch_shstk_prepare
|
||||
static inline int arch_shstk_prepare(struct pstree_item *item,
|
||||
|
|
@ -38,4 +39,25 @@ static inline int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *cor
|
|||
#define arch_shstk_trampoline arch_shstk_trampoline
|
||||
#endif
|
||||
|
||||
#ifndef shstk_restorer_stack_size
|
||||
static always_inline long shstk_restorer_stack_size(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef shstk_set_restorer_stack
|
||||
static always_inline long shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef shstk_min_mmap_addr
|
||||
static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long def)
|
||||
{
|
||||
return def;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@ struct task_restore_args {
|
|||
|
||||
struct restore_posix_timer *posix_timers;
|
||||
unsigned int posix_timers_n;
|
||||
bool posix_timer_cr_ids;
|
||||
|
||||
struct restore_timerfd *timerfd;
|
||||
unsigned int timerfd_n;
|
||||
|
|
@ -356,4 +357,11 @@ static inline int arch_shstk_restore(struct rst_shstk_info *shstk)
|
|||
#define arch_shstk_restore arch_shstk_restore
|
||||
#endif
|
||||
|
||||
#ifndef shstk_vma_restore
|
||||
static always_inline int shstk_vma_restore(VmaEntry *vma_entry)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __CR_RESTORER_H__ */
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ struct socket_desc {
|
|||
};
|
||||
|
||||
extern int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *);
|
||||
extern int dump_socket_opts(int sk, SkOptsEntry *soe);
|
||||
extern int dump_socket_opts(int sk, int family, SkOptsEntry *soe);
|
||||
extern int restore_socket_opts(int sk, SkOptsEntry *soe);
|
||||
extern int sk_setbufs(int sk, uint32_t *bufs);
|
||||
extern void release_skopts(SkOptsEntry *);
|
||||
|
|
|
|||
|
|
@ -37,6 +37,6 @@ enum {
|
|||
#define CTL_FLAGS_OPTIONAL 1
|
||||
#define CTL_FLAGS_HAS 2
|
||||
#define CTL_FLAGS_READ_EIO_SKIP 4
|
||||
#define CTL_FLAGS_IPC_EACCES_SKIP 5
|
||||
#define CTL_FLAGS_IPC_EACCES_SKIP 8
|
||||
|
||||
#endif /* __CR_SYSCTL_H__ */
|
||||
|
|
|
|||
145
criu/kerndat.c
145
criu/kerndat.c
|
|
@ -31,6 +31,7 @@
|
|||
#include "kerndat.h"
|
||||
#include "fs-magic.h"
|
||||
#include "mem.h"
|
||||
#include "mman.h"
|
||||
#include "common/compiler.h"
|
||||
#include "sysctl.h"
|
||||
#include "cr_options.h"
|
||||
|
|
@ -86,6 +87,10 @@ static int check_pagemap(void)
|
|||
if (ioctl(fd, PAGEMAP_SCAN, &args) == 0) {
|
||||
pr_debug("PAGEMAP_SCAN is supported\n");
|
||||
kdat.has_pagemap_scan = true;
|
||||
|
||||
args.return_mask |= PAGE_IS_GUARD;
|
||||
if (ioctl(fd, PAGEMAP_SCAN, &args) == 0)
|
||||
kdat.has_pagemap_scan_guard_pages = true;
|
||||
} else {
|
||||
switch (errno) {
|
||||
case EINVAL:
|
||||
|
|
@ -1720,6 +1725,134 @@ static int kerndat_has_close_range(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int kerndat_has_timer_cr_ids(void)
|
||||
{
|
||||
if (prctl(PR_TIMER_CREATE_RESTORE_IDS,
|
||||
PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) == -1) {
|
||||
if (errno == EINVAL) {
|
||||
pr_debug("PR_TIMER_CREATE_RESTORE_IDS isn't supported\n");
|
||||
return 0;
|
||||
}
|
||||
pr_perror("prctl returned unexpected error code");
|
||||
return -1;
|
||||
}
|
||||
|
||||
kdat.has_timer_cr_ids = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void breakpoint_func(void)
|
||||
{
|
||||
if (raise(SIGSTOP))
|
||||
pr_perror("Unable to kill itself with SIGSTOP");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* kerndat_breakpoints checks that hardware breakpoints work as they should.
|
||||
* In some cases, they might not work in virtual machines if the hypervisor
|
||||
* doesn't virtualize them. For example, they don't work in AMD SEV virtual
|
||||
* machines if the Debug Virtualization extension isn't supported or isn't
|
||||
* enabled in SEV_FEATURES.
|
||||
*/
|
||||
static int kerndat_breakpoints(void)
|
||||
{
|
||||
int status, ret, exit_code = -1;
|
||||
pid_t pid;
|
||||
|
||||
pid = fork();
|
||||
if (pid == -1) {
|
||||
pr_perror("fork");
|
||||
return -1;
|
||||
}
|
||||
if (pid == 0) {
|
||||
if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
|
||||
pr_perror("ptrace(PTRACE_TRACEME)");
|
||||
exit(1);
|
||||
}
|
||||
raise(SIGSTOP);
|
||||
breakpoint_func();
|
||||
exit(1);
|
||||
}
|
||||
if (waitpid(pid, &status, 0) == -1) {
|
||||
pr_perror("waitpid for initial stop");
|
||||
goto err;
|
||||
}
|
||||
if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) {
|
||||
pr_err("Child didn't stop as expected: status=%x\n", status);
|
||||
goto err;
|
||||
}
|
||||
ret = ptrace_set_breakpoint(pid, &breakpoint_func);
|
||||
if (ret < 0) {
|
||||
pr_err("Failed to set breakpoint\n");
|
||||
goto err;
|
||||
}
|
||||
if (ret == 0) {
|
||||
pr_debug("Hardware breakpoints appear to be disabled\n");
|
||||
goto out;
|
||||
}
|
||||
if (waitpid(pid, &status, 0) == -1) {
|
||||
pr_perror("waitpid for breakpoint trigger");
|
||||
goto err;
|
||||
}
|
||||
if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP) {
|
||||
pr_warn("Hardware breakpoints don't seem to work (status=%x)\n", status);
|
||||
goto out;
|
||||
}
|
||||
kdat.has_breakpoints = true;
|
||||
out:
|
||||
exit_code = 0;
|
||||
err:
|
||||
if (kill(pid, SIGKILL)) {
|
||||
pr_perror("Failed to kill the child process");
|
||||
exit_code = -1;
|
||||
}
|
||||
if (waitpid(pid, &status, 0) == -1) {
|
||||
pr_perror("Failed to wait for the child process");
|
||||
exit_code = -1;
|
||||
}
|
||||
if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL) {
|
||||
pr_err("The child exited with unexpected code: %x\n", status);
|
||||
exit_code = -1;
|
||||
}
|
||||
return exit_code;
|
||||
}
|
||||
|
||||
static int kerndat_has_madv_guard(void)
|
||||
{
|
||||
void *map;
|
||||
|
||||
map = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
|
||||
if (map == MAP_FAILED) {
|
||||
pr_perror("Can't mmap a page for has_madv_guard feature test");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (madvise(map, PAGE_SIZE, MADV_GUARD_INSTALL)) {
|
||||
if (errno != EINVAL) {
|
||||
pr_perror("madvise failed (has_madv_guard check)");
|
||||
goto mmap_cleanup;
|
||||
}
|
||||
} else {
|
||||
kdat.has_madv_guard = true;
|
||||
}
|
||||
|
||||
munmap(map, PAGE_SIZE);
|
||||
return 0;
|
||||
|
||||
mmap_cleanup:
|
||||
munmap(map, PAGE_SIZE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
void kerndat_warn_about_madv_guards(void)
|
||||
{
|
||||
if (kdat.has_madv_guard && !kdat.has_pagemap_scan_guard_pages)
|
||||
pr_warn("ioctl(PAGEMAP_SCAN) doesn't support PAGE_IS_GUARD flag. "
|
||||
"CRIU dump will fail if dumped processes use madvise(MADV_GUARD_INSTALL). "
|
||||
"Please, consider updating your kernel.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Some features depend on resource that can be dynamically changed
|
||||
* at the OS runtime. There are cases that we cannot determine the
|
||||
|
|
@ -1981,6 +2114,18 @@ int kerndat_init(void)
|
|||
pr_err("kerndat_has_close_range has failed when initializing kerndat.\n");
|
||||
ret = -1;
|
||||
}
|
||||
if (!ret && kerndat_has_timer_cr_ids()) {
|
||||
pr_err("kerndat_has_timer_cr_ids has failed when initializing kerndat.\n");
|
||||
ret = -1;
|
||||
}
|
||||
if (!ret && kerndat_breakpoints()) {
|
||||
pr_err("kerndat_breakpoints has failed when initializing kerndat.\n");
|
||||
ret = -1;
|
||||
}
|
||||
if (!ret && kerndat_has_madv_guard()) {
|
||||
pr_err("kerndat_has_madv_guard has failed when initializing kerndat.\n");
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
kerndat_lsm();
|
||||
kerndat_mmap_min_addr();
|
||||
|
|
|
|||
26
criu/log.c
26
criu/log.c
|
|
@ -10,6 +10,7 @@
|
|||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/utsname.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <fcntl.h>
|
||||
|
||||
|
|
@ -114,6 +115,9 @@ static struct str_and_lock *first_err;
|
|||
|
||||
int log_keep_err(void)
|
||||
{
|
||||
if (first_err)
|
||||
return 0;
|
||||
|
||||
first_err = shmalloc(sizeof(struct str_and_lock));
|
||||
if (first_err == NULL)
|
||||
return -1;
|
||||
|
|
@ -186,7 +190,7 @@ void flush_early_log_buffer(int fd)
|
|||
* with reading the log_level.
|
||||
*/
|
||||
struct early_log_hdr *hdr = (void *)early_log_buffer + pos;
|
||||
pos += sizeof(hdr);
|
||||
pos += sizeof(*hdr);
|
||||
if (hdr->level <= current_loglevel) {
|
||||
size_t size = 0;
|
||||
while (size < hdr->len) {
|
||||
|
|
@ -198,7 +202,7 @@ void flush_early_log_buffer(int fd)
|
|||
}
|
||||
pos += hdr->len;
|
||||
}
|
||||
if (early_log_buf_off == EARLY_LOG_BUF_LEN)
|
||||
if ((early_log_buf_off + sizeof(struct early_log_hdr)) >= EARLY_LOG_BUF_LEN)
|
||||
pr_warn("The early log buffer is full, some messages may have been lost\n");
|
||||
early_log_buf_off = 0;
|
||||
}
|
||||
|
|
@ -316,10 +320,10 @@ unsigned int log_get_loglevel(void)
|
|||
|
||||
static void early_vprint(const char *format, unsigned int loglevel, va_list params)
|
||||
{
|
||||
unsigned int log_size = 0;
|
||||
int log_size = 0, log_space;
|
||||
struct early_log_hdr *hdr;
|
||||
|
||||
if ((early_log_buf_off + sizeof(hdr)) >= EARLY_LOG_BUF_LEN)
|
||||
if ((early_log_buf_off + sizeof(*hdr)) >= EARLY_LOG_BUF_LEN)
|
||||
return;
|
||||
|
||||
/* Save loglevel */
|
||||
|
|
@ -327,7 +331,8 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para
|
|||
hdr = (void *)early_log_buffer + early_log_buf_off;
|
||||
hdr->level = loglevel;
|
||||
/* Skip the log entry size */
|
||||
early_log_buf_off += sizeof(hdr);
|
||||
early_log_buf_off += sizeof(*hdr);
|
||||
log_space = EARLY_LOG_BUF_LEN - early_log_buf_off;
|
||||
if (loglevel >= LOG_TIMESTAMP) {
|
||||
/*
|
||||
* If logging is not yet setup we just write zeros
|
||||
|
|
@ -335,12 +340,17 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para
|
|||
* keep the same format as the other messages on
|
||||
* log levels with timestamps (>=LOG_TIMESTAMP).
|
||||
*/
|
||||
log_size = snprintf(early_log_buffer + early_log_buf_off, sizeof(early_log_buffer) - early_log_buf_off,
|
||||
log_size = snprintf(early_log_buffer + early_log_buf_off, log_space,
|
||||
"(00.000000) ");
|
||||
}
|
||||
|
||||
log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size,
|
||||
sizeof(early_log_buffer) - early_log_buf_off - log_size, format, params);
|
||||
if (log_size < log_space)
|
||||
log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size,
|
||||
log_space - log_size, format, params);
|
||||
if (log_size > log_space) {
|
||||
/* vsnprintf always add the terminating null byte. */
|
||||
log_size = log_space - 1;
|
||||
}
|
||||
|
||||
/* Save log entry size */
|
||||
hdr->len = log_size;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,9 @@ static int apparmor_get_label(pid_t pid, char **profile_name)
|
|||
FILE *f;
|
||||
char *space;
|
||||
|
||||
f = fopen_proc(pid, "attr/current");
|
||||
f = fopen_proc(pid, "attr/apparmor/current");
|
||||
if (!f)
|
||||
f = fopen_proc(pid, "attr/current");
|
||||
if (!f)
|
||||
return -1;
|
||||
|
||||
|
|
|
|||
184
criu/mem.c
184
criu/mem.c
|
|
@ -10,6 +10,7 @@
|
|||
#include "cr_options.h"
|
||||
#include "servicefd.h"
|
||||
#include "mem.h"
|
||||
#include "mman.h"
|
||||
#include "parasite-syscall.h"
|
||||
#include "parasite.h"
|
||||
#include "page-pipe.h"
|
||||
|
|
@ -114,44 +115,74 @@ static bool should_dump_entire_vma(VmaEntry *vmae)
|
|||
}
|
||||
|
||||
/*
|
||||
* should_dump_page returns vaddr if an addressed page has to be dumped.
|
||||
* Otherwise, it returns an address that has to be inspected next.
|
||||
* should_dump_page writes vaddr in page_info->next if an addressed page has to be dumped.
|
||||
* Otherwise, it writes an address that has to be inspected next.
|
||||
*/
|
||||
u64 should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, bool *softdirty)
|
||||
int should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, struct page_info *page_info)
|
||||
{
|
||||
if (!page_info)
|
||||
goto err;
|
||||
|
||||
if (vaddr >= pmc->end && pmc_fill(pmc, vaddr, vmae->end))
|
||||
return -1;
|
||||
goto err;
|
||||
|
||||
if (pmc->regs) {
|
||||
while (1) {
|
||||
if (pmc->regs_idx == pmc->regs_len)
|
||||
return pmc->end;
|
||||
if (pmc->regs_idx == pmc->regs_len) {
|
||||
page_info->next = pmc->end;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (vaddr < pmc->regs[pmc->regs_idx].end)
|
||||
break;
|
||||
pmc->regs_idx++;
|
||||
}
|
||||
if (vaddr < pmc->regs[pmc->regs_idx].start)
|
||||
return pmc->regs[pmc->regs_idx].start;
|
||||
if (softdirty)
|
||||
*softdirty = pmc->regs[pmc->regs_idx].categories & PAGE_IS_SOFT_DIRTY;
|
||||
return vaddr;
|
||||
|
||||
if (vaddr < pmc->regs[pmc->regs_idx].start) {
|
||||
page_info->next = pmc->regs[pmc->regs_idx].start;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (pmc->regs[pmc->regs_idx].categories & PAGE_IS_GUARD)
|
||||
goto skip_guard_page;
|
||||
|
||||
page_info->softdirty = pmc->regs[pmc->regs_idx].categories & PAGE_IS_SOFT_DIRTY;
|
||||
page_info->next = vaddr;
|
||||
return 0;
|
||||
} else {
|
||||
u64 pme = pmc->map[PAGE_PFN(vaddr - pmc->start)];
|
||||
|
||||
if (pme & PME_GUARD_REGION)
|
||||
goto skip_guard_page;
|
||||
|
||||
/*
|
||||
* Optimisation for private mapping pages, that haven't
|
||||
* yet being COW-ed
|
||||
*/
|
||||
if (vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE))
|
||||
return vaddr + PAGE_SIZE;
|
||||
if ((pme & (PME_PRESENT | PME_SWAP)) && !__page_is_zero(pme)) {
|
||||
if (softdirty)
|
||||
*softdirty = pme & PME_SOFT_DIRTY;
|
||||
return vaddr;
|
||||
if (vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE)) {
|
||||
page_info->next = vaddr + PAGE_SIZE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return vaddr + PAGE_SIZE;
|
||||
if ((pme & (PME_PRESENT | PME_SWAP)) && !__page_is_zero(pme)) {
|
||||
page_info->softdirty = pme & PME_SOFT_DIRTY;
|
||||
page_info->next = vaddr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
page_info->next = vaddr + PAGE_SIZE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err:
|
||||
pr_err("should_dump_page failed on vma "
|
||||
"%#016" PRIx64 "-%#016" PRIx64 " vaddr=%#016" PRIx64 "\n",
|
||||
vmae->start, vmae->end, vaddr);
|
||||
return -1;
|
||||
|
||||
skip_guard_page:
|
||||
page_info->next = vaddr + PAGE_SIZE;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool page_is_zero(u64 pme)
|
||||
|
|
@ -201,14 +232,15 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct
|
|||
nr_scanned = 0;
|
||||
for (vaddr = *pvaddr; vaddr < vma->e->end; vaddr += PAGE_SIZE, nr_scanned++) {
|
||||
unsigned int ppb_flags = 0;
|
||||
bool softdirty = false;
|
||||
u64 next;
|
||||
struct page_info page_info = {};
|
||||
int st;
|
||||
|
||||
/* If dump_all_pages is true, should_dump_page is called to get pme. */
|
||||
next = should_dump_page(pmc, vma->e, vaddr, &softdirty);
|
||||
if (!dump_all_pages && next != vaddr) {
|
||||
vaddr = next - PAGE_SIZE;
|
||||
if (should_dump_page(pmc, vma->e, vaddr, &page_info))
|
||||
return -1;
|
||||
|
||||
if (!dump_all_pages && page_info.next != vaddr) {
|
||||
vaddr = page_info.next - PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -222,7 +254,7 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct
|
|||
* page. The latter would be checked in page-xfer.
|
||||
*/
|
||||
|
||||
if (has_parent && page_in_parent(softdirty)) {
|
||||
if (has_parent && page_in_parent(page_info.softdirty)) {
|
||||
ret = page_pipe_add_hole(pp, vaddr, PP_HOLE_PARENT);
|
||||
st = 0;
|
||||
} else {
|
||||
|
|
@ -304,7 +336,7 @@ static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl, struct pa
|
|||
list_for_each_entry(ppb, &pp->bufs, l) {
|
||||
args->nr_segs = ppb->nr_segs;
|
||||
args->nr_pages = ppb->pages_in;
|
||||
pr_debug("PPB: %d pages %d segs %u pipe %d off\n", args->nr_pages, args->nr_segs, ppb->pipe_size,
|
||||
pr_debug("PPB: %ld pages %d segs %u pipe %d off\n", args->nr_pages, args->nr_segs, ppb->pipe_size,
|
||||
args->off);
|
||||
|
||||
ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl);
|
||||
|
|
@ -398,6 +430,17 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, str
|
|||
if (vma_entry_is(vma->e, VMA_AREA_VVAR))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings")
|
||||
* tells us that:
|
||||
* Under memory pressure, mm can just drop the pages (so that they're
|
||||
* zero when read back again).
|
||||
*
|
||||
* Let's just skip MAP_DROPPABLE mappings pages dump logic.
|
||||
*/
|
||||
if (vma->e->flags & MAP_DROPPABLE)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* To facilitate any combination of pre-dump modes to run after
|
||||
* one another, we need to take extra care as discussed below.
|
||||
|
|
@ -556,6 +599,9 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit
|
|||
parent_predump_mode = mdc->parent_ie->pre_dump_mode;
|
||||
|
||||
list_for_each_entry(vma_area, &vma_area_list->h, list) {
|
||||
if (vma_area_is(vma_area, VMA_AREA_GUARD))
|
||||
continue;
|
||||
|
||||
ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump,
|
||||
parent_predump_mode);
|
||||
if (ret < 0)
|
||||
|
|
@ -741,8 +787,6 @@ int prepare_mm_pid(struct pstree_item *i)
|
|||
ri->vmas.rst_priv_size += vma_area_len(vma);
|
||||
if (vma_has_guard_gap_hidden(vma))
|
||||
ri->vmas.rst_priv_size += PAGE_SIZE;
|
||||
if (vma_area_is(vma, VMA_AREA_SHSTK))
|
||||
ri->vmas.rst_priv_size += PAGE_SIZE;
|
||||
}
|
||||
|
||||
pr_info("vma 0x%" PRIx64 " 0x%" PRIx64 "\n", vma->e->start, vma->e->end);
|
||||
|
|
@ -818,14 +862,14 @@ static void prepare_cow_vmas_for(struct vm_area_list *vmas, struct vm_area_list
|
|||
/* <= here to shift from matching VMAs and ... */
|
||||
while (vma->e->start <= pvma->e->start) {
|
||||
vma = vma_next(vma);
|
||||
if (&vma->list == &vmas->h)
|
||||
if ((&vma->list == &vmas->h) || vma_area_is(vma, VMA_AREA_GUARD))
|
||||
return;
|
||||
}
|
||||
|
||||
/* ... no == here since we must stop on matching pair */
|
||||
while (pvma->e->start < vma->e->start) {
|
||||
pvma = vma_next(pvma);
|
||||
if (&pvma->list == &pvmas->h)
|
||||
if ((&pvma->list == &pvmas->h) || vma_area_is(pvma, VMA_AREA_GUARD))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
@ -885,13 +929,6 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void
|
|||
|
||||
size = vma_entry_len(vma->e);
|
||||
|
||||
/*
|
||||
* map an extra page for shadow stack VMAs, it will be used as a
|
||||
* temporary shadow stack
|
||||
*/
|
||||
if (vma_area_is(vma, VMA_AREA_SHSTK))
|
||||
size += PAGE_SIZE;
|
||||
|
||||
if (!vma_inherited(vma)) {
|
||||
int flag = 0;
|
||||
/*
|
||||
|
|
@ -1026,6 +1063,9 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas, vo
|
|||
filemap_ctx_init(true);
|
||||
|
||||
list_for_each_entry(vma, &vmas->h, list) {
|
||||
if (vma_area_is(vma, VMA_AREA_GUARD))
|
||||
continue;
|
||||
|
||||
if (task_size_check(vpid(t), vma->e)) {
|
||||
ret = -1;
|
||||
break;
|
||||
|
|
@ -1233,6 +1273,9 @@ err_read:
|
|||
unsigned long size, i = 0;
|
||||
void *addr = decode_pointer(vma->premmaped_addr);
|
||||
|
||||
if (vma_area_is(vma, VMA_AREA_GUARD))
|
||||
continue;
|
||||
|
||||
if (!vma_inherited(vma))
|
||||
continue;
|
||||
|
||||
|
|
@ -1496,3 +1539,72 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta)
|
|||
|
||||
return prepare_vma_ios(t, ta);
|
||||
}
|
||||
|
||||
int collect_madv_guards(pid_t pid, struct vm_area_list *vma_area_list)
|
||||
{
|
||||
int pagemap_fd = -1;
|
||||
struct page_region *regs = NULL;
|
||||
long regs_len = 0;
|
||||
int i, ret = -1;
|
||||
|
||||
struct pm_scan_arg args = {
|
||||
.size = sizeof(struct pm_scan_arg),
|
||||
.flags = 0,
|
||||
.start = 0,
|
||||
.end = kdat.task_size,
|
||||
.walk_end = 0,
|
||||
.vec_len = 1000, /* this should be enough for most cases */
|
||||
.max_pages = 0,
|
||||
.category_mask = PAGE_IS_GUARD,
|
||||
.return_mask = PAGE_IS_GUARD,
|
||||
};
|
||||
|
||||
if (!kdat.has_pagemap_scan_guard_pages) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pagemap_fd = open_proc(pid, "pagemap");
|
||||
if (pagemap_fd < 0)
|
||||
goto out;
|
||||
|
||||
regs = xmalloc(args.vec_len * sizeof(struct page_region));
|
||||
if (!regs)
|
||||
goto out;
|
||||
args.vec = (long)regs;
|
||||
|
||||
do {
|
||||
/* start from where we finished the last time */
|
||||
args.start = args.walk_end;
|
||||
regs_len = ioctl(pagemap_fd, PAGEMAP_SCAN, &args);
|
||||
if (regs_len == -1) {
|
||||
pr_perror("PAGEMAP_SCAN");
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < regs_len; i++) {
|
||||
struct vma_area *vma;
|
||||
|
||||
BUG_ON(!(regs[i].categories & PAGE_IS_GUARD));
|
||||
|
||||
vma = alloc_vma_area();
|
||||
if (!vma)
|
||||
goto out;
|
||||
|
||||
vma->e->start = regs[i].start;
|
||||
vma->e->end = regs[i].end;
|
||||
vma->e->status = VMA_AREA_GUARD;
|
||||
|
||||
list_add_tail(&vma->list, &vma_area_list->h);
|
||||
vma_area_list->nr++;
|
||||
}
|
||||
} while (args.walk_end != kdat.task_size);
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
xfree(regs);
|
||||
if (pagemap_fd >= 0)
|
||||
close(pagemap_fd);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -443,6 +443,7 @@ err:
|
|||
/* Mounts root container mount. */
|
||||
static int do_mount_root_v2(struct mount_info *mi)
|
||||
{
|
||||
unsigned long mflags = mi->flags & (~MS_PROPAGATE);
|
||||
unsigned long flags = MS_BIND;
|
||||
int fd;
|
||||
|
||||
|
|
@ -477,6 +478,11 @@ static int do_mount_root_v2(struct mount_info *mi)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (mflags && mount(NULL, mi->plain_mountpoint, NULL, MS_REMOUNT | MS_BIND | mflags, NULL)) {
|
||||
pr_perror("Unable to apply root mount options");
|
||||
return -1;
|
||||
}
|
||||
|
||||
mi->mounted = true;
|
||||
|
||||
return 0;
|
||||
|
|
|
|||
13
criu/mount.c
13
criu/mount.c
|
|
@ -888,7 +888,11 @@ static int resolve_external_mounts(struct mount_info *info)
|
|||
|
||||
cut_root = cut_root_for_bind(m->root, match->root);
|
||||
|
||||
p = xsprintf("%s/%s", match->ns_mountpoint + 1, cut_root);
|
||||
if (cut_root[0] == '\0') {
|
||||
p = xstrdup(match->ns_mountpoint + 1);
|
||||
} else {
|
||||
p = xsprintf("%s/%s", match->ns_mountpoint + 1, cut_root);
|
||||
}
|
||||
if (!p)
|
||||
return -1;
|
||||
|
||||
|
|
@ -2690,9 +2694,16 @@ shared:
|
|||
|
||||
static int do_mount_root(struct mount_info *mi)
|
||||
{
|
||||
unsigned long mflags = mi->flags & (~MS_PROPAGATE);
|
||||
|
||||
if (restore_shared_options(mi, !mi->shared_id && !mi->master_id, mi->shared_id, mi->master_id))
|
||||
return -1;
|
||||
|
||||
if (mflags && mount(NULL, service_mountpoint(mi), NULL, MS_REMOUNT | MS_BIND | mflags, NULL)) {
|
||||
pr_perror("Unable to apply root mount options");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return fetch_rt_stat(mi, service_mountpoint(mi));
|
||||
}
|
||||
|
||||
|
|
|
|||
102
criu/net.c
102
criu/net.c
|
|
@ -2144,51 +2144,53 @@ static int ipv4_sysctls_op(SysctlEntry ***rsysctl, size_t *pn, int op)
|
|||
char path[ARRAY_SIZE(ipv4_sysctl_entries)][MAX_IPV4_SYSCTL_PATH] = {};
|
||||
struct sysctl_req req[ARRAY_SIZE(ipv4_sysctl_entries)] = {};
|
||||
SysctlEntry **sysctl = *rsysctl;
|
||||
size_t n = *pn;
|
||||
size_t n = *pn, ri;
|
||||
|
||||
if (n != ARRAY_SIZE(ipv4_sysctl_entries)) {
|
||||
pr_err("unix: Unexpected entries in sysctlig (%zu %zu)\n", n, ARRAY_SIZE(ipv4_sysctl_entries));
|
||||
pr_err("ipv4: Unexpected entries in sysctl (%zu %zu)\n", n, ARRAY_SIZE(ipv4_sysctl_entries));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (opts.weak_sysctls || op == CTL_READ)
|
||||
flags = CTL_FLAGS_OPTIONAL;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
snprintf(path[i], MAX_IPV4_SYSCTL_PATH, IPV4_SYSCTL_FMT, ipv4_sysctl_entries[i]);
|
||||
req[i].name = path[i];
|
||||
req[i].flags = flags;
|
||||
for (i = 0, ri = 0; i < n; i++) {
|
||||
snprintf(path[ri], MAX_IPV4_SYSCTL_PATH, IPV4_SYSCTL_FMT, ipv4_sysctl_entries[i]);
|
||||
req[ri].name = path[ri];
|
||||
req[ri].flags = flags;
|
||||
|
||||
switch (sysctl[i]->type) {
|
||||
case SYSCTL_TYPE__CTL_STR:
|
||||
req[i].type = CTL_STR(MAX_STR_IPV4_SYSCTL_LEN);
|
||||
req[ri].type = CTL_STR(MAX_STR_IPV4_SYSCTL_LEN);
|
||||
|
||||
/* skip write if have no value */
|
||||
if (op == CTL_WRITE && !sysctl[i]->sarg)
|
||||
continue;
|
||||
|
||||
req[i].arg = sysctl[i]->sarg;
|
||||
req[ri].arg = sysctl[i]->sarg;
|
||||
break;
|
||||
default:
|
||||
pr_err("ipv4: Unknown sysctl type %d\n", sysctl[i]->type);
|
||||
return -1;
|
||||
}
|
||||
ri++;
|
||||
}
|
||||
|
||||
ret = sysctl_op(req, n, op, CLONE_NEWNET);
|
||||
ret = sysctl_op(req, ri, op, CLONE_NEWNET);
|
||||
if (ret < 0) {
|
||||
pr_err("unix: Failed to %s %s/<sysctls>\n", (op == CTL_READ) ? "read" : "write", IPV4_SYSCTL_BASE);
|
||||
pr_err("ipv4: Failed to %s %s/<sysctls>\n", (op == CTL_READ) ? "read" : "write", IPV4_SYSCTL_BASE);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (op == CTL_READ) {
|
||||
bool has_entries = false;
|
||||
|
||||
BUG_ON(ri != n);
|
||||
for (i = 0; i < n; i++) {
|
||||
if (req[i].flags & CTL_FLAGS_HAS) {
|
||||
sysctl[i]->has_iarg = true;
|
||||
if (!has_entries)
|
||||
has_entries = true;
|
||||
has_entries = true;
|
||||
} else {
|
||||
sysctl[i]->sarg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2201,6 +2203,42 @@ static int ipv4_sysctls_op(SysctlEntry ***rsysctl, size_t *pn, int op)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int ipv4_sysctls_ping_group_range_map_gid(SysctlEntry *ent, size_t size)
|
||||
{
|
||||
int start, end, ustart, uend, ret;
|
||||
|
||||
if (sscanf(ent->sarg, "%d %d", &start, &end) != 2) {
|
||||
pr_err("Failed to parse ping_group_range: %s\n", ent->sarg);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* The default is "1 0", which means no group
|
||||
* is allowed to create ICMP Echo sockets.
|
||||
*/
|
||||
if (start == 1 && end == 0) {
|
||||
pr_debug("The ping_group_range is set to default, skipping it.\n");
|
||||
ent->sarg = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!(root_ns_mask & CLONE_NEWUSER))
|
||||
return 0;
|
||||
|
||||
ustart = userns_gid(start);
|
||||
uend = userns_gid(end);
|
||||
pr_debug("Mapping ping_group_range %d %d to userns -> %d %d\n",
|
||||
start, end, ustart, uend);
|
||||
|
||||
ret = snprintf(ent->sarg, size, "%d\t%d\n", ustart, uend);
|
||||
if (ret < 0 || ret >= size) {
|
||||
pr_err("Failed to map ping_group_range: %d\t%d\n", ustart, uend);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
|
||||
{
|
||||
void *buf, *o_buf;
|
||||
|
|
@ -2218,6 +2256,7 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
|
|||
SysctlEntry *ipv4_sysctls = NULL;
|
||||
size_t ipv4_sysctl_size = ARRAY_SIZE(ipv4_sysctl_entries);
|
||||
char ping_group_range[MAX_STR_IPV4_SYSCTL_LEN + 1] = {};
|
||||
int ping_group_range_id = -1;
|
||||
NetnsId *ids;
|
||||
struct netns_id *p;
|
||||
|
||||
|
|
@ -2308,6 +2347,7 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
|
|||
if (!strcmp(ipv4_sysctl_entries[i], "ping_group_range")) {
|
||||
netns.ipv4_sysctl[i]->type = SYSCTL_TYPE__CTL_STR;
|
||||
netns.ipv4_sysctl[i]->sarg = ping_group_range;
|
||||
ping_group_range_id = i;
|
||||
} else {
|
||||
/* Need to handle this case when we have more sysctls */
|
||||
BUG();
|
||||
|
|
@ -2336,6 +2376,12 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds)
|
|||
if (ret < 0)
|
||||
goto err_free;
|
||||
|
||||
BUG_ON(ping_group_range_id == -1);
|
||||
ret = ipv4_sysctls_ping_group_range_map_gid(netns.ipv4_sysctl[ping_group_range_id],
|
||||
MAX_STR_IPV4_SYSCTL_LEN + 1);
|
||||
if (ret < 0)
|
||||
goto err_free;
|
||||
|
||||
ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS);
|
||||
err_free:
|
||||
xfree(o_buf);
|
||||
|
|
@ -3206,12 +3252,12 @@ static inline FILE *redirect_nftables_output(struct nft_ctx *nft)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline int nftables_lock_network_internal(void)
|
||||
static inline int nftables_lock_network_internal(bool restore)
|
||||
{
|
||||
#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1)
|
||||
cleanup_file FILE *fp = NULL;
|
||||
struct nft_ctx *nft;
|
||||
int ret = 0;
|
||||
int ret = 0, exit_code = -1;
|
||||
char table[32];
|
||||
char buf[128];
|
||||
|
||||
|
|
@ -3224,11 +3270,16 @@ static inline int nftables_lock_network_internal(void)
|
|||
|
||||
fp = redirect_nftables_output(nft);
|
||||
if (!fp)
|
||||
goto out;
|
||||
goto err2;
|
||||
|
||||
snprintf(buf, sizeof(buf), "create table %s", table);
|
||||
if (NFT_RUN_CMD(nft, buf))
|
||||
ret = NFT_RUN_CMD(nft, buf);
|
||||
if (ret) {
|
||||
/* The network has been locked on dump. */
|
||||
if (restore && errno == EEXIST)
|
||||
return 0;
|
||||
goto err2;
|
||||
}
|
||||
|
||||
snprintf(buf, sizeof(buf), "add chain %s output { type filter hook output priority 0; policy drop; }", table);
|
||||
if (NFT_RUN_CMD(nft, buf))
|
||||
|
|
@ -3246,17 +3297,16 @@ static inline int nftables_lock_network_internal(void)
|
|||
if (NFT_RUN_CMD(nft, buf))
|
||||
goto err1;
|
||||
|
||||
goto out;
|
||||
|
||||
exit_code = 0;
|
||||
out:
|
||||
nft_ctx_free(nft);
|
||||
return exit_code;
|
||||
err1:
|
||||
snprintf(buf, sizeof(buf), "delete table %s", table);
|
||||
NFT_RUN_CMD(nft, buf);
|
||||
err2:
|
||||
ret = -1;
|
||||
pr_err("Locking network failed using nftables\n");
|
||||
out:
|
||||
nft_ctx_free(nft);
|
||||
return ret;
|
||||
goto out;
|
||||
#else
|
||||
pr_err("CRIU was built without libnftables support\n");
|
||||
return -1;
|
||||
|
|
@ -3288,7 +3338,7 @@ static int iptables_network_lock_internal(void)
|
|||
return ret;
|
||||
}
|
||||
|
||||
int network_lock_internal(void)
|
||||
int network_lock_internal(bool restore)
|
||||
{
|
||||
int ret = 0, nsret;
|
||||
|
||||
|
|
@ -3301,7 +3351,7 @@ int network_lock_internal(void)
|
|||
if (opts.network_lock_method == NETWORK_LOCK_IPTABLES)
|
||||
ret = iptables_network_lock_internal();
|
||||
else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES)
|
||||
ret = nftables_lock_network_internal();
|
||||
ret = nftables_lock_network_internal(restore);
|
||||
|
||||
if (restore_ns(nsret, &net_ns_desc))
|
||||
ret = -1;
|
||||
|
|
@ -3427,7 +3477,7 @@ int network_lock(void)
|
|||
if (run_scripts(ACT_NET_LOCK))
|
||||
return -1;
|
||||
|
||||
return network_lock_internal();
|
||||
return network_lock_internal(false);
|
||||
}
|
||||
|
||||
void network_unlock(void)
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue