diff --git a/.cirrus.yml b/.cirrus.yml index 72dbb3898..5e30ca2c2 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -13,8 +13,9 @@ task: nested_virtualization: true setup_script: | - contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker + scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker sudo kvm-ok + ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto build_script: | make -C scripts/ci vagrant-fedora-no-vdso @@ -32,9 +33,10 @@ task: memory: 8G setup_script: | + ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto dnf config-manager --set-enabled crb # Same as CentOS 8 powertools dnf -y install epel-release epel-next-release - contrib/dependencies/dnf-packages.sh + dnf -y install --allowerasing asciidoc gcc git gnutls-devel libaio-devel libasan libcap-devel libnet-devel libnl3-devel libbsd-devel libselinux-devel make protobuf-c-devel protobuf-devel python-devel python-PyYAML python-protobuf python-junit_xml python3-importlib-metadata xmlto libdrm-devel # The image has a too old version of nettle which does not work with gnutls. # Just upgrade to the latest to make the error go away. dnf -y upgrade nettle nettle-devel @@ -63,8 +65,9 @@ task: nested_virtualization: true setup_script: | - contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker + scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker sudo kvm-ok + ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto build_script: | make -C scripts/ci vagrant-fedora-rawhide @@ -83,11 +86,36 @@ task: nested_virtualization: true setup_script: | - contrib/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker + scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker sudo kvm-ok + ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto build_script: | make -C scripts/ci vagrant-fedora-non-root +task: + name: aarch64 build GCC (native) + arm_container: + image: docker.io/library/ubuntu:jammy + cpu: 4 + memory: 4G + script: uname -a + build_script: | + scripts/ci/apt-install make + ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto + make -C scripts/ci local + +task: + name: aarch64 build CLANG (native) + arm_container: + image: docker.io/library/ubuntu:jammy + cpu: 4 + memory: 4G + script: uname -a + build_script: | + scripts/ci/apt-install make + ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto + make -C scripts/ci local CLANG=1 + task: name: aarch64 Fedora Rawhide arm_container: @@ -97,5 +125,6 @@ task: script: uname -a build_script: | scripts/ci/prepare-for-fedora-rawhide.sh + ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto make -C scripts/ci/ local CC=gcc SKIP_CI_PREP=1 SKIP_CI_TEST=1 CD_TO_TOP=1 make -C test/zdtm -j 4 diff --git a/.codespellrc b/.codespellrc index 5def594b2..dd31dd851 100644 --- a/.codespellrc +++ b/.codespellrc @@ -1,3 +1,3 @@ [codespell] -skip = ./.git,./test/pki,./tags,./plugins/amdgpu/amdgpu_drm.h,./plugins/amdgpu/drm.h,./plugins/amdgpu/drm_mode.h -ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng,inh,wronly,renderd,bui,clen,sems +skip = ./.git,./test/pki +ignore-words-list = creat,fpr,fle,ue,bord,parms,nd,te,testng,inh,wronly,renderd,bui,clen diff --git a/.github/workflows/aarch64-test.yaml b/.github/workflows/actuated-aarch64-test.yaml similarity index 50% rename from .github/workflows/aarch64-test.yaml rename to .github/workflows/actuated-aarch64-test.yaml index ebbecadb3..8b0a63fc7 100644 --- a/.github/workflows/aarch64-test.yaml +++ b/.github/workflows/actuated-aarch64-test.yaml @@ -1,25 +1,43 @@ -name: aarch64 test +name: Actuated aarch64 test on: [push, pull_request] # Cancel any preceding run on the pull request. concurrency: - group: aarch64-test-${{ github.event.pull_request.number || github.ref }} + group: actuated-test-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }} jobs: build: + # Actuated runners are not available in all repositories. + if: ${{ github.repository == 'checkpoint-restore/criu' }} + # The memory size and the number of CPUs can be freely selected. + # 3GB and 4 CPUs seems to be enough according to the result from 'vmmeter'. + runs-on: actuated-arm64-4cpu-3gb strategy: matrix: - os: [ubuntu-24.04-arm, ubuntu-22.04-arm] target: [GCC=1, CLANG=1] - runs-on: ${{ matrix.os }} - steps: + # https://gist.github.com/alexellis/1f33e581c75e11e161fe613c46180771#file-metering-gha-md + # vmmeter start + - name: Prepare arkade + uses: alexellis/arkade-get@master + with: + crane: latest + print-summary: false + + - name: Install vmmeter + run: | + crane export --platform linux/arm64 ghcr.io/openfaasltd/vmmeter:latest | sudo tar -xvf - -C /usr/local/bin + + - name: Run vmmeter + uses: self-actuated/vmmeter-action@master + # vmmeter end + - uses: actions/checkout@v4 - - name: Run Tests ${{ matrix.target }} on ${{ matrix.os }} - # Following tests are failing on the VMs: + - name: Run Tests ${{ matrix.target }} + # Following tests are failing on the actuated VMs: # ./change_mnt_context --pidfile=change_mnt_context.pid --outfile=change_mnt_context.out # 45: ERR: change_mnt_context.c:23: mount (errno = 22 (Invalid argument)) # diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml index 0f5c20f48..73530d79a 100644 --- a/.github/workflows/alpine-test.yml +++ b/.github/workflows/alpine-test.yml @@ -9,11 +9,10 @@ concurrency: jobs: build: + runs-on: ubuntu-22.04 strategy: matrix: - os: [ubuntu-22.04, ubuntu-22.04-arm] target: [GCC=1, CLANG=1] - runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/check-commits.yml b/.github/workflows/check-commits.yml index bf7d06697..be2fbd285 100644 --- a/.github/workflows/check-commits.yml +++ b/.github/workflows/check-commits.yml @@ -12,14 +12,14 @@ jobs: # Check if pull request does not have label "not-selfcontained-ok" if: "!contains(github.event.pull_request.labels.*.name, 'not-selfcontained-ok')" steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v3 with: # Needed to rebase against the base branch fetch-depth: 0 # Checkout pull request HEAD commit instead of merge commit ref: ${{ github.event.pull_request.head.sha }} - name: Install dependencies - run: sudo contrib/apt-install libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnl-3-dev libnet-dev libcap-dev uuid-dev + run: sudo apt-get install -y libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnl-3-dev libnet-dev libcap-dev - name: Configure git user details run: | git config --global user.email "checkpoint-restore@users.noreply.github.com" diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 9c9e46c1b..518d9b8ae 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -29,22 +29,22 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v3 - name: Install Packages (cpp) if: ${{ matrix.language == 'cpp' }} run: | - sudo contrib/apt-install protobuf-c-compiler libprotobuf-c-dev libprotobuf-dev build-essential libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnet-dev pkg-config libnl-3-dev libbsd0 libbsd-dev iproute2 libcap-dev libaio-dev libbsd-dev python3-yaml libnl-route-3-dev gnutls-dev + sudo scripts/ci/apt-install protobuf-c-compiler libprotobuf-c-dev libprotobuf-dev build-essential libprotobuf-dev libprotobuf-c-dev protobuf-c-compiler protobuf-compiler python3-protobuf libnet-dev pkg-config libnl-3-dev libbsd0 libbsd-dev iproute2 libcap-dev libaio-dev libbsd-dev python3-yaml libnl-route-3-dev gnutls-dev - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Autobuild - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@v2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v2 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/cross-compile-daily.yml b/.github/workflows/cross-compile-daily.yml index c709cca00..b8c8c86d4 100644 --- a/.github/workflows/cross-compile-daily.yml +++ b/.github/workflows/cross-compile-daily.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross, riscv64-stable-cross] + target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross] branches: [criu-dev, master] steps: diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml index 96672b294..06b812823 100644 --- a/.github/workflows/cross-compile.yml +++ b/.github/workflows/cross-compile.yml @@ -21,7 +21,6 @@ jobs: aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross, - riscv64-stable-cross, ] include: - experimental: true diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f7da4f6f6..862d68245 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -14,7 +14,7 @@ jobs: image: registry.fedoraproject.org/fedora:latest steps: - name: Install tools - run: sudo dnf -y install git make ruff xz clang-tools-extra codespell git-clang-format ShellCheck + run: sudo dnf -y install git make ruff xz clang-tools-extra which codespell git-clang-format ShellCheck - uses: actions/checkout@v4 diff --git a/.github/workflows/nftables-test.yml b/.github/workflows/nftables-test.yml deleted file mode 100644 index 7a7d8bd30..000000000 --- a/.github/workflows/nftables-test.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Nftables bases testing - -on: [push, pull_request] - -# Cancel any preceding run on the pull request. -concurrency: - group: nftables-test-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }} - -jobs: - build: - runs-on: ubuntu-24.04 - steps: - - uses: actions/checkout@v4 - - name: Remove iptables - run: sudo apt remove -y iptables - - name: Install libnftables-dev - run: sudo contrib/apt-install libnftables-dev - - name: chmod 755 /home/runner - # CRIU's tests are sometimes running as some random user and need - # to be able to access the test files. - run: sudo chmod 755 /home/runner - - name: Build with nftables network locking backend - run: sudo make -C scripts/ci local COMPILE_FLAGS="NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES" diff --git a/.gitignore b/.gitignore index 94daa13ea..854657d1c 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,8 @@ compel/compel compel/compel-host-bin images/*.c images/*.h +images/google/protobuf/*.c +images/google/protobuf/*.h .gitid criu/criu criu/unittest/unittest diff --git a/.lgtm.yml b/.lgtm.yml index 4beadcc63..0dd49cda4 100644 --- a/.lgtm.yml +++ b/.lgtm.yml @@ -23,3 +23,8 @@ extraction: - "python3-yaml" - "libnl-route-3-dev" - "gnutls-dev" + configure: + command: + - "ls -laR images/google" + - "ln -s /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto" + - "ls -laR images/google" diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..94841b3f3 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,35 @@ +language: c +os: linux +dist: bionic +services: + - docker +jobs: + include: + - os: linux + arch: ppc64le + env: TR_ARCH=local + dist: bionic + - os: linux + arch: ppc64le + env: TR_ARCH=local CLANG=1 + dist: bionic + - os: linux + arch: s390x + env: TR_ARCH=local + dist: bionic + - os: linux + arch: arm64-graviton2 + env: TR_ARCH=local RUN_TESTS=1 + dist: focal + group: edge + virt: vm + - os: linux + arch: arm64-graviton2 + env: TR_ARCH=local CLANG=1 RUN_TESTS=1 + group: edge + virt: vm + dist: bionic +script: + - sudo make -C scripts/ci $TR_ARCH +after_success: + - make -C scripts/ci after_success diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 120000 index e3c5a92d9..000000000 --- a/CLAUDE.md +++ /dev/null @@ -1 +0,0 @@ -GEMINI.md \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 03875639d..37965e5fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,8 +8,8 @@ Here are some useful hints to get involved. * We have both -- [very simple](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Aenhancement) and [more sophisticated](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+feature%22) coding tasks; * CRIU does need [extensive testing](https://github.com/checkpoint-restore/criu/issues?q=is%3Aissue+is%3Aopen+label%3Atesting); * Documentation is always hard, we have [some information](https://criu.org/Category:Empty_articles) that is to be extracted from people's heads into wiki pages as well as [some texts](https://criu.org/Category:Editor_help_needed) that all need to be converted into useful articles; -* Feedback is expected on the GitHub issues page and on the [mailing list](https://lore.kernel.org/criu); -* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lore.kernel.org/criu). +* Feedback is expected on the GitHub issues page and on the [mailing list](https://lists.openvz.org/mailman/listinfo/criu); +* We accept GitHub pull requests and this is the preferred way to contribute to CRIU. If you prefer to send patches by email, you are welcome to send them to [CRIU development mailing list](https://lists.openvz.org/mailman/listinfo/criu). Below we describe in more detail recommend practices for CRIU development. * Spread the word about CRIU in [social networks](http://criu.org/Contacts); * If you're giving a talk about CRIU -- let us know, we'll mention it on the [wiki main page](https://criu.org/News/events); @@ -27,43 +27,19 @@ The repository may contain multiple branches. Development happens in the **criu- To clone CRIU repo and switch to the proper branch, run: ``` -git clone https://github.com/checkpoint-restore/criu criu -cd criu -git checkout criu-dev + git clone https://github.com/checkpoint-restore/criu criu + cd criu + git checkout criu-dev ``` -### Building from source +### Compile -Follow these steps to compile CRIU from source code. +First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info. -#### Installing build dependencies - -First, you need to install the required build dependencies. We provide scripts to simplify this process for several Linux distributions in [contrib/dependencies](contrib/dependencies). For a complete list of dependencies, please refer to the [installation guide](https://criu.org/Installation). - -##### On Ubuntu/Debian-based systems: +To compile CRIU, run: ``` -./contrib/dependencies/apt-packages.sh -``` - -##### On Fedora/CentOS-based systems: - -``` -./contrib/dependencies/dnf-packages.sh -``` - -##### Using Nix: - -``` -nix develop -``` - -#### Compiling CRIU - -Once the dependencies are installed, you can compile CRIU by running the `make` command from the root of the source directory: - -``` -make + make ``` This should create the `./criu/criu` executable. @@ -87,7 +63,7 @@ The following command can be used to automatically run a code linter for Python text spelling (codespell), and a number of CRIU-specific checks (usage of print macros and EOL whitespace for C files). ``` -make lint + make lint ``` In addition, we have adopted a [clang-format configuration file](https://www.kernel.org/doc/Documentation/process/clang-format.rst) @@ -97,7 +73,7 @@ results in decreased readability, we may choose to ignore these errors. Run the following command to check if your changes are compliant with the clang-format rules: ``` -make indent + make indent ``` This command is built upon the `git-clang-format` tool and supports two options `BASE` and `OPTS`. The `BASE` option allows you to @@ -107,7 +83,7 @@ can use `BASE=origin/criu-dev`. The `OPTS` option can be used to pass additional to check the last *N* commits for formatting errors, without applying the changes to the codebase you can use the following command. ``` -make indent OPTS=--diff BASE=HEAD~N + make indent OPTS=--diff BASE=HEAD~N ``` Note that for pull requests, the "Run code linter" workflow runs these checks for all commits. If a clang-format error is detected @@ -120,7 +96,7 @@ Here are some bad examples of clang-format-ing: ``` @@ -58,8 +59,7 @@ static int register_membarriers(void) } - + if (!all_ok) { - fail("can't register membarrier()s - tried %#x, kernel %#x", - barriers_registered, barriers_supported); @@ -153,11 +129,16 @@ Here are some bad examples of clang-format-ing: CRIU comes with an extensive test suite. To check whether your changes introduce any regressions, run ``` -make test + make test ``` The command runs [ZDTM Test Suite](https://criu.org/ZDTM_Test_Suite). Check for any error messages produced by it. +In case you'd rather have someone else run the tests, you can use travis-ci for your +own GitHub fork of CRIU. It will check the compilation for various supported platforms, +as well as run most of the tests from the suite. See https://travis-ci.org/checkpoint-restore/criu +for more details. + ## Describe your changes Describe your problem. Whether your change is a one-line bug fix or @@ -185,21 +166,21 @@ If your change fixes a bug in a specific commit, e.g. you found an issue using the SHA-1 ID, and the one line summary. For example: ``` -Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism") + Fixes: 9433b7b9db3e ("make: use cflags/ldflags for config.h detection mechanism") ``` The following `git config` settings can be used to add a pretty format for outputting the above style in the `git log` or `git show` commands: ``` -[pretty] - fixes = Fixes: %h (\"%s\") + [pretty] + fixes = Fixes: %h (\"%s\") ``` If your change address an issue listed in GitHub, please use `Fixes:` tag with the number of the issue. For instance: ``` -Fixes: #339 + Fixes: #339 ``` The `Fixes:` tags should be put at the end of the detailed description. @@ -282,7 +263,7 @@ can certify the below: then you just add a line saying ``` -Signed-off-by: Random J Developer + Signed-off-by: Random J Developer ``` using your real name (please, no pseudonyms or anonymous contributions if @@ -294,14 +275,14 @@ commit message. To append such line to a commit you already made, use ``` From: Random J Developer -Subject: [PATCH] component: Short patch description + Subject: [PATCH] component: Short patch description -Long patch description (could be skipped if patch -is trivial enough) + Long patch description (could be skipped if patch + is trivial enough) -Signed-off-by: Random J Developer ---- -Patch body here + Signed-off-by: Random J Developer + --- + Patch body here ``` ## Submit your work upstream @@ -335,8 +316,8 @@ contains the following: revisions should be listed. For example: ``` -v3: rebase on the current criu-dev -v2: add commit to foo() and update bar() coding style + v3: rebase on the current criu-dev + v2: add commit to foo() and update bar() coding style ``` If there are only minor updates to the commits in a pull request, it is @@ -354,7 +335,7 @@ Historically, CRIU worked with mailing lists and patches so if you still prefer To create a patch, run ``` -git format-patch --signoff origin/criu-dev + git format-patch --signoff origin/criu-dev ``` You might need to read GIT documentation on how to prepare patches @@ -365,8 +346,8 @@ at all. We recommend to post patches using `git send-email` ``` -git send-email --cover-letter --no-chain-reply-to --annotate \ - --confirm=always --to=criu@lists.linux.dev criu-dev + git send-email --cover-letter --no-chain-reply-to --annotate \ + --confirm=always --to=criu@openvz.org criu-dev ``` Note that the `git send-email` subcommand may not be in @@ -378,14 +359,14 @@ If this is your first time using git send-email, you might need to configure it to point it to your SMTP server with something like: ``` -git config --global sendemail.smtpServer stmp.example.net + git config --global sendemail.smtpServer stmp.example.net ``` -If you get tired of typing `--to=criu@lists.linux.dev` all the time, +If you get tired of typing `--to=criu@openvz.org` all the time, you can configure that to be automatically handled as well: ``` -git config sendemail.to criu@lists.linux.dev + git config sendemail.to criu@openvz.org ``` If a developer is sending another version of the patch (e.g. to address @@ -398,7 +379,7 @@ version if needed though). ### Mail patches -The patches should be sent to CRIU development mailing list, `criu AT lists.linux.dev`. Note that you need to be subscribed first in order to post. The list web interface is available at https://lore.kernel.org/criu; you can also use standard mailman aliases to work with it. +The patches should be sent to CRIU development mailing list, `criu AT openvz.org`. Note that you need to be subscribed first in order to post. The list web interface is available at https://openvz.org/mailman/listinfo/criu; you can also use standard mailman aliases to work with it. Please make sure the email client you're using doesn't screw your patch (line wrapping and so on). @@ -415,3 +396,5 @@ sometimes a patch may fly around a week before it gets reviewed. Wiki article: [Continuous integration](https://criu.org/Continuous_integration) CRIU tests are run for each series sent to the mailing list. If you get a message from our patchwork that patches failed to pass the tests, you have to investigate what is wrong. + +We also recommend you to [enable Travis CI for your repo](https://criu.org/Continuous_integration#Enable_Travis_CI_for_your_repo) to check patches in your git branch, before sending them to the mailing list. diff --git a/Documentation/criu-amdgpu-plugin.txt b/Documentation/criu-amdgpu-plugin.txt index fe76fc3bc..68803f3db 100644 --- a/Documentation/criu-amdgpu-plugin.txt +++ b/Documentation/criu-amdgpu-plugin.txt @@ -15,7 +15,6 @@ Checkpoint / Restore inside a docker container Pytorch Tensorflow Using CRIU Image Streamer -Parallel Restore DESCRIPTION ----------- diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 0c9a9e527..606935790 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -465,30 +465,6 @@ The 'mode' may be one of the following: *skip*::: Don't lock the network. If *--tcp-close* is not used, the network must be locked externally to allow CRIU to dump TCP connections. -*--allow-uprobes*:: - Allow dumping when uprobes vma is present. When used on dump, this option is - required on restore as well. - - A uprobes vma is automatically created by the kernel once a uprobe is - triggered. This mapping is not removed even once the uprobe is deleted. So, - even if a process once had uprobes attached to it, and they're removed by - the time the process is dumped, this option is still required because criu - has no way of knowing whether there are active uprobes or not. - - When using this option on restore, make sure the uprobes (if any) active on - the dumped processes are still active. Otherwise, when execution reaches - a uprobe'd location in any of the restored processes, that process will be - sent a SIGTRAP. - - As an example, say a uprobe is set at function foo in the executable of the - process p_bar. Whenever execution in p_bar reaches function foo, the uprobe - is triggered. If the uprobe has been triggered at least once, then the kernel - will have created the uprobes vma. To dump p_bar, this option is - necessary. After dumping, say the uprobe is deleted. Now, on restoring with - this option, once execution reaches function foo, SIGTRAP will be sent to - the restored p_bar. Unless it has a signal handler installed for SIGTRAP, - it will be terminated and core dumped. - *restore* ~~~~~~~~~ Restores previously checkpointed processes. @@ -502,8 +478,8 @@ Restores previously checkpointed processes. The 'resource' argument can be one of the following: + - **tty[**__rdev__**:**__dev__**]** - - **pipe:[**__inode__**]** - - **socket:[**__inode__*]* + - **pipe[**__inode__**]** + - **socket[**__inode__*]* - **file[**__mnt_id__**:**__inode__**]** - 'path/to/file' @@ -716,10 +692,6 @@ The 'mode' may be one of the following: *--skip-file-rwx-check*:: Skip checking file permissions (r/w/x for u/g/o) on restore. -*--allow-uprobes*:: - Required when dumped with this option. Refer to this option in the section - on dumping for more details. - *check* ~~~~~~~ Checks whether the kernel supports the features needed by *criu* to diff --git a/Documentation/logo.svg b/Documentation/logo.svg deleted file mode 100644 index f713e72b7..000000000 --- a/Documentation/logo.svg +++ /dev/null @@ -1,136 +0,0 @@ - - - - - - - diff --git a/GEMINI.md b/GEMINI.md deleted file mode 100644 index e56c1de12..000000000 --- a/GEMINI.md +++ /dev/null @@ -1,136 +0,0 @@ -# CRIU (Checkpoint/Restore In User-space) - -CRIU is a tool for saving the state of a running application to a set of files -(checkpointing) and restoring it back to a live state. It is primarily used for -live migration of containers, in-place updates, and fast application startup. - -It is implemented as a command-line tool called `criu`. The two primary commands -are `dump` and `restore`. - -- `dump`: Saves a process tree and all its related resources (file - descriptors, IPC, sockets, namespaces, etc.) into a collection of image - files. -- `restore`: Restores processes from image files to the same state they were - in before the dump. - -## Quick Start - -To get a feel for `criu`, you can try checkpointing and restoring a simple -process. - -1. **Run a simple process:** - Open a terminal and run a command that will run for a while. Find its PID. - ```bash - sleep 1000 & - [1] 12345 - ``` - -2. **Dump the process:** - As root, use `criu dump` with the process ID (`-t`) and a directory for the - image files (`-D`). - ```bash - sudo criu dump -t 12345 -D /tmp/sleep_images -v4 --shell-job - ``` - The `sleep` process will no longer be running. - -3. **Restore the process:** - Use `criu restore` to bring the process back to life from the images. - ```bash - sudo criu restore -D /tmp/sleep_images -v4 --shell-job - ``` - The `sleep` process will be running again as if nothing happened. - -# For Developers and Contributors - -This section contains more technical details about CRIU's internals and -development process. - -## Dump Process - -On dump, CRIU uses available kernel interfaces to collect information about -processes. For properties that can only be retrieved from within the process -itself, CRIU injects a binary blob (called a "parasite") into the process's -address space and executes it in the context of one of the process's threads. -This injection is handled by a subproject called **Compel**. - -## Restore Process - -On restore, CRIU reads the image files to reconstruct the processes. The goal is -to restore them to the exact state they were in before the dump. The restore -process is divided into several stages (defined as `CR_STATE_*` in -`./criu/include/restorer.h`). - -The main `criu` process acts as a coordinator. It first restores resources with -inter-process dependencies (file descriptors, sockets, shared memory, -namespaces, etc.). It then forks the process tree and sets up namespaces. -Finally, it restores process-specific resources like file descriptors and memory -mappings. - -A key step involves a small, self-contained binary called the "restorer". All -restored processes switch to executing this code, which unmaps the CRIU-specific -memory and restores the application's original memory mappings. On the final -step, the restorer calls `sigreturn` on a prepared signal frame to resume the -process with the state it had at the moment of the dump. - -## Compel - -Compel is a subproject responsible for generating the binary blobs used for the -parasite code (for dumping) and the restorer code (for restoring). It provides a -library for injecting and executing this code within the target process's -address space. It is a separate project because the logic for generating and -injecting Position-Independent Executable (PIE) code is complex and -self-contained. - -## Coding Style - -The C code in the CRIU project follows the -[Linux Kernel Coding Style](https://www.kernel.org/doc/html/latest/process/coding-style.html). -Here are some of the main points: - -- **Indentation**: Use tabs, which are set to 8 characters. -- **Line Length**: The preferred line limit is 80 characters, but it can be - extended to 120 if it improves code readability. -- **Braces**: - - The opening brace for a function goes on a new line. - - The opening brace for a block (like `if`, `for`, `while`, `switch`) goes - on the same line. -- **Spaces**: Use spaces around operators (`+`, `-`, `*`, `/`, `%`, `<`, `>`, - `=`, etc.). -- **Naming**: Use descriptive names for functions and variables. -- **Comments**: Use C-style comments (`/* ... */`). For multi-line comments, - the preferred format is: - ```c - /* - * This is a multi-line - * comment. - */ - ``` - -## Code Layout - -The code is organized into the following directories: - -- `./compel`: The Compel sub-project. -- `./criu`: The main `criu` tool source code. -- `./images`: Protobuf descriptions for the image files. -- `./test`: All tests. -- `./test/zdtm`: The Zero-Downtime Migration (ZDTM) test suite. -- `./test/zdtm.py`: The executor script for ZDTM tests. -- `./scripts`: Helper scripts. -- `./scripts/build`: Docker image files used for CI and cross-compilation - checks. -- `./crit`: A tool to inspect and manipulate CRIU image files. -- `./soccr`: A library for TCP socket checkpoint/restore. - -## Tests - -The main test suite is ZDTM. Here is an example of how to run a single test: - -```bash -sudo ./test/zdtm.py run -t zdtm/static/env00 -``` - -Each ZDTM test has three stages: preparation, C/R, and results checks. During -the test, a process calls `test_daemon()` to signal it is ready for C/R, then -calls `test_waitsig()` to wait for the C/R stage to complete. After being -restored, the test checks that all its resources are still in a valid state. diff --git a/INSTALL.md b/INSTALL.md index af0702518..d786d06eb 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -1,31 +1,11 @@ -## Building CRIU from source code - -First, you need to install compile-time dependencies. Check [Installation dependencies](https://criu.org/Installation#Dependencies) for more info. - -To compile CRIU, run: -``` -make -``` -This should create the `./criu/criu` executable. - -To change the default behaviour of CRIU, the following variables can be passed -to the make command: - - * **NETWORK_LOCK_DEFAULT**, can be set to one of the following - values: `NETWORK_LOCK_IPTABLES`, `NETWORK_LOCK_NFTABLES`, - `NETWORK_LOCK_SKIP`. CRIU defaults to `NETWORK_LOCK_IPTABLES` - if nothing is specified. If another network locking backend is - needed, `make` can be called like this: - `make NETWORK_LOCK_DEFAULT=NETWORK_LOCK_NFTABLES` - ## Installing CRIU from source code Once CRIU is built one can easily setup the complete CRIU package (which includes executable itself, CRIT tool, libraries, manual and etc) simply typing -``` -make install -``` + + make install + this command accepts the following variables: * **DESTDIR**, to specify global root where all components will be placed under (empty by default); @@ -36,17 +16,17 @@ this command accepts the following variables: * **LIBDIR**, to specify directory where to put libraries (guess the correct path by default). Thus one can type -``` -make DESTDIR=/some/new/place install -``` + + make DESTDIR=/some/new/place install + and get everything installed under `/some/new/place`. ## Uninstalling CRIU To clean up previously installed CRIU instance one can type -``` -make uninstall -``` + + make uninstall + and everything should be removed. Note though that if some variable (**DESTDIR**, **BINDIR** and such) has been used during installation procedure, the same *must* be passed with uninstall action. diff --git a/Makefile b/Makefile index e26807158..97b4dc211 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,7 @@ endif # # Supported Architectures -ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips loongarch64 riscv64,$(ARCH)),) +ifneq ($(filter-out x86 arm aarch64 ppc64 s390 mips loongarch64,$(ARCH)),) $(error "The architecture $(ARCH) isn't supported") endif @@ -43,7 +43,7 @@ ifeq ($(ARCH),arm) endif ifeq ($(ARMV),8) - # Running 'setarch linux32 uname -m' returns armv8l on aarch64. + # Running 'setarch linux32 uname -m' returns armv8l on travis aarch64. # This tells CRIU to handle armv8l just as armv7hf. Right now this is # only used for compile testing. No further verification of armv8l exists. ARCHCFLAGS += -march=armv7-a @@ -64,8 +64,6 @@ endif ifeq ($(ARCH),aarch64) DEFINES := -DCONFIG_AARCH64 - CC_MBRANCH_PROT := $(shell $(CC) -c -x c /dev/null -mbranch-protection=none -o /dev/null >/dev/null 2>&1 && echo "-mbranch-protection=none") - CFLAGS_PIE := $(CC_MBRANCH_PROT) endif ifeq ($(ARCH),ppc64) @@ -86,10 +84,6 @@ ifeq ($(ARCH),loongarch64) DEFINES := -DCONFIG_LOONGARCH64 endif -ifeq ($(ARCH),riscv64) - DEFINES := -DCONFIG_RISCV64 -endif - # # CFLAGS_PIE: # @@ -142,10 +136,6 @@ ifneq ($(GCOV),) CFLAGS += $(CFLAGS-GCOV) endif -ifneq ($(NETWORK_LOCK_DEFAULT),) - CFLAGS += -DNETWORK_LOCK_DEFAULT=$(NETWORK_LOCK_DEFAULT) -endif - ifeq ($(ASAN),1) CFLAGS-ASAN := -fsanitize=address export CFLAGS-ASAN @@ -447,14 +437,10 @@ help: ruff: @ruff --version - ruff check ${RUFF_FLAGS} --config=scripts/ruff.toml \ + ruff ${RUFF_FLAGS} --config=scripts/ruff.toml \ test/zdtm.py \ test/inhfd/*.py \ test/others/rpc/config_file.py \ - test/others/action-script/check_actions.py \ - test/others/pycriu/*.py \ - lib/pycriu/criu.py \ - lib/pycriu/__init__.py \ lib/pycriu/images/pb2dict.py \ lib/pycriu/images/images.py \ scripts/criu-ns \ @@ -468,8 +454,7 @@ ruff: shellcheck: shellcheck --version shellcheck scripts/*.sh - shellcheck scripts/ci/*.sh - shellcheck contrib/apt-install contrib/dependencies/*.sh + shellcheck scripts/ci/*.sh scripts/ci/apt-install shellcheck -x test/others/crit/*.sh shellcheck -x test/others/libcriu/*.sh shellcheck -x test/others/crit/*.sh test/others/criu-coredump/*.sh @@ -477,7 +462,7 @@ shellcheck: shellcheck -x test/others/action-script/*.sh codespell: - codespell + codespell -S tags lint: ruff shellcheck codespell # Do not append \n to pr_perror, pr_pwarn or fail @@ -492,7 +477,7 @@ lint: ruff shellcheck codespell ! git --no-pager grep -E '\s+$$' \*.c \*.h .PHONY: lint ruff shellcheck codespell -codecov: SHELL := $(shell command -v bash) +codecov: SHELL := $(shell which bash) codecov: curl -Os https://uploader.codecov.io/latest/linux/codecov chmod +x codecov diff --git a/Makefile.compel b/Makefile.compel index a4209edc5..764afadc8 100644 --- a/Makefile.compel +++ b/Makefile.compel @@ -50,8 +50,8 @@ compel/plugins/%: $(compel-deps) .FORCE # # GNU make 4.x supports targets matching via wide -# match targeting, where GNU make 3.x series is not, -# so we have to write them here explicitly. +# match targeting, where GNU make 3.x series (used on +# Travis) is not, so we have to write them here explicitly. compel/plugins/std.lib.a: $(compel-deps) .FORCE $(Q) $(MAKE) $(build)=compel/plugins $@ diff --git a/Makefile.config b/Makefile.config index 5cf4b8216..52c250b21 100644 --- a/Makefile.config +++ b/Makefile.config @@ -9,7 +9,7 @@ ifeq ($(call try-cc,$(FEATURE_TEST_LIBBSD_DEV),-lbsd),true) LIBS_FEATURES += -lbsd FEATURE_DEFINES += -DCONFIG_HAS_LIBBSD else - $(info Note: Building without setproctitle() support.) + $(info Note: Building without setproctitle() and strlcpy() support.) $(info $S Install libbsd-devel (RPM) / libbsd-dev (DEB) to fix.) endif @@ -59,10 +59,6 @@ endif export LIBS += $(LIBS_FEATURES) -ifneq ($(PLUGINDIR),) - FEATURE_DEFINES += -DCR_PLUGIN_DEFAULT="\"$(PLUGINDIR)\"" -endif - CONFIG_FILE = .config $(CONFIG_FILE): @@ -84,7 +80,7 @@ endif export DEFINES += $(FEATURE_DEFINES) export CFLAGS += $(FEATURE_DEFINES) -FEATURES_LIST := TCP_REPAIR PTRACE_PEEKSIGINFO \ +FEATURES_LIST := TCP_REPAIR STRLCPY STRLCAT PTRACE_PEEKSIGINFO \ SETPROCTITLE_INIT TCP_REPAIR_WINDOW MEMFD_CREATE \ OPENAT2 NO_LIBC_RSEQ_DEFS diff --git a/Makefile.install b/Makefile.install index 70c607ec6..455735f3b 100644 --- a/Makefile.install +++ b/Makefile.install @@ -46,13 +46,9 @@ endif endif # Default flags for pip install: -# --ignore-installed: Overwrite already installed pycriu/crit packages -# --no-build-isolation: Use current Python environment to build pycriu/crit packages -# --no-deps: Don't install any dependencies -# --no-index: Don't use PyPI index to find packages -# --progress-bar: Cleaner output -# --upgrade: Treat the install as an upgrade when replacing the installed version -PIPFLAGS ?= --ignore-installed --no-build-isolation --no-deps --no-index --progress-bar off --upgrade +# --upgrade: Upgrade crit/pycriu packages +# --ignore-installed: Ignore existing packages and reinstall them +PIPFLAGS ?= --upgrade --ignore-installed export SKIP_PIP_INSTALL PIPFLAGS diff --git a/Makefile.versions b/Makefile.versions index 3e6c9ed22..c5859801a 100644 --- a/Makefile.versions +++ b/Makefile.versions @@ -1,10 +1,10 @@ # # CRIU version. CRIU_VERSION_MAJOR := 4 -CRIU_VERSION_MINOR := 2 +CRIU_VERSION_MINOR := 0 CRIU_VERSION_SUBLEVEL := CRIU_VERSION_EXTRA := -CRIU_VERSION_NAME := CRIUTIBILITY +CRIU_VERSION_NAME := CRIUDA CRIU_VERSION := $(CRIU_VERSION_MAJOR)$(if $(CRIU_VERSION_MINOR),.$(CRIU_VERSION_MINOR))$(if $(CRIU_VERSION_SUBLEVEL),.$(CRIU_VERSION_SUBLEVEL))$(if $(CRIU_VERSION_EXTRA),.$(CRIU_VERSION_EXTRA)) export CRIU_VERSION_MAJOR CRIU_VERSION_MINOR CRIU_VERSION_SUBLEVEL diff --git a/README.md b/README.md index 6e2a0de9e..f578e745c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ [![CircleCI](https://circleci.com/gh/checkpoint-restore/criu.svg?style=svg)]( https://circleci.com/gh/checkpoint-restore/criu) -

+

## CRIU -- A project to implement checkpoint/restore functionality for Linux diff --git a/compel/.gitignore b/compel/.gitignore index 5e770a86c..eab3337d6 100644 --- a/compel/.gitignore +++ b/compel/.gitignore @@ -4,9 +4,6 @@ arch/arm/plugins/std/syscalls/syscalls.S arch/aarch64/plugins/std/syscalls/syscalls.S arch/s390/plugins/std/syscalls/syscalls.S arch/ppc64/plugins/std/syscalls/syscalls.S -arch/mips/plugins/std/syscalls/syscalls-64.S -arch/loongarch64/plugins/std/syscalls/syscalls-64.S -arch/riscv64/plugins/std/syscalls/syscalls.S include/version.h plugins/include/uapi/std/asm/syscall-types.h plugins/include/uapi/std/syscall-64.h diff --git a/compel/Makefile b/compel/Makefile index c0b8a82a0..78ec4826a 100644 --- a/compel/Makefile +++ b/compel/Makefile @@ -32,8 +32,8 @@ ifeq ($(ARCH),x86) lib-y += arch/$(ARCH)/src/lib/thread_area.o endif -# handle_elf() has no support of ELF relocations on ARM and RISCV64 (yet?) -ifneq ($(filter arm aarch64 loongarch64 riscv64,$(ARCH)),) +# handle_elf() has no support of ELF relocations on ARM (yet?) +ifneq ($(filter arm aarch64 loongarch64,$(ARCH)),) CFLAGS += -DNO_RELOCS HOSTCFLAGS += -DNO_RELOCS endif diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h deleted file mode 100644 index 9f9655e3b..000000000 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/gcs-types.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef __UAPI_ASM_GCS_TYPES_H__ -#define __UAPI_ASM_GCS_TYPES_H__ - -#ifndef NT_ARM_GCS -#define NT_ARM_GCS 0x410 /* ARM GCS state */ -#endif - -/* Shadow Stack/Guarded Control Stack interface */ -#define PR_GET_SHADOW_STACK_STATUS 74 -#define PR_SET_SHADOW_STACK_STATUS 75 -#define PR_LOCK_SHADOW_STACK_STATUS 76 - -/* When set PR_SHADOW_STACK_ENABLE flag allocates a Guarded Control Stack */ -#ifndef PR_SHADOW_STACK_ENABLE -#define PR_SHADOW_STACK_ENABLE (1UL << 0) -#endif - -/* Allows explicit GCS stores (eg. using GCSSTR) */ -#ifndef PR_SHADOW_STACK_WRITE -#define PR_SHADOW_STACK_WRITE (1UL << 1) -#endif - -/* Allows explicit GCS pushes (eg. using GCSPUSHM) */ -#ifndef PR_SHADOW_STACK_PUSH -#define PR_SHADOW_STACK_PUSH (1UL << 2) -#endif - -#ifndef SHADOW_STACK_SET_TOKEN -#define SHADOW_STACK_SET_TOKEN 0x1 /* Set up a restore token in the shadow stack */ -#endif - -#define PR_SHADOW_STACK_ALL_MODES \ - PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH - -/* copied from: arch/arm64/include/asm/sysreg.h */ -#define GCS_CAP_VALID_TOKEN 0x1 -#define GCS_CAP_ADDR_MASK 0xFFFFFFFFFFFFF000ULL -#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | GCS_CAP_VALID_TOKEN) -#define GCS_SIGNAL_CAP(addr) (((unsigned long)addr) & GCS_CAP_ADDR_MASK) - -#include - -#ifndef HWCAP_GCS -#define HWCAP_GCS (1UL << 32) -#endif - -#endif /* __UAPI_ASM_GCS_TYPES_H__ */ \ No newline at end of file diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h index 606c92ffe..9d4ce7e2e 100644 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/infect-types.h @@ -2,7 +2,6 @@ #define UAPI_COMPEL_ASM_TYPES_H__ #include -#include #include #include #include @@ -17,24 +16,7 @@ */ typedef struct user_pt_regs user_regs_struct_t; - -/* - * GCS (Guarded Control Stack) - * - * This mirrors the kernel definition but renamed to cr_user_gcs - * to avoid conflict with kernel headers (/usr/include/asm/ptrace.h). - */ -struct cr_user_gcs { - __u64 features_enabled; - __u64 features_locked; - __u64 gcspr_el0; -}; - -struct user_fpregs_struct { - struct user_fpsimd_state fpstate; - struct cr_user_gcs gcs; -}; -typedef struct user_fpregs_struct user_fpregs_struct_t; +typedef struct user_fpsimd_state user_fpregs_struct_t; #define __compel_arch_fetch_thread_area(tid, th) 0 #define compel_arch_fetch_thread_area(tctl) 0 @@ -57,12 +39,4 @@ typedef struct user_fpregs_struct user_fpregs_struct_t; __NR_##syscall; \ }) -extern bool __compel_host_supports_gcs(void); -#define compel_host_supports_gcs __compel_host_supports_gcs - -struct parasite_ctl; -extern int __parasite_setup_shstk(struct parasite_ctl *ctl, - user_fpregs_struct_t *ext_regs); -#define parasite_setup_shstk __parasite_setup_shstk - #endif /* UAPI_COMPEL_ASM_TYPES_H__ */ diff --git a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h index 7efee528f..9152024fd 100644 --- a/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h +++ b/compel/arch/aarch64/src/lib/include/uapi/asm/sigframe.h @@ -1,29 +1,19 @@ #ifndef UAPI_COMPEL_ASM_SIGFRAME_H__ #define UAPI_COMPEL_ASM_SIGFRAME_H__ -#include +#include #include #include -#include /* Copied from the kernel header arch/arm64/include/uapi/asm/sigcontext.h */ #define FPSIMD_MAGIC 0x46508001 -#define GCS_MAGIC 0x47435300 typedef struct fpsimd_context fpu_state_t; -struct gcs_context { - struct _aarch64_ctx head; - __u64 gcspr; - __u64 features_enabled; - __u64 reserved; -}; - struct aux_context { struct fpsimd_context fpsimd; - struct gcs_context gcs; /* additional context to be added before "end" */ struct _aarch64_ctx end; }; @@ -72,7 +62,6 @@ struct cr_sigcontext { #define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct aux_context *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved)) #define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd) #define RT_SIGFRAME_OFFSET(rt_sigframe) 0 -#define RT_SIGFRAME_GCS(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->gcs) #define rt_sigframe_erase_sigset(sigframe) memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t)) #define rt_sigframe_copy_sigset(sigframe, from) memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t)) diff --git a/compel/arch/aarch64/src/lib/infect.c b/compel/arch/aarch64/src/lib/infect.c index 42f593c79..812ba34a3 100644 --- a/compel/arch/aarch64/src/lib/infect.c +++ b/compel/arch/aarch64/src/lib/infect.c @@ -2,8 +2,8 @@ #include #include #include -#include #include +#include #include #include "common/page.h" @@ -13,8 +13,6 @@ #include "infect.h" #include "infect-priv.h" #include "asm/breakpoints.h" -#include "asm/gcs-types.h" -#include unsigned __page_size = 0; unsigned __page_shift = 0; @@ -35,54 +33,24 @@ static inline void __always_unused __check_code_syscall(void) BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); } -bool __compel_host_supports_gcs(void) -{ - unsigned long hwcap = getauxval(AT_HWCAP); - return (hwcap & HWCAP_GCS) != 0; -} - -static bool __compel_gcs_enabled(struct cr_user_gcs *gcs) -{ - if (!compel_host_supports_gcs()) - return false; - - return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0; -} - int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) { struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe); - struct gcs_context *gcs = RT_SIGFRAME_GCS(sigframe); memcpy(sigframe->uc.uc_mcontext.regs, regs->regs, sizeof(regs->regs)); - pr_debug("sigreturn_prep_regs_plain: sp %lx pc %lx\n", (long)regs->sp, (long)regs->pc); - sigframe->uc.uc_mcontext.sp = regs->sp; sigframe->uc.uc_mcontext.pc = regs->pc; sigframe->uc.uc_mcontext.pstate = regs->pstate; - memcpy(fpsimd->vregs, fpregs->fpstate.vregs, 32 * sizeof(__uint128_t)); + memcpy(fpsimd->vregs, fpregs->vregs, 32 * sizeof(__uint128_t)); - fpsimd->fpsr = fpregs->fpstate.fpsr; - fpsimd->fpcr = fpregs->fpstate.fpcr; + fpsimd->fpsr = fpregs->fpsr; + fpsimd->fpcr = fpregs->fpcr; fpsimd->head.magic = FPSIMD_MAGIC; fpsimd->head.size = sizeof(*fpsimd); - if (__compel_gcs_enabled(&fpregs->gcs)) { - gcs->head.magic = GCS_MAGIC; - gcs->head.size = sizeof(*gcs); - gcs->reserved = 0; - gcs->gcspr = fpregs->gcs.gcspr_el0 - 8; - gcs->features_enabled = fpregs->gcs.features_enabled; - - pr_debug("sigframe gcspr=%llx features_enabled=%llx\n", fpregs->gcs.gcspr_el0 - 8, fpregs->gcs.features_enabled); - } else { - pr_debug("sigframe gcspr=[disabled]\n"); - memset(gcs, 0, sizeof(*gcs)); - } - return 0; } @@ -91,7 +59,7 @@ int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigfr return 0; } -int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save, +int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd, save_regs_t save, void *arg, __maybe_unused unsigned long flags) { struct iovec iov; @@ -106,28 +74,14 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct goto err; } - iov.iov_base = &ext_regs->fpstate; - iov.iov_len = sizeof(ext_regs->fpstate); + iov.iov_base = fpsimd; + iov.iov_len = sizeof(*fpsimd); if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) { pr_perror("Failed to obtain FPU registers for %d", pid); goto err; } - memset(&ext_regs->gcs, 0, sizeof(ext_regs->gcs)); - - iov.iov_base = &ext_regs->gcs; - iov.iov_len = sizeof(ext_regs->gcs); - if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &iov) == 0) { - pr_info("gcs: GCSPR_EL0 for %d: 0x%llx, features: 0x%llx\n", - pid, ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled); - - if (!__compel_gcs_enabled(&ext_regs->gcs)) - pr_info("gcs: GCS is NOT enabled\n"); - } else { - pr_info("gcs: GCS state not available for %d\n", pid); - } - - ret = save(pid, arg, regs, ext_regs); + ret = save(arg, regs, fpsimd); err: return ret; } @@ -136,44 +90,14 @@ int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs) { struct iovec iov; - struct cr_user_gcs gcs; - struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) }; - pr_info("Restoring GP/FPU registers for %d\n", pid); - iov.iov_base = &ext_regs->fpstate; - iov.iov_len = sizeof(ext_regs->fpstate); + iov.iov_base = ext_regs; + iov.iov_len = sizeof(*ext_regs); if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) { pr_perror("Failed to set FPU registers for %d", pid); return -1; } - - if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) { - pr_warn("gcs: Failed to get GCS for %d\n", pid); - } else { - ext_regs->gcs = gcs; - compel_set_task_gcs_regs(pid, ext_regs); - } - - return 0; -} - -int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs) -{ - struct iovec iov; - - pr_info("gcs: restoring GCS registers for %d\n", pid); - pr_info("gcs: restoring GCS: gcspr=%llx features=%llx\n", - ext_regs->gcs.gcspr_el0, ext_regs->gcs.features_enabled); - - iov.iov_base = &ext_regs->gcs; - iov.iov_len = sizeof(ext_regs->gcs); - - if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &iov)) { - pr_perror("gcs: Failed to set GCS registers for %d", pid); - return -1; - } - return 0; } @@ -362,68 +286,3 @@ int ptrace_flush_breakpoints(pid_t pid) return 0; } - -int inject_gcs_cap_token(struct parasite_ctl *ctl, pid_t pid, struct cr_user_gcs *gcs) -{ - struct iovec gcs_iov = { .iov_base = gcs, .iov_len = sizeof(*gcs) }; - - uint64_t token_addr = gcs->gcspr_el0 - 8; - uint64_t sigtramp_addr = gcs->gcspr_el0 - 16; - - uint64_t cap_token = ALIGN_DOWN(GCS_SIGNAL_CAP(token_addr), 8); - unsigned long restorer_addr; - - pr_info("gcs: (setup) CAP token: 0x%lx at addr: 0x%lx\n", cap_token, token_addr); - - /* Inject capability token at gcspr_el0 - 8 */ - if (ptrace(PTRACE_POKEDATA, pid, (void *)token_addr, cap_token)) { - pr_perror("gcs: (setup) Inject GCS cap token failed"); - return -1; - } - - /* Inject restorer trampoline address (gcspr_el0 - 16) */ - restorer_addr = ctl->parasite_ip; - if (ptrace(PTRACE_POKEDATA, pid, (void *)sigtramp_addr, restorer_addr)) { - pr_perror("gcs: (setup) Inject GCS restorer failed"); - return -1; - } - - /* Update GCSPR_EL0 */ - gcs->gcspr_el0 = token_addr; - if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_GCS, &gcs_iov)) { - pr_perror("gcs: PTRACE_SETREGS FAILED"); - return -1; - } - - pr_debug("gcs: parasite_ip=%#lx sp=%#llx gcspr_el0=%#llx\n", - ctl->parasite_ip, ctl->orig.regs.sp, gcs->gcspr_el0); - - return 0; -} - -int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs) -{ - struct cr_user_gcs gcs; - struct iovec gcs_iov = { .iov_base = &gcs, .iov_len = sizeof(gcs) }; - pid_t pid = ctl->rpid; - - if(!__compel_host_supports_gcs()) - return 0; - - if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) != 0) { - pr_perror("GCS state not available for %d", pid); - return -1; - } - - if (!__compel_gcs_enabled(&gcs)) - return 0; - - if (inject_gcs_cap_token(ctl, pid, &gcs)) { - pr_perror("Failed to inject GCS cap token for %d", pid); - return -1; - } - - pr_info("gcs: GCS enabled for %d\n", pid); - - return 0; -} diff --git a/compel/arch/arm/plugins/std/syscalls/syscall.def b/compel/arch/arm/plugins/std/syscalls/syscall.def index f4deb02b2..9a33009eb 100644 --- a/compel/arch/arm/plugins/std/syscalls/syscall.def +++ b/compel/arch/arm/plugins/std/syscalls/syscall.def @@ -85,7 +85,7 @@ timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimer timer_gettime 108 259 (int timer_id, const struct itimerspec *setting) timer_getoverrun 109 260 (int timer_id) timer_delete 111 261 (kernel_timer_t timer_id) -clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp) +clock_gettime 113 263 (const clockid_t which_clock, const struct timespec *tp) exit_group 94 248 (int error_code) set_robust_list 99 338 (struct robust_list_head *head, size_t len) get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr) @@ -124,4 +124,3 @@ openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags) rseq 293 398 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) membarrier 283 389 (int cmd, unsigned int flags, int cpu_id) -map_shadow_stack 453 ! (unsigned long addr, unsigned long size, unsigned int flags) \ No newline at end of file diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c index a9fb639e2..8b810a88f 100644 --- a/compel/arch/arm/src/lib/infect.c +++ b/compel/arch/arm/src/lib/infect.c @@ -94,7 +94,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct } } - ret = save(pid, arg, regs, vfp); + ret = save(arg, regs, vfp); err: return ret; } diff --git a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl index 83dcdab4a..aa6ffb44d 100644 --- a/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/loongarch64/plugins/std/syscalls/syscall_64.tbl @@ -46,7 +46,7 @@ __NR_sys_timer_gettime 108 sys_timer_gettime (int timer_id, const struct itimer __NR_sys_timer_getoverrun 109 sys_timer_getoverrun (int timer_id) __NR_sys_timer_settime 110 sys_timer_settime (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting) __NR_sys_timer_delete 111 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 113 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) +__NR_clock_gettime 113 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) __NR_sched_setscheduler 119 sys_sched_setscheduler (int pid, int policy, struct sched_param *p) __NR_restart_syscall 128 sys_restart_syscall (void) __NR_kill 129 sys_kill (long pid, int sig) diff --git a/compel/arch/loongarch64/src/lib/infect.c b/compel/arch/loongarch64/src/lib/infect.c index 190c39227..8e3c19aff 100644 --- a/compel/arch/loongarch64/src/lib/infect.c +++ b/compel/arch/loongarch64/src/lib/infect.c @@ -91,7 +91,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct goto err; } - ret = save(pid, arg, regs, fpregs); + ret = save(arg, regs, fpregs); err: return 0; } diff --git a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl index ad3d44634..85faca5a9 100644 --- a/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/mips/plugins/std/syscalls/syscall_64.tbl @@ -84,7 +84,7 @@ __NR_sys_timer_settime 5217 sys_timer_settime (kernel_timer_t timer_id, int fl __NR_sys_timer_gettime 5218 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 5219 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 5220 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 5222 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) +__NR_clock_gettime 5222 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) __NR_exit_group 5205 sys_exit_group (int error_code) __NR_set_thread_area 5242 sys_set_thread_area (unsigned long *addr) __NR_openat 5247 sys_openat (int dfd, const char *filename, int flags, int mode) diff --git a/compel/arch/mips/src/lib/handle-elf.c b/compel/arch/mips/src/lib/handle-elf.c index e086761c2..a605a5a45 100644 --- a/compel/arch/mips/src/lib/handle-elf.c +++ b/compel/arch/mips/src/lib/handle-elf.c @@ -5,31 +5,18 @@ #include "piegen.h" #include "log.h" +static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = { + 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + extern int __handle_elf(void *mem, size_t size); int handle_binary(void *mem, size_t size) { - Elf64_Ehdr *ehdr = (Elf64_Ehdr *)mem; + if (memcmp(mem, elf_ident_64_le, sizeof(elf_ident_64_le)) == 0) + return __handle_elf(mem, size); - /* check ELF magic */ - if (ehdr->e_ident[EI_MAG0] != ELFMAG0 || - ehdr->e_ident[EI_MAG1] != ELFMAG1 || - ehdr->e_ident[EI_MAG2] != ELFMAG2 || - ehdr->e_ident[EI_MAG3] != ELFMAG3) { - pr_err("Invalid ELF magic\n"); - return -EINVAL; - } - - /* check ELF class and data encoding */ - if (ehdr->e_ident[EI_CLASS] != ELFCLASS64 || - ehdr->e_ident[EI_DATA] != ELFDATA2LSB) { - pr_err("Unsupported ELF class or data encoding\n"); - return -EINVAL; - } - - if (ehdr->e_ident[EI_ABIVERSION] != 0) { - pr_warn("Unusual ABI version: %d\n", ehdr->e_ident[EI_ABIVERSION]); - } - - return __handle_elf(mem, size); + pr_err("Unsupported Elf format detected\n"); + return -EINVAL; } diff --git a/compel/arch/mips/src/lib/infect.c b/compel/arch/mips/src/lib/infect.c index a1d4865cc..0e98aaee3 100644 --- a/compel/arch/mips/src/lib/infect.c +++ b/compel/arch/mips/src/lib/infect.c @@ -149,7 +149,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct regs->regs[0] = 0; } - ret = save(pid, arg, regs, xs); + ret = save(arg, regs, xs); return ret; } diff --git a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl index 3deb41cf7..c56b4e6de 100644 --- a/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl +++ b/compel/arch/ppc64/plugins/std/syscalls/syscall-ppc64.tbl @@ -82,7 +82,7 @@ __NR_sys_timer_settime 241 sys_timer_settime (kernel_timer_t timer_id, int flag __NR_sys_timer_gettime 242 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 243 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 244 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 246 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) +__NR_clock_gettime 246 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) __NR_exit_group 234 sys_exit_group (int error_code) __NR_waitid 272 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) __NR_set_robust_list 300 sys_set_robust_list (struct robust_list_head *head, size_t len) diff --git a/compel/arch/ppc64/src/lib/infect.c b/compel/arch/ppc64/src/lib/infect.c index 54abd48a4..84c2b1d7c 100644 --- a/compel/arch/ppc64/src/lib/infect.c +++ b/compel/arch/ppc64/src/lib/infect.c @@ -400,7 +400,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct if (ret) return ret; - return save(pid, arg, regs, fpregs); + return save(arg, regs, fpregs); } int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs) diff --git a/compel/arch/riscv64/plugins/include/asm/prologue.h b/compel/arch/riscv64/plugins/include/asm/prologue.h deleted file mode 100644 index 5c22b7b06..000000000 --- a/compel/arch/riscv64/plugins/include/asm/prologue.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef __ASM_PROLOGUE_H__ -#define __ASM_PROLOGUE_H__ - -#ifndef __ASSEMBLY__ - -#include -#include -#include - -#include - -#define sys_recv(sockfd, ubuf, size, flags) sys_recvfrom(sockfd, ubuf, size, flags, NULL, NULL) - -typedef struct prologue_init_args { - struct sockaddr_un ctl_sock_addr; - unsigned int ctl_sock_addr_len; - - unsigned int arg_s; - void *arg_p; - - void *sigframe; -} prologue_init_args_t; - -#endif /* __ASSEMBLY__ */ - -/* - * Reserve enough space for sigframe. - * - * FIXME It is rather should be taken from sigframe header. - */ -#define PROLOGUE_SGFRAME_SIZE 4096 - -#define PROLOGUE_INIT_ARGS_SIZE 1024 - -#endif /* __ASM_PROLOGUE_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/include/asm/syscall-types.h b/compel/arch/riscv64/plugins/include/asm/syscall-types.h deleted file mode 100644 index b9740a9ee..000000000 --- a/compel/arch/riscv64/plugins/include/asm/syscall-types.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef COMPEL_ARCH_SYSCALL_TYPES_H__ -#define COMPEL_ARCH_SYSCALL_TYPES_H__ - -#define SA_RESTORER 0x04000000 - -typedef void rt_signalfn_t(int, siginfo_t *, void *); -typedef rt_signalfn_t *rt_sighandler_t; - -typedef void rt_restorefn_t(void); -typedef rt_restorefn_t *rt_sigrestore_t; - -#define _KNSIG 64 // number of signals -#define _NSIG_BPW 64 // number of signals per word - -#define _KNSIG_WORDS (_KNSIG / _NSIG_BPW) - -typedef struct { - unsigned long sig[_KNSIG_WORDS]; -} k_rtsigset_t; - -typedef struct { - rt_sighandler_t rt_sa_handler; - unsigned long rt_sa_flags; - rt_sigrestore_t rt_sa_restorer; - k_rtsigset_t rt_sa_mask; -} rt_sigaction_t; - -#endif /* COMPEL_ARCH_SYSCALL_TYPES_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/include/features.h b/compel/arch/riscv64/plugins/include/features.h deleted file mode 100644 index 274cee52a..000000000 --- a/compel/arch/riscv64/plugins/include/features.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __COMPEL_ARCH_FEATURES_H -#define __COMPEL_ARCH_FEATURES_H - -#endif /* __COMPEL_ARCH_FEATURES_H */ \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/parasite-head.S b/compel/arch/riscv64/plugins/std/parasite-head.S deleted file mode 100644 index 3e9d272e3..000000000 --- a/compel/arch/riscv64/plugins/std/parasite-head.S +++ /dev/null @@ -1,7 +0,0 @@ -#include "common/asm/linkage.h" - - .section .head.text, "ax" -ENTRY(__export_parasite_head_start) - jal parasite_service - ebreak -END(__export_parasite_head_start) \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls b/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls deleted file mode 100644 index 5af35bcb4..000000000 --- a/compel/arch/riscv64/plugins/std/syscalls/Makefile.syscalls +++ /dev/null @@ -1,59 +0,0 @@ -ccflags-y += -iquote $(PLUGIN_ARCH_DIR)/std/syscalls/ -asflags-y += -iquote $(PLUGIN_ARCH_DIR)/std/syscalls/ - -sys-types := $(obj)/include/uapi/std/syscall-types.h -sys-codes := $(obj)/include/uapi/std/syscall-codes.h -sys-proto := $(obj)/include/uapi/std/syscall.h - -sys-def := $(PLUGIN_ARCH_DIR)/std/syscalls/syscall.def -sys-asm-common-name := std/syscalls/syscall-common.S -sys-asm-common := $(PLUGIN_ARCH_DIR)/$(sys-asm-common-name) -sys-asm-types := $(obj)/include/uapi/std/asm/syscall-types.h -sys-exec-tbl = $(PLUGIN_ARCH_DIR)/std/sys-exec-tbl.c - -sys-gen := $(PLUGIN_ARCH_DIR)/std/syscalls/gen-syscalls.pl -sys-gen-tbl := $(PLUGIN_ARCH_DIR)/std/syscalls/gen-sys-exec-tbl.pl - -sys-asm := ./$(PLUGIN_ARCH_DIR)/std/syscalls/syscalls.S -std-lib-y += $(sys-asm:.S=).o - -ifeq ($(ARCH),arm) -arch_bits := 32 -else -arch_bits := 64 -endif - -sys-exec-tbl := sys-exec-tbl.c - -$(sys-asm) $(sys-types) $(sys-codes) $(sys-proto): $(sys-gen) $(sys-def) $(sys-asm-common) $(sys-asm-types) - $(E) " GEN " $@ - $(Q) perl \ - $(sys-gen) \ - $(sys-def) \ - $(sys-codes) \ - $(sys-proto) \ - $(sys-asm) \ - $(sys-asm-common-name) \ - $(sys-types) \ - $(arch_bits) - -$(sys-asm:.S=).o: $(sys-asm) - -$(sys-exec-tbl): $(sys-gen-tbl) $(sys-def) - $(E) " GEN " $@ - $(Q) perl \ - $(sys-gen-tbl) \ - $(sys-def) \ - $(sys-exec-tbl) \ - $(arch_bits) - -$(sys-asm-types): $(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h - $(call msg-gen, $@) - $(Q) ln -s ../../../../../../$(PLUGIN_ARCH_DIR)/include/asm/syscall-types.h $(sys-asm-types) - $(Q) ln -s ../../../../../$(PLUGIN_ARCH_DIR)/std/syscalls/syscall-aux.S $(obj)/include/uapi/std/syscall-aux.S - $(Q) ln -s ../../../../../$(PLUGIN_ARCH_DIR)/std/syscalls/syscall-aux.h $(obj)/include/uapi/std/syscall-aux.h - -std-headers-deps += $(sys-asm) $(sys-codes) $(sys-proto) $(sys-asm-types) $(sys-codes) -mrproper-y += $(std-headers-deps) -mrproper-y += $(obj)/include/uapi/std/syscall-aux.S -mrproper-y += $(obj)/include/uapi/std/syscall-aux.h \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl b/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl deleted file mode 100755 index 61a807eb6..000000000 --- a/compel/arch/riscv64/plugins/std/syscalls/gen-sys-exec-tbl.pl +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -my $in = $ARGV[0]; -my $tblout = $ARGV[1]; -my $bits = $ARGV[2]; - -my $code = "code$bits"; - -open TBLOUT, ">", $tblout or die $!; -open IN, "<", $in or die $!; - -print TBLOUT "/* Autogenerated, don't edit */\n"; -print TBLOUT "static struct syscall_exec_desc sc_exec_table[] = {\n"; - -for () { - if ($_ =~ /\#/) { - next; - } - - my $sys_name; - my $sys_num; - - if (/(?\S+)\s+(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) { - $sys_name = $+{alias}; - } elsif (/(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) { - $sys_name = $+{name}; - } else { - unlink $tblout; - die "Invalid syscall definition file: invalid entry $_\n"; - } - - $sys_num = $+{$code}; - - if ($sys_num ne "!") { - print TBLOUT "SYSCALL($sys_name, $sys_num)\n"; - } -} - -print TBLOUT " { }, /* terminator */"; -print TBLOUT "};" \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl b/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl deleted file mode 100755 index a53f1962f..000000000 --- a/compel/arch/riscv64/plugins/std/syscalls/gen-syscalls.pl +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -my $in = $ARGV[0]; -my $codesout = $ARGV[1]; -my $codes = $ARGV[1]; -$codes =~ s/.*include\/uapi\//compel\/plugins\//g; -my $protosout = $ARGV[2]; -my $protos = $ARGV[2]; -$protos =~ s/.*include\/uapi\//compel\/plugins\//g; -my $asmout = $ARGV[3]; -my $asmcommon = $ARGV[4]; -my $prototypes = $ARGV[5]; -$prototypes =~ s/.*include\/uapi\//compel\/plugins\//g; -my $bits = $ARGV[6]; - -my $codesdef = $codes; -$codesdef =~ tr/.\-\//_/; -my $protosdef = $protos; -$protosdef =~ tr/.\-\//_/; -my $code = "code$bits"; -my $need_aux = 0; - -unlink $codesout; -unlink $protosout; -unlink $asmout; - -open CODESOUT, ">", $codesout or die $!; -open PROTOSOUT, ">", $protosout or die $!; -open ASMOUT, ">", $asmout or die $!; -open IN, "<", $in or die $!; - -print CODESOUT <<"END"; -/* Autogenerated, don't edit */ -#ifndef $codesdef -#define $codesdef -END - -print PROTOSOUT <<"END"; -/* Autogenerated, don't edit */ -#ifndef $protosdef -#define $protosdef -#include <$prototypes> -#include <$codes> -END - -print ASMOUT <<"END"; -/* Autogenerated, don't edit */ -#include <$codes> -#include "$asmcommon" -END - - -for () { - if ($_ =~ /\#/) { - next; - } - - my $code_macro; - my $sys_macro; - my $sys_name; - - if (/(?\S+)\s+(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) { - $code_macro = "__NR_$+{name}"; - $sys_macro = "SYS_$+{name}"; - $sys_name = "sys_$+{alias}"; - } elsif (/(?\S+)\s+(?\d+|\!)\s+(?(?:\d+|\!))\s+\((?.+)\)/) { - $code_macro = "__NR_$+{name}"; - $sys_macro = "SYS_$+{name}"; - $sys_name = "sys_$+{name}"; - } else { - unlink $codesout; - unlink $protosout; - unlink $asmout; - - die "Invalid syscall definition file: invalid entry $_\n"; - } - - if ($+{$code} ne "!") { - print CODESOUT "#ifndef $code_macro\n#define $code_macro $+{$code}\n#endif\n"; - print CODESOUT "#ifndef $sys_macro\n#define $sys_macro $code_macro\n#endif\n"; - print ASMOUT "syscall $sys_name, $code_macro\n"; - - } else { - $need_aux = 1; - } - - print PROTOSOUT "extern long $sys_name($+{args});\n"; -} - -if ($need_aux == 1) { - print ASMOUT "#include \n"; - print CODESOUT "#include \n"; -} - -print CODESOUT "#endif /* $codesdef */"; -print PROTOSOUT "#endif /* $protosdef */"; \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S deleted file mode 100644 index 04160b7ac..000000000 --- a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.S +++ /dev/null @@ -1,37 +0,0 @@ -/** - * This source contains emulation of syscalls - * that are not implemented in the riscv64 Linux kernel - */ - -ENTRY(sys_open) - add a3, x0, a2 - add a2, x0, a1 - add a1, x0, a0 - addi a0, x0, -100 - j sys_openat -END(sys_open) - - -ENTRY(sys_mkdir) - add a3,x0, a2 - add a2, x0, a1 - add a1, x0, a0 - addi a0, x0, -100 - j sys_mkdirat -END(sys_mkdir) - - -ENTRY(sys_rmdir) - addi a2, x0, 0x200 // flags = AT_REMOVEDIR - add a1, x0, a0 - addi a0, x0, -100 - j sys_unlinkat -END(sys_rmdir) - - -ENTRY(sys_unlink) - addi a2, x0, 0 // flags = 0 - add a1, x0, a0 - addi a0, x0, -100 - j sys_unlinkat -END(sys_unlink) \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h b/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h deleted file mode 100644 index 881765bbb..000000000 --- a/compel/arch/riscv64/plugins/std/syscalls/syscall-aux.h +++ /dev/null @@ -1,3 +0,0 @@ -#ifndef __NR_openat -#define __NR_openat 56 -#endif \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S b/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S deleted file mode 100644 index fdef3b47a..000000000 --- a/compel/arch/riscv64/plugins/std/syscalls/syscall-common.S +++ /dev/null @@ -1,17 +0,0 @@ -#include "common/asm/linkage.h" - -syscall_common: - ecall - ret - -.macro syscall name, nr - ENTRY(\name) - li a7, \nr - j syscall_common - END(\name) -.endm - -ENTRY(__cr_restore_rt) - li a7, __NR_rt_sigreturn - ecall -END(__cr_restore_rt) \ No newline at end of file diff --git a/compel/arch/riscv64/plugins/std/syscalls/syscall.def b/compel/arch/riscv64/plugins/std/syscalls/syscall.def deleted file mode 100644 index 967f097f9..000000000 --- a/compel/arch/riscv64/plugins/std/syscalls/syscall.def +++ /dev/null @@ -1,125 +0,0 @@ -# -# System calls table, please make sure the table consists of only the syscalls -# really used somewhere in the project. -# -# The template is (name and arguments are optional if you need only __NR_x -# defined, but no real entry point in syscalls lib). -# -# name/alias code64 code32 arguments -# ----------------------------------------------------------------------- -# -read 63 3 (int fd, void *buf, unsigned long count) -write 64 4 (int fd, const void *buf, unsigned long count) -open ! 5 (const char *filename, unsigned long flags, unsigned long mode) -close 57 6 (int fd) -lseek 62 19 (int fd, unsigned long offset, unsigned long origin) -mmap 222 ! (void *addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long offset) -mprotect 226 125 (const void *addr, unsigned long len, unsigned long prot) -munmap 215 91 (void *addr, unsigned long len) -brk 214 45 (void *addr) -rt_sigaction sigaction 134 174 (int signum, const rt_sigaction_t *act, rt_sigaction_t *oldact, size_t sigsetsize) -rt_sigprocmask sigprocmask 135 175 (int how, k_rtsigset_t *set, k_rtsigset_t *old, size_t sigsetsize) -rt_sigreturn 139 173 (void) -ioctl 29 54 (unsigned int fd, unsigned int cmd, unsigned long arg) -pread64 67 180 (unsigned int fd, char *buf, size_t count, loff_t pos) -ptrace 117 26 (long request, pid_t pid, void *addr, void *data) -mremap 216 163 (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flag, unsigned long new_addr) -mincore 232 219 (void *addr, unsigned long size, unsigned char *vec) -madvise 233 220 (unsigned long start, size_t len, int behavior) -shmat 196 305 (int shmid, void *shmaddr, int shmflag) -pause 1061 29 (void) -nanosleep 101 162 (struct timespec *req, struct timespec *rem) -getitimer 102 105 (int which, const struct itimerval *val) -setitimer 103 104 (int which, const struct itimerval *val, struct itimerval *old) -getpid 172 20 (void) -socket 198 281 (int domain, int type, int protocol) -connect 203 283 (int sockfd, struct sockaddr *addr, int addrlen) -sendto 206 290 (int sockfd, void *buff, size_t len, unsigned int flags, struct sockaddr *addr, int addr_len) -recvfrom 207 292 (int sockfd, void *ubuf, size_t size, unsigned int flags, struct sockaddr *addr, int *addr_len) -sendmsg 211 296 (int sockfd, const struct msghdr *msg, int flags) -recvmsg 212 297 (int sockfd, struct msghdr *msg, int flags) -shutdown 210 293 (int sockfd, int how) -bind 235 282 (int sockfd, const struct sockaddr *addr, int addrlen) -setsockopt 208 294 (int sockfd, int level, int optname, const void *optval, socklen_t optlen) -getsockopt 209 295 (int sockfd, int level, int optname, const void *optval, socklen_t *optlen) -clone 220 120 (unsigned long flags, void *child_stack, void *parent_tid, unsigned long newtls, void *child_tid) -exit 93 1 (unsigned long error_code) -wait4 260 114 (int pid, int *status, int options, struct rusage *ru) -waitid 95 280 (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) -kill 129 37 (long pid, int sig) -fcntl 25 55 (int fd, int type, long arg) -flock 32 143 (int fd, unsigned long cmd) -mkdir ! 39 (const char *name, int mode) -rmdir ! 40 (const char *name) -unlink ! 10 (char *pathname) -readlinkat 78 332 (int fd, const char *path, char *buf, int bufsize) -umask 166 60 (int mask) -getgroups 158 205 (int gsize, unsigned int *groups) -setgroups 159 206 (int gsize, unsigned int *groups) -setresuid 147 164 (int uid, int euid, int suid) -getresuid 148 165 (int *uid, int *euid, int *suid) -setresgid 149 170 (int gid, int egid, int sgid) -getresgid 150 171 (int *gid, int *egid, int *sgid) -getpgid 155 132 (pid_t pid) -setfsuid 151 138 (int fsuid) -setfsgid 152 139 (int fsgid) -getsid 156 147 (void) -capget 90 184 (struct cap_header *h, struct cap_data *d) -capset 91 185 (struct cap_header *h, struct cap_data *d) -rt_sigqueueinfo 138 178 (pid_t pid, int sig, siginfo_t *info) -setpriority 140 97 (int which, int who, int nice) -sched_setscheduler 119 156 (int pid, int policy, struct sched_param *p) -sigaltstack 132 186 (const void *uss, void *uoss) -personality 92 136 (unsigned int personality) -prctl 167 172 (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) -arch_prctl ! 17 (int option, unsigned long addr) -setrlimit 164 75 (int resource, struct krlimit *rlim) -mount 40 21 (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data) -umount2 39 52 (char *name, int flags) -gettid 178 224 (void) -futex 98 240 (uint32_t *uaddr, int op, uint32_t val, struct timespec *utime, uint32_t *uaddr2, uint32_t val3) -set_tid_address 96 256 (int *tid_addr) -restart_syscall 128 0 (void) -timer_create 107 257 (clockid_t which_clock, struct sigevent *timer_event_spec, kernel_timer_t *created_timer_id) -timer_settime 110 258 (kernel_timer_t timer_id, int flags, const struct itimerspec *new_setting, struct itimerspec *old_setting) -timer_gettime 108 259 (int timer_id, const struct itimerspec *setting) -timer_getoverrun 109 260 (int timer_id) -timer_delete 111 261 (kernel_timer_t timer_id) -clock_gettime 113 263 (clockid_t which_clock, struct timespec *tp) -exit_group 94 248 (int error_code) -set_robust_list 99 338 (struct robust_list_head *head, size_t len) -get_robust_list 100 339 (int pid, struct robust_list_head **head_ptr, size_t *len_ptr) -signalfd4 74 355 (int fd, k_rtsigset_t *mask, size_t sizemask, int flags) -rt_tgsigqueueinfo 240 363 (pid_t tgid, pid_t pid, int sig, siginfo_t *info) -vmsplice 75 343 (int fd, const struct iovec *iov, unsigned long nr_segs, unsigned int flags) -timerfd_settime 86 353 (int ufd, int flags, const struct itimerspec *utmr, struct itimerspec *otmr) -fanotify_init 262 367 (unsigned int flags, unsigned int event_f_flags) -fanotify_mark 263 368 (int fanotify_fd, unsigned int flags, uint64_t mask, int dfd, const char *pathname) -open_by_handle_at 265 371 (int mountdirfd, struct file_handle *handle, int flags) -setns 268 375 (int fd, int nstype) -kcmp 272 378 (pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2) -openat 56 322 (int dirfd, const char *pathname, int flags, mode_t mode) -mkdirat 34 323 (int dirfd, const char *pathname, mode_t mode) -unlinkat 35 328 (int dirfd, const char *pathname, int flags) -memfd_create 279 385 (const char *name, unsigned int flags) -io_setup 0 243 (unsigned nr_events, aio_context_t *ctx) -io_submit 2 246 (aio_context_t ctx_id, long nr, struct iocb **iocbpp) -io_getevents 4 245 (aio_context_t ctx, long min_nr, long nr, struct io_event *evs, struct timespec *tmo) -seccomp 277 383 (unsigned int op, unsigned int flags, const char *uargs) -gettimeofday 169 78 (struct timeval *tv, struct timezone *tz) -preadv_raw 69 361 (int fd, struct iovec *iov, unsigned long nr, unsigned long pos_l, unsigned long pos_h) -userfaultfd 282 388 (int flags) -fallocate 47 352 (int fd, int mode, loff_t offset, loff_t len) -cacheflush ! 983042 (void *start, void *end, int flags) -ppoll 73 336 (struct pollfd *fds, unsigned int nfds, const struct timespec *tmo, const sigset_t *sigmask, size_t sigsetsize) -fsopen 430 430 (char *fsname, unsigned int flags) -fsconfig 431 431 (int fd, unsigned int cmd, const char *key, const char *value, int aux) -fsmount 432 432 (int fd, unsigned int flags, unsigned int attr_flags) -clone3 435 435 (struct clone_args *uargs, size_t size) -pidfd_open 434 434 (pid_t pid, unsigned int flags) -pidfd_getfd 438 438 (int pidfd, int targetfd, unsigned int flags) -rseq 293 293 (void *rseq, uint32_t rseq_len, int flags, uint32_t sig) -move_mount 429 429 (int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, int flags) -open_tree 428 428 (int dirfd, const char *pathname, unsigned int flags) -openat2 437 437 (int dirfd, char *pathname, struct open_how *how, size_t size) -membarrier 283 283 (int cmd, unsigned int flags, int cpu_id) diff --git a/compel/arch/riscv64/scripts/compel-pack.lds.S b/compel/arch/riscv64/scripts/compel-pack.lds.S deleted file mode 100644 index a61235b44..000000000 --- a/compel/arch/riscv64/scripts/compel-pack.lds.S +++ /dev/null @@ -1,32 +0,0 @@ -OUTPUT_ARCH(riscv) -EXTERN(__export_parasite_head_start) - -SECTIONS -{ - .crblob 0x0 : { - *(.head.text) - ASSERT(DEFINED(__export_parasite_head_start), - "Symbol __export_parasite_head_start is missing"); - *(.text*) - . = ALIGN(32); - *(.data*) - . = ALIGN(32); - *(.rodata*) - . = ALIGN(32); - *(.bss*) - . = ALIGN(32); - *(.got*) - . = ALIGN(32); - *(.toc*) - . = ALIGN(32); - } =0x00000000, - - /DISCARD/ : { - *(.debug*) - *(.comment*) - *(.note*) - *(.group*) - *(.eh_frame*) - *(*) - } -} \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/cpu.c b/compel/arch/riscv64/src/lib/cpu.c deleted file mode 100644 index 9a0291f70..000000000 --- a/compel/arch/riscv64/src/lib/cpu.c +++ /dev/null @@ -1,78 +0,0 @@ -#include -#include - -#include "compel-cpu.h" - -#include "common/bitops.h" - -#include "log.h" - -#undef LOG_PREFIX -#define LOG_PREFIX "cpu: " - -static compel_cpuinfo_t rt_info; - -static void fetch_rt_cpuinfo(void) -{ - static bool rt_info_done = false; - - if (!rt_info_done) { - compel_cpuid(&rt_info); - rt_info_done = true; - } -} - -void compel_set_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) -{ -} -void compel_clear_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) -{ -} -int compel_test_cpu_cap(compel_cpuinfo_t *info, unsigned int feature) -{ - return 0; -} -int compel_test_fpu_cap(compel_cpuinfo_t *info, unsigned int feature) -{ - return 0; -} -int compel_cpuid(compel_cpuinfo_t *info) -{ - return 0; -} - -bool compel_cpu_has_feature(unsigned int feature) -{ - fetch_rt_cpuinfo(); - return compel_test_cpu_cap(&rt_info, feature); -} - -bool compel_fpu_has_feature(unsigned int feature) -{ - fetch_rt_cpuinfo(); - return compel_test_fpu_cap(&rt_info, feature); -} - -uint32_t compel_fpu_feature_size(unsigned int feature) -{ - fetch_rt_cpuinfo(); - return 0; -} - -uint32_t compel_fpu_feature_offset(unsigned int feature) -{ - fetch_rt_cpuinfo(); - return 0; -} - -void compel_cpu_clear_feature(unsigned int feature) -{ - fetch_rt_cpuinfo(); - return compel_clear_cpu_cap(&rt_info, feature); -} - -void compel_cpu_copy_cpuinfo(compel_cpuinfo_t *c) -{ - fetch_rt_cpuinfo(); - memcpy(c, &rt_info, sizeof(rt_info)); -} \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/handle-elf-host.c b/compel/arch/riscv64/src/lib/handle-elf-host.c deleted file mode 120000 index fe4611886..000000000 --- a/compel/arch/riscv64/src/lib/handle-elf-host.c +++ /dev/null @@ -1 +0,0 @@ -handle-elf.c \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/handle-elf.c b/compel/arch/riscv64/src/lib/handle-elf.c deleted file mode 100644 index 22420bc78..000000000 --- a/compel/arch/riscv64/src/lib/handle-elf.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include - -#include "handle-elf.h" -#include "piegen.h" -#include "log.h" - -static const unsigned char __maybe_unused elf_ident_64_le[EI_NIDENT] = { - 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, /* clang-format */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; - -static const unsigned char __maybe_unused elf_ident_64_be[EI_NIDENT] = { - 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x02, 0x01, 0x00, /* clang-format */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; - -int handle_binary(void *mem, size_t size) -{ - const unsigned char *elf_ident = -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - elf_ident_64_le; -#else - elf_ident_64_be; -#endif - - if (memcmp(mem, elf_ident, sizeof(elf_ident_64_le)) == 0) - return handle_elf_riscv64(mem, size); - - pr_err("Unsupported Elf format detected\n"); - return -EINVAL; -} \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/cpu.h b/compel/arch/riscv64/src/lib/include/cpu.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/compel/arch/riscv64/src/lib/include/handle-elf.h b/compel/arch/riscv64/src/lib/include/handle-elf.h deleted file mode 100644 index 582770583..000000000 --- a/compel/arch/riscv64/src/lib/include/handle-elf.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef COMPEL_HANDLE_ELF_H__ -#define COMPEL_HANDLE_ELF_H__ - -#include "elf64-types.h" - -#define __handle_elf handle_elf_riscv64 -#define ELF_RISCV -#define arch_is_machine_supported(e_machine) (e_machine == EM_RISCV) - -extern int handle_elf_riscv64(void *mem, size_t size); - -#endif /* COMPEL_HANDLE_ELF_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/syscall.h b/compel/arch/riscv64/src/lib/include/syscall.h deleted file mode 100644 index 53f10525d..000000000 --- a/compel/arch/riscv64/src/lib/include/syscall.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef __COMPEL_SYSCALL_H__ -#define __COMPEL_SYSCALL_H__ -#define __NR(syscall, compat) \ - ({ \ - (void)compat; \ - __NR_##syscall; \ - }) -#endif \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h b/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h deleted file mode 100644 index f2ba799cb..000000000 --- a/compel/arch/riscv64/src/lib/include/uapi/asm/breakpoints.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __COMPEL_BREAKPOINTS_H__ -#define __COMPEL_BREAKPOINTS_H__ -#define ARCH_SI_TRAP TRAP_BRKPT - -static inline int ptrace_set_breakpoint(pid_t pid, void *addr) -{ - return 0; -} - -static inline int ptrace_flush_breakpoints(pid_t pid) -{ - return 0; -} - -#endif \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h b/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h deleted file mode 100644 index ac58567e3..000000000 --- a/compel/arch/riscv64/src/lib/include/uapi/asm/cpu.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef UAPI_COMPEL_ASM_CPU_H__ -#define UAPI_COMPEL_ASM_CPU_H__ - -typedef struct { -} compel_cpuinfo_t; - -#endif /* UAPI_COMPEL_ASM_CPU_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h b/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h deleted file mode 100644 index a74decc23..000000000 --- a/compel/arch/riscv64/src/lib/include/uapi/asm/fpu.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef __CR_ASM_FPU_H__ -#define __CR_ASM_FPU_H__ - -#endif /* __CR_ASM_FPU_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h deleted file mode 100644 index 192810cac..000000000 --- a/compel/arch/riscv64/src/lib/include/uapi/asm/infect-types.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef UAPI_COMPEL_ASM_TYPES_H__ -#define UAPI_COMPEL_ASM_TYPES_H__ - -#include -#include -#include -#include - -#define SIGMAX 64 -#define SIGMAX_OLD 31 - -/* - * Copied from the Linux kernel header arch/riscv/include/uapi/asm/ptrace.h - * - * A thread RISC-V CPU context - */ -typedef struct user_regs_struct user_regs_struct_t; -typedef struct __riscv_d_ext_state user_fpregs_struct_t; - -#define __compel_arch_fetch_thread_area(tid, th) 0 -#define compel_arch_fetch_thread_area(tctl) 0 -#define compel_arch_get_tls_task(ctl, tls) -#define compel_arch_get_tls_thread(tctl, tls) - -#define REG_RES(registers) ((uint64_t)(registers).a0) -#define REG_IP(registers) ((uint64_t)(registers).pc) -#define SET_REG_IP(registers, val) ((registers).pc = (val)) - -/* - * REG_SP is also defined in riscv64-linux-gnu/include/sys/ucontext.h - * with a different meaning, and it's not used in CRIU. So we have to - * undefine it here. - */ -#ifdef REG_SP -#undef REG_SP -#endif - -#define REG_SP(registers) ((uint64_t)((registers).sp)) - -#define REG_SYSCALL_NR(registers) ((uint64_t)(registers).a7) - -#define user_regs_native(pregs) true - -#define ARCH_SI_TRAP TRAP_BRKPT - -#define __NR(syscall, compat) \ - ({ \ - (void)compat; \ - __NR_##syscall; \ - }) - -#endif /* UAPI_COMPEL_ASM_TYPES_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h b/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h deleted file mode 100644 index e231d0465..000000000 --- a/compel/arch/riscv64/src/lib/include/uapi/asm/instruction_formats.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef COMPEL_RELOCATIONS_H__ -#define COMPEL_RELOCATIONS_H__ - -#include - -static inline uint32_t riscv_b_imm(uint32_t val) -{ - return (val & 0x00001000) << 19 | (val & 0x000007e0) << 20 | (val & 0x0000001e) << 7 | (val & 0x00000800) >> 4; -} - -static inline uint32_t riscv_i_imm(uint32_t val) -{ - return val << 20; -} - -static inline uint32_t riscv_u_imm(uint32_t val) -{ - return val & 0xfffff000; -} - -static inline uint32_t riscv_j_imm(uint32_t val) -{ - return (val & 0x00100000) << 11 | (val & 0x000007fe) << 20 | (val & 0x00000800) << 9 | (val & 0x000ff000); -} - -#endif /* COMPEL_RELOCATIONS_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h b/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h deleted file mode 100644 index e40fb6fce..000000000 --- a/compel/arch/riscv64/src/lib/include/uapi/asm/processor-flags.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__ -#define UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__ - -#endif /* UAPI_COMPEL_ASM_PROCESSOR_FLAGS_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h b/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h deleted file mode 100644 index 761a08f62..000000000 --- a/compel/arch/riscv64/src/lib/include/uapi/asm/sigframe.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef UAPI_COMPEL_ASM_SIGFRAME_H__ -#define UAPI_COMPEL_ASM_SIGFRAME_H__ - -#include - -#include - -#include - -/* Copied from the kernel header arch/riscv/include/uapi/asm/sigcontext.h */ -/* - * Signal context structure - * - * This contains the context saved before a signal handler is invoked; - * it is restored by sys_sigreturn / sys_rt_sigreturn. - */ -// struct sigcontext { -// struct user_regs_struct sc_regs; -// union __riscv_fp_state sc_fpregs; -// /* -// * 4K + 128 reserved for vector state and future expansion. -// * This space is enough to store the vector context whose VLENB -// * is less or equal to 128. -// * (The size of the vector context is 4144 byte as VLENB is 128) -// */ -// __u8 __reserved[4224] __attribute__((__aligned__(16))); -// }; - -#define rt_sigcontext sigcontext - -#include - -/* Copied from the kernel source arch/riscv/kernel/signal.c */ -struct rt_sigframe { - siginfo_t info; - ucontext_t uc; //ucontext_t structure holds the user context, e.g., the signal mask, GP regs -}; - -/* - generates inline assembly code for triggering the rt_sigreturn system call. - used to return from a signal handler back to the normal execution flow of the process. -*/ -/* clang-format off */ -#define ARCH_RT_SIGRETURN(new_sp, rt_sigframe) \ - asm volatile( \ - "mv sp, %0\n" \ - "li a7, "__stringify(__NR_rt_sigreturn)" \n" \ - "ecall\n" \ - : \ - : "r"(new_sp) \ - : "a7", "memory") -/* clang-format on */ - -#define RT_SIGFRAME_UC(rt_sigframe) (&rt_sigframe->uc) -#define RT_SIGFRAME_REGIP(rt_sigframe) ((long unsigned int)(rt_sigframe)->uc.uc_mcontext.__gregs[REG_PC]) -#define RT_SIGFRAME_HAS_FPU(rt_sigframe) 1 -#define RT_SIGFRAME_OFFSET(rt_sigframe) 0 - -// #define RT_SIGFRAME_SIGCONTEXT(rt_sigframe) ((struct cr_sigcontext *)&(rt_sigframe)->uc.uc_mcontext) -// #define RT_SIGFRAME_AUX_CONTEXT(rt_sigframe) ((struct sigcontext *)&(RT_SIGFRAME_SIGCONTEXT(rt_sigframe)->__reserved)) -// #define RT_SIGFRAME_FPU(rt_sigframe) (&RT_SIGFRAME_AUX_CONTEXT(rt_sigframe)->fpsimd) - -#define rt_sigframe_erase_sigset(sigframe) \ - memset(&sigframe->uc.uc_sigmask, 0, sizeof(k_rtsigset_t)) // erase the signal mask -#define rt_sigframe_copy_sigset(sigframe, from) \ - memcpy(&sigframe->uc.uc_sigmask, from, sizeof(k_rtsigset_t)) // copy the signal mask - -#endif /* UAPI_COMPEL_ASM_SIGFRAME_H__ */ \ No newline at end of file diff --git a/compel/arch/riscv64/src/lib/infect.c b/compel/arch/riscv64/src/lib/infect.c deleted file mode 100644 index 3f3a4b7ec..000000000 --- a/compel/arch/riscv64/src/lib/infect.c +++ /dev/null @@ -1,224 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "common/page.h" -#include "uapi/compel/asm/infect-types.h" -#include "log.h" -#include "errno.h" -#include "infect.h" -#include "infect-priv.h" - -unsigned __page_size = 0; -unsigned __page_shift = 0; - -/* - * Injected syscall instruction - */ -const char code_syscall[] = { - 0x73, 0x00, 0x00, 0x00, /* ecall */ - 0x73, 0x00, 0x10, 0x00 /* ebreak */ -}; - -static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long)); - -static inline void __always_unused __check_code_syscall(void) -{ - BUILD_BUG_ON(code_syscall_aligned != BUILTIN_SYSCALL_SIZE); - BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); -} - -int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) -{ - sigframe->uc.uc_mcontext.__gregs[0] = regs->pc; - sigframe->uc.uc_mcontext.__gregs[1] = regs->ra; - sigframe->uc.uc_mcontext.__gregs[2] = regs->sp; - sigframe->uc.uc_mcontext.__gregs[3] = regs->gp; - sigframe->uc.uc_mcontext.__gregs[4] = regs->tp; - sigframe->uc.uc_mcontext.__gregs[5] = regs->t0; - sigframe->uc.uc_mcontext.__gregs[6] = regs->t1; - sigframe->uc.uc_mcontext.__gregs[7] = regs->t2; - sigframe->uc.uc_mcontext.__gregs[8] = regs->s0; - sigframe->uc.uc_mcontext.__gregs[9] = regs->s1; - sigframe->uc.uc_mcontext.__gregs[10] = regs->a0; - sigframe->uc.uc_mcontext.__gregs[11] = regs->a1; - sigframe->uc.uc_mcontext.__gregs[12] = regs->a2; - sigframe->uc.uc_mcontext.__gregs[13] = regs->a3; - sigframe->uc.uc_mcontext.__gregs[14] = regs->a4; - sigframe->uc.uc_mcontext.__gregs[15] = regs->a5; - sigframe->uc.uc_mcontext.__gregs[16] = regs->a6; - sigframe->uc.uc_mcontext.__gregs[17] = regs->a7; - sigframe->uc.uc_mcontext.__gregs[18] = regs->s2; - sigframe->uc.uc_mcontext.__gregs[19] = regs->s3; - sigframe->uc.uc_mcontext.__gregs[20] = regs->s4; - sigframe->uc.uc_mcontext.__gregs[21] = regs->s5; - sigframe->uc.uc_mcontext.__gregs[22] = regs->s6; - sigframe->uc.uc_mcontext.__gregs[23] = regs->s7; - sigframe->uc.uc_mcontext.__gregs[24] = regs->s8; - sigframe->uc.uc_mcontext.__gregs[25] = regs->s9; - sigframe->uc.uc_mcontext.__gregs[26] = regs->s10; - sigframe->uc.uc_mcontext.__gregs[27] = regs->s11; - sigframe->uc.uc_mcontext.__gregs[28] = regs->t3; - sigframe->uc.uc_mcontext.__gregs[29] = regs->t4; - sigframe->uc.uc_mcontext.__gregs[30] = regs->t5; - sigframe->uc.uc_mcontext.__gregs[31] = regs->t6; - - memcpy(sigframe->uc.uc_mcontext.__fpregs.__d.__f, fpregs->f, sizeof(fpregs->f)); - sigframe->uc.uc_mcontext.__fpregs.__d.__fcsr = fpregs->fcsr; - - return 0; -} - -int sigreturn_prep_fpu_frame_plain(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe) -{ - return 0; -} - -int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save, - void *arg, __maybe_unused unsigned long flags) -{ - user_fpregs_struct_t tmp, *fpsimd = ext_regs ? ext_regs : &tmp; - struct iovec iov; - int ret = -1; - - pr_info("Dumping FPU registers for %d\n", pid); - - iov.iov_base = fpsimd; - iov.iov_len = sizeof(*fpsimd); - if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov))) { - pr_perror("Failed to obtain FPU registers for %d", pid); - return -1; - } - - ret = save(pid, arg, regs, fpsimd); - return ret; -} - -int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs) -{ - struct iovec iov; - - pr_info("Restoring GP/FPU registers for %d\n", pid); - - iov.iov_base = ext_regs; - iov.iov_len = sizeof(*ext_regs); - if (ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov)) { - pr_perror("Failed to set FPU registers for %d", pid); - return -1; - } - return 0; -} - -int compel_syscall(struct parasite_ctl *ctl, int nr, long *ret, unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5, unsigned long arg6) -{ - user_regs_struct_t regs = ctl->orig.regs; - int err; - - regs.a7 = (unsigned long)nr; - regs.a0 = arg1; - regs.a1 = arg2; - regs.a2 = arg3; - regs.a3 = arg4; - regs.a4 = arg5; - regs.a5 = arg6; - regs.a6 = 0; - - err = compel_execute_syscall(ctl, ®s, code_syscall); - - *ret = regs.a0; - return err; -} - -/* - * Calling the mmap system call in the context of the target (victim) process using the compel_syscall function. - * Used during the infection process to allocate memory for the parasite code. -*/ -void *remote_mmap(struct parasite_ctl *ctl, void *addr, size_t length, int prot, int flags, int fd, off_t offset) -{ - long map; - int err; - - err = compel_syscall(ctl, __NR_mmap, &map, (unsigned long)addr, length, prot, flags, fd, offset); - if (err < 0 || (long)map < 0) - map = 0; - - return (void *)map; -} - -void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) -{ - regs->pc = new_ip; - if (stack) - regs->sp = (unsigned long)stack; -} - -bool arch_can_dump_task(struct parasite_ctl *ctl) -{ - /* - * TODO: Add proper check here. - */ - return true; -} - -/* - * Fetch the signal alternate stack (sigaltstack), - * sas is a separate memory area for the signal handler to run on, - * avoiding potential issues with the main process stack -*/ -int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s) -{ - long ret; - int err; - - err = compel_syscall(ctl, __NR_sigaltstack, &ret, 0, (unsigned long)&s->uc.uc_stack, 0, 0, 0, 0); - return err ? err : ret; -} - -/* - * Task size is the maximum virtual address space size that a process can occupy in the memory - * Refer to linux kernel arch/riscv/include/asm/pgtable.h, - * task size is: - * - 0x9fc00000 (~2.5GB) for RV32. - * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu - * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu - * - 0x100000000000000 ( 64PB) for RV64 using SV57 mmu - */ -#define TASK_SIZE_MIN (1UL << 38) -#define TASK_SIZE_MAX (1UL << 56) - -unsigned long compel_task_size(void) -{ - unsigned long task_size; - - for (task_size = TASK_SIZE_MIN; task_size < TASK_SIZE_MAX; task_size <<= 1) - if (munmap((void *)task_size, page_size())) - break; - return task_size; -} - -/* - * Get task registers (overwrites weak function) - */ -int ptrace_get_regs(int pid, user_regs_struct_t *regs) -{ - struct iovec iov; - - iov.iov_base = regs; - iov.iov_len = sizeof(user_regs_struct_t); - return ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov); -} - -/* - * Set task registers (overwrites weak function) - */ -int ptrace_set_regs(int pid, user_regs_struct_t *regs) -{ - struct iovec iov; - - iov.iov_base = regs; - iov.iov_len = sizeof(user_regs_struct_t); - return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov); -} diff --git a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl index ff2f33006..018d58a59 100644 --- a/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl +++ b/compel/arch/s390/plugins/std/syscalls/syscall-s390.tbl @@ -82,7 +82,7 @@ __NR_sys_timer_settime 255 sys_timer_settime (kernel_timer_t timer_id, int flag __NR_sys_timer_gettime 256 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 257 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 258 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 260 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) +__NR_clock_gettime 260 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) __NR_exit_group 248 sys_exit_group (int error_code) __NR_waitid 281 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) __NR_set_robust_list 304 sys_set_robust_list (struct robust_list_head *head, size_t len) diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index a77b38917..85dfc3a4d 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -348,7 +348,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct } } /* Call save_task_regs() */ - return save(pid, arg, regs, fpregs); + return save(arg, regs, fpregs); } int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs) diff --git a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl index 8c3620c2a..7fbfd69ad 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl +++ b/compel/arch/x86/plugins/std/syscalls/syscall_64.tbl @@ -85,7 +85,7 @@ __NR_sys_timer_settime 223 sys_timer_settime (kernel_timer_t timer_id, int fla __NR_sys_timer_gettime 224 sys_timer_gettime (int timer_id, const struct itimerspec *setting) __NR_sys_timer_getoverrun 225 sys_timer_getoverrun (int timer_id) __NR_sys_timer_delete 226 sys_timer_delete (kernel_timer_t timer_id) -__NR_clock_gettime 228 sys_clock_gettime (clockid_t which_clock, struct timespec *tp) +__NR_clock_gettime 228 sys_clock_gettime (const clockid_t which_clock, const struct timespec *tp) __NR_exit_group 231 sys_exit_group (int error_code) __NR_openat 257 sys_openat (int dfd, const char *filename, int flags, int mode) __NR_waitid 247 sys_waitid (int which, pid_t pid, struct siginfo *infop, int options, struct rusage *ru) diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index afcf2c53b..a07b1c9f3 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -453,7 +453,7 @@ int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct goto err; out: - ret = save(pid, arg, regs, xs); + ret = save(arg, regs, xs); err: return ret; } @@ -761,7 +761,7 @@ bool __compel_shstk_enabled(user_fpregs_struct_t *ext_regs) return false; } -int parasite_setup_shstk(struct parasite_ctl *ctl, __maybe_unused user_fpregs_struct_t *ext_regs) +int parasite_setup_shstk(struct parasite_ctl *ctl, user_fpregs_struct_t *ext_regs) { pid_t pid = ctl->rpid; unsigned long sa_restorer = ctl->parasite_ip; diff --git a/compel/include/infect-priv.h b/compel/include/infect-priv.h index 8e78a7f6c..9d3442839 100644 --- a/compel/include/infect-priv.h +++ b/compel/include/infect-priv.h @@ -72,7 +72,6 @@ extern bool arch_can_dump_task(struct parasite_ctl *ctl); extern int compel_get_task_regs(pid_t pid, user_regs_struct_t *regs, user_fpregs_struct_t *ext_regs, save_regs_t save, void *arg, unsigned long flags); extern int compel_set_task_ext_regs(pid_t pid, user_fpregs_struct_t *ext_regs); -extern int compel_set_task_gcs_regs(pid_t pid, user_fpregs_struct_t *ext_regs); extern int arch_fetch_sas(struct parasite_ctl *ctl, struct rt_sigframe *s); extern int sigreturn_prep_regs_plain(struct rt_sigframe *sigframe, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs); diff --git a/compel/include/uapi/infect-util.h b/compel/include/uapi/infect-util.h index 658df9393..ace6f6b6b 100644 --- a/compel/include/uapi/infect-util.h +++ b/compel/include/uapi/infect-util.h @@ -3,20 +3,11 @@ #include "common/compiler.h" -/** - * The length of the hash is based on what libuuid provides. - * According to the manpage this is: - * - * The uuid_unparse() function converts the supplied UUID uu from the binary - * representation into a 36-byte string (plus trailing '\0') - */ -#define RUN_ID_HASH_LENGTH 37 - /* * compel_run_id is a unique value of the current run. It can be used to * generate resource ID-s to avoid conflicts with other processes. */ -extern char compel_run_id[RUN_ID_HASH_LENGTH]; +extern uint64_t compel_run_id; struct parasite_ctl; extern int __must_check compel_util_send_fd(struct parasite_ctl *ctl, int fd); diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index d21c261b7..7e6134f4b 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -13,15 +13,6 @@ #define PARASITE_START_AREA_MIN (4096) -#define PARASITE_STACK_SIZE (16 << 10) -/* - * A stack redzone is a small, protected region of memory located immediately - * after a parasite stack. It is intended to remain unchanged. While it can be - * implemented as a guard page, we want to avoid the overhead of additional - * remote system calls. - */ -#define PARASITE_STACK_REDZONE 128 - extern int __must_check compel_interrupt_task(int pid); struct seize_task_status { @@ -106,7 +97,7 @@ extern k_rtsigset_t *compel_thread_sigmask(struct parasite_thread_ctl *tctl); struct rt_sigframe; typedef int (*open_proc_fn)(int pid, int mode, const char *fmt, ...) __attribute__((__format__(__printf__, 3, 4))); -typedef int (*save_regs_t)(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); +typedef int (*save_regs_t)(void *, user_regs_struct_t *, user_fpregs_struct_t *); typedef int (*make_sigframe_t)(void *, struct rt_sigframe *, struct rt_sigframe *, k_rtsigset_t *); struct infect_ctx { @@ -192,14 +183,6 @@ void compel_set_thread_ip(struct parasite_thread_ctl *tctl, uint64_t v); extern void compel_get_stack(struct parasite_ctl *ctl, void **rstack, void **r_thread_stack); -#ifndef compel_host_supports_gcs -static inline bool compel_host_supports_gcs(void) -{ - return false; -} -#define compel_host_supports_gcs -#endif - #ifndef compel_shstk_enabled static inline bool compel_shstk_enabled(user_fpregs_struct_t *ext_regs) { diff --git a/compel/src/lib/infect-util.c b/compel/src/lib/infect-util.c index dc57e28f7..00a7c83f7 100644 --- a/compel/src/lib/infect-util.c +++ b/compel/src/lib/infect-util.c @@ -7,7 +7,7 @@ #include "infect-rpc.h" #include "infect-util.h" -char compel_run_id[RUN_ID_HASH_LENGTH]; +uint64_t compel_run_id; int compel_util_send_fd(struct parasite_ctl *ctl, int fd) { diff --git a/compel/src/lib/infect.c b/compel/src/lib/infect.c index 22fcf24fa..1e3ffb967 100644 --- a/compel/src/lib/infect.c +++ b/compel/src/lib/infect.c @@ -38,6 +38,8 @@ #define UNIX_PATH_MAX (sizeof(struct sockaddr_un) - (size_t)((struct sockaddr_un *)0)->sun_path) #endif +#define PARASITE_STACK_SIZE (16 << 10) + #ifndef SECCOMP_MODE_DISABLED #define SECCOMP_MODE_DISABLED 0 #endif @@ -425,7 +427,7 @@ static int gen_parasite_saddr(struct sockaddr_un *saddr, int key) int sun_len; saddr->sun_family = AF_UNIX; - snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%s", key, compel_run_id); + snprintf(saddr->sun_path, UNIX_PATH_MAX, "X/crtools-pr-%d-%" PRIx64, key, compel_run_id); sun_len = SUN_LEN(saddr); *saddr->sun_path = '\0'; @@ -1054,16 +1056,6 @@ int compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads, memcpy(ctl->local_map, ctl->pblob.hdr.mem, ctl->pblob.hdr.bsize); compel_relocs_apply(ctl->local_map, ctl->remote_map, &ctl->pblob); - /* - * Ensure the infected thread sees the updated code. - * - * On architectures like ARM64, the Data Cache (D-cache) and - * Instruction Cache (I-cache) are not automatically coherent. - * Modifications land in the D-cache, so we must flush (clean) the - * D-cache to push changes to RAM to ensure the CPU fetches the updated - * instructions. - */ - __builtin___clear_cache(ctl->local_map, ctl->local_map + ctl->pblob.hdr.bsize); p = parasite_size; @@ -1072,7 +1064,7 @@ int compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads, p += RESTORE_STACK_SIGFRAME; p += PARASITE_STACK_SIZE; - ctl->rstack = ctl->remote_map + p - PARASITE_STACK_REDZONE; + ctl->rstack = ctl->remote_map + p; /* * x86-64 ABI requires a 16 bytes aligned stack. @@ -1086,7 +1078,7 @@ int compel_infect_no_daemon(struct parasite_ctl *ctl, unsigned long nr_threads, if (nr_threads > 1) { p += PARASITE_STACK_SIZE; - ctl->r_thread_stack = ctl->remote_map + p - PARASITE_STACK_REDZONE; + ctl->r_thread_stack = ctl->remote_map + p; } ret = arch_fetch_sas(ctl, ctl->rsigframe); @@ -1308,7 +1300,7 @@ struct plain_regs_struct { user_fpregs_struct_t fpregs; }; -static int save_regs_plain(pid_t pid, void *to, user_regs_struct_t *r, user_fpregs_struct_t *f) +static int save_regs_plain(void *to, user_regs_struct_t *r, user_fpregs_struct_t *f) { struct plain_regs_struct *prs = to; diff --git a/compel/src/main.c b/compel/src/main.c index 21e06d7dd..bc16c0ab4 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -60,9 +60,6 @@ static const flags_t flags = { #elif defined CONFIG_LOONGARCH64 .arch = "loongarch64", .cflags = COMPEL_CFLAGS_PIE, -#elif defined CONFIG_RISCV64 - .arch = "riscv64", - .cflags = COMPEL_CFLAGS_PIE, #else #error "CONFIG_ not defined, or unsupported ARCH" #endif diff --git a/compel/test/infect/Makefile b/compel/test/infect/Makefile index 85efa5fd9..bacfad962 100644 --- a/compel/test/infect/Makefile +++ b/compel/test/infect/Makefile @@ -3,11 +3,6 @@ CFLAGS ?= -O2 -g -Wall -Werror COMPEL := ../../../compel/compel-host -ifeq ($(GCS_ENABLE),1) -CFLAGS += -mbranch-protection=standard -DGCS_TEST_ENABLE=1 -LDFLAGS += -z experimental-gcs=check -endif - all: victim spy run: @@ -22,7 +17,7 @@ clean: rm -f parasite.o victim: victim.c - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + $(CC) $(CFLAGS) -o $@ $^ spy: spy.c parasite.h $(CC) $(CFLAGS) $(shell $(COMPEL) includes) -o $@ $< $(shell $(COMPEL) --static libs) diff --git a/compel/test/infect/spy.c b/compel/test/infect/spy.c index 143946941..b10db4d47 100644 --- a/compel/test/infect/spy.c +++ b/compel/test/infect/spy.c @@ -112,9 +112,6 @@ int main(int argc, char **argv) return -1; } -#ifdef GCS_TEST_ENABLE - setenv("GLIBC_TUNABLES", "glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2", 1); -#endif pid = vfork(); if (pid == 0) { close(p_in[1]); diff --git a/compel/test/stack/spy.c b/compel/test/stack/spy.c index 184c8ab31..9b7c9a7f0 100644 --- a/compel/test/stack/spy.c +++ b/compel/test/stack/spy.c @@ -50,6 +50,70 @@ static void *get_parasite_rstack_start(struct parasite_ctl *ctl) return rstack_start; } +static int page_writable(struct parasite_ctl *ctl, int pid, void *page) +{ + FILE *maps; + size_t maps_line_len = 0; + char *maps_line = NULL; + char victim_maps_path[6 + 11 + 5 + 1]; + int written; + int ret = 0; + + if (((uintptr_t)page & (page_size() - 1)) != 0) { + fprintf(stderr, "Page address not aligned\n"); + ret = -1; + goto done; + } + + written = snprintf(victim_maps_path, sizeof(victim_maps_path), "/proc/%d/maps", pid); + if (written < 0 || written >= sizeof(victim_maps_path)) { + fprintf(stderr, "Failed to create path string to victim's /proc/%d/maps file\n", pid); + ret = -1; + goto done; + } + + maps = fopen(victim_maps_path, "r"); + if (maps == NULL) { + perror("Can't open victim's /proc/$pid/maps"); + ret = -1; + goto done; + } + + while (getline(&maps_line, &maps_line_len, maps) != -1) { + unsigned long vmstart, vmend; + char r, w; + + if (sscanf(maps_line, "%lx-%lx %c%c", &vmstart, &vmend, &r, &w) < 4) { + fprintf(stderr, "Can't parse victim's /proc/%d/maps; line: %s\n", pid, maps_line); + ret = -1; + goto free_linebuf; + } + + if (page >= (void *)vmstart && page < (void *)vmend) { + if (w == 'w') { + if (r != 'r') { + fprintf(stderr, "Expecting writable memory to also be readable"); + ret = -1; + goto free_linebuf; + } + ret = 1; + } + break; + } + } + + if (errno) { + perror("Can't read victim's /proc/$pid/maps"); + ret = -1; + } + +free_linebuf: + free(maps_line); + fclose(maps); +done: + return ret; +} + static void *read_proc_mem(int pid, void *offset, size_t len) { char victim_mem_path[6 + 11 + 4 + 1]; @@ -89,6 +153,51 @@ freebuf: return NULL; } +static int save_data_near_stack(struct parasite_ctl *ctl, int pid, void *stack, void **saved_data, + size_t *saved_data_size) +{ + size_t page_mask = page_size() - 1; + size_t saved_size = 0; + size_t stack_size_last_page = (uintptr_t)stack & page_mask; + void *next_page = stack; + + if (stack_size_last_page != 0) { + size_t empty_space_last_page = page_size() - stack_size_last_page; + saved_size = min(empty_space_last_page, (size_t)SAVED_DATA_MAX); + next_page += page_size() - stack_size_last_page; + } + + while (saved_size < SAVED_DATA_MAX && next_page != NULL) { + switch (page_writable(ctl, pid, next_page)) { + case 1: + saved_size = min((size_t)(saved_size + page_size()), (size_t)SAVED_DATA_MAX); + next_page += page_size(); + break; + case 0: + next_page = NULL; + break; + default: + return -1; + } + } + + if (saved_size > 0) { + void *sd; + + sd = read_proc_mem(pid, stack, saved_size); + if (sd == NULL) + return -1; + + *saved_data = sd; + } else { + *saved_data = NULL; + } + + *saved_data_size = saved_size; + + return 0; +} + static int check_saved_data(struct parasite_ctl *ctl, int pid, void *stack, void *saved_data, size_t saved_data_size) { if (saved_data != NULL) { @@ -112,7 +221,7 @@ static int do_infection(int pid) struct infect_ctx *ictx; int *arg; void *stack; - size_t saved_data_size = PARASITE_STACK_REDZONE; + size_t saved_data_size; int saved_data_check; compel_log_init(print_vmsg, COMPEL_LOG_DEBUG); @@ -148,6 +257,8 @@ static int do_infection(int pid) err_and_ret("Can't register cleanup function with atexit\n"); stack = get_parasite_rstack_start(ctl); + if (save_data_near_stack(ctl, pid, stack, &saved_data, &saved_data_size)) + err_and_ret("Can't save data above stack\n"); if (compel_start_daemon(ctl)) err_and_ret("Can't start daemon in victim\n"); diff --git a/contrib/debian/dev-packages.lst b/contrib/debian/dev-packages.lst new file mode 100644 index 000000000..ce45f1b7c --- /dev/null +++ b/contrib/debian/dev-packages.lst @@ -0,0 +1,19 @@ +# Required packages for development in Debian +build-essential +libprotobuf-dev +libprotobuf-c-dev +protobuf-c-compiler +protobuf-compiler +python3-protobuf +libnet-dev + +# Extra packages, required for testing and building other tools +pkg-config +libnl-3-dev +libbsd0 +libbsd-dev +iproute2 +libcap-dev +libaio-dev +python3-yaml +libnl-route-3-dev diff --git a/contrib/dependencies/apk-packages.sh b/contrib/dependencies/apk-packages.sh deleted file mode 100755 index c47fb9fe0..000000000 --- a/contrib/dependencies/apk-packages.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env sh - -apk add --no-cache \ - asciidoctor \ - bash \ - build-base \ - coreutils \ - e2fsprogs \ - elfutils-dev \ - git \ - gnutls-dev \ - go \ - ip6tables \ - iproute2 \ - iptables \ - iptables-legacy \ - libaio-dev \ - libbsd-dev \ - libcap-dev \ - libcap-utils \ - libdrm-dev \ - libnet-dev \ - libnl3-dev \ - libtraceevent-dev \ - libtracefs-dev \ - nftables \ - nftables-dev \ - perl \ - pkgconfig \ - procps \ - protobuf-c-compiler \ - protobuf-c-dev \ - protobuf-dev \ - py3-importlib-metadata \ - py3-pip \ - py3-protobuf \ - py3-yaml \ - python3 \ - sudo \ - tar \ - util-linux \ - util-linux-dev diff --git a/contrib/dependencies/apt-cross-packages.sh b/contrib/dependencies/apt-cross-packages.sh deleted file mode 100755 index 30ce6874c..000000000 --- a/contrib/dependencies/apt-cross-packages.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env sh - -APT_INSTALL="$(cd "$(dirname "$0")/.." >/dev/null 2>&1 && pwd)/apt-install" -if [ ! -x "$APT_INSTALL" ]; then - echo "Error: apt-install not found or not executable" - exit 1 -fi - -"$APT_INSTALL" \ - crossbuild-essential-"${DEBIAN_ARCH}" \ - iproute2:"${DEBIAN_ARCH}" \ - libaio-dev:"${DEBIAN_ARCH}" \ - libbz2-dev:"${DEBIAN_ARCH}" \ - libc6-"${DEBIAN_ARCH}"-cross \ - libc6-dev-"${DEBIAN_ARCH}"-cross \ - libcap-dev:"${DEBIAN_ARCH}" \ - libdrm-dev:"${DEBIAN_ARCH}" \ - libelf-dev:"${DEBIAN_ARCH}" \ - libexpat1-dev:"${DEBIAN_ARCH}" \ - libgnutls28-dev:"${DEBIAN_ARCH}" \ - libnet-dev:"${DEBIAN_ARCH}" \ - libnftables-dev:"${DEBIAN_ARCH}" \ - libnl-3-dev:"${DEBIAN_ARCH}" \ - libnl-route-3-dev:"${DEBIAN_ARCH}" \ - libprotobuf-c-dev:"${DEBIAN_ARCH}" \ - libprotobuf-dev:"${DEBIAN_ARCH}" \ - libssl-dev:"${DEBIAN_ARCH}" \ - libtraceevent-dev:"${DEBIAN_ARCH}" \ - libtracefs-dev:"${DEBIAN_ARCH}" \ - ncurses-dev:"${DEBIAN_ARCH}" \ - uuid-dev:"${DEBIAN_ARCH}" \ - build-essential \ - pkg-config \ - git \ - protobuf-c-compiler \ - protobuf-compiler \ - python3-protobuf diff --git a/contrib/dependencies/apt-packages.sh b/contrib/dependencies/apt-packages.sh deleted file mode 100755 index 7963be7b4..000000000 --- a/contrib/dependencies/apt-packages.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env sh - -APT_INSTALL="$(cd "$(dirname "$0")/.." >/dev/null 2>&1 && pwd)/apt-install" -if [ ! -x "$APT_INSTALL" ]; then - echo "Error: apt-install not found or not executable" - exit 1 -fi - -"$APT_INSTALL" \ - asciidoctor \ - bash \ - bsdmainutils \ - build-essential \ - gdb \ - git-core \ - iproute2 \ - iptables \ - kmod \ - libaio-dev \ - libbsd-dev \ - libcap-dev \ - libdrm-dev \ - libelf-dev \ - libgnutls28-dev \ - libgnutls30 \ - libnet-dev \ - libnl-3-dev \ - libnl-route-3-dev \ - libperl-dev \ - libprotobuf-c-dev \ - libprotobuf-dev \ - libselinux-dev \ - libtraceevent-dev \ - libtracefs-dev \ - pkg-config \ - protobuf-c-compiler \ - protobuf-compiler \ - python3-importlib-metadata \ - python3-pip \ - python3-protobuf \ - python3-yaml \ - time \ - util-linux \ - uuid-dev diff --git a/contrib/dependencies/dnf-packages.sh b/contrib/dependencies/dnf-packages.sh deleted file mode 100755 index 793f267a5..000000000 --- a/contrib/dependencies/dnf-packages.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env sh - -dnf install -y \ - asciidoc \ - binutils \ - elfutils-libelf-devel \ - gcc \ - git \ - glibc-devel \ - gnutls-devel \ - iproute \ - iptables \ - libaio-devel \ - libasan \ - libbpf-devel \ - libbsd-devel \ - libcap-devel \ - libdrm-devel \ - libnet-devel \ - libnl3-devel \ - libselinux-devel \ - libtraceevent-devel \ - libtracefs-devel \ - libuuid-devel \ - make \ - nftables \ - pkg-config \ - protobuf \ - protobuf-c \ - protobuf-c-devel \ - protobuf-compiler \ - protobuf-devel \ - python-devel \ - python3-importlib-metadata \ - python3-protobuf \ - python3-pyyaml \ - python3-setuptools \ - python3-wheel \ - rubygem-asciidoctor \ - xmlto diff --git a/contrib/dependencies/pacman-packages.sh b/contrib/dependencies/pacman-packages.sh deleted file mode 100755 index 260797606..000000000 --- a/contrib/dependencies/pacman-packages.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env sh - -pacman -Syu --noconfirm \ - asciidoctor \ - base-devel \ - bash \ - coreutils \ - diffutils \ - git \ - gnutls \ - go \ - iproute2 \ - iptables \ - libaio \ - libbsd \ - libcap \ - libdrm \ - libelf \ - libnet \ - libnl \ - libtraceevent \ - libtracefs \ - nftables \ - pkg-config \ - protobuf \ - protobuf-c \ - python-importlib-metadata \ - python-pip \ - python-protobuf \ - python-yaml \ - sudo \ - tar \ - util-linux \ - util-linux-libs diff --git a/contrib/docker_cr.sh b/contrib/docker_cr.sh index 04ef676cd..9b43d8ba1 100755 --- a/contrib/docker_cr.sh +++ b/contrib/docker_cr.sh @@ -418,7 +418,7 @@ resolve_path() { local p p="${2}" - if command -v realpath > /dev/null; then + if which realpath > /dev/null; then p=$(realpath "${p}") fi ${ECHO} "${1}: ${p}" @@ -427,7 +427,7 @@ resolve_path() { resolve_cmd() { local cpath - cpath=$(command -v "${2}") + cpath=$(which "${2}") resolve_path "${1}" "${cpath}" } diff --git a/coredump/coredump b/coredump/coredump index 5b3e6f366..3fbdafe81 100755 --- a/coredump/coredump +++ b/coredump/coredump @@ -6,8 +6,6 @@ import sys import criu_coredump -PLATFORMS = ["aarch64", "armv7l", "x86_64"] - def coredump(opts): generator = criu_coredump.coredump_generator() @@ -39,8 +37,8 @@ def main(): opts = vars(parser.parse_args()) - if platform.machine() not in PLATFORMS: - print("ERROR: %s is only supported on: %s" % (sys.argv[0], ', '.join(PLATFORMS))) + if platform.machine() != 'x86_64': + print('ERROR: %s only supported on x86_64' % sys.argv[0]) sys.exit(1) try: diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index acb806ace..20ec8e5dc 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -31,7 +31,6 @@ import io import sys import ctypes -import platform from pycriu import images from . import elf @@ -55,7 +54,6 @@ status = { "VMA_AREA_VVAR": 1 << 12, "VMA_AREA_AIORING": 1 << 13, "VMA_AREA_MEMFD": 1 << 14, - "VMA_AREA_UPROBES": 1 << 17, "VMA_AREA_UNSUPP": 1 << 31 } @@ -96,13 +94,8 @@ class coredump: buf.write(b"\0" * (8 - len(note.owner))) buf.write(note.data) - bits = platform.architecture()[0] # 32 or 64 bits - - ehdr = {"32bit": elf.Elf32_Ehdr, "64bit": elf.Elf64_Ehdr} - phdr = {"32bit": elf.Elf32_Phdr, "64bit": elf.Elf64_Phdr} - - offset = ctypes.sizeof(ehdr[bits]()) - offset += (len(self.vmas) + 1) * ctypes.sizeof(phdr[bits]()) + offset = ctypes.sizeof(elf.Elf64_Ehdr()) + offset += (len(self.vmas) + 1) * ctypes.sizeof(elf.Elf64_Phdr()) filesz = 0 for note in self.notes: @@ -137,20 +130,6 @@ class coredump_generator: reg_files = None # reg-files; pagemaps = {} # pagemap by pid; - # thread info key based on the current arch - thread_info_key = { - "aarch64": "ti_aarch64", - "armv7l": "ti_arm", - "x86_64": "thread_info", - } - - machine = platform.machine() # current arch - bits = platform.architecture()[0] # 32 or 64 bits - - ehdr = {"32bit": elf.Elf32_Ehdr, "64bit": elf.Elf64_Ehdr} # 32 or 64 bits Ehdr - nhdr = {"32bit": elf.Elf32_Nhdr, "64bit": elf.Elf64_Nhdr} # 32 or 64 bits Nhdr - phdr = {"32bit": elf.Elf32_Phdr, "64bit": elf.Elf64_Phdr} # 32 or 64 bits Phdr - def _img_open_and_strip(self, name, single=False, pid=None): """ Load criu image and strip it from magic and redundant list. @@ -222,62 +201,44 @@ class coredump_generator: """ Generate elf header for process pid with program headers phdrs. """ - ei_class = {"32bit": elf.ELFCLASS32, "64bit": elf.ELFCLASS64} - - ehdr = self.ehdr[self.bits]() + ehdr = elf.Elf64_Ehdr() ctypes.memset(ctypes.addressof(ehdr), 0, ctypes.sizeof(ehdr)) ehdr.e_ident[elf.EI_MAG0] = elf.ELFMAG0 ehdr.e_ident[elf.EI_MAG1] = elf.ELFMAG1 ehdr.e_ident[elf.EI_MAG2] = elf.ELFMAG2 ehdr.e_ident[elf.EI_MAG3] = elf.ELFMAG3 - ehdr.e_ident[elf.EI_CLASS] = ei_class[self.bits] + ehdr.e_ident[elf.EI_CLASS] = elf.ELFCLASS64 ehdr.e_ident[elf.EI_DATA] = elf.ELFDATA2LSB ehdr.e_ident[elf.EI_VERSION] = elf.EV_CURRENT - if self.machine == "armv7l": - ehdr.e_ident[elf.EI_OSABI] = elf.ELFOSABI_ARM - else: - ehdr.e_ident[elf.EI_OSABI] = elf.ELFOSABI_NONE - ehdr.e_type = elf.ET_CORE - ehdr.e_machine = self._get_e_machine() + ehdr.e_machine = elf.EM_X86_64 ehdr.e_version = elf.EV_CURRENT - ehdr.e_phoff = ctypes.sizeof(self.ehdr[self.bits]()) - ehdr.e_ehsize = ctypes.sizeof(self.ehdr[self.bits]()) - ehdr.e_phentsize = ctypes.sizeof(self.phdr[self.bits]()) + ehdr.e_phoff = ctypes.sizeof(elf.Elf64_Ehdr()) + ehdr.e_ehsize = ctypes.sizeof(elf.Elf64_Ehdr()) + ehdr.e_phentsize = ctypes.sizeof(elf.Elf64_Phdr()) # FIXME Case len(phdrs) > PN_XNUM should be handled properly. # See fs/binfmt_elf.c from linux kernel. ehdr.e_phnum = len(phdrs) return ehdr - def _get_e_machine(self): - """ - Get the e_machine field based on the current architecture. - """ - e_machine_dict = { - "aarch64": elf.EM_AARCH64, - "armv7l": elf.EM_ARM, - "x86_64": elf.EM_X86_64, - } - return e_machine_dict[self.machine] - def _gen_phdrs(self, pid, notes, vmas): """ Generate program headers for process pid. """ phdrs = [] - offset = ctypes.sizeof(self.ehdr[self.bits]()) - offset += (len(vmas) + 1) * ctypes.sizeof(self.phdr[self.bits]()) + offset = ctypes.sizeof(elf.Elf64_Ehdr()) + offset += (len(vmas) + 1) * ctypes.sizeof(elf.Elf64_Phdr()) filesz = 0 for note in notes: filesz += ctypes.sizeof(note.nhdr) + ctypes.sizeof(note.data) + 8 # PT_NOTE - phdr = self.phdr[self.bits]() + phdr = elf.Elf64_Phdr() ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr)) phdr.p_type = elf.PT_NOTE phdr.p_offset = offset @@ -297,7 +258,7 @@ class coredump_generator: for vma in vmas: offset += filesz filesz = vma.filesz - phdr = self.phdr[self.bits]() + phdr = elf.Elf64_Phdr() ctypes.memset(ctypes.addressof(phdr), 0, ctypes.sizeof(phdr)) phdr.p_type = elf.PT_LOAD phdr.p_align = PAGESIZE @@ -354,7 +315,7 @@ class coredump_generator: prpsinfo.pr_psargs = self._gen_cmdline(pid)[:80] prpsinfo.pr_fname = core["tc"]["comm"].encode() - nhdr = self.nhdr[self.bits]() + nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 nhdr.n_descsz = ctypes.sizeof(elf.elf_prpsinfo()) nhdr.n_type = elf.NT_PRPSINFO @@ -371,7 +332,7 @@ class coredump_generator: Generate NT_PRSTATUS note for thread tid of process pid. """ core = self.cores[tid] - regs = self._get_gpregs(core) + regs = core["thread_info"]["gpregs"] pstree = self.pstree[pid] prstatus = elf.elf_prstatus() @@ -384,9 +345,35 @@ class coredump_generator: prstatus.pr_pgrp = pstree["pgid"] prstatus.pr_sid = pstree["sid"] - self._set_pr_regset(prstatus.pr_reg, regs) + prstatus.pr_reg.r15 = regs["r15"] + prstatus.pr_reg.r14 = regs["r14"] + prstatus.pr_reg.r13 = regs["r13"] + prstatus.pr_reg.r12 = regs["r12"] + prstatus.pr_reg.rbp = regs["bp"] + prstatus.pr_reg.rbx = regs["bx"] + prstatus.pr_reg.r11 = regs["r11"] + prstatus.pr_reg.r10 = regs["r10"] + prstatus.pr_reg.r9 = regs["r9"] + prstatus.pr_reg.r8 = regs["r8"] + prstatus.pr_reg.rax = regs["ax"] + prstatus.pr_reg.rcx = regs["cx"] + prstatus.pr_reg.rdx = regs["dx"] + prstatus.pr_reg.rsi = regs["si"] + prstatus.pr_reg.rdi = regs["di"] + prstatus.pr_reg.orig_rax = regs["orig_ax"] + prstatus.pr_reg.rip = regs["ip"] + prstatus.pr_reg.cs = regs["cs"] + prstatus.pr_reg.eflags = regs["flags"] + prstatus.pr_reg.rsp = regs["sp"] + prstatus.pr_reg.ss = regs["ss"] + prstatus.pr_reg.fs_base = regs["fs_base"] + prstatus.pr_reg.gs_base = regs["gs_base"] + prstatus.pr_reg.ds = regs["ds"] + prstatus.pr_reg.es = regs["es"] + prstatus.pr_reg.fs = regs["fs"] + prstatus.pr_reg.gs = regs["gs"] - nhdr = self.nhdr[self.bits]() + nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 nhdr.n_descsz = ctypes.sizeof(elf.elf_prstatus()) nhdr.n_type = elf.NT_PRSTATUS @@ -398,83 +385,28 @@ class coredump_generator: return note - def _get_gpregs(self, core): - """ - Get the general purpose registers based on the current architecture. - """ - thread_info_key = self.thread_info_key[self.machine] - thread_info = core[thread_info_key] - - return thread_info["gpregs"] - - def _set_pr_regset(self, pr_reg, regs): - """ - Set the pr_reg struct based on the current architecture. - """ - if self.machine == "aarch64": - pr_reg.regs = (ctypes.c_ulonglong * len(regs["regs"]))(*regs["regs"]) - pr_reg.sp = regs["sp"] - pr_reg.pc = regs["pc"] - pr_reg.pstate = regs["pstate"] - elif self.machine == "armv7l": - pr_reg.r0 = regs["r0"] - pr_reg.r1 = regs["r1"] - pr_reg.r2 = regs["r2"] - pr_reg.r3 = regs["r3"] - pr_reg.r4 = regs["r4"] - pr_reg.r5 = regs["r5"] - pr_reg.r6 = regs["r6"] - pr_reg.r7 = regs["r7"] - pr_reg.r8 = regs["r8"] - pr_reg.r9 = regs["r9"] - pr_reg.r10 = regs["r10"] - pr_reg.fp = regs["fp"] - pr_reg.ip = regs["ip"] - pr_reg.sp = regs["sp"] - pr_reg.lr = regs["lr"] - pr_reg.pc = regs["pc"] - pr_reg.cpsr = regs["cpsr"] - pr_reg.orig_r0 = regs["orig_r0"] - elif self.machine == "x86_64": - pr_reg.r15 = regs["r15"] - pr_reg.r14 = regs["r14"] - pr_reg.r13 = regs["r13"] - pr_reg.r12 = regs["r12"] - pr_reg.rbp = regs["bp"] - pr_reg.rbx = regs["bx"] - pr_reg.r11 = regs["r11"] - pr_reg.r10 = regs["r10"] - pr_reg.r9 = regs["r9"] - pr_reg.r8 = regs["r8"] - pr_reg.rax = regs["ax"] - pr_reg.rcx = regs["cx"] - pr_reg.rdx = regs["dx"] - pr_reg.rsi = regs["si"] - pr_reg.rdi = regs["di"] - pr_reg.orig_rax = regs["orig_ax"] - pr_reg.rip = regs["ip"] - pr_reg.cs = regs["cs"] - pr_reg.eflags = regs["flags"] - pr_reg.rsp = regs["sp"] - pr_reg.ss = regs["ss"] - pr_reg.fs_base = regs["fs_base"] - pr_reg.gs_base = regs["gs_base"] - pr_reg.ds = regs["ds"] - pr_reg.es = regs["es"] - pr_reg.fs = regs["fs"] - pr_reg.gs = regs["gs"] - def _gen_fpregset(self, pid, tid): """ Generate NT_FPREGSET note for thread tid of process pid. """ core = self.cores[tid] - regs = self._get_fpregs(core) + regs = core["thread_info"]["fpregs"] fpregset = elf.elf_fpregset_t() ctypes.memset(ctypes.addressof(fpregset), 0, ctypes.sizeof(fpregset)) - self._set_fpregset(fpregset, regs) + fpregset.cwd = regs["cwd"] + fpregset.swd = regs["swd"] + fpregset.ftw = regs["twd"] + fpregset.fop = regs["fop"] + fpregset.rip = regs["rip"] + fpregset.rdp = regs["rdp"] + fpregset.mxcsr = regs["mxcsr"] + fpregset.mxcr_mask = regs["mxcsr_mask"] + fpregset.st_space = (ctypes.c_uint * len(regs["st_space"]))( + *regs["st_space"]) + fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))( + *regs["xmm_space"]) nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 @@ -488,86 +420,6 @@ class coredump_generator: return note - def _get_fpregs(self, core): - """ - Get the floating point register dictionary based on the current architecture. - """ - fpregs_key_dict = {"aarch64": "fpsimd", "x86_64": "fpregs"} - fpregs_key = fpregs_key_dict[self.machine] - - thread_info_key = self.thread_info_key[self.machine] - - return core[thread_info_key][fpregs_key] - - def _set_fpregset(self, fpregset, regs): - """ - Set the fpregset struct based on the current architecture. - """ - if self.machine == "aarch64": - fpregset.vregs = (ctypes.c_ulonglong * len(regs["vregs"]))(*regs["vregs"]) - fpregset.fpsr = regs["fpsr"] - fpregset.fpcr = regs["fpcr"] - elif self.machine == "x86_64": - fpregset.cwd = regs["cwd"] - fpregset.swd = regs["swd"] - fpregset.ftw = regs["twd"] - fpregset.fop = regs["fop"] - fpregset.rip = regs["rip"] - fpregset.rdp = regs["rdp"] - fpregset.mxcsr = regs["mxcsr"] - fpregset.mxcr_mask = regs["mxcsr_mask"] - fpregset.st_space = (ctypes.c_uint * len(regs["st_space"]))( - *regs["st_space"]) - fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))( - *regs["xmm_space"]) - - def _gen_arm_tls(self, tid): - """ - Generate NT_ARM_TLS note for thread tid of process pid. - """ - core = self.cores[tid] - tls = ctypes.c_ulonglong(core["ti_aarch64"]["tls"]) - - nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 6 - nhdr.n_descsz = ctypes.sizeof(ctypes.c_ulonglong) - nhdr.n_type = elf.NT_ARM_TLS - - note = elf_note() - note.data = tls - note.owner = b"LINUX" - note.nhdr = nhdr - - return note - - def _gen_arm_vfp(self, tid): - """ - Generate NT_ARM_VFP note for thread tid of process pid. - """ - core = self.cores[tid] - fpstate = core["ti_arm"]["fpstate"] - - data = elf.vfp_hard_struct() - ctypes.memset(ctypes.addressof(data), 0, ctypes.sizeof(data)) - - data.vfp_regs = (ctypes.c_uint64 * len(fpstate["vfp_regs"]))(*fpstate["vfp_regs"]) - data.fpexc = fpstate["fpexc"] - data.fpscr = fpstate["fpscr"] - data.fpinst = fpstate["fpinst"] - data.fpinst2 = fpstate["fpinst2"] - - nhdr = elf.Elf32_Nhdr() - nhdr.n_namesz = 6 - nhdr.n_descsz = ctypes.sizeof(data) - nhdr.n_type = elf.NT_ARM_VFP - - note = elf_note() - note.data = data - note.owner = b"LINUX" - note.nhdr = nhdr - - return note - def _gen_x86_xstate(self, pid, tid): """ Generate NT_X86_XSTATE note for thread tid of process pid. @@ -617,7 +469,7 @@ class coredump_generator: # FIXME zeroify everything for now ctypes.memset(ctypes.addressof(siginfo), 0, ctypes.sizeof(siginfo)) - nhdr = self.nhdr[self.bits]() + nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 nhdr.n_descsz = ctypes.sizeof(elf.siginfo_t()) nhdr.n_type = elf.NT_SIGINFO @@ -636,22 +488,17 @@ class coredump_generator: mm = self.mms[pid] num_auxv = len(mm["mm_saved_auxv"]) // 2 - class elf32_auxv(ctypes.Structure): - _fields_ = [("auxv", elf.Elf32_auxv_t * num_auxv)] - - class elf64_auxv(ctypes.Structure): + class elf_auxv(ctypes.Structure): _fields_ = [("auxv", elf.Elf64_auxv_t * num_auxv)] - elf_auxv = {"32bit": elf32_auxv(), "64bit": elf64_auxv()} - - auxv = elf_auxv[self.bits] + auxv = elf_auxv() for i in range(num_auxv): auxv.auxv[i].a_type = mm["mm_saved_auxv"][i] auxv.auxv[i].a_val = mm["mm_saved_auxv"][i + 1] - nhdr = self.nhdr[self.bits]() + nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 - nhdr.n_descsz = ctypes.sizeof(elf_auxv[self.bits]) + nhdr.n_descsz = ctypes.sizeof(elf_auxv()) nhdr.n_type = elf.NT_AUXV note = elf_note() @@ -728,7 +575,7 @@ class coredump_generator: setattr(data, "file_ofs" + str(i), info.file_ofs) setattr(data, "name" + str(i), info.name.encode()) - nhdr = self.nhdr[self.bits]() + nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 # strlen + 1 nhdr.n_descsz = ctypes.sizeof(elf_files()) @@ -745,15 +592,9 @@ class coredump_generator: notes = [] notes.append(self._gen_prstatus(pid, tid)) - if self.machine != "armv7l": - notes.append(self._gen_fpregset(pid, tid)) + notes.append(self._gen_fpregset(pid, tid)) + notes.append(self._gen_x86_xstate(pid, tid)) notes.append(self._gen_siginfo(pid, tid)) - if self.machine == "aarch64": - notes.append(self._gen_arm_tls(tid)) - elif self.machine == "armv7l": - notes.append(self._gen_arm_vfp(tid)) - elif self.machine == "x86_64": - notes.append(self._gen_x86_xstate(pid, tid)) return notes @@ -794,9 +635,7 @@ class coredump_generator: off = 0 # in pages for m in pagemap[1:]: found = False - num_pages = m.get("nr_pages", m["compat_nr_pages"]) - - for i in range(num_pages): + for i in range(m["nr_pages"]): if m["vaddr"] + i * PAGESIZE == page_no * PAGESIZE: found = True break diff --git a/coredump/criu_coredump/elf.py b/coredump/criu_coredump/elf.py index 2911f491e..092b47857 100644 --- a/coredump/criu_coredump/elf.py +++ b/coredump/criu_coredump/elf.py @@ -1,14 +1,5 @@ # Define structures and constants for generating elf file. import ctypes -import platform - -MACHINE = platform.machine() - -Elf32_Half = ctypes.c_uint16 # typedef uint16_t Elf32_Half; -Elf32_Word = ctypes.c_uint32 # typedef uint32_t Elf32_Word; -Elf32_Addr = ctypes.c_uint32 # typedef uint32_t Elf32_Addr; -Elf32_Off = ctypes.c_uint32 # typedef uint32_t Elf32_Off; -Elf32_Xword = ctypes.c_uint64 # typedef uint64_t Elf32_Xword; Elf64_Half = ctypes.c_uint16 # typedef uint16_t Elf64_Half; Elf64_Word = ctypes.c_uint32 # typedef uint32_t Elf64_Word; @@ -16,7 +7,7 @@ Elf64_Addr = ctypes.c_uint64 # typedef uint64_t Elf64_Addr; Elf64_Off = ctypes.c_uint64 # typedef uint64_t Elf64_Off; Elf64_Xword = ctypes.c_uint64 # typedef uint64_t Elf64_Xword; -# Elf_Ehdr related constants. +# Elf64_Ehdr related constants. # e_ident size. EI_NIDENT = 16 # #define EI_NIDENT (16) @@ -37,50 +28,21 @@ EI_CLASS = 4 # #define EI_CLASS 4 /* File class byte index EI_DATA = 5 # #define EI_DATA 5 /* Data encoding byte index */ -EI_OSABI = 7 # #define EI_OSABI 7 /* OS ABI identification */ - EI_VERSION = 6 # #define EI_VERSION 6 /* File version byte index */ ELFDATA2LSB = 1 # #define ELFDATA2LSB 1 /* 2's complement, little endian */ -ELFCLASS32 = 1 # #define ELFCLASS32 1 /* 32-bit objects */ ELFCLASS64 = 2 # #define ELFCLASS64 2 /* 64-bit objects */ # Legal values for e_type (object file type). ET_CORE = 4 # #define ET_CORE 4 /* Core file */ # Legal values for e_machine (architecture). -EM_ARM = 40 # #define EM_ARM 40 /* ARM */ EM_X86_64 = 62 # #define EM_X86_64 62 /* AMD x86-64 architecture */ -EM_AARCH64 = 183 # #define EM_AARCH64 183 /* ARM AARCH64 */ # Legal values for e_version (version). EV_CURRENT = 1 # #define EV_CURRENT 1 /* Current version */ -# Legal values for e_osabi -ELFOSABI_NONE = 0 # #define ELFOSABI_NONE 0 /* UNIX System V ABI */ -ELFOSABI_ARM = 97 # #define ELFOSABI_ARM 97 /* ARM */ - - -class Elf32_Ehdr(ctypes.Structure): # typedef struct - _fields_ = [ - ("e_ident", - ctypes.c_ubyte * EI_NIDENT), # unsigned char e_ident[EI_NIDENT]; - ("e_type", Elf32_Half), # Elf32_Half e_type; - ("e_machine", Elf32_Half), # Elf32_Half e_machine; - ("e_version", Elf32_Word), # Elf32_Word e_version; - ("e_entry", Elf32_Addr), # Elf32_Addr e_entry; - ("e_phoff", Elf32_Off), # Elf32_Off e_phoff; - ("e_shoff", Elf32_Off), # Elf32_Off e_shoff; - ("e_flags", Elf32_Word), # Elf32_Word e_flags; - ("e_ehsize", Elf32_Half), # Elf32_Half e_ehsize; - ("e_phentsize", Elf32_Half), # Elf32_Half e_phentsize; - ("e_phnum", Elf32_Half), # Elf32_Half e_phnum; - ("e_shentsize", Elf32_Half), # Elf32_Half e_shentsize; - ("e_shnum", Elf32_Half), # Elf32_Half e_shnum; - ("e_shstrndx", Elf32_Half) # Elf32_Half e_shstrndx; - ] # } Elf32_Ehdr; - class Elf64_Ehdr(ctypes.Structure): # typedef struct _fields_ = [ @@ -102,7 +64,7 @@ class Elf64_Ehdr(ctypes.Structure): # typedef struct ] # } Elf64_Ehdr; -# Elf_Phdr related constants. +# Elf64_Phdr related constants. # Legal values for p_type (segment type). PT_LOAD = 1 # #define PT_LOAD 1 /* Loadable program segment */ @@ -114,19 +76,6 @@ PF_W = 1 << 1 # #define PF_W (1 << 1) /* Segment is writable PF_R = 1 << 2 # #define PF_R (1 << 2) /* Segment is readable */ -class Elf32_Phdr(ctypes.Structure): # typedef struct - _fields_ = [ - ("p_type", Elf32_Word), # Elf32_Word p_type; - ("p_offset", Elf32_Off), # Elf32_Off p_offset; - ("p_vaddr", Elf32_Addr), # Elf32_Addr p_vaddr; - ("p_paddr", Elf32_Addr), # Elf32_Addr p_paddr; - ("p_filesz", Elf32_Word), # Elf32_Word p_filesz; - ("p_memsz", Elf32_Word), # Elf32_Word p_memsz; - ("p_flags", Elf32_Word), # Elf32_Word p_flags; - ("p_align", Elf32_Word), # Elf32_Word p_align; - ] # } Elf32_Phdr; - - class Elf64_Phdr(ctypes.Structure): # typedef struct _fields_ = [ ("p_type", Elf64_Word), # Elf64_Word p_type; @@ -140,25 +89,7 @@ class Elf64_Phdr(ctypes.Structure): # typedef struct ] # } Elf64_Phdr; -# Elf_auxv_t related constants. - - -class _Elf32_auxv_t_U(ctypes.Union): - _fields_ = [("a_val", ctypes.c_uint32)] - - -class Elf32_auxv_t(ctypes.Structure): # typedef struct - _fields_ = [ - ("a_type", - ctypes.c_uint32), # uint32_t a_type; /* Entry type */ - ("a_un", _Elf32_auxv_t_U) # union - - # uint32_t a_val; /* Integer value */ - # /* We use to have pointer elements added here. We cannot do that, - # though, since it does not work when using 32-bit definitions - # on 64-bit platforms and vice versa. */ - # } a_un; - ] # } Elf32_auxv_t; +# Elf64_auxv_t related constants. class _Elf64_auxv_t_U(ctypes.Union): @@ -179,7 +110,7 @@ class Elf64_auxv_t(ctypes.Structure): # typedef struct ] # } Elf64_auxv_t; -# Elf_Nhdr related constants. +# Elf64_Nhdr related constants. NT_PRSTATUS = 1 # #define NT_PRSTATUS 1 /* Contains copy of prstatus struct */ NT_FPREGSET = 2 # #define NT_FPREGSET 2 /* Contains copy of fpregset struct */ @@ -188,22 +119,6 @@ NT_AUXV = 6 # #define NT_AUXV 6 /* Contains copy of auxv array */ NT_SIGINFO = 0x53494749 # #define NT_SIGINFO 0x53494749 /* Contains copy of siginfo_t, size might increase */ NT_FILE = 0x46494c45 # #define NT_FILE 0x46494c45 /* Contains information about mapped files */ NT_X86_XSTATE = 0x202 # #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ -NT_ARM_VFP = 0x400 # #define NT_ARM_VFP 0x400 /* ARM VFP/NEON registers */ -NT_ARM_TLS = 0x401 # #define NT_ARM_TLS 0x401 /* ARM TLS register */ - - -class Elf32_Nhdr(ctypes.Structure): # typedef struct - _fields_ = [ - ( - "n_namesz", Elf32_Word - ), # Elf32_Word n_namesz; /* Length of the note's name. */ - ( - "n_descsz", Elf32_Word - ), # Elf32_Word n_descsz; /* Length of the note's descriptor. */ - ( - "n_type", Elf32_Word - ), # Elf32_Word n_type; /* Type of the note. */ - ] # } Elf32_Nhdr; class Elf64_Nhdr(ctypes.Structure): # typedef struct @@ -219,52 +134,7 @@ class Elf64_Nhdr(ctypes.Structure): # typedef struct ] # } Elf64_Nhdr; -# Elf_Shdr related constants. - - -class Elf32_Shdr(ctypes.Structure): - _fields_ = [ - ( - # Section name (string tbl index) - "sh_name", Elf32_Word - ), - ( - # Section type - "sh_type", Elf32_Word - ), - ( - # Section flags - "sh_flags", Elf32_Word - ), - ( - # Section virtual addr at execution - "sh_addr", Elf32_Addr - ), - ( - # Section file offset - "sh_offset", Elf32_Off - ), - ( - # Section size in bytes - "sh_size", Elf32_Word - ), - ( - # Link to another section - "sh_link", Elf32_Word - ), - ( - # Additional section information - "sh_info", Elf32_Word - ), - ( - # Section alignment - "sh_addralign", Elf32_Word - ), - ( - # Entry size if section holds table - "sh_entsize", Elf32_Word - ) - ] +# Elf64_Shdr related constants. class Elf64_Shdr(ctypes.Structure): @@ -348,7 +218,7 @@ class timeval(ctypes.Structure): # struct timeval ] -class x86_64_user_regs_struct(ctypes.Structure): # struct x86_64_user_regs_struct +class user_regs_struct(ctypes.Structure): # struct user_regs_struct _fields_ = [ ("r15", ctypes.c_ulonglong), # __extension__ unsigned long long int r15; @@ -407,73 +277,10 @@ class x86_64_user_regs_struct(ctypes.Structure): # struct x86_64_user_regs_stru ] -class aarch64_user_regs_struct(ctypes.Structure): # struct aarch64_user_regs_struct - _fields_ = [ - ("regs", - ctypes.c_ulonglong * 31), # unsigned long long int regs[31]; - ("sp", - ctypes.c_ulonglong), # unsigned long long int sp; - ("pc", - ctypes.c_ulonglong), # unsigned long long int pc; - ("pstate", - ctypes.c_ulonglong), # unsigned long long int pstate; - ] - - -class arm_user_regs_struct(ctypes.Structure): # struct arm_user_regs_struct - _fields_ = [ - ("r0", - ctypes.c_ulong), # unsigned ulong int r0; - ("r1", - ctypes.c_ulong), # unsigned ulong int r1; - ("r2", - ctypes.c_ulong), # unsigned ulong int r2; - ("r3", - ctypes.c_ulong), # unsigned ulong int r3; - ("r4", - ctypes.c_ulong), # unsigned ulong int r4; - ("r5", - ctypes.c_ulong), # unsigned ulong int r5; - ("r6", - ctypes.c_ulong), # unsigned ulong int r6; - ("r7", - ctypes.c_ulong), # unsigned ulong int r7; - ("r8", - ctypes.c_ulong), # unsigned ulong int r8; - ("r9", - ctypes.c_ulong), # unsigned ulong int r9; - ("r10", - ctypes.c_ulong), # unsigned ulong int r10; - ("fp", - ctypes.c_ulong), # unsigned ulong int fp; - ("ip", - ctypes.c_ulong), # unsigned ulong int ip; - ("sp", - ctypes.c_ulong), # unsigned ulong int sp; - ("lr", - ctypes.c_ulong), # unsigned ulong int lr; - ("pc", - ctypes.c_ulong), # unsigned ulong int pc; - ("cpsr", - ctypes.c_ulong), # unsigned ulong int cpsr; - ("orig_r0", - ctypes.c_ulong), # unsigned ulong int orig_r0; - ] - - # elf_greg_t = ctypes.c_ulonglong # ELF_NGREG = ctypes.sizeof(user_regs_struct)/ctypes.sizeof(elf_greg_t) # elf_gregset_t = elf_greg_t*ELF_NGREG -user_regs_dict = { - "aarch64": aarch64_user_regs_struct, - "armv7l": arm_user_regs_struct, - "x86_64": x86_64_user_regs_struct, -} - -try: - elf_gregset_t = user_regs_dict[MACHINE] -except KeyError: - raise ValueError("Current architecture %s is not supported." % MACHINE) +elf_gregset_t = user_regs_struct class elf_prstatus(ctypes.Structure): # struct elf_prstatus @@ -613,7 +420,7 @@ class elf_prpsinfo(ctypes.Structure): # struct elf_prpsinfo ] -class x86_64_user_fpregs_struct(ctypes.Structure): # struct x86_64_user_fpregs_struct +class user_fpregs_struct(ctypes.Structure): # struct user_fpregs_struct _fields_ = [ # unsigned short int cwd; ("cwd", ctypes.c_ushort), @@ -640,29 +447,7 @@ class x86_64_user_fpregs_struct(ctypes.Structure): # struct x86_64_user_fpregs_ ] -class aarch64_user_fpregs_struct(ctypes.Structure): # struct aarch64_user_fpregs_struct - _fields_ = [ - # unsigned long long int vregs[64]; - ("vregs", ctypes.c_ulonglong * 64), - # unsigned int fpsr; - ("fpsr", ctypes.c_uint), - # unsigned int fpcr; - ("fpcr", ctypes.c_uint), - # unsigned int padding[2]; - ("padding", ctypes.c_uint * 2), - ] - - -user_fpregs_dict = { - "aarch64": aarch64_user_fpregs_struct, - "armv7l": None, - "x86_64": x86_64_user_fpregs_struct, -} - -try: - elf_fpregset_t = user_fpregs_dict[MACHINE] -except KeyError: - raise ValueError("Current architecture %s is not supported." % MACHINE) +elf_fpregset_t = user_fpregs_struct # siginfo_t related constants. @@ -1057,13 +842,3 @@ class elf_xsave_struct(ctypes.Structure): # struct xsave_struct { # struct ymmh_struct ymmh; ("ymmh", ymmh_struct) ] # } __aligned(FP_MIN_ALIGN_BYTES) __packed; - - -class vfp_hard_struct(ctypes.Structure): # struct vfp_hard_struct { - _fields_ = [ - ("vfp_regs", ctypes.c_ulonglong * 32), # __u64 fpregs[32]; - ("fpexc", ctypes.c_ulong), # __u32 fpexc; - ("fpscr", ctypes.c_ulong), # __u32 fpscr; - ("fpinst", ctypes.c_ulong), # __u32 fpinst; - ("fpinst2", ctypes.c_ulong), # __u32 fpinst2; - ] # }; diff --git a/crit/pyproject.toml b/crit/pyproject.toml index f0b185eb7..9089f0a39 100644 --- a/crit/pyproject.toml +++ b/crit/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "crit" description = "CRiu Image Tool" authors = [ - {name = "CRIU team", email = "criu@lists.linux.dev"}, + {name = "CRIU team", email = "criu@openvz.org"}, ] license = {text = "GPLv2"} dynamic = ["version"] diff --git a/crit/setup.cfg b/crit/setup.cfg index 37895923f..fbc9a5143 100644 --- a/crit/setup.cfg +++ b/crit/setup.cfg @@ -7,7 +7,7 @@ name = crit description = CRiu Image Tool author = CRIU team -author_email = criu@lists.linux.dev +author_email = criu@openvz.org license = GPLv2 version = attr: crit.__version__ diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index ba6132d2f..3ddf45cd7 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -101,7 +101,6 @@ obj-$(CONFIG_COMPAT) += vdso-compat.o CFLAGS_REMOVE_vdso-compat.o += $(CFLAGS-ASAN) $(CFLAGS-GCOV) obj-y += pidfd-store.o obj-y += hugetlb.o -obj-y += pidfd.o PROTOBUF_GEN := scripts/protobuf-gen.sh diff --git a/criu/Makefile.packages b/criu/Makefile.packages index 3e2e6efd1..7f6113c8f 100644 --- a/criu/Makefile.packages +++ b/criu/Makefile.packages @@ -6,7 +6,6 @@ REQ-RPM-PKG-NAMES += protobuf-devel REQ-RPM-PKG-NAMES += protobuf-python REQ-RPM-PKG-NAMES += libnl3-devel REQ-RPM-PKG-NAMES += libcap-devel -REQ-RPM-PKG-NAMES += libuuid-devel REQ-RPM-PKG-TEST-NAMES += libaio-devel @@ -17,7 +16,6 @@ REQ-DEB-PKG-NAMES += protobuf-compiler REQ-DEB-PKG-NAMES += $(PYTHON)-protobuf REQ-DEB-PKG-NAMES += libnl-3-dev REQ-DEB-PKG-NAMES += libcap-dev -REQ-DEB-PKG-NAMES += uuid-dev REQ-DEB-PKG-TEST-NAMES += $(PYTHON)-yaml REQ-DEB-PKG-TEST-NAMES += libaio-dev @@ -27,7 +25,7 @@ REQ-DEB-PKG-TEST-NAMES += libaio-dev REQ-RPM-PKG-TEST-NAMES += $(PYTHON)-PyYAML -export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet -luuid +export LIBS += -lprotobuf-c -ldl -lnl-3 -lsoccr -Lsoccr/ -lnet check-packages-failed: $(warning Can not find some of the required libraries) diff --git a/criu/arch/aarch64/Makefile b/criu/arch/aarch64/Makefile index b87fcaa5b..b26487367 100644 --- a/criu/arch/aarch64/Makefile +++ b/criu/arch/aarch64/Makefile @@ -6,4 +6,3 @@ obj-y += cpu.o obj-y += crtools.o obj-y += sigframe.o obj-y += bitops.o -obj-y += gcs.o \ No newline at end of file diff --git a/criu/arch/aarch64/crtools.c b/criu/arch/aarch64/crtools.c index 2e89f9ce3..e87b8629a 100644 --- a/criu/arch/aarch64/crtools.c +++ b/criu/arch/aarch64/crtools.c @@ -1,6 +1,5 @@ #include #include -#include #include @@ -12,7 +11,6 @@ #include "common/compiler.h" #include #include "asm/dump.h" -#include "asm/gcs-types.h" #include "protobuf.h" #include "images/core.pb-c.h" #include "images/creds.pb-c.h" @@ -22,137 +20,12 @@ #include "cpu.h" #include "restorer.h" #include "compel/infect.h" -#include "pstree.h" -#include - -/* - * cr_user_pac_* are a copy of the corresponding uapi structs - * in arch/arm64/include/uapi/asm/ptrace.h - */ -struct cr_user_pac_address_keys { - __uint128_t apiakey; - __uint128_t apibkey; - __uint128_t apdakey; - __uint128_t apdbkey; -}; - -struct cr_user_pac_generic_keys { - __uint128_t apgakey; -}; - -/* - * The following HWCAP constants are copied from - * arch/arm64/include/uapi/asm/hwcap.h - */ -#ifndef HWCAP_PACA -#define HWCAP_PACA (1 << 30) -#endif - -#ifndef HWCAP_PACG -#define HWCAP_PACG (1UL << 31) -#endif - -/* - * The following NT_ARM_PAC constants are copied from - * include/uapi/linux/elf.h - */ -#ifndef NT_ARM_PACA_KEYS -#define NT_ARM_PACA_KEYS 0x407 /* ARM pointer authentication address keys */ -#endif - -#ifndef NT_ARM_PACG_KEYS -#define NT_ARM_PACG_KEYS 0x408 -#endif - -#ifndef NT_ARM_PAC_ENABLED_KEYS -#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* AArch64 pointer authentication enabled keys. */ -#endif - -extern unsigned long getauxval(unsigned long type); #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))(src)->e -static int save_pac_keys(int pid, CoreEntry *core) -{ - struct cr_user_pac_address_keys paca; - struct cr_user_pac_generic_keys pacg; - PacKeys *pac_entry; - long pac_enabled_key; - struct iovec iov; - int ret; - - unsigned long hwcaps = getauxval(AT_HWCAP); - - pac_entry = xmalloc(sizeof(PacKeys)); - if (!pac_entry) - return -1; - core->ti_aarch64->pac_keys = pac_entry; - pac_keys__init(pac_entry); - - if (hwcaps & HWCAP_PACA) { - PacAddressKeys *pac_address_keys; - - pr_debug("%d: Dumping address authentication keys\n", pid); - iov.iov_base = &paca; - iov.iov_len = sizeof(paca); - if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_ARM_PACA_KEYS, &iov))) { - pr_perror("Failed to get address authentication key for %d", pid); - return -1; - } - pac_address_keys = xmalloc(sizeof(PacAddressKeys)); - if (!pac_address_keys) - return -1; - pac_address_keys__init(pac_address_keys); - pac_entry->pac_address_keys = pac_address_keys; - pac_address_keys->apiakey_lo = paca.apiakey; - pac_address_keys->apiakey_hi = paca.apiakey >> 64; - pac_address_keys->apibkey_lo = paca.apibkey; - pac_address_keys->apibkey_hi = paca.apibkey >> 64; - pac_address_keys->apdakey_lo = paca.apdakey; - pac_address_keys->apdakey_hi = paca.apdakey >> 64; - pac_address_keys->apdbkey_lo = paca.apdbkey; - pac_address_keys->apdbkey_hi = paca.apdbkey >> 64; - - iov.iov_base = &pac_enabled_key; - iov.iov_len = sizeof(pac_enabled_key); - ret = ptrace(PTRACE_GETREGSET, pid, NT_ARM_PAC_ENABLED_KEYS, &iov); - if (ret) { - pr_perror("Failed to get authentication key mask for %d", pid); - return -1; - } - - pac_address_keys->pac_enabled_key = pac_enabled_key; - - } - if (hwcaps & HWCAP_PACG) { - PacGenericKeys *pac_generic_keys; - - pr_debug("%d: Dumping generic authentication keys\n", pid); - iov.iov_base = &pacg; - iov.iov_len = sizeof(pacg); - if ((ret = ptrace(PTRACE_GETREGSET, pid, NT_ARM_PACG_KEYS, &iov))) { - pr_perror("Failed to get a generic authantication key for %d", pid); - return -1; - } - pac_generic_keys = xmalloc(sizeof(PacGenericKeys)); - if (!pac_generic_keys) - return -1; - pac_generic_keys__init(pac_generic_keys); - pac_entry->pac_generic_keys = pac_generic_keys; - pac_generic_keys->apgakey_lo = pacg.apgakey; - pac_generic_keys->apgakey_hi = pacg.apgakey >> 64; - } - return 0; -} - -int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd) +int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd) { int i; - struct cr_user_gcs gcs_live; - struct iovec gcs_iov = { - .iov_base = &gcs_live, - .iov_len = sizeof(gcs_live), - }; CoreEntry *core = x; // Save the Aarch64 CPU state @@ -164,24 +37,11 @@ int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_str // Save the FP/SIMD state for (i = 0; i < 32; ++i) { - core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->fpstate.vregs[i]; - core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->fpstate.vregs[i] >> 64; - } - assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpsr); - assign_reg(core->ti_aarch64->fpsimd, &fpsimd->fpstate, fpcr); - - if (save_pac_keys(pid, core)) - return -1; - - /* Save the GCS state */ - if (compel_host_supports_gcs()) { - if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_GCS, &gcs_iov) < 0) { - pr_perror("Failed to get GCS for %d", pid); - return -1; - } - core->ti_aarch64->gcs->gcspr_el0 = gcs_live.gcspr_el0; - core->ti_aarch64->gcs->features_enabled = gcs_live.features_enabled; + core->ti_aarch64->fpsimd->vregs[2 * i] = fpsimd->vregs[i]; + core->ti_aarch64->fpsimd->vregs[2 * i + 1] = fpsimd->vregs[i] >> 64; } + assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpsr); + assign_reg(core->ti_aarch64->fpsimd, fpsimd, fpcr); return 0; } @@ -191,7 +51,6 @@ int arch_alloc_thread_info(CoreEntry *core) ThreadInfoAarch64 *ti_aarch64; UserAarch64RegsEntry *gpregs; UserAarch64FpsimdContextEntry *fpsimd; - UserAarch64GcsEntry *gcs; ti_aarch64 = xmalloc(sizeof(*ti_aarch64)); if (!ti_aarch64) @@ -221,15 +80,6 @@ int arch_alloc_thread_info(CoreEntry *core) if (!fpsimd->vregs) goto err; - /* Allocate & init GCS */ - if (compel_host_supports_gcs()) { - gcs = xmalloc(sizeof(*gcs)); - if (!gcs) - goto err; - user_aarch64_gcs_entry__init(gcs); - ti_aarch64->gcs = gcs; - } - return 0; err: return -1; @@ -242,12 +92,6 @@ void arch_free_thread_info(CoreEntry *core) xfree(CORE_THREAD_ARCH_INFO(core)->fpsimd->vregs); xfree(CORE_THREAD_ARCH_INFO(core)->fpsimd); } - if (CORE_THREAD_ARCH_INFO(core)->pac_keys) { - PacKeys *pac_entry = CORE_THREAD_ARCH_INFO(core)->pac_keys; - xfree(pac_entry->pac_address_keys); - xfree(pac_entry->pac_generic_keys); - xfree(pac_entry); - } xfree(CORE_THREAD_ARCH_INFO(core)->gpregs->regs); xfree(CORE_THREAD_ARCH_INFO(core)->gpregs); xfree(CORE_THREAD_ARCH_INFO(core)); @@ -259,7 +103,6 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) { int i; struct fpsimd_context *fpsimd = RT_SIGFRAME_FPU(sigframe); - struct gcs_context *gcs; if (core->ti_aarch64->fpsimd->n_vregs != 64) return 1; @@ -273,18 +116,6 @@ int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) fpsimd->head.magic = FPSIMD_MAGIC; fpsimd->head.size = sizeof(*fpsimd); - if (compel_host_supports_gcs()) { - gcs = RT_SIGFRAME_GCS(sigframe); - - pr_debug("sigframe gcspr %llx enabled %llx\n", gcs->gcspr, gcs->features_enabled); - - gcs->head.magic = GCS_MAGIC; - gcs->head.size = sizeof(*gcs); - gcs->reserved = 0; - gcs->gcspr = core->ti_aarch64->gcs->gcspr_el0 - 8; - gcs->features_enabled = core->ti_aarch64->gcs->features_enabled; - } - return 0; } @@ -304,83 +135,3 @@ int restore_gpregs(struct rt_sigframe *f, UserRegsEntry *r) return 0; } - -int arch_ptrace_restore(int pid, struct pstree_item *item) -{ - unsigned long hwcaps = getauxval(AT_HWCAP); - struct cr_user_pac_address_keys upaca; - struct cr_user_pac_generic_keys upacg; - PacAddressKeys *paca; - PacGenericKeys *pacg; - long pac_enabled_keys; - struct iovec iov; - int ret; - - - pr_debug("%d: Restoring PAC keys\n", pid); - - paca = &rsti(item)->arch_info.pac_address_keys; - pacg = &rsti(item)->arch_info.pac_generic_keys; - if (rsti(item)->arch_info.has_paca) { - if (!(hwcaps & HWCAP_PACA)) { - pr_err("PACG support is required from the source system.\n"); - return 1; - } - pac_enabled_keys = rsti(item)->arch_info.pac_address_keys.pac_enabled_key; - - upaca.apiakey = paca->apiakey_lo + ((__uint128_t)paca->apiakey_hi << 64); - upaca.apibkey = paca->apibkey_lo + ((__uint128_t)paca->apibkey_hi << 64); - upaca.apdakey = paca->apdakey_lo + ((__uint128_t)paca->apdakey_hi << 64); - upaca.apdbkey = paca->apdbkey_lo + ((__uint128_t)paca->apdbkey_hi << 64); - - iov.iov_base = &upaca; - iov.iov_len = sizeof(upaca); - - if ((ret = ptrace(PTRACE_SETREGSET, pid, NT_ARM_PACA_KEYS, &iov))) { - pr_perror("Failed to set address authentication keys for %d", pid); - return 1; - } - iov.iov_base = &pac_enabled_keys; - iov.iov_len = sizeof(pac_enabled_keys); - if ((ret = ptrace(PTRACE_SETREGSET, pid, NT_ARM_PAC_ENABLED_KEYS, &iov))) { - pr_perror("Failed to set enabled key mask for %d", pid); - return 1; - } - } - - if (rsti(item)->arch_info.has_pacg) { - if (!(hwcaps & HWCAP_PACG)) { - pr_err("PACG support is required from the source system.\n"); - return 1; - } - upacg.apgakey = pacg->apgakey_lo + ((__uint128_t)pacg->apgakey_hi << 64); - iov.iov_base = &upacg; - iov.iov_len = sizeof(upacg); - if ((ret = ptrace(PTRACE_SETREGSET, pid, NT_ARM_PACG_KEYS, &iov))) { - pr_perror("Failed to set the generic authentication key for %d", pid); - return 1; - } - } - - return 0; -} - -void arch_rsti_init(struct pstree_item *p) -{ - PacKeys *pac_keys = p->core[0]->ti_aarch64->pac_keys; - - rsti(p)->arch_info.has_paca = false; - rsti(p)->arch_info.has_pacg = false; - - if (!pac_keys) - return; - - if (pac_keys->pac_address_keys) { - rsti(p)->arch_info.has_paca = true; - rsti(p)->arch_info.pac_address_keys = *pac_keys->pac_address_keys; - } - if (pac_keys->pac_generic_keys) { - rsti(p)->arch_info.has_pacg = true; - rsti(p)->arch_info.pac_generic_keys = *pac_keys->pac_generic_keys; - } -} diff --git a/criu/arch/aarch64/gcs.c b/criu/arch/aarch64/gcs.c deleted file mode 100644 index 4bdb9d2e4..000000000 --- a/criu/arch/aarch64/gcs.c +++ /dev/null @@ -1,157 +0,0 @@ -#include -#include - -#include -#include - -#include "asm/gcs-types.h" -#include "pstree.h" -#include "restorer.h" -#include "rst-malloc.h" -#include "vma.h" - -#include -#include - -static bool task_has_gcs_enabled(UserAarch64GcsEntry *gcs) -{ - return gcs && (gcs->features_enabled & PR_SHADOW_STACK_ENABLE) != 0; -} - -static bool host_supports_gcs(void) -{ - unsigned long hwcap = getauxval(AT_HWCAP); - return (hwcap & HWCAP_GCS) != 0; -} - -static bool task_needs_gcs(struct pstree_item *item, CoreEntry *core) -{ - UserAarch64GcsEntry *gcs; - - if (!task_alive(item)) - return false; - - gcs = core->ti_aarch64->gcs; - - if (task_has_gcs_enabled(gcs)) { - if (!host_supports_gcs()) { - pr_warn_once("Restoring task with GCS on non-GCS host\n"); - return false; - } - - pr_info("Restoring task with GCS\n"); - return true; - } - - pr_info("Restoring a task without GCS\n"); - return false; -} - -static int gcs_prepare_task(struct vm_area_list *vmas, - struct rst_shstk_info *gcs) -{ - struct vma_area *vma; - - list_for_each_entry(vma, &vmas->h, list) { - if (vma_area_is(vma, VMA_AREA_SHSTK) && - in_vma_area(vma, gcs->gcspr_el0)) { - unsigned long premapped_addr = vma->premmaped_addr; - unsigned long size = vma_area_len(vma); - - gcs->vma_start = vma->e->start; - gcs->vma_size = size; - gcs->premapped_addr = premapped_addr; - - return 0; - } - } - - pr_err("Unable to find a shadow stack vma: %lx\n", gcs->gcspr_el0); - return -1; -} - -int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core, - struct task_restore_args *ta) -{ - int i; - struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]); - struct vm_area_list *vmas = &rsti(item)->vmas; - struct rst_shstk_info *gcs = &ta->shstk; - - if (!task_needs_gcs(item, core)) - return 0; - - gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0; - gcs->features_enabled = core->ti_aarch64->gcs->features_enabled; - - if (gcs_prepare_task(vmas, gcs)) { - pr_err("gcs: failed to prepare shadow stack memory\n"); - return -1; - } - - for (i = 0; i < item->nr_threads; i++) { - struct thread_restore_args *thread_args = &args_array[i]; - - core = item->core[i]; - gcs = &thread_args->shstk; - - gcs->gcspr_el0 = core->ti_aarch64->gcs->gcspr_el0; - gcs->features_enabled = core->ti_aarch64->gcs->features_enabled; - - if (gcs_prepare_task(vmas, gcs)) { - pr_err("gcs: failed to prepare GCS memory\n"); - return -1; - } - } - - return 0; -} - -int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core, - int (*func)(void *arg), void *arg) -{ - int fret; - unsigned long flags = PR_SHADOW_STACK_ENABLE | - PR_SHADOW_STACK_PUSH | - PR_SHADOW_STACK_WRITE; - - long ret, x1_after, x8_after; - - /* If task doesn't need GCS, just call func */ - if (!task_needs_gcs(item, core)) { - return func(arg); - } - - pr_debug("gcs: GCS enable SVC about to fire: x8=%d x0=%d x1=0x%lx\n", - __NR_prctl, PR_SET_SHADOW_STACK_STATUS, flags); - - asm volatile( - "mov x0, %3\n" // x0 = PR_SET_SHADOW_STACK_STATUS (75) - "mov x1, %4\n" // x1 = flags - "mov x2, xzr\n" // x2 = 0 - "mov x3, xzr\n" // x3 = 0 - "mov x4, xzr\n" // x4 = 0 - "mov x8, %5\n" // x8 = __NR_prctl (167) - "svc #0\n" // Invoke syscall - "mov %0, x0\n" // Capture return value - "mov %1, x1\n" // Capture x1 after - "mov %2, x8\n" // Capture x8 after - : "=r"(ret), "=r"(x1_after), "=r"(x8_after) - : "i"(PR_SET_SHADOW_STACK_STATUS), // x0 - %3rd - "r"(flags), // x1 - %4th - "i"(__NR_prctl) // x8 - %5th - : "x0", "x1", "x2", "x3", "x4", "x8", "memory", "cc"); - - pr_info("gcs: after SVC: ret=%ld x1=%ld x8=%ld\n", ret, x1_after, x8_after); - - if (ret != 0) { - int err = errno; - pr_err("gcs: failed to enable GCS: ret=%ld errno=%d (%s)\n", ret, err, strerror(err)); - return -1; - } - - fret = func(arg); - exit(fret); - - return -1; -} diff --git a/criu/arch/aarch64/include/asm/dump.h b/criu/arch/aarch64/include/asm/dump.h index ecab061c3..90cd8bca8 100644 --- a/criu/arch/aarch64/include/asm/dump.h +++ b/criu/arch/aarch64/include/asm/dump.h @@ -1,7 +1,7 @@ #ifndef __CR_ASM_DUMP_H__ #define __CR_ASM_DUMP_H__ -extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); extern int arch_alloc_thread_info(CoreEntry *core); extern void arch_free_thread_info(CoreEntry *core); diff --git a/criu/arch/aarch64/include/asm/gcs.h b/criu/arch/aarch64/include/asm/gcs.h deleted file mode 100644 index 28faa23b7..000000000 --- a/criu/arch/aarch64/include/asm/gcs.h +++ /dev/null @@ -1,196 +0,0 @@ -#ifndef __CR_ASM_GCS_H__ -#define __CR_ASM_GCS_H__ - -#include - -struct rst_shstk_info { - unsigned long vma_start; /* start of GCS VMA */ - unsigned long vma_size; /* size of GCS VMA */ - unsigned long premapped_addr; /* premapped buffer */ - unsigned long tmp_gcs; /* temp area for GCS if needed */ - u64 gcspr_el0; /* GCS pointer */ - u64 features_enabled; /* GCS flags */ -}; - -#define rst_shstk_info rst_shstk_info - -struct task_restore_args; -struct pstree_item; - -int arch_gcs_prepare(struct pstree_item *item, CoreEntry *core, - struct task_restore_args *ta); -#define arch_shstk_prepare arch_gcs_prepare - -int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core, - int (*func)(void *arg), void *arg); -#define arch_shstk_trampoline arch_shstk_trampoline - -static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *gcs, void *ptr) -{ - gcs->tmp_gcs = (long unsigned)ptr; -} -#define shstk_set_restorer_stack shstk_set_restorer_stack - -static always_inline long shstk_restorer_stack_size(void) -{ - return PAGE_SIZE; -} -#define shstk_restorer_stack_size shstk_restorer_stack_size - -#ifdef CR_NOGLIBC -#include -#include -#include "vma.h" - -static inline unsigned long gcs_map(unsigned long addr, unsigned long size, unsigned int flags) -{ - long gcspr = sys_map_shadow_stack(addr, size, flags); - pr_info("gcs: syscall: map_shadow_stack at=%lx size=%ld\n", addr, size); - - if (gcspr < 0) { - pr_err("gcs: failed to map GCS at %lx: %ld\n", addr, gcspr); - return -1; - } - - if (addr && gcspr != addr) { - pr_err("gcs: address mismatch: need %lx, got %lx\n", addr, gcspr); - return -1; - } - - pr_info("gcs: mmapped GCS at %lx\n", gcspr); - - return gcspr; -} - -/* clang-format off */ -static always_inline void gcsss1(unsigned long *Xt) -{ - asm volatile ( - "sys #3, C7, C7, #2, %0\n" - : - : "rZ" (Xt) - : "memory"); -} - -static always_inline unsigned long *gcsss2(void) -{ - unsigned long *Xt; - - asm volatile ( - "SYSL %0, #3, C7, C7, #3\n" - : "=r" (Xt) - : - : "memory"); - - return Xt; -} - -static inline void gcsstr(unsigned long addr, unsigned long val) -{ - asm volatile( - "mov x0, %0\n" - "mov x1, %1\n" - ".inst 0xd91f1c01\n" // GCSSTR x1, [x0] - "mov x0, #0\n" - : - : "r"(addr), "r"(val) - : "x0", "x1", "memory"); -} -/* clang-format on */ - -static always_inline int gcs_restore(struct rst_shstk_info *gcs) -{ - unsigned long gcspr, val; - - if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) { - return 0; - } - - gcspr = gcs->gcspr_el0 - 8; - - val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8); - pr_debug("gcs: [0] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr); - gcsstr(gcspr, val); - - val = ALIGN_DOWN(GCS_SIGNAL_CAP(gcspr), 8) | GCS_CAP_VALID_TOKEN; - gcspr -= 8; - pr_debug("gcs: [1] GCSSTR VAL=%lx write at GCSPR=%lx\n", val, gcspr); - gcsstr(gcspr, val); - - pr_debug("gcs: about to switch stacks via GCSSS1 to: %lx\n", gcspr); - gcsss1((unsigned long *)gcspr); - return 0; -} -#define arch_shstk_restore gcs_restore - -static always_inline int gcs_vma_restore(VmaEntry *vma_entry) -{ - unsigned long shstk, i, ret; - unsigned long *gcs_data = (void *)vma_premmaped_start(vma_entry); - unsigned long vma_size = vma_entry_len(vma_entry); - - shstk = gcs_map(0, vma_size, SHADOW_STACK_SET_TOKEN); - if (shstk < 0) { - pr_err("Failed to map shadow stack at %lx: %ld\n", shstk, shstk); - } - - /* restore shadow stack contents */ - for (i = 0; i < vma_size / 8; i++) - gcsstr(shstk + i * 8, gcs_data[i]); - - pr_debug("unmap %lx %ld\n", (unsigned long)gcs_data, vma_size); - ret = sys_munmap(gcs_data, vma_size); - if (ret < 0) { - pr_err("Failed to unmap premmaped shadow stack\n"); - return ret; - } - - vma_premmaped_start(vma_entry) = shstk; - - return 0; -} -#define shstk_vma_restore gcs_vma_restore - -static always_inline int gcs_switch_to_restorer(struct rst_shstk_info *gcs) -{ - int ret; - unsigned long *ssp; - unsigned long addr; - unsigned long gcspr; - - if (!(gcs && gcs->features_enabled & PR_SHADOW_STACK_ENABLE)) { - return 0; - } - - pr_debug("gcs->premapped_addr + gcs->vma_size = %lx\n", gcs->premapped_addr + gcs->vma_size); - pr_debug("gcs->tmp_gcs = %lx\n", gcs->tmp_gcs); - addr = gcs->tmp_gcs; - - if (addr % PAGE_SIZE != 0) { - pr_err("gcs: 0x%lx not page-aligned to size 0x%lx\n", addr, PAGE_SIZE); - return -1; - } - - ret = sys_munmap((void *)addr, PAGE_SIZE); - if (ret < 0) { - pr_err("gcs: Failed to unmap aarea for dumpee GCS VMAs\n"); - return -1; - } - - gcspr = gcs_map(addr, PAGE_SIZE, SHADOW_STACK_SET_TOKEN); - - if (gcspr == -1) { - pr_err("gcs: failed to gcs_map(%lx, %lx)\n", (unsigned long)addr, PAGE_SIZE); - return -1; - } - - ssp = (unsigned long *)(addr + PAGE_SIZE - 8); - gcsss1(ssp); - - return 0; -} -#define arch_shstk_switch_to_restorer gcs_switch_to_restorer - -#endif /* CR_NOGLIBC */ - -#endif /* __CR_ASM_GCS_H__ */ diff --git a/criu/arch/aarch64/include/asm/restore.h b/criu/arch/aarch64/include/asm/restore.h index c79605c40..75e87996a 100644 --- a/criu/arch/aarch64/include/asm/restore.h +++ b/criu/arch/aarch64/include/asm/restore.h @@ -26,14 +26,4 @@ static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core); -#define ARCH_RST_INFO y -struct rst_arch_info { - bool has_paca, has_pacg; - PacAddressKeys pac_address_keys; - PacGenericKeys pac_generic_keys; -}; - -int arch_ptrace_restore(int pid, struct pstree_item *item); -void arch_rsti_init(struct pstree_item *current); - #endif diff --git a/criu/arch/aarch64/include/asm/restorer.h b/criu/arch/aarch64/include/asm/restorer.h index 8f3edc257..64a9c24eb 100644 --- a/criu/arch/aarch64/include/asm/restorer.h +++ b/criu/arch/aarch64/include/asm/restorer.h @@ -1,11 +1,10 @@ #ifndef __CR_ASM_RESTORER_H__ #define __CR_ASM_RESTORER_H__ -#include +#include #include #include "asm/types.h" -#include "asm/gcs.h" #include "images/core.pb-c.h" #include diff --git a/criu/arch/aarch64/include/asm/types.h b/criu/arch/aarch64/include/asm/types.h index db118cafd..363c1cae2 100644 --- a/criu/arch/aarch64/include/asm/types.h +++ b/criu/arch/aarch64/include/asm/types.h @@ -33,16 +33,7 @@ static inline uint64_t encode_pointer(void *p) return (uint64_t)p; } -/** - * See also: - * * arch/arm64/include/uapi/asm/auxvec.h - * * include/linux/auxvec.h - * * include/linux/mm_types.h - */ -#define AT_VECTOR_SIZE_BASE 22 -#define AT_VECTOR_SIZE_ARCH 2 -#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) - +#define AT_VECTOR_SIZE 40 typedef uint64_t auxv_t; typedef uint64_t tls_t; diff --git a/criu/arch/arm/crtools.c b/criu/arch/arm/crtools.c index 6a5e4c89a..26b94e157 100644 --- a/criu/arch/arm/crtools.c +++ b/criu/arch/arm/crtools.c @@ -22,7 +22,7 @@ #define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))((src)->ARM_##e) -int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) +int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) { CoreEntry *core = x; diff --git a/criu/arch/arm/include/asm/dump.h b/criu/arch/arm/include/asm/dump.h index b0ac5715d..485986065 100644 --- a/criu/arch/arm/include/asm/dump.h +++ b/criu/arch/arm/include/asm/dump.h @@ -1,7 +1,7 @@ #ifndef __CR_ASM_DUMP_H__ #define __CR_ASM_DUMP_H__ -extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); extern int arch_alloc_thread_info(CoreEntry *core); extern void arch_free_thread_info(CoreEntry *core); diff --git a/criu/arch/loongarch64/crtools.c b/criu/arch/loongarch64/crtools.c index 783951b5b..eeb0731ca 100644 --- a/criu/arch/loongarch64/crtools.c +++ b/criu/arch/loongarch64/crtools.c @@ -29,7 +29,7 @@ #define assign_reg(dst, src, e) (dst)->e = (__typeof__(dst->e))(src)->e -int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) +int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) { int i; CoreEntry *core = x; diff --git a/criu/arch/loongarch64/include/asm/dump.h b/criu/arch/loongarch64/include/asm/dump.h index a1c0c4c58..04347155c 100644 --- a/criu/arch/loongarch64/include/asm/dump.h +++ b/criu/arch/loongarch64/include/asm/dump.h @@ -1,7 +1,7 @@ #ifndef __CR_ASM_DUMP_H__ #define __CR_ASM_DUMP_H__ -extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); extern int arch_alloc_thread_info(CoreEntry *core); extern void arch_free_thread_info(CoreEntry *core); diff --git a/criu/arch/mips/crtools.c b/criu/arch/mips/crtools.c index eabbd85f4..ed4da9b7e 100644 --- a/criu/arch/mips/crtools.c +++ b/criu/arch/mips/crtools.c @@ -27,7 +27,7 @@ #include "images/core.pb-c.h" #include "images/creds.pb-c.h" -int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) +int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) { CoreEntry *core = x; diff --git a/criu/arch/mips/include/asm/dump.h b/criu/arch/mips/include/asm/dump.h index ec59b051b..58015833d 100644 --- a/criu/arch/mips/include/asm/dump.h +++ b/criu/arch/mips/include/asm/dump.h @@ -1,7 +1,7 @@ #ifndef __CR_ASM_DUMP_H__ #define __CR_ASM_DUMP_H__ -extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); extern int arch_alloc_thread_info(CoreEntry *core); extern void arch_free_thread_info(CoreEntry *core); extern int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info); diff --git a/criu/arch/ppc64/cpu.c b/criu/arch/ppc64/cpu.c index b87230f40..bb5b7256e 100644 --- a/criu/arch/ppc64/cpu.c +++ b/criu/arch/ppc64/cpu.c @@ -64,12 +64,6 @@ int cpu_validate_cpuinfo(void) if (!img) return -1; - if (empty_image(img)) { - pr_err("No cpuinfo image\n"); - close_image(img); - return -1; - } - if (pb_read_one(img, &cpu_info, PB_CPUINFO) < 0) goto error; diff --git a/criu/arch/ppc64/crtools.c b/criu/arch/ppc64/crtools.c index d57040008..a08a2ca5b 100644 --- a/criu/arch/ppc64/crtools.c +++ b/criu/arch/ppc64/crtools.c @@ -404,7 +404,7 @@ static int __copy_task_regs(user_regs_struct_t *regs, user_fpregs_struct_t *fpre return 0; } -int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f) +int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f) { return __copy_task_regs(u, f, (CoreEntry *)arg); } diff --git a/criu/arch/ppc64/include/asm/dump.h b/criu/arch/ppc64/include/asm/dump.h index 7393654fa..eb488900a 100644 --- a/criu/arch/ppc64/include/asm/dump.h +++ b/criu/arch/ppc64/include/asm/dump.h @@ -1,7 +1,7 @@ #ifndef __CR_ASM_DUMP_H__ #define __CR_ASM_DUMP_H__ -extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); extern int arch_alloc_thread_info(CoreEntry *core); extern void arch_free_thread_info(CoreEntry *core); diff --git a/criu/arch/riscv64/Makefile b/criu/arch/riscv64/Makefile deleted file mode 100644 index d19895471..000000000 --- a/criu/arch/riscv64/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -builtin-name := crtools.built-in.o - -ldflags-y += -r - -obj-y += cpu.o -obj-y += crtools.o -obj-y += sigframe.o -obj-y += vdso-lookup.o \ No newline at end of file diff --git a/criu/arch/riscv64/cpu.c b/criu/arch/riscv64/cpu.c deleted file mode 100644 index 97a883b8c..000000000 --- a/criu/arch/riscv64/cpu.c +++ /dev/null @@ -1,40 +0,0 @@ -#undef LOG_PREFIX -#define LOG_PREFIX "cpu: " - -#include -#include "cpu.h" - -int cpu_init(void) -{ - return 0; -} - -int cpu_dump_cpuinfo(void) -{ - return 0; -} - -int cpu_validate_cpuinfo(void) -{ - return 0; -} - -int cpu_dump_cpuinfo_single(void) -{ - return -ENOTSUP; -} - -int cpu_validate_image_cpuinfo_single(void) -{ - return -ENOTSUP; -} - -int cpuinfo_dump(void) -{ - return -ENOTSUP; -} - -int cpuinfo_check(void) -{ - return -ENOTSUP; -} diff --git a/criu/arch/riscv64/crtools.c b/criu/arch/riscv64/crtools.c deleted file mode 100644 index eea98d6de..000000000 --- a/criu/arch/riscv64/crtools.c +++ /dev/null @@ -1,171 +0,0 @@ -#include -#include - -#include - -#include "types.h" -#include - -#include -#include "asm/restorer.h" -#include "common/compiler.h" -#include -#include "asm/dump.h" -#include "protobuf.h" -#include "images/core.pb-c.h" -#include "images/creds.pb-c.h" -#include "parasite-syscall.h" -#include "log.h" -#include "util.h" -#include "cpu.h" -#include "restorer.h" -#include "compel/infect.h" - -#define assign_reg(dst, src, e) dst->e = (__typeof__(dst->e))(src)->e - -int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpsimd) -{ - int i; - CoreEntry *core = x; - - // Save riscv64 gprs - assign_reg(core->ti_riscv64->gpregs, regs, pc); - assign_reg(core->ti_riscv64->gpregs, regs, ra); - assign_reg(core->ti_riscv64->gpregs, regs, sp); - assign_reg(core->ti_riscv64->gpregs, regs, gp); - assign_reg(core->ti_riscv64->gpregs, regs, tp); - assign_reg(core->ti_riscv64->gpregs, regs, t0); - assign_reg(core->ti_riscv64->gpregs, regs, t1); - assign_reg(core->ti_riscv64->gpregs, regs, t2); - assign_reg(core->ti_riscv64->gpregs, regs, s0); - assign_reg(core->ti_riscv64->gpregs, regs, s1); - assign_reg(core->ti_riscv64->gpregs, regs, a0); - assign_reg(core->ti_riscv64->gpregs, regs, a1); - assign_reg(core->ti_riscv64->gpregs, regs, a2); - assign_reg(core->ti_riscv64->gpregs, regs, a3); - assign_reg(core->ti_riscv64->gpregs, regs, a4); - assign_reg(core->ti_riscv64->gpregs, regs, a5); - assign_reg(core->ti_riscv64->gpregs, regs, a6); - assign_reg(core->ti_riscv64->gpregs, regs, a7); - assign_reg(core->ti_riscv64->gpregs, regs, s2); - assign_reg(core->ti_riscv64->gpregs, regs, s3); - assign_reg(core->ti_riscv64->gpregs, regs, s4); - assign_reg(core->ti_riscv64->gpregs, regs, s5); - assign_reg(core->ti_riscv64->gpregs, regs, s6); - assign_reg(core->ti_riscv64->gpregs, regs, s7); - assign_reg(core->ti_riscv64->gpregs, regs, s8); - assign_reg(core->ti_riscv64->gpregs, regs, s9); - assign_reg(core->ti_riscv64->gpregs, regs, s10); - assign_reg(core->ti_riscv64->gpregs, regs, s11); - assign_reg(core->ti_riscv64->gpregs, regs, t3); - assign_reg(core->ti_riscv64->gpregs, regs, t4); - assign_reg(core->ti_riscv64->gpregs, regs, t5); - assign_reg(core->ti_riscv64->gpregs, regs, t6); - - // Save riscv64 fprs - for (i = 0; i < 32; ++i) - assign_reg(core->ti_riscv64->fpsimd, fpsimd, f[i]); - assign_reg(core->ti_riscv64->fpsimd, fpsimd, fcsr); - - return 0; -} - -int arch_alloc_thread_info(CoreEntry *core) -{ - ThreadInfoRiscv64 *ti_riscv64; - UserRiscv64RegsEntry *gpregs; - UserRiscv64DExtEntry *fpsimd; - - ti_riscv64 = xmalloc(sizeof(*ti_riscv64)); - if (!ti_riscv64) - goto err; - thread_info_riscv64__init(ti_riscv64); - core->ti_riscv64 = ti_riscv64; - - gpregs = xmalloc(sizeof(*gpregs)); - if (!gpregs) - goto err; - user_riscv64_regs_entry__init(gpregs); - - ti_riscv64->gpregs = gpregs; - - fpsimd = xmalloc(sizeof(*fpsimd)); - if (!fpsimd) - goto err; - user_riscv64_d_ext_entry__init(fpsimd); - ti_riscv64->fpsimd = fpsimd; - fpsimd->f = xmalloc(32 * sizeof(fpsimd->f[0])); - fpsimd->n_f = 32; - if (!fpsimd->f) - goto err; - - return 0; -err: - return -1; -} - -void arch_free_thread_info(CoreEntry *core) -{ - if (core->ti_riscv64) { - if (core->ti_riscv64->fpsimd) { - xfree(core->ti_riscv64->fpsimd->f); - xfree(core->ti_riscv64->fpsimd); - } - xfree(core->ti_riscv64->gpregs); - xfree(core->ti_riscv64); - core->ti_riscv64 = NULL; - } -} - -int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core) -{ - int i; - UserRiscv64DExtEntry *fpsimd = core->ti_riscv64->fpsimd; - - if (fpsimd->n_f != 32) - return 1; - - for (i = 0; i < 32; ++i) - sigframe->uc.uc_mcontext.__fpregs.__d.__f[i] = fpsimd->f[i]; - sigframe->uc.uc_mcontext.__fpregs.__d.__fcsr = fpsimd->fcsr; - - return 0; -} - -int restore_gpregs(struct rt_sigframe *f, UserRiscv64RegsEntry *r) -{ - f->uc.uc_mcontext.__gregs[0] = r->pc; - f->uc.uc_mcontext.__gregs[1] = r->ra; - f->uc.uc_mcontext.__gregs[2] = r->sp; - f->uc.uc_mcontext.__gregs[3] = r->gp; - f->uc.uc_mcontext.__gregs[4] = r->tp; - f->uc.uc_mcontext.__gregs[5] = r->t0; - f->uc.uc_mcontext.__gregs[6] = r->t1; - f->uc.uc_mcontext.__gregs[7] = r->t2; - f->uc.uc_mcontext.__gregs[8] = r->s0; - f->uc.uc_mcontext.__gregs[9] = r->s1; - f->uc.uc_mcontext.__gregs[10] = r->a0; - f->uc.uc_mcontext.__gregs[11] = r->a1; - f->uc.uc_mcontext.__gregs[12] = r->a2; - f->uc.uc_mcontext.__gregs[13] = r->a3; - f->uc.uc_mcontext.__gregs[14] = r->a4; - f->uc.uc_mcontext.__gregs[15] = r->a5; - f->uc.uc_mcontext.__gregs[16] = r->a6; - f->uc.uc_mcontext.__gregs[17] = r->a7; - f->uc.uc_mcontext.__gregs[18] = r->s2; - f->uc.uc_mcontext.__gregs[19] = r->s3; - f->uc.uc_mcontext.__gregs[20] = r->s4; - f->uc.uc_mcontext.__gregs[21] = r->s5; - f->uc.uc_mcontext.__gregs[22] = r->s6; - f->uc.uc_mcontext.__gregs[23] = r->s7; - f->uc.uc_mcontext.__gregs[24] = r->s8; - f->uc.uc_mcontext.__gregs[25] = r->s9; - f->uc.uc_mcontext.__gregs[26] = r->s10; - f->uc.uc_mcontext.__gregs[27] = r->s11; - f->uc.uc_mcontext.__gregs[28] = r->t3; - f->uc.uc_mcontext.__gregs[29] = r->t4; - f->uc.uc_mcontext.__gregs[30] = r->t5; - f->uc.uc_mcontext.__gregs[31] = r->t6; - - return 0; -} diff --git a/criu/arch/riscv64/include/asm/dump.h b/criu/arch/riscv64/include/asm/dump.h deleted file mode 100644 index 4f0a2d209..000000000 --- a/criu/arch/riscv64/include/asm/dump.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef __CR_ASM_DUMP_H__ -#define __CR_ASM_DUMP_H__ - -extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); -extern int arch_alloc_thread_info(CoreEntry *core); -extern void arch_free_thread_info(CoreEntry *core); - -static inline void core_put_tls(CoreEntry *core, tls_t tls) -{ - core->ti_riscv64->tls = tls; -} - -#define get_task_futex_robust_list_compat(pid, info) -1 - -#endif diff --git a/criu/arch/riscv64/include/asm/int.h b/criu/arch/riscv64/include/asm/int.h deleted file mode 100644 index 642804e9b..000000000 --- a/criu/arch/riscv64/include/asm/int.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CR_ASM_INT_H__ -#define __CR_ASM_INT_H__ - -#include "asm-generic/int.h" - -#endif /* __CR_ASM_INT_H__ */ diff --git a/criu/arch/riscv64/include/asm/kerndat.h b/criu/arch/riscv64/include/asm/kerndat.h deleted file mode 100644 index bb70cf6cf..000000000 --- a/criu/arch/riscv64/include/asm/kerndat.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __CR_ASM_KERNDAT_H__ -#define __CR_ASM_KERNDAT_H__ - -#define kdat_compatible_cr() 0 -#define kdat_can_map_vdso() 0 - -#endif /* __CR_ASM_KERNDAT_H__ */ diff --git a/criu/arch/riscv64/include/asm/parasite-syscall.h b/criu/arch/riscv64/include/asm/parasite-syscall.h deleted file mode 100644 index 6008c3792..000000000 --- a/criu/arch/riscv64/include/asm/parasite-syscall.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CR_ASM_PARASITE_SYSCALL_H__ -#define __CR_ASM_PARASITE_SYSCALL_H__ - -struct parasite_ctl; - -#endif diff --git a/criu/arch/riscv64/include/asm/parasite.h b/criu/arch/riscv64/include/asm/parasite.h deleted file mode 100644 index 4798cfd8a..000000000 --- a/criu/arch/riscv64/include/asm/parasite.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __ASM_PARASITE_H__ -#define __ASM_PARASITE_H__ - -/* - * This function is used to retrieve the value of the thread pointer (tp) - * in RISC-V architecture, which is typically used for thread-local storage (TLS). - * The value is then stored in the provided tls_t pointer. - */ -static inline void arch_get_tls(tls_t *ptls) -{ - tls_t tls; - asm("mv %0, tp" : "=r"(tls)); - *ptls = tls; -} - -#endif diff --git a/criu/arch/riscv64/include/asm/restore.h b/criu/arch/riscv64/include/asm/restore.h deleted file mode 100644 index e4f25a57b..000000000 --- a/criu/arch/riscv64/include/asm/restore.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __CR_ASM_RESTORE_H__ -#define __CR_ASM_RESTORE_H__ - -#include "asm/restorer.h" - -#include "images/core.pb-c.h" - -/* clang-format off */ -#define JUMP_TO_RESTORER_BLOB(new_sp, restore_task_exec_start, \ - task_args) \ - asm volatile( \ - "and sp, %0, ~15 \n" \ - "mv a0, %2 \n" \ - "jr %1 \n" \ - : \ - : "r"(new_sp), \ - "r"(restore_task_exec_start), \ - "r"(task_args) \ - : "a0", "memory") -/* clang-format on */ - -static inline void core_get_tls(CoreEntry *pcore, tls_t *ptls) -{ - *ptls = pcore->ti_riscv64->tls; -} - -int restore_fpu(struct rt_sigframe *sigframe, CoreEntry *core); - -#endif diff --git a/criu/arch/riscv64/include/asm/restorer.h b/criu/arch/riscv64/include/asm/restorer.h deleted file mode 100644 index 45fe847a9..000000000 --- a/criu/arch/riscv64/include/asm/restorer.h +++ /dev/null @@ -1,150 +0,0 @@ -#ifndef __CR_ASM_RESTORER_H__ -#define __CR_ASM_RESTORER_H__ - -#include - -#include "asm/types.h" -#include "images/core.pb-c.h" - -#include - -// kernel arg order for clone -// unsigned long clone_flags, -// unsigned long newsp, -// int __user * parent_tidptr, -// unsigned long tls, -// int __user * child_tidptr -/* clang-format off */ -#define RUN_CLONE_RESTORE_FN(ret, clone_flags, new_sp, parent_tid, \ - thread_args, clone_restore_fn) \ - asm volatile( \ - "clone_emul: \n" \ - "ld a1, %2 \n" \ - "andi a1, a1, ~15 \n" \ - "addi a1, a1, -16 \n" \ - "sd %5, 0(a1) \n" \ - "sd %6, 8(a1) \n" \ - "mv a0, %1 \n" \ - "mv a2, %3 \n" \ - "mv a3, %4 \n" \ - "li a7, "__stringify(__NR_clone)" \n" \ - "ecall \n" \ - \ - "beqz a0, thread_run \n" \ - \ - "mv %0, a0 \n" \ - "j clone_end \n" \ - \ - "thread_run: \n" \ - "ld a1, 0(sp) \n" \ - "ld a0, 8(sp) \n" \ - "jr a1 \n" \ - \ - "clone_end: \n" \ - : "=r"(ret) \ - : "r"(clone_flags), \ - "m"(new_sp), \ - "r"(&parent_tid), \ - "r"(&thread_args[i].pid), \ - "r"(clone_restore_fn), \ - "r"(&thread_args[i]) \ - : "a0", "a1", "a2", "a3", "a7", "memory") - -/* - * Based on sysdeps/unix/sysv/linux/riscv/clone.S - * - * int clone(int (*fn)(void *arg), x0 - * void *child_stack, x1 - * int flags, x2 - * void *arg, x3 - * pid_t *ptid, x4 - * struct user_desc *tls, x5 - * pid_t *ctid); x6 - * - * int clone3(struct clone_args *args, x0 - * size_t size); x1 - * - * Always consult the CLONE3 wrappers for other architectures - * for additional details. - * - */ -#define RUN_CLONE3_RESTORE_FN(ret, clone_args, size, args, \ - clone_restore_fn) \ - asm volatile( \ - /* In contrast to the clone() wrapper above this does not put - * the thread function and its arguments on the child stack, - * but uses registers to pass these parameters to the child process. - * Based on the glibc clone() wrapper at - * sysdeps/unix/sysv/linux/riscv/clone.S. - */ \ - "clone3_emul: \n" \ - /* - * Based on the glibc clone() wrapper, which uses x10 and x11 - * to save the arguments for the child process, this does the same. - * x10 for the thread function and x11 for the thread arguments. - */ \ - "mv t0, %3 /* clone_restore_fn */ \n" \ - "mv t1, %4 /* args */ \n" \ - "mv a0, %1 /* &clone_args */ \n" \ - "mv a1, %2 /* size */ \n" \ - /* Load syscall number */ \ - "li a7, "__stringify(__NR_clone3)" \n" \ - /* Do the syscall */ \ - "ecall \n" \ - \ - "beqz a0, clone3_thread_run \n" \ - \ - "mv %0, a0 \n" \ - "j clone3_end \n" \ - \ - "clone3_thread_run: \n" \ - /* Move args to a0 */ \ - "mv a0, t1 \n" \ - /* Jump to clone_restore_fn */ \ - "jr t0 \n" \ - \ - "clone3_end: \n" \ - : "=r"(ret) \ - : "r"(&clone_args), \ - "r"(size), \ - "r"(clone_restore_fn), \ - "r"(args) \ - : "a0", "a1", "a7", "t0", "t1", "memory") - -#define ARCH_FAIL_CORE_RESTORE \ - asm volatile( \ - "mv sp, %0 \n" \ - "li a0, 0 \n" \ - "jr x0 \n" \ - : \ - : "r"(ret) \ - : "sp", "a0", "memory") -/* clang-format on */ - -#define arch_map_vdso(map, compat) -1 - -int restore_gpregs(struct rt_sigframe *f, UserRiscv64RegsEntry *r); -int restore_nonsigframe_gpregs(UserRiscv64RegsEntry *r); - -static inline void restore_tls(tls_t *ptls) -{ - asm("mv tp, %0" : : "r"(*ptls)); -} - -static inline void *alloc_compat_syscall_stack(void) -{ - return NULL; -} -static inline void free_compat_syscall_stack(void *stack32) -{ -} -static inline int arch_compat_rt_sigaction(void *stack, int sig, void *act) -{ - return -1; -} -static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len) -{ - return -1; -} - -#endif \ No newline at end of file diff --git a/criu/arch/riscv64/include/asm/thread_pointer.h b/criu/arch/riscv64/include/asm/thread_pointer.h deleted file mode 100644 index f7e07066a..000000000 --- a/criu/arch/riscv64/include/asm/thread_pointer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* __thread_pointer definition. Generic version. - Copyright (C) 2021 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library. If not, see - . */ - -#ifndef _SYS_THREAD_POINTER_H -#define _SYS_THREAD_POINTER_H - -static inline void *__criu_thread_pointer(void) -{ - return __builtin_thread_pointer(); -} - -#endif /* _SYS_THREAD_POINTER_H */ diff --git a/criu/arch/riscv64/include/asm/types.h b/criu/arch/riscv64/include/asm/types.h deleted file mode 100644 index 83bb5f65f..000000000 --- a/criu/arch/riscv64/include/asm/types.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef __CR_ASM_TYPES_H__ -#define __CR_ASM_TYPES_H__ - -#include -#include -#include -#include "images/core.pb-c.h" - -#include "page.h" -#include "bitops.h" -#include "asm/int.h" - -#include - -#define core_is_compat(core) false - -typedef UserRiscv64RegsEntry UserRegsEntry; - -#define CORE_ENTRY__MARCH CORE_ENTRY__MARCH__RISCV64 - -#define CORE_THREAD_ARCH_INFO(core) core->ti_riscv64 - -#define TI_SP(core) ((core)->ti_riscv64->gpregs->sp) - -#define TI_IP(core) ((core)->ti_riscv64->gpregs->pc) - -static inline void *decode_pointer(uint64_t v) -{ - return (void *)v; -} -static inline uint64_t encode_pointer(void *p) -{ - return (uint64_t)p; -} - -#define AT_VECTOR_SIZE 64 -typedef uint64_t auxv_t; -typedef uint64_t tls_t; - -#endif /* __CR_ASM_TYPES_H__ */ diff --git a/criu/arch/riscv64/include/asm/vdso.h b/criu/arch/riscv64/include/asm/vdso.h deleted file mode 100644 index 322149c6e..000000000 --- a/criu/arch/riscv64/include/asm/vdso.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef __CR_ASM_VDSO_H__ -#define __CR_ASM_VDSO_H__ - -#include "asm/int.h" -#include "common/compiler.h" -#include "asm-generic/vdso.h" - -/* - * This is a minimal amount of symbols - * we should support at the moment. - */ -#define VDSO_SYMBOL_MAX 6 -#define VDSO_SYMBOL_GTOD 2 - -#define ARCH_VDSO_SYMBOLS_LIST \ - const char *rv64_vdso_symbol1 = "__vdso_clock_getres"; \ - const char *rv64_vdso_symbol2 = "__vdso_clock_gettime"; \ - const char *rv64_vdso_symbol3 = "__vdso_gettimeofday"; \ - const char *rv64_vdso_symbol4 = "__vdso_getcpu"; \ - const char *rv64_vdso_symbol5 = "__vdso_flush_icache"; \ - const char *rv64_vdso_symbol6 = "__vdso_rt_sigreturn"; - -#define ARCH_VDSO_SYMBOLS \ - rv64_vdso_symbol1, rv64_vdso_symbol2, rv64_vdso_symbol3, rv64_vdso_symbol4, rv64_vdso_symbol5, rv64_vdso_symbol6 - -extern void write_intraprocedure_branch(unsigned long to, unsigned long from); - -#endif /* __CR_ASM_VDSO_H__ */ \ No newline at end of file diff --git a/criu/arch/riscv64/restorer.c b/criu/arch/riscv64/restorer.c deleted file mode 100644 index d605f048d..000000000 --- a/criu/arch/riscv64/restorer.c +++ /dev/null @@ -1,14 +0,0 @@ -#include - -#include "restorer.h" -#include "asm/restorer.h" - -#include -#include "log.h" -#include -#include "cpu.h" - -int restore_nonsigframe_gpregs(UserRiscv64RegsEntry *r) -{ - return 0; -} diff --git a/criu/arch/riscv64/sigframe.c b/criu/arch/riscv64/sigframe.c deleted file mode 100644 index 8096fab66..000000000 --- a/criu/arch/riscv64/sigframe.c +++ /dev/null @@ -1,8 +0,0 @@ -#include "asm/types.h" -#include -#include "asm/sigframe.h" - -int sigreturn_prep_fpu_frame(struct rt_sigframe *sigframe, struct rt_sigframe *rsigframe) -{ - return 0; -} diff --git a/criu/arch/riscv64/vdso-lookup.S b/criu/arch/riscv64/vdso-lookup.S deleted file mode 100644 index 50d4ecf08..000000000 --- a/criu/arch/riscv64/vdso-lookup.S +++ /dev/null @@ -1,15 +0,0 @@ -#include "common/asm/linkage.h" - -.section .text - -/* Expects t0 to hold the index into the lookup table. */ -GLOBAL(riscv_vdso_lookup) - /* Get the beginning of the lookup table */ - la t1, riscv_vdso_lookup_end - /* Scale the index */ - slli t0, t0, 3 - add t1, t0, t1 - ld t2, 0(t1) - jr t2 - -GLOBAL(riscv_vdso_lookup_end) \ No newline at end of file diff --git a/criu/arch/riscv64/vdso-pie.c b/criu/arch/riscv64/vdso-pie.c deleted file mode 100644 index aa9272fb5..000000000 --- a/criu/arch/riscv64/vdso-pie.c +++ /dev/null @@ -1,159 +0,0 @@ -#include - -#include "asm/types.h" - -#include -#include -#include -#include -#include "atomic.h" -#include "parasite-vdso.h" -#include "log.h" -#include "common/bug.h" - -#ifdef LOG_PREFIX -#undef LOG_PREFIX -#endif -#define LOG_PREFIX "vdso: " - -/* These symbols are defined in vdso-lookup.S */ -extern char *riscv_vdso_lookup, *riscv_vdso_lookup_end; - -/* - * li t0, INDEX - * jal x0, riscv_vdso_lookup - */ -#define TRAMP_CALL_SIZE (2 * sizeof(uint32_t)) - -static inline void invalidate_caches(void) -{ - // We're supposed to use the VDSO as the officially sanctioned ABI. But oh well. - int ret; - __smp_mb(); - asm volatile("li a0, 0\n" - "li a1, 0\n" - "li a2, 1\n" /* SYS_RISCV_FLUSH_ICACHE_ALL */ - "li a7, 259\n" /* __NR_arch_specific_syscall */ - "ecall\n" - : "=r"(ret) - : - : "a7"); -} - -static inline size_t vdso_trampoline_size(void) -{ - return (size_t)&riscv_vdso_lookup_end - (size_t)&riscv_vdso_lookup; -} - -static uint64_t put_trampoline(uint64_t at, struct vdso_symtable *sym) -{ - int i, j; - uint64_t total_size, trampoline_size; - uint64_t trampoline = 0; - - /* First of all we have to find a place where to put the trampoline - * code. - */ - trampoline_size = vdso_trampoline_size(); - total_size = trampoline_size + VDSO_SYMBOL_MAX * sizeof(uint64_t); - - for (i = 0; i < ARRAY_SIZE(sym->symbols); i++) { - if (vdso_symbol_empty(&sym->symbols[i])) - continue; - - pr_debug("Checking '%s' at %lx\n", sym->symbols[i].name, sym->symbols[i].offset); - - /* find the nearest following symbol we are interested in */ - for (j = 0; j < ARRAY_SIZE(sym->symbols); j++) { - if (i == j || vdso_symbol_empty(&sym->symbols[j])) - continue; - - if (sym->symbols[j].offset <= sym->symbols[i].offset) - /* this symbol is above the current one */ - continue; - - if ((sym->symbols[i].offset + TRAMP_CALL_SIZE) > sym->symbols[j].offset) { - /* we have a major issue here since we cannot - * even put the trampoline call for this symbol - */ - pr_err("Can't handle small vDSO symbol %s\n", sym->symbols[i].name); - return 0; - } - - if (trampoline) - /* no need to put it twice */ - continue; - - if ((sym->symbols[j].offset - (sym->symbols[i].offset + TRAMP_CALL_SIZE)) <= total_size) - /* not enough place */ - continue; - - /* We can put the trampoline there */ - trampoline = at + sym->symbols[i].offset; - trampoline += TRAMP_CALL_SIZE; - - pr_debug("Putting vDSO trampoline in %s at %lx\n", sym->symbols[i].name, trampoline); - memcpy((void *)trampoline, &riscv_vdso_lookup, trampoline_size); - invalidate_caches(); - return trampoline; - } - } - - return 0; -} - -static inline void put_trampoline_call(uint64_t from, uint64_t to, uint64_t trampoline, unsigned int idx) -{ - size_t trampoline_size = vdso_trampoline_size(); - uint64_t *lookup_table = NULL; - /* - * li t0, INDEX - * addi t0, x0 INDEX - * jal x0, riscv_vdso_lookup - */ - uint32_t trampoline_call[2] = { - 0x00000293, - 0x0000006f, - }; - const size_t insts_len = ARRAY_SIZE(trampoline_call); - uint32_t *call_addr = (uint32_t *)from; - // Offset from the jal instruction to the lookup trampoline. - ssize_t trampoline_offset = trampoline - (from + sizeof(uint32_t)); - - trampoline_call[0] = trampoline_call[0] | (idx << 24); - trampoline_call[1] = trampoline_call[1] | riscv_j_imm(trampoline_offset); - - for (unsigned int i = 0; i < insts_len; i++) { - call_addr[i] = trampoline_call[i]; - } - - // Set the lookup table pointer for this vdso symbol. - lookup_table = (uint64_t *)(trampoline + trampoline_size); - lookup_table[idx] = to; -} - -int vdso_redirect_calls(uint64_t base_to, uint64_t base_from, struct vdso_symtable *to, struct vdso_symtable *from, - bool __always_unused compat_vdso) -{ - unsigned int i, valid_idx = 0; - - uint64_t trampoline = (uint64_t)put_trampoline(base_from, from); - if (!trampoline) - return 1; - - for (i = 0; i < ARRAY_SIZE(to->symbols); i++) { - if (vdso_symbol_empty(&from->symbols[i])) - continue; - - pr_debug("br: %lx/%lx -> %lx/%lx (index %d) '%s'\n", base_from, from->symbols[i].offset, base_to, - to->symbols[i].offset, i, from->symbols[i].name); - - put_trampoline_call(base_from + from->symbols[i].offset, base_to + to->symbols[i].offset, trampoline, - valid_idx); - valid_idx++; - } - - invalidate_caches(); - - return 0; -} \ No newline at end of file diff --git a/criu/arch/s390/cpu.c b/criu/arch/s390/cpu.c index e227fad5e..3f430f455 100644 --- a/criu/arch/s390/cpu.c +++ b/criu/arch/s390/cpu.c @@ -87,12 +87,6 @@ int cpu_validate_cpuinfo(void) if (!img) return -1; - if (empty_image(img)) { - pr_err("No cpuinfo image\n"); - close_image(img); - return -1; - } - ret = 0; if (pb_read_one(img, &cpu_info, PB_CPUINFO) < 0) goto error; diff --git a/criu/arch/s390/crtools.c b/criu/arch/s390/crtools.c index e08c83878..5cf160d82 100644 --- a/criu/arch/s390/crtools.c +++ b/criu/arch/s390/crtools.c @@ -142,29 +142,6 @@ static void print_core_fp_regs(const char *msg, CoreEntry *core) print_core_ri_cb(core); } -/* - * Allocate floating point registers - */ -static UserS390FpregsEntry *allocate_fp_regs(void) -{ - UserS390FpregsEntry *fpregs; - - fpregs = xmalloc(sizeof(*fpregs)); - if (!fpregs) - return NULL; - user_s390_fpregs_entry__init(fpregs); - - fpregs->n_fprs = 16; - fpregs->fprs = xzalloc(16 * sizeof(uint64_t)); - if (!fpregs->fprs) - goto fail_free_fpregs; - return fpregs; - -fail_free_fpregs: - xfree(fpregs); - return NULL; -} - /* * Allocate VxrsLow registers */ @@ -305,7 +282,7 @@ static void free_ri_cb(UserS390RiEntry *ri_cb) /* * Copy internal structures into Google Protocol Buffers */ -int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f) +int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f) { UserS390VxrsHighEntry *vxrs_high = NULL; UserS390VxrsLowEntry *vxrs_low = NULL; @@ -317,13 +294,7 @@ int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_stru CoreEntry *core = arg; gpregs = CORE_THREAD_ARCH_INFO(core)->gpregs; - /* - * We delay allocating this until now because checkpointing can fail earlier. - * When it fails we need to know if we reached here or not so that the cleanup - * code doesn't restore FPRs that were never saved in the first place. - */ - fpregs = allocate_fp_regs(); - CORE_THREAD_ARCH_INFO(core)->fpregs = fpregs; + fpregs = CORE_THREAD_ARCH_INFO(core)->fpregs; /* Vector registers */ if (f->flags & USER_FPREGS_VXRS) { @@ -428,15 +399,36 @@ int restore_fpu(struct rt_sigframe *f, CoreEntry *core) return 0; } +/* + * Allocate floating point registers + */ +static UserS390FpregsEntry *allocate_fp_regs(void) +{ + UserS390FpregsEntry *fpregs; + + fpregs = xmalloc(sizeof(*fpregs)); + if (!fpregs) + return NULL; + user_s390_fpregs_entry__init(fpregs); + + fpregs->n_fprs = 16; + fpregs->fprs = xzalloc(16 * sizeof(uint64_t)); + if (!fpregs->fprs) + goto fail_free_fpregs; + return fpregs; + +fail_free_fpregs: + xfree(fpregs); + return NULL; +} + /* * Free floating point registers */ static void free_fp_regs(UserS390FpregsEntry *fpregs) { - if (fpregs) { - xfree(fpregs->fprs); - xfree(fpregs); - } + xfree(fpregs->fprs); + xfree(fpregs); } /* @@ -495,17 +487,15 @@ int arch_alloc_thread_info(CoreEntry *core) ti_s390->gpregs = allocate_gp_regs(); if (!ti_s390->gpregs) goto fail_free_ti_s390; - - /* - * Delay allocating space until needed. Checkpointing can fail before that - * and the cleanup code needs to be able to tell if FPRs were saved or not - * before trying to restore the register state. - */ - ti_s390->fpregs = NULL; + ti_s390->fpregs = allocate_fp_regs(); + if (!ti_s390->fpregs) + goto fail_free_gp_regs; CORE_THREAD_ARCH_INFO(core) = ti_s390; return 0; +fail_free_gp_regs: + free_gp_regs(ti_s390->gpregs); fail_free_ti_s390: xfree(ti_s390); return -1; @@ -688,18 +678,14 @@ static int set_task_regs(pid_t pid, CoreEntry *core) user_fpregs_struct_t fpregs; memset(&fpregs, 0, sizeof(fpregs)); - /* - * Floating point registers - * Optional on checkpoint; checkpoint may have failed and we may reach here as part of cleanup - * so there's no guarantee that we saved FPRs for this thread. - */ + /* Floating point registers */ cfpregs = CORE_THREAD_ARCH_INFO(core)->fpregs; - if (cfpregs) { - fpregs.prfpreg.fpc = cfpregs->fpc; - memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs)); - if (set_fp_regs(pid, &fpregs) < 0) - return -1; - } + if (!cfpregs) + return -1; + fpregs.prfpreg.fpc = cfpregs->fpc; + memcpy(fpregs.prfpreg.fprs, cfpregs->fprs, sizeof(fpregs.prfpreg.fprs)); + if (set_fp_regs(pid, &fpregs) < 0) + return -1; /* Vector registers (optional) */ cvxrs_low = CORE_THREAD_ARCH_INFO(core)->vxrs_low; if (cvxrs_low != NULL) { diff --git a/criu/arch/s390/include/asm/dump.h b/criu/arch/s390/include/asm/dump.h index 5a24c5b3d..c200724d7 100644 --- a/criu/arch/s390/include/asm/dump.h +++ b/criu/arch/s390/include/asm/dump.h @@ -1,7 +1,7 @@ #ifndef __CR_ASM_DUMP_H__ #define __CR_ASM_DUMP_H__ -int save_task_regs(pid_t pid, void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f); +int save_task_regs(void *arg, user_regs_struct_t *u, user_fpregs_struct_t *f); int arch_alloc_thread_info(CoreEntry *core); void arch_free_thread_info(CoreEntry *core); diff --git a/criu/arch/x86/cpu.c b/criu/arch/x86/cpu.c index 2e1f2de9a..dfa31569f 100644 --- a/criu/arch/x86/cpu.c +++ b/criu/arch/x86/cpu.c @@ -407,12 +407,6 @@ int cpu_validate_cpuinfo(void) if (!img) return -1; - if (empty_image(img)) { - pr_err("No cpuinfo image\n"); - close_image(img); - return -1; - } - if (pb_read_one(img, &img_cpu_info, PB_CPUINFO) < 0) goto err; diff --git a/criu/arch/x86/crtools.c b/criu/arch/x86/crtools.c index 1f4d0736b..e068a9a02 100644 --- a/criu/arch/x86/crtools.c +++ b/criu/arch/x86/crtools.c @@ -15,7 +15,7 @@ #define XSAVE_PB_NELEMS(__s, __obj, __member) (sizeof(__s) / sizeof(*(__obj)->__member)) -int save_task_regs(pid_t pid, void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) +int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs) { CoreEntry *core = x; UserX86RegsEntry *gpregs = core->thread_info->gpregs; diff --git a/criu/arch/x86/include/asm/compat.h b/criu/arch/x86/include/asm/compat.h index 4ca704fd7..867357fa2 100644 --- a/criu/arch/x86/include/asm/compat.h +++ b/criu/arch/x86/include/asm/compat.h @@ -11,8 +11,6 @@ #include -#include "log.h" - static inline void *alloc_compat_syscall_stack(void) { void *mem = (void *)sys_mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, diff --git a/criu/arch/x86/include/asm/dump.h b/criu/arch/x86/include/asm/dump.h index 925ea91ff..192f6bd02 100644 --- a/criu/arch/x86/include/asm/dump.h +++ b/criu/arch/x86/include/asm/dump.h @@ -1,7 +1,7 @@ #ifndef __CR_ASM_DUMP_H__ #define __CR_ASM_DUMP_H__ -extern int save_task_regs(pid_t pid, void *, user_regs_struct_t *, user_fpregs_struct_t *); +extern int save_task_regs(void *, user_regs_struct_t *, user_fpregs_struct_t *); extern int arch_alloc_thread_info(CoreEntry *core); extern void arch_free_thread_info(CoreEntry *core); extern int get_task_futex_robust_list_compat(pid_t pid, ThreadCoreEntry *info); diff --git a/criu/arch/x86/include/asm/shstk.h b/criu/arch/x86/include/asm/shstk.h index d113fd8ab..7814c351d 100644 --- a/criu/arch/x86/include/asm/shstk.h +++ b/criu/arch/x86/include/asm/shstk.h @@ -73,23 +73,6 @@ int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core, int (*func)(void *arg), void *arg); #define arch_shstk_trampoline arch_shstk_trampoline -static always_inline long shstk_restorer_stack_size(void) -{ - return PAGE_SIZE; -} -#define shstk_restorer_stack_size shstk_restorer_stack_size -static always_inline void shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr) -{ - info->tmp_shstk = (unsigned long)ptr; -} -#define shstk_set_restorer_stack shstk_set_restorer_stack - -static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long __maybe_unused def) -{ - return !(info->cet & ARCH_SHSTK_SHSTK) ? def : (4UL << 30); -} -#define shstk_min_mmap_addr shstk_min_mmap_addr - #ifdef CR_NOGLIBC #include @@ -163,53 +146,33 @@ static inline int shstk_finalize(void) return ret; } -/* - * Create shadow stack vma and restore its content from premmapped anonymous (non-shstk) vma - */ -static always_inline int shstk_vma_restore(VmaEntry *vma_entry) -{ - long shstk, i; - unsigned long *shstk_data = (void *)vma_premmaped_start(vma_entry); - unsigned long vma_size = vma_entry_len(vma_entry); - long ret; - - shstk = sys_map_shadow_stack(0, vma_size, SHADOW_STACK_SET_TOKEN); - if (shstk < 0) { - pr_err("Failed to map shadow stack: %ld\n", shstk); - return -1; - } - - /* restore shadow stack contents */ - for (i = 0; i < vma_size / 8; i++) - wrssq(shstk + i * 8, shstk_data[i]); - - ret = sys_munmap(shstk_data, vma_size); - if (ret < 0) { - pr_err("Failed to unmap premmaped shadow stack\n"); - return ret; - } - - /* - * From that point premapped vma is (shstk) and we need - * to mremap() it to the final location. Originally premapped - * (shstk_data) has been unmapped already. - */ - vma_premmaped_start(vma_entry) = shstk; - - return 0; -} -#define shstk_vma_restore shstk_vma_restore - /* * Restore contents of the shadow stack and set shadow stack pointer */ static always_inline int shstk_restore(struct rst_shstk_info *cet) { - unsigned long ssp, val; + unsigned long *shstk_data = (unsigned long *)cet->premmaped_addr; + unsigned long ssp = cet->vma_start + cet->vma_size - 8; + unsigned long shstk_top = cet->vma_size / 8 - 1; + unsigned long val; + long ret; if (!(cet->cet & ARCH_SHSTK_SHSTK)) return 0; + if (shstk_map(cet->vma_start, cet->vma_size)) + return -1; + + /* + * Switch shadow stack from temporary location to the actual task's + * shadow stack VMA + */ + shstk_switch_ssp(ssp); + + /* restore shadow stack contents */ + for (; ssp >= cet->ssp; ssp -= 8, shstk_top--) + wrssq(ssp, shstk_data[shstk_top]); + /* * Add tokens for sigreturn frame and for switch of the shadow stack. * The sigreturn token will be checked by the kernel during @@ -219,7 +182,6 @@ static always_inline int shstk_restore(struct rst_shstk_info *cet) */ /* token for sigreturn frame */ - ssp = cet->ssp - 8; val = ALIGN_DOWN(cet->ssp, 8) | SHSTK_DATA_BIT; wrssq(ssp, val); @@ -231,6 +193,12 @@ static always_inline int shstk_restore(struct rst_shstk_info *cet) /* reset shadow stack pointer to the proper location */ shstk_switch_ssp(ssp); + ret = sys_munmap(shstk_data, cet->vma_size + PAGE_SIZE); + if (ret < 0) { + pr_err("Failed to unmap premmaped shadow stack\n"); + return ret; + } + return shstk_finalize(); } #define arch_shstk_restore shstk_restore diff --git a/criu/arch/x86/shstk.c b/criu/arch/x86/shstk.c index 0810efac5..b752f114a 100644 --- a/criu/arch/x86/shstk.c +++ b/criu/arch/x86/shstk.c @@ -45,6 +45,7 @@ static int shstk_prepare_task(struct vm_area_list *vmas, shstk->vma_start = vma->e->start; shstk->vma_size = size; shstk->premmaped_addr = premmaped_addr; + shstk->tmp_shstk = premmaped_addr + size; break; } diff --git a/criu/cgroup.c b/criu/cgroup.c index 9246be639..fcaed0708 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -248,7 +248,7 @@ static int find_dir(const char *path, struct list_head *dirs, struct cgroup_dir return EXACT_MATCH; } - if (issubpath(path, d->path)) { + if (strstartswith(path, d->path)) { int ret = find_dir(path, &d->children, rdir); if (ret == NO_MATCH) { *rdir = d; diff --git a/criu/config.c b/criu/config.c index d7ef3f8e8..1322a490a 100644 --- a/criu/config.c +++ b/criu/config.c @@ -18,7 +18,6 @@ #include "cr_options.h" #include "filesystems.h" #include "file-lock.h" -#include "image.h" #include "irmap.h" #include "mount.h" #include "mount-v2.h" @@ -704,7 +703,6 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode), BOOL_OPT("unprivileged", &opts.unprivileged), BOOL_OPT("ghost-fiemap", &opts.ghost_fiemap), - BOOL_OPT(OPT_ALLOW_UPROBES, &opts.allow_uprobes), {}, }; diff --git a/criu/cr-check.c b/criu/cr-check.c index 7c3dc76dd..0388cbe7f 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1392,14 +1392,6 @@ static int check_pagemap_scan(void) return 0; } -static int check_timer_cr_ids(void) -{ - if (!kdat.has_timer_cr_ids) - return -1; - - return 0; -} - /* musl doesn't have a statx wrapper... */ struct staty { __u32 stx_dev_major; @@ -1589,23 +1581,6 @@ static int check_overlayfs_maps(void) return status == 0 ? 0 : -1; } -static int check_breakpoints(void) -{ - if (!kdat.has_breakpoints) { - pr_warn("Hardware breakpoints don't seem to work\n"); - return -1; - } - - return 0; -} - -static int check_pagemap_scan_guard_pages(void) -{ - kerndat_warn_about_madv_guards(); - - return kdat.has_pagemap_scan_guard_pages ? 0 : -1; -} - static int (*chk_feature)(void); /* @@ -1633,7 +1608,6 @@ static int (*chk_feature)(void); return ret; \ } \ } while (0) - int cr_check(void) { struct ns_id *ns; @@ -1729,8 +1703,6 @@ int cr_check(void) ret |= check_ipv6_freebind(); ret |= check_pagemap_scan(); ret |= check_overlayfs_maps(); - ret |= check_timer_cr_ids(); - ret |= check_pagemap_scan_guard_pages(); if (kdat.lsm == LSMTYPE__APPARMOR) ret |= check_apparmor_stacking(); @@ -1743,10 +1715,6 @@ int cr_check(void) ret |= check_autofs(); ret |= check_compat_cr(); } - /* - * Category 4 - optional. - */ - check_breakpoints(); pr_msg("%s\n", ret ? CHECK_MAYBE : CHECK_GOOD); return ret; @@ -1857,10 +1825,7 @@ static struct feature_list feature_list[] = { { "get_rseq_conf", check_ptrace_get_rseq_conf }, { "ipv6_freebind", check_ipv6_freebind }, { "pagemap_scan", check_pagemap_scan }, - { "timer_cr_ids", check_timer_cr_ids }, { "overlayfs_maps", check_overlayfs_maps }, - { "breakpoints", check_breakpoints }, - { "pagemap_scan_guard_pages", check_pagemap_scan_guard_pages }, { NULL, NULL }, }; diff --git a/criu/cr-dedup.c b/criu/cr-dedup.c index feeb9ebb0..c0c21f53e 100644 --- a/criu/cr-dedup.c +++ b/criu/cr-dedup.c @@ -87,8 +87,7 @@ static int cr_dedup_one_pagemap(unsigned long img_id, int flags) if (ret <= 0) goto exit; - pr_debug("dedup iovec %" PRIx64 " - %" PRIx64 "\n", - pr.pe->vaddr, pr.pe->vaddr + pagemap_len(pr.pe)); + pr_debug("dedup iovec base=%" PRIx64 ", len=%lu\n", pr.pe->vaddr, pagemap_len(pr.pe)); if (!pagemap_in_parent(pr.pe)) { ret = dedup_one_iovec(prp, pr.pe->vaddr, pagemap_len(pr.pe)); if (ret) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index a58aaf34a..1bc5d934f 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -130,23 +130,6 @@ int collect_mappings(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap if (ret < 0) goto err; - /* - * In addition to real process VMAs we should keep an info about - * madvise(MADV_GUARD_INSTALL) pages. While these are not represented - * as a struct vm_area_struct in the kernel, it is convenient to treat - * them as mappings in CRIU and reuse the same VMA images but with only - * VMA_AREA_GUARD flag set. - * - * Also, we don't need to dump them during pre-dump. - */ - if (dump_file) { - ret = collect_madv_guards(pid, vma_area_list); - if (ret < 0) { - pr_err("Collect MADV_GUARD_INSTALL pages (pid: %d) failed with %d\n", pid, ret); - goto err; - } - } - pr_info("Collected, longest area occupies %lu pages\n", vma_area_list->nr_priv_pages_longest); pr_info_vma_list(&vma_area_list->h); @@ -1413,7 +1396,7 @@ static int dump_zombies(void) item->sid = pps_buf.sid; item->pgid = pps_buf.pgid; - BUG_ON(has_children(item)); + BUG_ON(!list_empty(&item->children)); if (!item->sid) { pr_err("A session leader of zombie process %d(%d) is outside of its pid namespace\n", @@ -2138,10 +2121,8 @@ int cr_dump_tasks(pid_t pid) InventoryEntry he = INVENTORY_ENTRY__INIT; InventoryEntry *parent_ie = NULL; struct pstree_item *item; - int ret; - int exit_code = -1; - - kerndat_warn_about_madv_guards(); + int pre_dump_ret = 0; + int ret = -1; pr_info("========================================\n"); pr_info("Dumping processes (pid: %d comm: %s)\n", pid, __task_comm_info(pid)); @@ -2159,9 +2140,9 @@ int cr_dump_tasks(pid_t pid) goto err; root_item->pid->real = pid; - ret = run_scripts(ACT_PRE_DUMP); - if (ret != 0) { - pr_err("Pre dump script failed with %d!\n", ret); + pre_dump_ret = run_scripts(ACT_PRE_DUMP); + if (pre_dump_ret != 0) { + pr_err("Pre dump script failed with %d!\n", pre_dump_ret); goto err; } if (init_stats(DUMP_STATS)) @@ -2211,9 +2192,6 @@ int cr_dump_tasks(pid_t pid) if (collect_pstree()) goto err; - if (checkpoint_devices()) - goto err; - if (collect_pstree_ids()) goto err; @@ -2247,10 +2225,6 @@ int cr_dump_tasks(pid_t pid) goto err; } - ret = run_plugins(DUMP_DEVICES_LATE, pid); - if (ret && ret != -ENOTSUP) - goto err; - if (parent_ie) { inventory_entry__free_unpacked(parent_ie, NULL); parent_ie = NULL; @@ -2287,44 +2261,49 @@ int cr_dump_tasks(pid_t pid) * ipc shared memory, but an ipc namespace is dumped in a child * process. */ - if (cr_dump_shmem()) + ret = cr_dump_shmem(); + if (ret) goto err; if (root_ns_mask) { - if (dump_namespaces(root_item, root_ns_mask)) + ret = dump_namespaces(root_item, root_ns_mask); + if (ret) goto err; } if ((root_ns_mask & CLONE_NEWTIME) == 0) { - if (dump_time_ns(0)) + ret = dump_time_ns(0); + if (ret) goto err; } if (dump_aa_namespaces() < 0) goto err; - if (dump_cgroups()) + ret = dump_cgroups(); + if (ret) goto err; - if (fix_external_unix_sockets()) + ret = fix_external_unix_sockets(); + if (ret) goto err; - if (tty_post_actions()) + ret = tty_post_actions(); + if (ret) goto err; - if (inventory_save_uptime(&he)) + ret = inventory_save_uptime(&he); + if (ret) goto err; he.has_pre_dump_mode = false; - if (found_uprobes_vma()) { - he.has_allow_uprobes = true; - he.allow_uprobes = true; - } - exit_code = write_img_inventory(&he); + ret = write_img_inventory(&he); + if (ret) + goto err; err: if (parent_ie) inventory_entry__free_unpacked(parent_ie, NULL); - return cr_dump_finish(exit_code); + return cr_dump_finish(ret); } diff --git a/criu/cr-restore.c b/criu/cr-restore.c index b92b92715..4d4dfbe6f 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -17,7 +17,6 @@ #include #include #include -#include #include "types.h" #include @@ -80,7 +79,6 @@ #include "timens.h" #include "bpfmap.h" #include "apparmor.h" -#include "pidfd.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -282,7 +280,7 @@ static struct collect_image_info *cinfos_files[] = { &unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, &packet_sk_cinfo, &netlink_sk_cinfo, &eventfd_cinfo, &epoll_cinfo, &epoll_tfd_cinfo, &signalfd_cinfo, &tunfile_cinfo, &timerfd_cinfo, &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo, - &fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, &pidfd_cinfo + &fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, }; /* These images are required to restore namespaces */ @@ -1238,7 +1236,7 @@ static inline int fork_with_pid(struct pstree_item *item) pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item)); } - arch_shstk_unlock(item, ca.core, ret); + arch_shstk_unlock(item, ca.core, pid); err_unlock: if (!(ca.clone_flags & CLONE_NEWPID)) @@ -1708,9 +1706,6 @@ static int restore_task_with_children(void *_arg) arg); } -int __attribute((weak)) arch_ptrace_restore(int pid, struct pstree_item *item); -int arch_ptrace_restore(int pid, struct pstree_item *item) { return 0; } - static int attach_to_tasks(bool root_seized) { struct pstree_item *item; @@ -1751,8 +1746,6 @@ static int attach_to_tasks(bool root_seized) pr_perror("Unable to set PTRACE_O_TRACESYSGOOD for %d", pid); return -1; } - if (arch_ptrace_restore(pid, item)) - return -1; /* * Suspend seccomp if necessary. We need to do this because * although seccomp is restored at the very end of the @@ -1820,7 +1813,6 @@ static int restore_rseq_cs(void) static int catch_tasks(bool root_seized) { struct pstree_item *item; - bool nobp = fault_injected(FI_NO_BREAKPOINTS) || !kdat.has_breakpoints; for_each_pstree_item(item) { int status, i, ret; @@ -1848,7 +1840,7 @@ static int catch_tasks(bool root_seized) return -1; } - ret = compel_stop_pie(pid, rsti(item)->breakpoint, nobp); + ret = compel_stop_pie(pid, rsti(item)->breakpoint, fault_injected(FI_NO_BREAKPOINTS)); if (ret < 0) return -1; } @@ -2120,7 +2112,7 @@ static int restore_root_task(struct pstree_item *init) * the '--empty-ns net' mode no iptables C/R is done and we * need to return these rules by hands. */ - ret = network_lock_internal(/* restore = */ true); + ret = network_lock_internal(); if (ret) goto out_kill; } @@ -2132,9 +2124,6 @@ static int restore_root_task(struct pstree_item *init) __restore_switch_stage(CR_STATE_FORKING); skip_ns_bouncing: - ret = run_plugins(POST_FORKING); - if (ret < 0 && ret != -ENOTSUP) - goto out_kill; ret = restore_wait_inprogress_tasks(); if (ret < 0) @@ -2262,7 +2251,7 @@ skip_ns_bouncing: * might actually be a true error code but that would be also * captured in the plugin so no need to print the error here. */ - if (ret < 0 && ret != -ENOTSUP) + if (ret < 0) pr_debug("restore late stage hook for external plugin failed\n"); } @@ -2339,7 +2328,6 @@ int prepare_task_entries(void) task_entries->nr_helpers = 0; futex_set(&task_entries->start, CR_STATE_FAIL); mutex_init(&task_entries->userns_sync_lock); - mutex_init(&task_entries->cgroupd_sync_lock); mutex_init(&task_entries->last_pid_mutex); return 0; @@ -2365,48 +2353,42 @@ int cr_restore_tasks(void) if (init_service_fd()) return 1; - if (check_img_inventory(/* restore = */ true) < 0) + if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE)) return -1; + if (check_img_inventory(/* restore = */ true) < 0) + goto err; + if (init_stats(RESTORE_STATS)) - return -1; + goto err; if (lsm_check_opts()) - return -1; + goto err; timing_start(TIME_RESTORE); if (cpu_init() < 0) - return -1; + goto err; if (vdso_init_restore()) - return -1; + goto err; if (tty_init_restore()) - return -1; + goto err; if (opts.cpu_cap & CPU_CAP_IMAGE) { if (cpu_validate_cpuinfo()) - return -1; + goto err; } if (prepare_task_entries() < 0) - return -1; + goto err; if (prepare_pstree() < 0) - return -1; + goto err; if (fdstore_init()) - return -1; - - /* - * For the AMDGPU plugin, its parallel restore feature needs to use fdstore to store - * its socket file descriptor. This allows the main process and the target process to - * communicate with each other through this file descriptor. Therefore, cr_plugin_init - * must be initialized after fdstore_init. - */ - if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE)) - return -1; + goto err; if (inherit_fd_move_to_fdstore()) goto err; @@ -2431,24 +2413,23 @@ err: return ret; } -static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long min_addr, long vma_len) +static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_head *self_vma_list, long vma_len) { struct vma_area *t_vma, *s_vma; - long prev_vma_end = min_addr; + long prev_vma_end = 0; struct vma_area end_vma; VmaEntry end_e; end_vma.e = &end_e; end_e.start = end_e.end = kdat.task_size; - INIT_LIST_HEAD(&end_vma.list); + prev_vma_end = kdat.mmap_min_addr; s_vma = list_first_entry(self_vma_list, struct vma_area, list); t_vma = list_first_entry(tgt_vma_list, struct vma_area, list); while (1) { if (prev_vma_end + vma_len > s_vma->e->start) { - if ((s_vma->list.next == self_vma_list) || - vma_area_is(vma_next(s_vma), VMA_AREA_GUARD)) { + if (s_vma->list.next == self_vma_list) { s_vma = &end_vma; continue; } @@ -2461,8 +2442,7 @@ static long restorer_get_vma_hint(struct list_head *tgt_vma_list, struct list_he } if (prev_vma_end + vma_len > t_vma->e->start) { - if ((t_vma->list.next == tgt_vma_list) || - vma_area_is(vma_next(t_vma), VMA_AREA_GUARD)) { + if (t_vma->list.next == tgt_vma_list) { t_vma = &end_vma; continue; } @@ -2571,17 +2551,6 @@ static int remap_restorer_blob(void *addr) restorer_setup_c_header_desc(&pbd, true); compel_relocs_apply(addr, addr, &pbd); - /* - * Ensure the infected thread sees the updated code. - * - * On architectures like ARM64, the Data Cache (D-cache) and - * Instruction Cache (I-cache) are not automatically coherent. - * Modifications land in the D-cache, so we must flush (clean) the - * D-cache to push changes to RAM to ensure the CPU fetches the updated - * instructions. - */ - __builtin___clear_cache(addr, addr + pbd.hdr.bsize); - return 0; } @@ -3022,7 +2991,6 @@ static struct thread_creds_args *rst_prep_creds_args(CredsEntry *ce, unsigned lo args->creds.cap_eff = NULL; args->creds.cap_prm = NULL; args->creds.cap_bnd = NULL; - args->creds.cap_amb = NULL; args->creds.groups = NULL; args->creds.lsm_profile = NULL; @@ -3030,7 +2998,6 @@ static struct thread_creds_args *rst_prep_creds_args(CredsEntry *ce, unsigned lo copy_caps(args->cap_eff, ce->cap_eff, ce->n_cap_eff); copy_caps(args->cap_prm, ce->cap_prm, ce->n_cap_prm); copy_caps(args->cap_bnd, ce->cap_bnd, ce->n_cap_bnd); - copy_caps(args->cap_amb, ce->cap_amb, ce->n_cap_amb); if (ce->n_groups && !groups_match(ce->groups, ce->n_groups)) { unsigned int *groups; @@ -3133,9 +3100,6 @@ static void *restorer_munmap_addr(CoreEntry *core, void *restorer_blob) return restorer_sym(restorer_blob, arch_export_unmap); } -void arch_rsti_init(struct pstree_item *p) __attribute__((weak)); -void arch_rsti_init(struct pstree_item *p) {} - static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, unsigned long alen, CoreEntry *core) { void *mem = MAP_FAILED; @@ -3196,7 +3160,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns rst_mem_size = rst_mem_lock(); memzone_size = round_up(sizeof(struct restore_mem_zone) * current->nr_threads, page_size()); - task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size + shstk_restorer_stack_size(); + task_args->bootstrap_len = restorer_len + memzone_size + alen + rst_mem_size; BUG_ON(task_args->bootstrap_len & (PAGE_SIZE - 1)); pr_info("%d threads require %ldK of memory\n", current->nr_threads, KBYTES(task_args->bootstrap_len)); @@ -3226,9 +3190,7 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns * or inited from scratch). */ - mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h, - shstk_min_mmap_addr(&task_args->shstk, kdat.mmap_min_addr), - task_args->bootstrap_len); + mem = (void *)restorer_get_vma_hint(&vmas->h, &self_vmas.h, task_args->bootstrap_len); if (mem == (void *)-1) { pr_err("No suitable area for task_restore bootstrap (%ldK)\n", task_args->bootstrap_len); goto err; @@ -3357,7 +3319,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns */ creds_pos_next = creds_pos; siginfo_n = task_args->siginfo_n; - arch_rsti_init(current); for (i = 0; i < current->nr_threads; i++) { CoreEntry *tcore; struct rt_sigframe *sigframe; @@ -3467,10 +3428,6 @@ static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns * self-vmas are unmaped. */ mem += rst_mem_size; - - shstk_set_restorer_stack(&task_args->shstk, mem); - mem += shstk_restorer_stack_size(); - task_args->vdso_rt_parked_at = (unsigned long)mem; task_args->vdso_maps_rt = vdso_maps_rt; task_args->vdso_rt_size = vdso_rt_size; diff --git a/criu/cr-service.c b/criu/cr-service.c index dccf4ef38..61a04c5ff 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -283,122 +283,15 @@ int exec_rpc_query_external_files(char *name, int sk) return ret; } -static int resolve_images_dir_path(char *images_dir_path, - bool imgs_changed_by_rpc_conf, - const CriuOpts *req, - pid_t peer_pid) -{ - /* - * images_dir_fd is a required RPC parameter with -1 as default value. - * - * This assumes that if opts.imgs_dir is set, we have a value - * from the configuration file parser. The test to see that - * imgs_changed_by_rpc_conf is true is used to make sure the value - * is from the RPC configuration file. The idea is that only the - * RPC configuration file is able to overwrite RPC settings: - * * apply_config(global_conf) - * * apply_config(user_conf) - * * apply_config(environment variable) - * * apply_rpc_options() - * * apply_config(rpc_conf) - */ - if (imgs_changed_by_rpc_conf) { - strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1); - images_dir_path[PATH_MAX - 1] = '\0'; - } else if (req->images_dir_fd != -1) { - snprintf(images_dir_path, PATH_MAX, "/proc/%d/fd/%d", peer_pid, req->images_dir_fd); - } else if (req->images_dir) { - strncpy(images_dir_path, req->images_dir, PATH_MAX - 1); - images_dir_path[PATH_MAX - 1] = '\0'; - } else { - /* - * Since images dir is not required in CHECK mode, we need to - * check for work_dir_fd in setup_images_and_workdir() - */ - if (opts.mode == CR_CHECK) - return 0; - pr_err("Neither images_dir_fd nor images_dir was passed by RPC client.\n"); - return -1; - } - - return 0; -} - -static int setup_images_and_workdir(const char *images_dir_path, - bool work_changed_by_rpc_conf, - CriuOpts *req, - pid_t peer_pid) -{ - char work_dir_path[PATH_MAX] = ""; - - /* We don't need to open images dir in CHECK mode. */ - if (opts.mode != CR_CHECK) { - /* - * Image streaming is not supported with CRIU's service feature as - * the streamer must be started for each dump/restore operation. - * It is unclear how to do that with RPC, so we punt for now. - * This explains why we provide the argument mode=-1 instead of - * O_RSTR or O_DUMP. - */ - if (open_image_dir(images_dir_path, -1) < 0) { - pr_perror("Can't open images directory"); - return -1; - } - } - - if (work_changed_by_rpc_conf) - strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1); - else if (req->has_work_dir_fd) - sprintf(work_dir_path, "/proc/%d/fd/%d", peer_pid, req->work_dir_fd); - else if (opts.work_dir) - strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1); - else if (images_dir_path[0] != '\0') - strcpy(work_dir_path, images_dir_path); - - if (work_dir_path[0] == '\0') { - pr_err("images-dir or work-dir is required when using log file\n"); - return -1; - } - - if (chdir(work_dir_path)) { - pr_perror("Can't chdir to work_dir"); - return -1; - } - - return 0; -} - -static int setup_logging_from_req(CriuOpts *req, bool output_changed_by_rpc_conf) -{ - if (req->log_file && !output_changed_by_rpc_conf) { - if (strchr(req->log_file, '/')) { - pr_perror("No subdirs are allowed in log_file name"); - return -1; - } - SET_CHAR_OPTS(output, req->log_file); - } else if (req->has_log_to_stderr && req->log_to_stderr && !output_changed_by_rpc_conf) { - xfree(opts.output); - opts.output = NULL; /* log_init(NULL) writes to stderr */ - } else if (!opts.output) { - SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME); - } - - opts.log_level = req->log_level; - log_set_loglevel(opts.log_level); - if (log_init(opts.output)) { - pr_perror("Can't initiate log"); - return -1; - } - - return 0; -} +static char images_dir[PATH_MAX]; static int setup_opts_from_req(int sk, CriuOpts *req) { struct ucred ids; struct stat st; socklen_t ids_len = sizeof(struct ucred); - char images_dir_path[PATH_MAX] = ""; + char images_dir_path[PATH_MAX]; + char work_dir_path[PATH_MAX]; char status_fd[PATH_MAX]; bool output_changed_by_rpc_conf = false; bool work_changed_by_rpc_conf = false; @@ -411,23 +304,6 @@ static int setup_opts_from_req(int sk, CriuOpts *req) goto err; } - /* - * The options relevant in CHECK mode are: log_file, log_to_stderr, and log_level. - * When logging to a file, we also need to resolve images_dir and work_dir. - */ - if (opts.mode == CR_CHECK) { - if (!req) - return 0; /* nothing to do */ - - /* - * A log file is needed only if: - * - log_file is explicitly set, or - * - log_to_stderr is NOT requested (i.e., using DEFAULT_LOG_FILENAME) - */ - if (!req->log_file || (req->has_log_to_stderr && req->log_to_stderr)) - return 0; /* no log file, don't require images_dir or work_dir */ - } - if (fstat(sk, &st)) { pr_perror("Can't get socket stat"); goto err; @@ -436,9 +312,165 @@ static int setup_opts_from_req(int sk, CriuOpts *req) BUG_ON(st.st_ino == -1); service_sk_ino = st.st_ino; + /* + * Evaluate an additional configuration file if specified. + * This needs to happen twice, because it is needed early to detect + * things like work_dir, imgs_dir and logfile. The second parsing + * of the optional RPC configuration file happens at the end and + * overwrites all options set via RPC. + */ + if (req->config_file) { + char *tmp_output = opts.output; + char *tmp_work = opts.work_dir; + char *tmp_imgs = opts.imgs_dir; + + opts.output = NULL; + opts.work_dir = NULL; + opts.imgs_dir = NULL; + + rpc_cfg_file = req->config_file; + i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF); + if (i) { + xfree(tmp_output); + xfree(tmp_work); + xfree(tmp_imgs); + goto err; + } + /* If this is non-NULL, the RPC configuration file had a value, use it.*/ + if (opts.output) + output_changed_by_rpc_conf = true; + /* If this is NULL, use the old value if it was set. */ + if (!opts.output && tmp_output) { + opts.output = tmp_output; + tmp_output = NULL; + } + + if (opts.work_dir) + work_changed_by_rpc_conf = true; + if (!opts.work_dir && tmp_work) { + opts.work_dir = tmp_work; + tmp_work = NULL; + } + + if (opts.imgs_dir) + imgs_changed_by_rpc_conf = true; + /* + * As the images directory is a required RPC setting, it is not + * necessary to use the value from other configuration files. + * Either it is set in the RPC configuration file or it is set + * via RPC. + */ + xfree(tmp_output); + xfree(tmp_work); + xfree(tmp_imgs); + } + + /* + * open images_dir - images_dir_fd is a required RPC parameter + * + * This assumes that if opts.imgs_dir is set we have a value + * from the configuration file parser. The test to see that + * imgs_changed_by_rpc_conf is true is used to make sure the value + * is from the RPC configuration file. + * The idea is that only the RPC configuration file is able to + * overwrite RPC settings: + * * apply_config(global_conf) + * * apply_config(user_conf) + * * apply_config(environment variable) + * * apply_rpc_options() + * * apply_config(rpc_conf) + */ + if (imgs_changed_by_rpc_conf) + strncpy(images_dir_path, opts.imgs_dir, PATH_MAX - 1); + else if (req->images_dir_fd != -1) + sprintf(images_dir_path, "/proc/%d/fd/%d", ids.pid, req->images_dir_fd); + else if (req->images_dir) + strncpy(images_dir_path, req->images_dir, PATH_MAX - 1); + else { + pr_err("Neither images_dir_fd nor images_dir was passed by RPC client.\n"); + goto err; + } + + if (req->parent_img) + SET_CHAR_OPTS(img_parent, req->parent_img); + + /* + * Image streaming is not supported with CRIU's service feature as + * the streamer must be started for each dump/restore operation. + * It is unclear how to do that with RPC, so we punt for now. + * This explains why we provide the argument mode=-1 instead of + * O_RSTR or O_DUMP. + */ + if (open_image_dir(images_dir_path, -1) < 0) { + pr_perror("Can't open images directory"); + goto err; + } + + /* get full path to images_dir to use in process title */ + if (readlink(images_dir_path, images_dir, PATH_MAX) == -1) { + pr_perror("Can't readlink %s", images_dir_path); + goto err; + } + + /* chdir to work dir */ + if (work_changed_by_rpc_conf) + /* Use the value from the RPC configuration file first. */ + strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1); + else if (req->has_work_dir_fd) + /* Use the value set via RPC. */ + sprintf(work_dir_path, "/proc/%d/fd/%d", ids.pid, req->work_dir_fd); + else if (opts.work_dir) + /* Use the value from one of the other configuration files. */ + strncpy(work_dir_path, opts.work_dir, PATH_MAX - 1); + else + /* Use the images directory a work directory. */ + strcpy(work_dir_path, images_dir_path); + + if (chdir(work_dir_path)) { + pr_perror("Can't chdir to work_dir"); + goto err; + } + + /* initiate log file in work dir */ + if (req->log_file && !output_changed_by_rpc_conf) { + /* + * If RPC sets a log file and if there nothing from the + * RPC configuration file, use the RPC value. + */ + if (strchr(req->log_file, '/')) { + pr_perror("No subdirs are allowed in log_file name"); + goto err; + } + + SET_CHAR_OPTS(output, req->log_file); + } else if (req->has_log_to_stderr && req->log_to_stderr && !output_changed_by_rpc_conf) { + xfree(opts.output); + opts.output = NULL; + } else if (!opts.output) { + SET_CHAR_OPTS(output, DEFAULT_LOG_FILENAME); + } + + /* This is needed later to correctly set the log_level */ + opts.log_level = req->log_level; + log_set_loglevel(req->log_level); + if (log_init(opts.output) == -1) { + pr_perror("Can't initiate log"); + goto err; + } + + if (req->config_file) { + pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file); + } + if (req->has_unprivileged) opts.unprivileged = req->unprivileged; + if (check_caps()) + return 1; + + if (kerndat_init()) + return 1; + if (log_keep_err()) { pr_perror("Can't tune log"); goto err; @@ -721,6 +753,14 @@ static int setup_opts_from_req(int sk, CriuOpts *req) if (req->empty_ns & ~(CLONE_NEWNET)) goto err; } + + if (req->n_irmap_scan_paths) { + for (i = 0; i < req->n_irmap_scan_paths; i++) { + if (irmap_scan_path_add(req->irmap_scan_paths[i])) + goto err; + } + } + if (req->has_status_fd) { pr_warn("status_fd is obsoleted; use status-ready notification instead\n"); @@ -732,95 +772,28 @@ static int setup_opts_from_req(int sk, CriuOpts *req) } } + if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk)) + goto err; + if (req->orphan_pts_master) opts.orphan_pts_master = true; if (req->has_display_stats) opts.display_stats = req->display_stats; - /* Evaluate additional configuration file (e.g., runc.conf) to overwrite all RPC settings. */ + /* Evaluate additional configuration file a second time to overwrite + * all RPC settings. */ if (req->config_file) { - char *tmp_output = opts.output; - char *tmp_work = opts.work_dir; - - opts.output = NULL; - opts.work_dir = NULL; - - /* - * As the images directory is a required RPC setting, it is not - * necessary to use the value from other configuration files. - * Either it is set in the RPC configuration file or it is set - * via RPC. - */ - xfree(opts.imgs_dir); - opts.imgs_dir = NULL; - - pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file); - rpc_cfg_file = req->config_file; i = parse_options(0, NULL, &dummy, &dummy, PARSING_RPC_CONF); - if (i) { - xfree(tmp_output); - xfree(tmp_work); + if (i) goto err; - } - - /* If opts.{output,work_dir} is non-NULL, the RPC configuration file had a value, use it.*/ - /* If opts.{output,work_dir} is NULL, use the old value if it was set. */ - if (opts.output) { - output_changed_by_rpc_conf = true; - } else { - opts.output = tmp_output; - tmp_output = NULL; - } - - if (opts.work_dir) { - work_changed_by_rpc_conf = true; - } else { - opts.work_dir = tmp_work; - tmp_work = NULL; - } - - if (opts.imgs_dir) - imgs_changed_by_rpc_conf = true; - - xfree(tmp_output); - xfree(tmp_work); } - if (resolve_images_dir_path(images_dir_path, imgs_changed_by_rpc_conf, req, ids.pid) < 0) - goto err; - - if (req->parent_img) - SET_CHAR_OPTS(img_parent, req->parent_img); - - if (setup_images_and_workdir(images_dir_path, work_changed_by_rpc_conf, req, ids.pid)) - goto err; - - if (req->n_irmap_scan_paths) { - for (i = 0; i < req->n_irmap_scan_paths; i++) { - if (irmap_scan_path_add(req->irmap_scan_paths[i])) - goto err; - } - } - - /* initiate log file in work dir */ - if (setup_logging_from_req(req, output_changed_by_rpc_conf)) - goto err; - - if (check_caps()) - goto err; - - if (kerndat_init()) - goto err; - - /* init_pidfd_store_sk must be called after kerndat_init. */ - if (req->has_pidfd_store_sk && init_pidfd_store_sk(ids.pid, req->pidfd_store_sk)) - goto err; - if (req->mntns_compat_mode) opts.mntns_compat_mode = true; + log_set_loglevel(opts.log_level); if (check_options()) goto err; @@ -840,7 +813,7 @@ static int dump_using_req(int sk, CriuOpts *req) if (setup_opts_from_req(sk, req)) goto exit; - __setproctitle("dump --rpc -t %d", req->pid); + __setproctitle("dump --rpc -t %d -D %s", req->pid, images_dir); if (init_pidfd_store_hash()) goto pidfd_store_err; @@ -883,7 +856,7 @@ static int restore_using_req(int sk, CriuOpts *req) if (setup_opts_from_req(sk, req)) goto exit; - __setproctitle("restore --rpc"); + __setproctitle("restore --rpc -D %s", images_dir); if (cr_restore_tasks()) goto exit; @@ -922,11 +895,6 @@ static int check(int sk, CriuOpts *req) resp.type = CRIU_REQ_TYPE__CHECK; - if (log_keep_err()) { - pr_perror("Can't tune log"); - goto out; - } - pid = fork(); if (pid < 0) { pr_perror("Can't fork"); @@ -951,7 +919,6 @@ static int check(int sk, CriuOpts *req) resp.success = true; out: - set_resp_err(&resp); return send_criu_msg(sk, &resp); } @@ -960,11 +927,6 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single) int pid, status; bool success = false; - if (log_keep_err()) { - pr_perror("Can't tune log"); - goto out; - } - pid = fork(); if (pid < 0) { pr_perror("Can't fork"); @@ -978,7 +940,7 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single) if (setup_opts_from_req(sk, req)) goto cout; - __setproctitle("pre-dump --rpc -t %d", req->pid); + __setproctitle("pre-dump --rpc -t %d -D %s", req->pid, images_dir); if (init_pidfd_store_hash()) goto pidfd_store_err; @@ -1043,11 +1005,6 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode) CriuPageServerInfo ps = CRIU_PAGE_SERVER_INFO__INIT; struct ps_info info; - if (log_keep_err()) { - pr_perror("Can't tune log"); - goto out; - } - if (pipe(start_pipe)) { pr_perror("No start pipe"); goto out; @@ -1121,7 +1078,6 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode) out: resp.type = CRIU_REQ_TYPE__PAGE_SERVER; resp.success = success; - set_resp_err(&resp); return send_criu_msg(sk, &resp); } @@ -1296,11 +1252,6 @@ static int handle_cpuinfo(int sk, CriuReq *msg) bool success = false; int pid, status; - if (log_keep_err()) { - pr_perror("Can't tune log"); - goto out; - } - pid = fork(); if (pid < 0) { pr_perror("Can't fork"); @@ -1310,11 +1261,12 @@ static int handle_cpuinfo(int sk, CriuReq *msg) if (pid == 0) { int ret = 1; - opts.mode = (msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP) ? CR_CPUINFO_DUMP : CR_CPUINFO_CHECK; + opts.mode = CR_CPUINFO; if (setup_opts_from_req(sk, msg->opts)) goto cout; - __setproctitle("cpuinfo %s --rpc", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check"); + __setproctitle("cpuinfo %s --rpc -D %s", msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP ? "dump" : "check", + images_dir); if (msg->type == CRIU_REQ_TYPE__CPUINFO_DUMP) ret = cpuinfo_dump(); @@ -1349,7 +1301,7 @@ static int handle_cpuinfo(int sk, CriuReq *msg) out: resp.type = msg->type; resp.success = success; - set_resp_err(&resp); + return send_criu_msg(sk, &resp); } @@ -1358,14 +1310,6 @@ int cr_service_work(int sk) int ret = -1; CriuReq *msg = 0; - /* - * util_init initializes criu_run_id and compel_run_id so that sockets - * are generated with an unique name identifying the specific process - * even in cases where multiple processes with the same pid in - * different pid namespaces are sharing the same network namespace. - */ - util_init(); - more: opts.mode = CR_SWRK; diff --git a/criu/crtools.c b/criu/crtools.c index 4dc55a065..94657f418 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -54,17 +54,19 @@ void flush_early_log_to_stderr(void) flush_early_log_buffer(STDERR_FILENO); } -static int image_dir_mode(void) +static int image_dir_mode(char *argv[], int optind) { switch (opts.mode) { case CR_DUMP: /* fallthrough */ - case CR_CPUINFO_DUMP: - /* fallthrough */ case CR_PRE_DUMP: return O_DUMP; case CR_RESTORE: return O_RSTR; + case CR_CPUINFO: + if (!strcmp(argv[optind + 1], "dump")) + return O_DUMP; + /* fallthrough */ default: return -1; } @@ -74,55 +76,36 @@ static int image_dir_mode(void) return -1; } -struct { - char *cmd; - int mode; -} commands[] = { - { "dump", CR_DUMP }, - { "pre-dump", CR_PRE_DUMP }, - { "restore", CR_RESTORE }, - { "lazy-pages", CR_LAZY_PAGES }, - { "check", CR_CHECK }, - { "page-server", CR_PAGE_SERVER }, - { "service", CR_SERVICE }, - { "swrk", CR_SWRK }, - { "dedup", CR_DEDUP }, - { "exec", CR_EXEC_DEPRECATED }, - { "show", CR_SHOW_DEPRECATED }, -}; - -static int parse_criu_mode(int argc, char **argv, int *optind) +static int parse_criu_mode(char *mode) { - char *cmd = argv[*optind]; - bool has_sub_command = (argc - *optind) > 1; - char *subcommand = has_sub_command ? argv[*optind + 1] : NULL; - int i; + if (!strcmp(mode, "dump")) + opts.mode = CR_DUMP; + else if (!strcmp(mode, "pre-dump")) + opts.mode = CR_PRE_DUMP; + else if (!strcmp(mode, "restore")) + opts.mode = CR_RESTORE; + else if (!strcmp(mode, "lazy-pages")) + opts.mode = CR_LAZY_PAGES; + else if (!strcmp(mode, "check")) + opts.mode = CR_CHECK; + else if (!strcmp(mode, "page-server")) + opts.mode = CR_PAGE_SERVER; + else if (!strcmp(mode, "service")) + opts.mode = CR_SERVICE; + else if (!strcmp(mode, "swrk")) + opts.mode = CR_SWRK; + else if (!strcmp(mode, "dedup")) + opts.mode = CR_DEDUP; + else if (!strcmp(mode, "cpuinfo")) + opts.mode = CR_CPUINFO; + else if (!strcmp(mode, "exec")) + opts.mode = CR_EXEC_DEPRECATED; + else if (!strcmp(mode, "show")) + opts.mode = CR_SHOW_DEPRECATED; + else + return -1; - for (i = 0; i < ARRAY_SIZE(commands); i++) { - if (strcmp(cmd, commands[i].cmd)) - continue; - opts.mode = commands[i].mode; - return 0; - } - - if (!strcmp(cmd, "cpuinfo")) { - if (subcommand == NULL) { - pr_err("cpuinfo requires an action: dump or check\n"); - return -1; - } - if (!strcmp(subcommand, "dump")) - opts.mode = CR_CPUINFO_DUMP; - else if (!strcmp(subcommand, "check")) - opts.mode = CR_CPUINFO_CHECK; - else { - pr_err("unknown cpuinfo sub-command: %s\n", subcommand); - return -1; - } - (*optind)++; - return 0; - } - pr_err("unknown command: %s\n", argv[*optind]); - return -1; + return 0; } int main(int argc, char *argv[], char *envp[]) @@ -132,7 +115,6 @@ int main(int argc, char *argv[], char *envp[]) bool has_exec_cmd = false; bool has_sub_command; int state = PARSING_GLOBAL_CONF; - char *cmd; BUILD_BUG_ON(CTL_32 != SYSCTL_TYPE__CTL_32); BUILD_BUG_ON(__CTL_STR != SYSCTL_TYPE__CTL_STR); @@ -183,18 +165,11 @@ int main(int argc, char *argv[], char *envp[]) return 1; } - cmd = argv[optind]; - ret = parse_criu_mode(argc, argv, &optind); - if (ret) + if (parse_criu_mode(argv[optind])) { + pr_err("unknown command: %s\n", argv[optind]); goto usage; + } - /* - * util_init initializes criu_run_id and compel_run_id so that sockets - * are generated with an unique name identifying the specific process - * even in cases where multiple processes with the same pid in - * different pid namespaces are sharing the same network namespace. - */ - util_init(); if (opts.mode == CR_SWRK) { if (argc != optind + 2) { fprintf(stderr, "Usage: criu swrk \n"); @@ -242,19 +217,25 @@ int main(int argc, char *argv[], char *envp[]) return 1; memcpy(opts.exec_cmd, &argv[optind + 1], (argc - optind - 1) * sizeof(char *)); opts.exec_cmd[argc - optind - 1] = NULL; - } else if (has_sub_command) { - pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", cmd); - goto usage; + } else { + /* No subcommands except for cpuinfo and restore --exec-cmd */ + if (opts.mode != CR_CPUINFO && has_sub_command) { + pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]); + goto usage; + } else if (opts.mode == CR_CPUINFO && !has_sub_command) { + pr_err("cpuinfo requires an action: dump or check\n"); + goto usage; + } } - if (opts.stream && image_dir_mode() == -1) { - pr_err("--stream cannot be used with the %s command\n", cmd); + if (opts.stream && image_dir_mode(argv, optind) == -1) { + pr_err("--stream cannot be used with the %s command\n", argv[optind]); goto usage; } /* We must not open imgs dir, if service is called */ if (opts.mode != CR_SERVICE) { - ret = open_image_dir(opts.imgs_dir, image_dir_mode()); + ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind)); if (ret < 0) { pr_err("Couldn't open image dir %s\n", opts.imgs_dir); return 1; @@ -273,6 +254,8 @@ int main(int argc, char *argv[], char *envp[]) return 1; } + util_init(); + if (log_init(opts.output)) return 1; @@ -299,13 +282,14 @@ int main(int argc, char *argv[], char *envp[]) if (opts.img_parent) pr_info("Will do snapshot from %s\n", opts.img_parent); - switch (opts.mode) { - case CR_DUMP: + if (opts.mode == CR_DUMP) { if (!opts.tree_id) goto opt_pid_missing; return cr_dump_tasks(opts.tree_id); - case CR_PRE_DUMP: + } + + if (opts.mode == CR_PRE_DUMP) { if (!opts.tree_id) goto opt_pid_missing; @@ -315,7 +299,9 @@ int main(int argc, char *argv[], char *envp[]) } return cr_pre_dump_tasks(opts.tree_id) != 0; - case CR_RESTORE: + } + + if (opts.mode == CR_RESTORE) { if (opts.tree_id) pr_warn("Using -t with criu restore is obsoleted\n"); @@ -328,41 +314,46 @@ int main(int argc, char *argv[], char *envp[]) } return ret != 0; + } - case CR_LAZY_PAGES: + if (opts.mode == CR_LAZY_PAGES) return cr_lazy_pages(opts.daemon_mode) != 0; - case CR_CHECK: + if (opts.mode == CR_CHECK) return cr_check() != 0; - case CR_PAGE_SERVER: + if (opts.mode == CR_PAGE_SERVER) return cr_page_server(opts.daemon_mode, false, -1) != 0; - case CR_SERVICE: + if (opts.mode == CR_SERVICE) return cr_service(opts.daemon_mode); - case CR_DEDUP: + if (opts.mode == CR_DEDUP) return cr_dedup() != 0; - case CR_CPUINFO_DUMP: - return cpuinfo_dump(); + if (opts.mode == CR_CPUINFO) { + if (!argv[optind + 1]) { + pr_err("cpuinfo requires an action: dump or check\n"); + goto usage; + } + if (!strcmp(argv[optind + 1], "dump")) + return cpuinfo_dump(); + else if (!strcmp(argv[optind + 1], "check")) + return cpuinfo_check(); + } - case CR_CPUINFO_CHECK: - return cpuinfo_check(); - - case CR_EXEC_DEPRECATED: + if (opts.mode == CR_EXEC_DEPRECATED) { pr_err("The \"exec\" action is deprecated by the Compel library.\n"); return -1; + } - case CR_SHOW_DEPRECATED: + if (opts.mode == CR_SHOW_DEPRECATED) { pr_err("The \"show\" action is deprecated by the CRIT utility.\n"); pr_err("To view an image use the \"crit decode -i $name --pretty\" command.\n"); return -1; - - case CR_UNSET: - default: - pr_err("unknown command: %s\n", cmd); } + + pr_err("unknown command: %s\n", argv[optind]); usage: pr_msg("\n" "Usage:\n" @@ -426,7 +417,7 @@ usage: " --network-lock METHOD network locking/unlocking method; argument\n" " can be 'nftables' or 'iptables' (default).\n" " --unprivileged accept limitations when running as non-root\n" - " --allow-uprobes allow dump/restore with uprobes vma\n" + " consult documentation for further details\n" "\n" "* External resources support:\n" " --external RES dump objects from this list as external resources:\n" @@ -503,8 +494,8 @@ usage: " Inherit file descriptors, treating fd NUM as being\n" " already opened via an existing RES, which can be:\n" " tty[rdev:dev]\n" - " pipe:[inode]\n" - " socket:[inode]\n" + " pipe[inode]\n" + " socket[inode]\n" " file[mnt_id:inode]\n" " /memfd:name\n" " path/to/file\n" diff --git a/criu/fault-injection.c b/criu/fault-injection.c index 5dd9acf60..2272e6d84 100644 --- a/criu/fault-injection.c +++ b/criu/fault-injection.c @@ -24,8 +24,8 @@ int fault_injection_init(void) fi_strategy = start; switch (fi_strategy) { - case FI_COMPEL_INTERRUPT_ONLY_MODE: - set_compel_interrupt_only_mode(); + case FI_DISABLE_FREEZE_CGROUP: + dont_use_freeze_cgroup(); break; default: break; diff --git a/criu/fdstore.c b/criu/fdstore.c index 6ac639c55..d615ad15d 100644 --- a/criu/fdstore.c +++ b/criu/fdstore.c @@ -58,7 +58,7 @@ int fdstore_init(void) } addr.sun_family = AF_UNIX; - addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%s", st.st_ino, + addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-fdstore-%" PRIx64 "-%" PRIx64, st.st_ino, criu_run_id); addrlen += sizeof(addr.sun_family); diff --git a/criu/files-ext.c b/criu/files-ext.c index 4cc99d921..95ec8e37c 100644 --- a/criu/files-ext.c +++ b/criu/files-ext.c @@ -45,11 +45,10 @@ static int open_fd(struct file_desc *d, int *new_fd) { struct ext_file_info *xfi; int fd; - bool retry_needed; xfi = container_of(d, struct ext_file_info, d); - fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id, &retry_needed); + fd = run_plugins(RESTORE_EXT_FILE, xfi->xfe->id); if (fd < 0) { pr_err("Unable to restore %#x\n", xfi->xfe->id); return -1; @@ -58,11 +57,8 @@ static int open_fd(struct file_desc *d, int *new_fd) if (restore_fown(fd, xfi->xfe->fown)) return -1; - if (!retry_needed) - *new_fd = fd; - else - *new_fd = -1; - return retry_needed; + *new_fd = fd; + return 0; } static struct file_desc_ops ext_desc_ops = { diff --git a/criu/files-reg.c b/criu/files-reg.c index 66c0e6cda..fc6149350 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1150,7 +1150,7 @@ static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_i rfe.name = link_name + 1; /* Any 'unique' name works here actually. Remap works by reg-file ids. */ - snprintf(tmp + 1, sizeof(link_name) - (size_t)(tmp - link_name) - 1, "link_remap.%d", rfe.id); + snprintf(tmp + 1, sizeof(link_name) - (size_t)(tmp - link_name - 1), "link_remap.%d", rfe.id); mntns_root = mntns_get_root_fd(nsid); diff --git a/criu/files.c b/criu/files.c index af4b8aeac..3b653e24b 100644 --- a/criu/files.c +++ b/criu/files.c @@ -49,7 +49,6 @@ #include "kerndat.h" #include "fdstore.h" #include "bpfmap.h" -#include "pidfd.h" #include "protobuf.h" #include "util.h" @@ -545,8 +544,6 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, ops = &signalfd_dump_ops; else if (is_timerfd_link(link)) ops = &timerfd_dump_ops; - else if (is_pidfd_link(link)) - ops = &pidfd_dump_ops; #ifdef CONFIG_HAS_LIBBPF else if (is_bpfmap_link(link)) ops = &bpfmap_dump_ops; @@ -557,11 +554,6 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, return do_dump_gen_file(&p, lfd, ops, e); } - if (p.fs_type == PID_FS_MAGIC) { - ops = &pidfd_dump_ops; - return do_dump_gen_file(&p, lfd, ops, e); - } - if (S_ISREG(p.stat.st_mode) || S_ISDIR(p.stat.st_mode) || S_ISLNK(p.stat.st_mode)) { if (fill_fdlink(lfd, &p, &link)) return -1; @@ -978,7 +970,7 @@ static int receive_fd(struct fdinfo_list_entry *fle); static void transport_name_gen(struct sockaddr_un *addr, int *len, int pid) { addr->sun_family = AF_UNIX; - snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%s", pid, criu_run_id); + snprintf(addr->sun_path, UNIX_PATH_MAX, "x/crtools-fd-%d-%" PRIx64, pid, criu_run_id); *len = SUN_LEN(addr); *addr->sun_path = '\0'; } @@ -1329,6 +1321,7 @@ int prepare_fds(struct pstree_item *me) } } + BUG_ON(current->pid->state == TASK_HELPER); ret = open_fdinfos(me); if (rsti(me)->fdt) @@ -1785,9 +1778,6 @@ static int collect_one_file(void *o, ProtobufCMessage *base, struct cr_img *i) case FD_TYPES__MEMFD: ret = collect_one_file_entry(fe, fe->memfd->id, &fe->memfd->base, &memfd_cinfo); break; - case FD_TYPES__PIDFD: - ret = collect_one_file_entry(fe, fe->pidfd->id, &fe->pidfd->base, &pidfd_cinfo); - break; #ifdef CONFIG_HAS_LIBBPF case FD_TYPES__BPFMAP: ret = collect_one_file_entry(fe, fe->bpf->id, &fe->bpf->base, &bpfmap_cinfo); @@ -1810,6 +1800,5 @@ int prepare_files(void) { init_fdesc_hash(); init_sk_info_hash(); - init_dead_pidfd_hash(); return collect_image(&files_cinfo); } diff --git a/criu/image-desc.c b/criu/image-desc.c index 2d87c7381..d65d9c098 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -107,7 +107,6 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF), FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF), FD_ENTRY(APPARMOR, "apparmor"), - FD_ENTRY(PIDFD, "pidfd"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/image.c b/criu/image.c index 91101c3eb..9fb390ab7 100644 --- a/criu/image.c +++ b/criu/image.c @@ -25,15 +25,6 @@ bool img_common_magic = true; TaskKobjIdsEntry *root_ids; u32 root_cg_set; Lsmtype image_lsm; -char dump_criu_run_id[RUN_ID_HASH_LENGTH]; - -struct inventory_plugin { - struct list_head node; - char *name; -}; - -struct list_head inventory_plugins_list = LIST_HEAD_INIT(inventory_plugins_list); -static int n_inventory_plugins; int check_img_inventory(bool restore) { @@ -95,11 +86,6 @@ int check_img_inventory(bool restore) goto out_err; } - if (restore && he->allow_uprobes && !opts.allow_uprobes) { - pr_err("Dumped with --" OPT_ALLOW_UPROBES ". Need to set it on restore as well.\n"); - goto out_err; - } - if (restore) { if (!he->has_network_lock_method) { /* @@ -113,37 +99,6 @@ int check_img_inventory(bool restore) } else { opts.network_lock_method = he->network_lock_method; } - - if (!he->plugins_entry) { - /* backwards compatibility: if the 'plugins_entry' field is missing, - * all plugins should be enabled during restore. - */ - n_inventory_plugins = -1; - } else { - PluginsEntry *pe = he->plugins_entry; - for (int i = 0; i < pe->n_plugins; i++) { - if (add_inventory_plugin(pe->plugins[i])) - goto out_err; - } - } - - /** - * This contains the criu_run_id during dumping of the process. - * For things like removing network locking (nftables) this - * information is needed to identify the name of the network - * locking table. - */ - if (he->dump_criu_run_id) { - strncpy(dump_criu_run_id, he->dump_criu_run_id, sizeof(dump_criu_run_id) - 1); - pr_info("Dump CRIU run id = %s\n", dump_criu_run_id); - } else { - /** - * If restoring from an old image this is a marker - * that no dump_criu_run_id exists. - */ - dump_criu_run_id[0] = NO_DUMP_CRIU_RUN_ID; - } - } ret = 0; @@ -155,92 +110,8 @@ out_close: return ret; } -/** - * Check if the 'plugins' field in the inventory image contains - * the specified plugin name. If found, the plugin is removed - * from the linked list. - */ -bool check_and_remove_inventory_plugin(const char *name, size_t n) -{ - if (n_inventory_plugins == -1) - return true; /* backwards compatibility */ - - if (n_inventory_plugins > 0) { - struct inventory_plugin *p, *tmp; - - list_for_each_entry_safe(p, tmp, &inventory_plugins_list, node) { - if (!strncmp(name, p->name, n)) { - xfree(p->name); - list_del(&p->node); - xfree(p); - n_inventory_plugins--; - return true; - } - } - } - - return false; -} - -/** - * We expect during restore all loaded plugins to be removed from - * the inventory_plugins_list. If the list is not empty, show an - * error message for each missing plugin. - */ -int check_inventory_plugins(void) -{ - struct inventory_plugin *p; - - if (n_inventory_plugins <= 0) - return 0; - - list_for_each_entry(p, &inventory_plugins_list, node) { - pr_err("Missing required plugin: %s\n", p->name); - } - - return -1; -} - -/** - * Add plugin name to the inventory image. These values - * can be used to identify required plugins during restore. - */ -int add_inventory_plugin(const char *name) -{ - struct inventory_plugin *p; - - p = xmalloc(sizeof(struct inventory_plugin)); - if (p == NULL) - return -1; - - p->name = xstrdup(name); - if (!p->name) { - xfree(p); - return -1; - } - list_add(&p->node, &inventory_plugins_list); - n_inventory_plugins++; - - return 0; -} - -void free_inventory_plugins_list(void) -{ - struct inventory_plugin *p, *tmp; - - if (!list_empty(&inventory_plugins_list)) { - list_for_each_entry_safe(p, tmp, &inventory_plugins_list, node) { - xfree(p->name); - list_del(&p->node); - xfree(p); - } - } - n_inventory_plugins = 0; -} - int write_img_inventory(InventoryEntry *he) { - PluginsEntry pe = PLUGINS_ENTRY__INIT; struct cr_img *img; int ret; @@ -250,27 +121,8 @@ int write_img_inventory(InventoryEntry *he) if (!img) return -1; - if (!list_empty(&inventory_plugins_list)) { - struct inventory_plugin *p; - int i = 0; - - pe.n_plugins = n_inventory_plugins; - pe.plugins = xmalloc(n_inventory_plugins * sizeof(char *)); - if (!pe.plugins) - return -1; - - list_for_each_entry(p, &inventory_plugins_list, node) { - pe.plugins[i] = p->name; - i++; - } - } - he->plugins_entry = &pe; - ret = pb_write_one(img, he, PB_INVENTORY); - free_inventory_plugins_list(); - xfree(pe.plugins); - xfree(he->root_ids); close_image(img); if (ret < 0) @@ -391,17 +243,6 @@ int prepare_inventory(InventoryEntry *he) he->has_network_lock_method = true; he->network_lock_method = opts.network_lock_method; - /** - * This contains the criu_run_id during dumping of the process. - * For things like removing network locking (nftables) this - * information is needed to identify the name of the network - * locking table. - */ - he->dump_criu_run_id = xstrdup(criu_run_id); - - if (!he->dump_criu_run_id) - return -1; - return 0; } @@ -717,7 +558,7 @@ struct cr_img *img_from_fd(int fd) * This is used when opts.stream is enabled for picking the right streamer * socket name. `mode` is ignored when opts.stream is not enabled. */ -int open_image_dir(const char *dir, int mode) +int open_image_dir(char *dir, int mode) { int fd, ret; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 8c5707b41..60cf9437e 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -70,15 +70,7 @@ enum NETWORK_LOCK_METHOD { NETWORK_LOCK_SKIP, }; -/** - * CRIU currently defaults to the iptables locking backend. - * - * It is, however, possible to change this by defining - * NETWORK_LOCK_DEFAULT to a different value on the command-line. - */ -#ifndef NETWORK_LOCK_DEFAULT #define NETWORK_LOCK_DEFAULT NETWORK_LOCK_IPTABLES -#endif /* * Ghost file size we allow to carry by default. @@ -125,8 +117,7 @@ enum criu_mode { CR_SERVICE, CR_SWRK, CR_DEDUP, - CR_CPUINFO_DUMP, - CR_CPUINFO_CHECK, + CR_CPUINFO, CR_EXEC_DEPRECATED, CR_SHOW_DEPRECATED, }; @@ -196,7 +187,6 @@ struct cr_options { char *work_dir; int network_lock_method; int skip_file_rwx_check; - int allow_uprobes; /* * When we scheduler for removal some functionality we first diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h index c3bea1385..392ea9f53 100644 --- a/criu/include/criu-plugin.h +++ b/criu/include/criu-plugin.h @@ -60,12 +60,6 @@ enum { CR_PLUGIN_HOOK__CHECKPOINT_DEVICES = 11, - CR_PLUGIN_HOOK__POST_FORKING = 12, - - CR_PLUGIN_HOOK__RESTORE_INIT = 13, - - CR_PLUGIN_HOOK__DUMP_DEVICES_LATE = 14, - CR_PLUGIN_HOOK__MAX }; @@ -74,7 +68,7 @@ enum { DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_UNIX_SK, int fd, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_UNIX_SK, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_FILE, int fd, int id); -DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id, bool *retry_needed); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind); @@ -84,9 +78,6 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *path, const DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__PAUSE_DEVICES, int pid); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, int pid); -DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__POST_FORKING, void); -DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_INIT, void); -DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, int id); enum { CR_PLUGIN_STAGE__DUMP, @@ -161,6 +152,5 @@ typedef int(cr_plugin_handle_device_vma_t)(int fd, const struct stat *stat); typedef int(cr_plugin_update_vma_map_t)(const char *path, const uint64_t addr, const uint64_t old_pgoff, uint64_t *new_pgoff, int *plugin_fd); typedef int(cr_plugin_resume_devices_late_t)(int pid); -typedef int(cr_plugin_post_forking_t)(void); #endif /* __CRIU_PLUGIN_H__ */ diff --git a/criu/include/fault-injection.h b/criu/include/fault-injection.h index e987c18ce..59adf05b9 100644 --- a/criu/include/fault-injection.h +++ b/criu/include/fault-injection.h @@ -21,7 +21,7 @@ enum faults { FI_CORRUPT_EXTREGS = 134, FI_DONT_USE_PAGEMAP_SCAN = 135, FI_DUMP_CRASH = 136, - FI_COMPEL_INTERRUPT_ONLY_MODE = 137, + FI_DISABLE_FREEZE_CGROUP = 137, FI_PLUGIN_CUDA_FORCE_ENABLE = 138, FI_MAX, }; diff --git a/criu/include/fs-magic.h b/criu/include/fs-magic.h index ffc0455d5..ad34f4891 100644 --- a/criu/include/fs-magic.h +++ b/criu/include/fs-magic.h @@ -57,8 +57,4 @@ #define OVERLAYFS_SUPER_MAGIC 0x794c7630 #endif -#ifndef PID_FS_MAGIC -#define PID_FS_MAGIC 0x50494446 -#endif - #endif /* __CR_FS_MAGIC_H__ */ diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 79e1ac111..9f369be64 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -113,7 +113,6 @@ enum { CR_FD_PIPES, CR_FD_TTY_FILES, CR_FD_MEMFD_FILE, - CR_FD_PIDFD, CR_FD_AUTOFS, diff --git a/criu/include/image.h b/criu/include/image.h index 30e32323d..a17aae35c 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -68,18 +68,6 @@ * processing exiting with error; while the rest of bits * are part of image ABI, this particular one must never * be used in image. - * - guard - * stands for a fake VMA (not represented in the kernel - * by a struct vm_area_struct). Used to keep an information - * about virtual address space ranges covered by - * MADV_GUARD_INSTALL guards. These ones must be always at - * the end of the vma_area_list and properly skipped a.e. - * - uprobes - * stands for a "[uprobes]" vma that's automatically mapped by - * the kernel when an active uprobe is hit. Contents of this vma - * are not dumped and neither are its madvise bits restored, - * because the kernel is in complete control of this vma. This is - * just used to track the existence of the uprobes vma. */ #define VMA_AREA_NONE (0 << 0) #define VMA_AREA_REGULAR (1 << 0) @@ -99,8 +87,6 @@ #define VMA_AREA_AIORING (1 << 13) #define VMA_AREA_MEMFD (1 << 14) #define VMA_AREA_SHSTK (1 << 15) -#define VMA_AREA_GUARD (1 << 16) -#define VMA_AREA_UPROBES (1 << 17) #define VMA_EXT_PLUGIN (1 << 27) #define VMA_CLOSE (1 << 28) @@ -114,8 +100,6 @@ #define CR_PARENT_LINK "parent" -#define OPT_ALLOW_UPROBES "allow-uprobes" - extern bool ns_per_id; extern bool img_common_magic; @@ -165,7 +149,7 @@ static inline int img_raw_fd(struct cr_img *img) extern off_t img_raw_size(struct cr_img *img); -extern int open_image_dir(const char *dir, int mode); +extern int open_image_dir(char *dir, int mode); extern void close_image_dir(void); /* * Return -1 -- parent symlink points to invalid target @@ -193,8 +177,4 @@ extern int read_img_str(struct cr_img *, char **pstr, int size); extern void close_image(struct cr_img *); -extern int add_inventory_plugin(const char *name); -extern int check_inventory_plugins(void); -extern bool check_and_remove_inventory_plugin(const char *name, size_t n); - #endif /* __CR_IMAGE_H__ */ diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index e4922f401..e03a57341 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -89,10 +89,6 @@ struct kerndat_s { bool has_pagemap_scan; bool has_shstk; bool has_close_range; - bool has_timer_cr_ids; - bool has_breakpoints; - bool has_madv_guard; - bool has_pagemap_scan_guard_pages; }; extern struct kerndat_s kdat; @@ -115,6 +111,4 @@ extern int kerndat_fs_virtualized(unsigned int which, u32 kdev); extern int kerndat_has_nspid(void); -extern void kerndat_warn_about_madv_guards(void); - #endif /* __CR_KERNDAT_H__ */ diff --git a/criu/include/magic.h b/criu/include/magic.h index 6f0aff26d..0e8c37234 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -100,7 +100,6 @@ #define BPFMAP_FILE_MAGIC 0x57506142 /* Alapayevsk */ #define BPFMAP_DATA_MAGIC 0x64324033 /* Arkhangelsk */ #define APPARMOR_MAGIC 0x59423047 /* Nikolskoye */ -#define PIDFD_MAGIC 0x54435556 /* Ufa */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/mem.h b/criu/include/mem.h index e9ce3518a..3618c9cc3 100644 --- a/criu/include/mem.h +++ b/criu/include/mem.h @@ -31,12 +31,10 @@ extern int do_task_reset_dirty_track(int pid); extern unsigned long dump_pages_args_size(struct vm_area_list *vmas); extern int parasite_dump_pages_seized(struct pstree_item *item, struct vm_area_list *vma_area_list, struct mem_dump_ctl *mdc, struct parasite_ctl *ctl); -extern int collect_madv_guards(pid_t pid, struct vm_area_list *vma_area_list); #define PME_PRESENT (1ULL << 63) #define PME_SWAP (1ULL << 62) #define PME_FILE (1ULL << 61) -#define PME_GUARD_REGION (1ULL << 58) #define PME_SOFT_DIRTY (1ULL << 55) #define PME_PSHIFT_BITS (6) #define PME_STATUS_BITS (3) @@ -51,11 +49,5 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta); int unmap_guard_pages(struct pstree_item *t); int prepare_mappings(struct pstree_item *t); -struct page_info { - u64 next; - bool softdirty; -}; - -int should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, struct page_info *page_info); - +u64 should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, bool *softdirty); #endif /* __CR_MEM_H__ */ diff --git a/criu/include/mman.h b/criu/include/mman.h index 43e0b6cc7..8ca71fadf 100644 --- a/criu/include/mman.h +++ b/criu/include/mman.h @@ -4,9 +4,6 @@ #ifndef MAP_HUGETLB #define MAP_HUGETLB 0x40000 #endif -#ifndef MAP_DROPPABLE -#define MAP_DROPPABLE 0x08 -#endif #ifndef MADV_HUGEPAGE #define MADV_HUGEPAGE 14 #endif @@ -16,11 +13,5 @@ #ifndef MADV_DONTDUMP #define MADV_DONTDUMP 16 #endif -#ifndef MADV_WIPEONFORK -#define MADV_WIPEONFORK 18 -#endif -#ifndef MADV_GUARD_INSTALL -#define MADV_GUARD_INSTALL 102 -#endif #endif /* __CR_MMAN_H__ */ diff --git a/criu/include/net.h b/criu/include/net.h index 7c5ede21e..5e8a84862 100644 --- a/criu/include/net.h +++ b/criu/include/net.h @@ -31,7 +31,7 @@ extern int collect_net_namespaces(bool for_dump); extern int network_lock(void); extern void network_unlock(void); -extern int network_lock_internal(bool restore); +extern int network_lock_internal(void); extern struct ns_desc net_ns_desc; diff --git a/criu/include/page-pipe.h b/criu/include/page-pipe.h index 65292b7ab..15178c015 100644 --- a/criu/include/page-pipe.h +++ b/criu/include/page-pipe.h @@ -92,9 +92,9 @@ struct kernel_pipe_buffer { struct page_pipe_buf { int p[2]; /* pipe with pages */ unsigned int pipe_size; /* how many pages can be fit into pipe */ + unsigned int pipe_off; /* where this buf is started in a pipe */ + unsigned int pages_in; /* how many pages are there */ unsigned int nr_segs; /* how many iov-s are busy */ - unsigned long pipe_off; /* where this buf is started in a pipe */ - unsigned long pages_in; /* how many pages are there */ #define PPB_LAZY (1 << 0) unsigned int flags; struct iovec *iov; /* vaddr:len map */ @@ -149,7 +149,7 @@ struct pipe_read_dest { }; extern int pipe_read_dest_init(struct pipe_read_dest *prd); -extern int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned long *nr_pages, +extern int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned int *nr_pages, unsigned int ppb_flags); #endif /* __CR_PAGE_PIPE_H__ */ diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h index 0d9b35019..36fe67092 100644 --- a/criu/include/page-xfer.h +++ b/criu/include/page-xfer.h @@ -69,9 +69,9 @@ extern int check_parent_page_xfer(int fd_type, unsigned long id); */ /* async request/receive of remote pages */ -extern int request_remote_pages(unsigned long img_id, unsigned long addr, unsigned long nr_pages); +extern int request_remote_pages(unsigned long img_id, unsigned long addr, int nr_pages); -typedef int (*ps_async_read_complete)(unsigned long img_id, unsigned long vaddr, unsigned long nr_pages, void *); -extern int page_server_start_read(void *buf, unsigned long nr_pages, ps_async_read_complete complete, void *priv, unsigned flags); +typedef int (*ps_async_read_complete)(unsigned long img_id, unsigned long vaddr, int nr_pages, void *); +extern int page_server_start_read(void *buf, int nr_pages, ps_async_read_complete complete, void *priv, unsigned flags); #endif /* __CR_PAGE_XFER__H__ */ diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h index 4cbc87cc6..8c7180559 100644 --- a/criu/include/pagemap.h +++ b/criu/include/pagemap.h @@ -44,7 +44,7 @@ struct page_read { /* reads page from current pagemap */ - int (*read_pages)(struct page_read *, unsigned long vaddr, unsigned long nr, void *, unsigned flags); + int (*read_pages)(struct page_read *, unsigned long vaddr, int nr, void *, unsigned flags); /* Advance page_read to the next entry */ int (*advance)(struct page_read *pr); void (*close)(struct page_read *); @@ -52,15 +52,12 @@ struct page_read { int (*sync)(struct page_read *pr); int (*seek_pagemap)(struct page_read *pr, unsigned long vaddr); void (*reset)(struct page_read *pr); - int (*io_complete)(struct page_read *, unsigned long vaddr, unsigned long nr); - int (*maybe_read_page)(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags); + int (*io_complete)(struct page_read *, unsigned long vaddr, int nr); + int (*maybe_read_page)(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags); /* Whether or not pages can be read in PIE code */ bool pieok; - /* Whether or not disable image deduplication*/ - bool disable_dedup; - /* Private data of reader */ struct cr_img *pmi; struct cr_img *pi; @@ -115,8 +112,6 @@ int pagemap_render_iovec(struct list_head *from, struct task_restore_args *ta); */ extern void dup_page_read(struct page_read *src, struct page_read *dst); -extern void page_read_disable_dedup(struct page_read *pr); - extern int dedup_one_iovec(struct page_read *pr, unsigned long base, unsigned long len); static inline unsigned long pagemap_len(PagemapEntry *pe) diff --git a/criu/include/pagemap_scan.h b/criu/include/pagemap_scan.h index 9046e01ed..0ad4c9bc0 100644 --- a/criu/include/pagemap_scan.h +++ b/criu/include/pagemap_scan.h @@ -14,7 +14,6 @@ #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) -#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/criu/include/parasite.h b/criu/include/parasite.h index 176357711..1244220f6 100644 --- a/criu/include/parasite.h +++ b/criu/include/parasite.h @@ -63,7 +63,7 @@ struct parasite_dump_pages_args { unsigned int add_prot; unsigned int off; unsigned int nr_segs; - unsigned long nr_pages; + unsigned int nr_pages; }; static inline struct parasite_vma_entry *pargs_vmas(struct parasite_dump_pages_args *a) @@ -148,7 +148,6 @@ struct parasite_dump_creds { u32 cap_prm[CR_CAP_SIZE]; u32 cap_eff[CR_CAP_SIZE]; u32 cap_bnd[CR_CAP_SIZE]; - u32 cap_amb[CR_CAP_SIZE]; int uids[4]; int gids[4]; diff --git a/criu/include/pidfd.h b/criu/include/pidfd.h deleted file mode 100644 index bcc0fb45a..000000000 --- a/criu/include/pidfd.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef __CR_PIDFD_H__ -#define __CR_PIDFD_H__ - -#include "files.h" -#include "pidfd.pb-c.h" - -extern const struct fdtype_ops pidfd_dump_ops; -extern struct collect_image_info pidfd_cinfo; -extern int is_pidfd_link(char *link); -extern void init_dead_pidfd_hash(void); -struct pidfd_dump_info { - PidfdEntry pidfe; - pid_t pid; -}; - -#endif /* __CR_PIDFD_H__ */ diff --git a/criu/include/prctl.h b/criu/include/prctl.h index 2966659da..4c2a548b1 100644 --- a/criu/include/prctl.h +++ b/criu/include/prctl.h @@ -36,15 +36,6 @@ #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #endif -#ifndef PR_CAP_AMBIENT -#define PR_CAP_AMBIENT 47 -#endif -#ifndef PR_CAP_AMBIENT_IS_SET -#define PR_CAP_AMBIENT_IS_SET 1 -#endif -#ifndef PR_CAP_AMBIENT_RAISE -#define PR_CAP_AMBIENT_RAISE 2 -#endif #ifndef PR_SET_MM #define PR_SET_MM 35 @@ -97,11 +88,4 @@ struct prctl_mm_map { #define PR_GET_THP_DISABLE 42 #endif -#ifndef PR_TIMER_CREATE_RESTORE_IDS -#define PR_TIMER_CREATE_RESTORE_IDS 77 -# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 -# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 -# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 -#endif - #endif /* __CR_PRCTL_H__ */ diff --git a/criu/include/proc_parse.h b/criu/include/proc_parse.h index 76d3242d2..0c334a190 100644 --- a/criu/include/proc_parse.h +++ b/criu/include/proc_parse.h @@ -81,7 +81,6 @@ struct proc_status_creds { u32 cap_prm[PROC_CAP_SIZE]; u32 cap_eff[PROC_CAP_SIZE]; u32 cap_bnd[PROC_CAP_SIZE]; - u32 cap_amb[PROC_CAP_SIZE]; }; #define INVALID_UID ((uid_t)-1) @@ -105,6 +104,4 @@ extern int parse_uptime(uint64_t *upt); extern int parse_timens_offsets(struct timespec *boff, struct timespec *moff); -extern bool found_uprobes_vma(void); - #endif /* __CR_PROC_PARSE_H__ */ diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index c4241be55..3824de101 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -70,7 +70,6 @@ enum { PB_BPFMAP_FILE, PB_BPFMAP_DATA, PB_APPARMOR, - PB_PIDFD, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/pstree.h b/criu/include/pstree.h index b750a919e..1137046d4 100644 --- a/criu/include/pstree.h +++ b/criu/include/pstree.h @@ -104,7 +104,6 @@ extern void pstree_insert_pid(struct pid *pid_node); extern struct pid *pstree_pid_by_virt(pid_t pid); extern struct pstree_item *root_item; -extern bool has_children(struct pstree_item *item); extern struct pstree_item *pstree_item_next(struct pstree_item *item); #define for_each_pstree_item(pi) for (pi = root_item; pi != NULL; pi = pstree_item_next(pi)) diff --git a/criu/include/rbtree.h b/criu/include/rbtree.h index 6981aa8f9..ba0a8100e 100644 --- a/criu/include/rbtree.h +++ b/criu/include/rbtree.h @@ -14,7 +14,7 @@ #define RB_MASK 3 struct rb_node { - unsigned long rb_parent_color; /* Keeps both parent and color */ + unsigned long rb_parent_color; /* Keeps both parent anc color */ struct rb_node *rb_right; struct rb_node *rb_left; } __aligned(sizeof(long)); diff --git a/criu/include/restore.h b/criu/include/restore.h index 189051826..04d006505 100644 --- a/criu/include/restore.h +++ b/criu/include/restore.h @@ -9,7 +9,6 @@ extern int arch_set_thread_regs_nosigrt(struct pid *pid); struct task_restore_args; struct pstree_item; -struct rst_shstk_info; #ifndef arch_shstk_prepare static inline int arch_shstk_prepare(struct pstree_item *item, @@ -39,25 +38,4 @@ static inline int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *cor #define arch_shstk_trampoline arch_shstk_trampoline #endif -#ifndef shstk_restorer_stack_size -static always_inline long shstk_restorer_stack_size(void) -{ - return 0; -} -#endif - -#ifndef shstk_set_restorer_stack -static always_inline long shstk_set_restorer_stack(struct rst_shstk_info *info, void *ptr) -{ - return 0; -} -#endif - -#ifndef shstk_min_mmap_addr -static always_inline long shstk_min_mmap_addr(struct rst_shstk_info *info, unsigned long def) -{ - return def; -} -#endif - #endif diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 14c0a3768..3fb5322a4 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -75,7 +75,6 @@ struct thread_creds_args { u32 cap_prm[CR_CAP_SIZE]; u32 cap_eff[CR_CAP_SIZE]; u32 cap_bnd[CR_CAP_SIZE]; - u32 cap_amb[CR_CAP_SIZE]; char *lsm_profile; unsigned int *groups; @@ -170,7 +169,6 @@ struct task_restore_args { struct restore_posix_timer *posix_timers; unsigned int posix_timers_n; - bool posix_timer_cr_ids; struct restore_timerfd *timerfd; unsigned int timerfd_n; @@ -357,11 +355,4 @@ static inline int arch_shstk_restore(struct rst_shstk_info *shstk) #define arch_shstk_restore arch_shstk_restore #endif -#ifndef shstk_vma_restore -static always_inline int shstk_vma_restore(VmaEntry *vma_entry) -{ - return -1; -} -#endif - #endif /* __CR_RESTORER_H__ */ diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h index deb297e5f..59b891fa2 100644 --- a/criu/include/rst_info.h +++ b/criu/include/rst_info.h @@ -1,7 +1,6 @@ #ifndef __CR_RST_INFO_H__ #define __CR_RST_INFO_H__ -#include "asm/restore.h" #include "common/lock.h" #include "common/list.h" #include "vma.h" @@ -15,7 +14,6 @@ struct task_entries { futex_t start; atomic_t cr_err; mutex_t userns_sync_lock; - mutex_t cgroupd_sync_lock; mutex_t last_pid_mutex; }; @@ -24,7 +22,7 @@ struct fdt { pid_t pid; /* Who should restore this fd table */ /* * The fd table is ready for restoing, if fdt_lock is equal to nr - * The fdt table was restored, if fdt_lock is equal to nr + 1 + * The fdt table was restrored, if fdt_lock is equal to nr + 1 */ futex_t fdt_lock; }; @@ -34,11 +32,6 @@ struct rst_rseq { uint64_t rseq_cs_pointer; }; -#ifndef ARCH_RST_INFO -struct rst_arch_info { -}; -#endif - struct rst_info { struct list_head fds; @@ -86,8 +79,6 @@ struct rst_info { futex_t shstk_unlock; void *breakpoint; - - struct rst_arch_info arch_info; }; extern struct task_entries *task_entries; diff --git a/criu/include/seize.h b/criu/include/seize.h index fc7facad3..f5ea76b16 100644 --- a/criu/include/seize.h +++ b/criu/include/seize.h @@ -2,7 +2,6 @@ #define __CR_SEIZE_H__ extern int collect_pstree(void); -extern int checkpoint_devices(void); struct pstree_item; extern void pstree_switch_state(struct pstree_item *root_item, int st); extern const char *get_real_freezer_state(void); @@ -10,6 +9,6 @@ extern bool alarm_timeouted(void); extern char *task_comm_info(pid_t pid, char *comm, size_t size); extern char *__task_comm_info(pid_t pid); -extern void set_compel_interrupt_only_mode(void); +extern void dont_use_freeze_cgroup(void); #endif diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 6c81d3edd..c3e7c879a 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -25,7 +25,7 @@ struct socket_desc { }; extern int dump_socket(struct fd_parms *p, int lfd, FdinfoEntry *); -extern int dump_socket_opts(int sk, int family, SkOptsEntry *soe); +extern int dump_socket_opts(int sk, SkOptsEntry *soe); extern int restore_socket_opts(int sk, SkOptsEntry *soe); extern int sk_setbufs(int sk, uint32_t *bufs); extern void release_skopts(SkOptsEntry *); diff --git a/criu/include/sysctl.h b/criu/include/sysctl.h index 2d689a9a0..cb3eba817 100644 --- a/criu/include/sysctl.h +++ b/criu/include/sysctl.h @@ -37,6 +37,6 @@ enum { #define CTL_FLAGS_OPTIONAL 1 #define CTL_FLAGS_HAS 2 #define CTL_FLAGS_READ_EIO_SKIP 4 -#define CTL_FLAGS_IPC_EACCES_SKIP 8 +#define CTL_FLAGS_IPC_EACCES_SKIP 5 #endif /* __CR_SYSCTL_H__ */ diff --git a/criu/include/util-vdso.h b/criu/include/util-vdso.h index 9fd9a6de4..c4386cf8e 100644 --- a/criu/include/util-vdso.h +++ b/criu/include/util-vdso.h @@ -30,7 +30,6 @@ struct vdso_symbol { struct vdso_symtable { unsigned long vdso_size; unsigned long vvar_size; - unsigned long vvar_vclock_size; struct vdso_symbol symbols[VDSO_SYMBOL_MAX]; bool vdso_before_vvar; /* order of vdso/vvar pair */ }; diff --git a/criu/include/util.h b/criu/include/util.h index 55ad5b63c..ae293a68c 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -21,8 +21,6 @@ #include "log.h" #include "common/err.h" -#include "compel/infect-util.h" - #define PREF_SHIFT_OP(pref, op, size) ((size)op(pref##BYTES_SHIFT)) #define KBYTES_SHIFT 10 #define MBYTES_SHIFT 20 @@ -408,24 +406,14 @@ static inline void cleanup_freep(void *p) free(*pp); } -#define cleanup_file __attribute__((cleanup(cleanup_filep))) -static inline void cleanup_filep(FILE **f) -{ - FILE *file = *f; - if (file) - (void)fclose(file); -} - extern int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args); /* * criu_run_id is a unique value of the current run. It can be used to * generate resource ID-s to avoid conflicts with other CRIU processes. */ -extern char criu_run_id[RUN_ID_HASH_LENGTH]; +extern uint64_t criu_run_id; extern void util_init(void); -#define NO_DUMP_CRIU_RUN_ID 0x7f -extern char dump_criu_run_id[RUN_ID_HASH_LENGTH]; extern char *resolve_mountpoint(char *path); diff --git a/criu/kerndat.c b/criu/kerndat.c index 2dc2f77d5..fa1ed21fa 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -31,7 +31,6 @@ #include "kerndat.h" #include "fs-magic.h" #include "mem.h" -#include "mman.h" #include "common/compiler.h" #include "sysctl.h" #include "cr_options.h" @@ -87,10 +86,6 @@ static int check_pagemap(void) if (ioctl(fd, PAGEMAP_SCAN, &args) == 0) { pr_debug("PAGEMAP_SCAN is supported\n"); kdat.has_pagemap_scan = true; - - args.return_mask |= PAGE_IS_GUARD; - if (ioctl(fd, PAGEMAP_SCAN, &args) == 0) - kdat.has_pagemap_scan_guard_pages = true; } else { switch (errno) { case EINVAL: @@ -651,7 +646,7 @@ static int kerndat_loginuid(void) static int kerndat_iptables_has_xtlocks(void) { int fd; - char *argv[4] = { "sh", "-c", "iptables -n -w -L", NULL }; + char *argv[4] = { "sh", "-c", "iptables -w -L", NULL }; fd = open("/dev/null", O_RDWR); if (fd < 0) { @@ -1725,134 +1720,6 @@ static int kerndat_has_close_range(void) return 0; } -static int kerndat_has_timer_cr_ids(void) -{ - if (prctl(PR_TIMER_CREATE_RESTORE_IDS, - PR_TIMER_CREATE_RESTORE_IDS_GET, 0, 0, 0) == -1) { - if (errno == EINVAL) { - pr_debug("PR_TIMER_CREATE_RESTORE_IDS isn't supported\n"); - return 0; - } - pr_perror("prctl returned unexpected error code"); - return -1; - } - - kdat.has_timer_cr_ids = true; - return 0; -} - -static void breakpoint_func(void) -{ - if (raise(SIGSTOP)) - pr_perror("Unable to kill itself with SIGSTOP"); - exit(1); -} - -/* - * kerndat_breakpoints checks that hardware breakpoints work as they should. - * In some cases, they might not work in virtual machines if the hypervisor - * doesn't virtualize them. For example, they don't work in AMD SEV virtual - * machines if the Debug Virtualization extension isn't supported or isn't - * enabled in SEV_FEATURES. - */ -static int kerndat_breakpoints(void) -{ - int status, ret, exit_code = -1; - pid_t pid; - - pid = fork(); - if (pid == -1) { - pr_perror("fork"); - return -1; - } - if (pid == 0) { - if (ptrace(PTRACE_TRACEME, 0, 0, 0)) { - pr_perror("ptrace(PTRACE_TRACEME)"); - exit(1); - } - raise(SIGSTOP); - breakpoint_func(); - exit(1); - } - if (waitpid(pid, &status, 0) == -1) { - pr_perror("waitpid for initial stop"); - goto err; - } - if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGSTOP) { - pr_err("Child didn't stop as expected: status=%x\n", status); - goto err; - } - ret = ptrace_set_breakpoint(pid, &breakpoint_func); - if (ret < 0) { - pr_err("Failed to set breakpoint\n"); - goto err; - } - if (ret == 0) { - pr_debug("Hardware breakpoints appear to be disabled\n"); - goto out; - } - if (waitpid(pid, &status, 0) == -1) { - pr_perror("waitpid for breakpoint trigger"); - goto err; - } - if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP) { - pr_warn("Hardware breakpoints don't seem to work (status=%x)\n", status); - goto out; - } - kdat.has_breakpoints = true; -out: - exit_code = 0; -err: - if (kill(pid, SIGKILL)) { - pr_perror("Failed to kill the child process"); - exit_code = -1; - } - if (waitpid(pid, &status, 0) == -1) { - pr_perror("Failed to wait for the child process"); - exit_code = -1; - } - if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL) { - pr_err("The child exited with unexpected code: %x\n", status); - exit_code = -1; - } - return exit_code; -} - -static int kerndat_has_madv_guard(void) -{ - void *map; - - map = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - if (map == MAP_FAILED) { - pr_perror("Can't mmap a page for has_madv_guard feature test"); - return -1; - } - - if (madvise(map, PAGE_SIZE, MADV_GUARD_INSTALL)) { - if (errno != EINVAL) { - pr_perror("madvise failed (has_madv_guard check)"); - goto mmap_cleanup; - } - } else { - kdat.has_madv_guard = true; - } - - munmap(map, PAGE_SIZE); - return 0; - -mmap_cleanup: - munmap(map, PAGE_SIZE); - return -1; -} - -void kerndat_warn_about_madv_guards(void) -{ - if (kdat.has_madv_guard && !kdat.has_pagemap_scan_guard_pages) - pr_warn("ioctl(PAGEMAP_SCAN) doesn't support PAGE_IS_GUARD flag. " - "CRIU dump will fail if dumped processes use madvise(MADV_GUARD_INSTALL). " - "Please, consider updating your kernel.\n"); -} - /* * Some features depend on resource that can be dynamically changed * at the OS runtime. There are cases that we cannot determine the @@ -2114,18 +1981,6 @@ int kerndat_init(void) pr_err("kerndat_has_close_range has failed when initializing kerndat.\n"); ret = -1; } - if (!ret && kerndat_has_timer_cr_ids()) { - pr_err("kerndat_has_timer_cr_ids has failed when initializing kerndat.\n"); - ret = -1; - } - if (!ret && kerndat_breakpoints()) { - pr_err("kerndat_breakpoints has failed when initializing kerndat.\n"); - ret = -1; - } - if (!ret && kerndat_has_madv_guard()) { - pr_err("kerndat_has_madv_guard has failed when initializing kerndat.\n"); - ret = -1; - } kerndat_lsm(); kerndat_mmap_min_addr(); diff --git a/criu/log.c b/criu/log.c index bf6f657f2..89ae8f820 100644 --- a/criu/log.c +++ b/criu/log.c @@ -10,7 +10,6 @@ #include #include #include -#include #include @@ -115,9 +114,6 @@ static struct str_and_lock *first_err; int log_keep_err(void) { - if (first_err) - return 0; - first_err = shmalloc(sizeof(struct str_and_lock)); if (first_err == NULL) return -1; @@ -136,11 +132,10 @@ static void log_note_err(char *msg) * anyway, so it doesn't make much sense to try hard * and optimize this out. */ - if (mutex_trylock(&first_err->l)) { - if (first_err->s[0] == '\0') - __strlcpy(first_err->s, msg, sizeof(first_err->s)); - mutex_unlock(&first_err->l); - } + mutex_lock(&first_err->l); + if (first_err->s[0] == '\0') + __strlcpy(first_err->s, msg, sizeof(first_err->s)); + mutex_unlock(&first_err->l); } } @@ -190,7 +185,7 @@ void flush_early_log_buffer(int fd) * with reading the log_level. */ struct early_log_hdr *hdr = (void *)early_log_buffer + pos; - pos += sizeof(*hdr); + pos += sizeof(hdr); if (hdr->level <= current_loglevel) { size_t size = 0; while (size < hdr->len) { @@ -202,7 +197,7 @@ void flush_early_log_buffer(int fd) } pos += hdr->len; } - if ((early_log_buf_off + sizeof(struct early_log_hdr)) >= EARLY_LOG_BUF_LEN) + if (early_log_buf_off == EARLY_LOG_BUF_LEN) pr_warn("The early log buffer is full, some messages may have been lost\n"); early_log_buf_off = 0; } @@ -320,10 +315,10 @@ unsigned int log_get_loglevel(void) static void early_vprint(const char *format, unsigned int loglevel, va_list params) { - int log_size = 0, log_space; + unsigned int log_size = 0; struct early_log_hdr *hdr; - if ((early_log_buf_off + sizeof(*hdr)) >= EARLY_LOG_BUF_LEN) + if ((early_log_buf_off + sizeof(hdr)) >= EARLY_LOG_BUF_LEN) return; /* Save loglevel */ @@ -331,8 +326,7 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para hdr = (void *)early_log_buffer + early_log_buf_off; hdr->level = loglevel; /* Skip the log entry size */ - early_log_buf_off += sizeof(*hdr); - log_space = EARLY_LOG_BUF_LEN - early_log_buf_off; + early_log_buf_off += sizeof(hdr); if (loglevel >= LOG_TIMESTAMP) { /* * If logging is not yet setup we just write zeros @@ -340,17 +334,12 @@ static void early_vprint(const char *format, unsigned int loglevel, va_list para * keep the same format as the other messages on * log levels with timestamps (>=LOG_TIMESTAMP). */ - log_size = snprintf(early_log_buffer + early_log_buf_off, log_space, + log_size = snprintf(early_log_buffer + early_log_buf_off, sizeof(early_log_buffer) - early_log_buf_off, "(00.000000) "); } - if (log_size < log_space) - log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size, - log_space - log_size, format, params); - if (log_size > log_space) { - /* vsnprintf always add the terminating null byte. */ - log_size = log_space - 1; - } + log_size += vsnprintf(early_log_buffer + early_log_buf_off + log_size, + sizeof(early_log_buffer) - early_log_buf_off - log_size, format, params); /* Save log entry size */ hdr->len = log_size; diff --git a/criu/lsm.c b/criu/lsm.c index 5faf3e5b2..d1b73cc79 100644 --- a/criu/lsm.c +++ b/criu/lsm.c @@ -29,9 +29,7 @@ static int apparmor_get_label(pid_t pid, char **profile_name) FILE *f; char *space; - f = fopen_proc(pid, "attr/apparmor/current"); - if (!f) - f = fopen_proc(pid, "attr/current"); + f = fopen_proc(pid, "attr/current"); if (!f) return -1; @@ -372,7 +370,7 @@ int render_lsm_profile(char *profile, char **val) case LSMTYPE__APPARMOR: return render_aa_profile(val, profile); case LSMTYPE__SELINUX: - if (asprintf(val, "%s", opts.lsm_supplied ? opts.lsm_profile : profile) < 0) { + if (asprintf(val, "%s", profile) < 0) { *val = NULL; return -1; } diff --git a/criu/mem.c b/criu/mem.c index 9e8740c07..c9578ef44 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -10,7 +10,6 @@ #include "cr_options.h" #include "servicefd.h" #include "mem.h" -#include "mman.h" #include "parasite-syscall.h" #include "parasite.h" #include "page-pipe.h" @@ -115,74 +114,44 @@ static bool should_dump_entire_vma(VmaEntry *vmae) } /* - * should_dump_page writes vaddr in page_info->next if an addressed page has to be dumped. - * Otherwise, it writes an address that has to be inspected next. + * should_dump_page returns vaddr if an addressed page has to be dumped. + * Otherwise, it returns an address that has to be inspected next. */ -int should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, struct page_info *page_info) +u64 should_dump_page(pmc_t *pmc, VmaEntry *vmae, u64 vaddr, bool *softdirty) { - if (!page_info) - goto err; - if (vaddr >= pmc->end && pmc_fill(pmc, vaddr, vmae->end)) - goto err; + return -1; if (pmc->regs) { while (1) { - if (pmc->regs_idx == pmc->regs_len) { - page_info->next = pmc->end; - return 0; - } - + if (pmc->regs_idx == pmc->regs_len) + return pmc->end; if (vaddr < pmc->regs[pmc->regs_idx].end) break; pmc->regs_idx++; } - - if (vaddr < pmc->regs[pmc->regs_idx].start) { - page_info->next = pmc->regs[pmc->regs_idx].start; - return 0; - } - - if (pmc->regs[pmc->regs_idx].categories & PAGE_IS_GUARD) - goto skip_guard_page; - - page_info->softdirty = pmc->regs[pmc->regs_idx].categories & PAGE_IS_SOFT_DIRTY; - page_info->next = vaddr; - return 0; + if (vaddr < pmc->regs[pmc->regs_idx].start) + return pmc->regs[pmc->regs_idx].start; + if (softdirty) + *softdirty = pmc->regs[pmc->regs_idx].categories & PAGE_IS_SOFT_DIRTY; + return vaddr; } else { u64 pme = pmc->map[PAGE_PFN(vaddr - pmc->start)]; - if (pme & PME_GUARD_REGION) - goto skip_guard_page; - /* * Optimisation for private mapping pages, that haven't * yet being COW-ed */ - if (vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE)) { - page_info->next = vaddr + PAGE_SIZE; - return 0; - } - + if (vma_entry_is(vmae, VMA_FILE_PRIVATE) && (pme & PME_FILE)) + return vaddr + PAGE_SIZE; if ((pme & (PME_PRESENT | PME_SWAP)) && !__page_is_zero(pme)) { - page_info->softdirty = pme & PME_SOFT_DIRTY; - page_info->next = vaddr; - return 0; + if (softdirty) + *softdirty = pme & PME_SOFT_DIRTY; + return vaddr; } - page_info->next = vaddr + PAGE_SIZE; - return 0; + return vaddr + PAGE_SIZE; } - -err: - pr_err("should_dump_page failed on vma " - "%#016" PRIx64 "-%#016" PRIx64 " vaddr=%#016" PRIx64 "\n", - vmae->start, vmae->end, vaddr); - return -1; - -skip_guard_page: - page_info->next = vaddr + PAGE_SIZE; - return 0; } bool page_is_zero(u64 pme) @@ -232,15 +201,14 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct nr_scanned = 0; for (vaddr = *pvaddr; vaddr < vma->e->end; vaddr += PAGE_SIZE, nr_scanned++) { unsigned int ppb_flags = 0; - struct page_info page_info = {}; + bool softdirty = false; + u64 next; int st; /* If dump_all_pages is true, should_dump_page is called to get pme. */ - if (should_dump_page(pmc, vma->e, vaddr, &page_info)) - return -1; - - if (!dump_all_pages && page_info.next != vaddr) { - vaddr = page_info.next - PAGE_SIZE; + next = should_dump_page(pmc, vma->e, vaddr, &softdirty); + if (!dump_all_pages && next != vaddr) { + vaddr = next - PAGE_SIZE; continue; } @@ -254,7 +222,7 @@ static int generate_iovs(struct pstree_item *item, struct vma_area *vma, struct * page. The latter would be checked in page-xfer. */ - if (has_parent && page_in_parent(page_info.softdirty)) { + if (has_parent && page_in_parent(softdirty)) { ret = page_pipe_add_hole(pp, vaddr, PP_HOLE_PARENT); st = 0; } else { @@ -336,7 +304,7 @@ static int drain_pages(struct page_pipe *pp, struct parasite_ctl *ctl, struct pa list_for_each_entry(ppb, &pp->bufs, l) { args->nr_segs = ppb->nr_segs; args->nr_pages = ppb->pages_in; - pr_debug("PPB: %ld pages %d segs %u pipe %d off\n", args->nr_pages, args->nr_segs, ppb->pipe_size, + pr_debug("PPB: %d pages %d segs %u pipe %d off\n", args->nr_pages, args->nr_segs, ppb->pipe_size, args->off); ret = compel_rpc_call(PARASITE_CMD_DUMPPAGES, ctl); @@ -430,17 +398,6 @@ static int generate_vma_iovs(struct pstree_item *item, struct vma_area *vma, str if (vma_entry_is(vma->e, VMA_AREA_VVAR)) return 0; - /* - * 9651fcedf7b9 ("mm: add MAP_DROPPABLE for designating always lazily freeable mappings") - * tells us that: - * Under memory pressure, mm can just drop the pages (so that they're - * zero when read back again). - * - * Let's just skip MAP_DROPPABLE mappings pages dump logic. - */ - if (vma->e->flags & MAP_DROPPABLE) - return 0; - /* * To facilitate any combination of pre-dump modes to run after * one another, we need to take extra care as discussed below. @@ -599,9 +556,6 @@ static int __parasite_dump_pages_seized(struct pstree_item *item, struct parasit parent_predump_mode = mdc->parent_ie->pre_dump_mode; list_for_each_entry(vma_area, &vma_area_list->h, list) { - if (vma_area_is(vma_area, VMA_AREA_GUARD)) - continue; - ret = generate_vma_iovs(item, vma_area, pp, &xfer, args, ctl, &pmc, has_parent, mdc->pre_dump, parent_predump_mode); if (ret < 0) @@ -787,6 +741,8 @@ int prepare_mm_pid(struct pstree_item *i) ri->vmas.rst_priv_size += vma_area_len(vma); if (vma_has_guard_gap_hidden(vma)) ri->vmas.rst_priv_size += PAGE_SIZE; + if (vma_area_is(vma, VMA_AREA_SHSTK)) + ri->vmas.rst_priv_size += PAGE_SIZE; } pr_info("vma 0x%" PRIx64 " 0x%" PRIx64 "\n", vma->e->start, vma->e->end); @@ -862,14 +818,14 @@ static void prepare_cow_vmas_for(struct vm_area_list *vmas, struct vm_area_list /* <= here to shift from matching VMAs and ... */ while (vma->e->start <= pvma->e->start) { vma = vma_next(vma); - if ((&vma->list == &vmas->h) || vma_area_is(vma, VMA_AREA_GUARD)) + if (&vma->list == &vmas->h) return; } /* ... no == here since we must stop on matching pair */ while (pvma->e->start < vma->e->start) { pvma = vma_next(pvma); - if ((&pvma->list == &pvmas->h) || vma_area_is(pvma, VMA_AREA_GUARD)) + if (&pvma->list == &pvmas->h) return; } } @@ -929,6 +885,13 @@ static int premap_private_vma(struct pstree_item *t, struct vma_area *vma, void size = vma_entry_len(vma->e); + /* + * map an extra page for shadow stack VMAs, it will be used as a + * temporary shadow stack + */ + if (vma_area_is(vma, VMA_AREA_SHSTK)) + size += PAGE_SIZE; + if (!vma_inherited(vma)) { int flag = 0; /* @@ -1063,9 +1026,6 @@ static int premap_priv_vmas(struct pstree_item *t, struct vm_area_list *vmas, vo filemap_ctx_init(true); list_for_each_entry(vma, &vmas->h, list) { - if (vma_area_is(vma, VMA_AREA_GUARD)) - continue; - if (task_size_check(vpid(t), vma->e)) { ret = -1; break; @@ -1273,9 +1233,6 @@ err_read: unsigned long size, i = 0; void *addr = decode_pointer(vma->premmaped_addr); - if (vma_area_is(vma, VMA_AREA_GUARD)) - continue; - if (!vma_inherited(vma)) continue; @@ -1539,72 +1496,3 @@ int prepare_vmas(struct pstree_item *t, struct task_restore_args *ta) return prepare_vma_ios(t, ta); } - -int collect_madv_guards(pid_t pid, struct vm_area_list *vma_area_list) -{ - int pagemap_fd = -1; - struct page_region *regs = NULL; - long regs_len = 0; - int i, ret = -1; - - struct pm_scan_arg args = { - .size = sizeof(struct pm_scan_arg), - .flags = 0, - .start = 0, - .end = kdat.task_size, - .walk_end = 0, - .vec_len = 1000, /* this should be enough for most cases */ - .max_pages = 0, - .category_mask = PAGE_IS_GUARD, - .return_mask = PAGE_IS_GUARD, - }; - - if (!kdat.has_pagemap_scan_guard_pages) { - ret = 0; - goto out; - } - - pagemap_fd = open_proc(pid, "pagemap"); - if (pagemap_fd < 0) - goto out; - - regs = xmalloc(args.vec_len * sizeof(struct page_region)); - if (!regs) - goto out; - args.vec = (long)regs; - - do { - /* start from where we finished the last time */ - args.start = args.walk_end; - regs_len = ioctl(pagemap_fd, PAGEMAP_SCAN, &args); - if (regs_len == -1) { - pr_perror("PAGEMAP_SCAN"); - goto out; - } - - for (i = 0; i < regs_len; i++) { - struct vma_area *vma; - - BUG_ON(!(regs[i].categories & PAGE_IS_GUARD)); - - vma = alloc_vma_area(); - if (!vma) - goto out; - - vma->e->start = regs[i].start; - vma->e->end = regs[i].end; - vma->e->status = VMA_AREA_GUARD; - - list_add_tail(&vma->list, &vma_area_list->h); - vma_area_list->nr++; - } - } while (args.walk_end != kdat.task_size); - - ret = 0; - -out: - xfree(regs); - if (pagemap_fd >= 0) - close(pagemap_fd); - return ret; -} diff --git a/criu/mount-v2.c b/criu/mount-v2.c index 1e33ac12a..5d53e9a22 100644 --- a/criu/mount-v2.c +++ b/criu/mount-v2.c @@ -443,7 +443,6 @@ err: /* Mounts root container mount. */ static int do_mount_root_v2(struct mount_info *mi) { - unsigned long mflags = mi->flags & (~MS_PROPAGATE); unsigned long flags = MS_BIND; int fd; @@ -478,11 +477,6 @@ static int do_mount_root_v2(struct mount_info *mi) return -1; } - if (mflags && mount(NULL, mi->plain_mountpoint, NULL, MS_REMOUNT | MS_BIND | mflags, NULL)) { - pr_perror("Unable to apply root mount options"); - return -1; - } - mi->mounted = true; return 0; @@ -933,12 +927,8 @@ static int move_mount_set_group(int src_id, char *source, int dst_id) static int restore_one_sharing(struct sharing_group *sg, struct mount_info *target) { - int nsfd = -1, orig_nsfd = -1, exit_code = -1; char target_path[PATH_MAX]; - int target_fd = -1; - - if (!sg->master_id && !sg->shared_id) - return 0; + int target_fd; target_fd = fdstore_get(target->mnt_fd_id); BUG_ON(target_fd < 0); @@ -953,7 +943,8 @@ static int restore_one_sharing(struct sharing_group *sg, struct mount_info *targ first = get_first_mount(sg->parent); if (move_mount_set_group(first->mnt_fd_id, NULL, target->mnt_fd_id)) { pr_err("Failed to copy sharing from %d to %d\n", first->mnt_id, target->mnt_id); - goto err; + close(target_fd); + return -1; } } else { /* @@ -965,23 +956,16 @@ static int restore_one_sharing(struct sharing_group *sg, struct mount_info *targ */ if (move_mount_set_group(-1, sg->source, target->mnt_fd_id)) { pr_err("Failed to copy sharing from source %s to %d\n", sg->source, target->mnt_id); - goto err; + close(target_fd); + return -1; } } - } - nsfd = fdstore_get(target->nsid->mnt.nsfd_id); - if (nsfd < 0) - goto err; - - if (switch_ns_by_fd(nsfd, &mnt_ns_desc, &orig_nsfd)) - goto err; - - if (sg->master_id) { /* Convert shared_id to master_id */ if (mount(NULL, target_path, NULL, MS_SLAVE, NULL)) { pr_perror("Failed to make mount %d slave", target->mnt_id); - goto err; + close(target_fd); + return -1; } } @@ -989,16 +973,13 @@ static int restore_one_sharing(struct sharing_group *sg, struct mount_info *targ if (sg->shared_id) { if (mount(NULL, target_path, NULL, MS_SHARED, NULL)) { pr_perror("Failed to make mount %d shared", target->mnt_id); - goto err; + close(target_fd); + return -1; } } - exit_code = 0; -err: - close_safe(&target_fd); - close_safe(&nsfd); - if (orig_nsfd >= 0 && restore_ns(orig_nsfd, &mnt_ns_desc)) - exit_code = -1; - return exit_code; + close(target_fd); + + return 0; } static int restore_one_sharing_group(struct sharing_group *sg) diff --git a/criu/mount.c b/criu/mount.c index b643a7f26..82bbd52d6 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -888,11 +888,7 @@ static int resolve_external_mounts(struct mount_info *info) cut_root = cut_root_for_bind(m->root, match->root); - if (cut_root[0] == '\0') { - p = xstrdup(match->ns_mountpoint + 1); - } else { - p = xsprintf("%s/%s", match->ns_mountpoint + 1, cut_root); - } + p = xsprintf("%s/%s", match->ns_mountpoint + 1, cut_root); if (!p) return -1; @@ -2694,16 +2690,9 @@ shared: static int do_mount_root(struct mount_info *mi) { - unsigned long mflags = mi->flags & (~MS_PROPAGATE); - if (restore_shared_options(mi, !mi->shared_id && !mi->master_id, mi->shared_id, mi->master_id)) return -1; - if (mflags && mount(NULL, service_mountpoint(mi), NULL, MS_REMOUNT | MS_BIND | mflags, NULL)) { - pr_perror("Unable to apply root mount options"); - return -1; - } - return fetch_rt_stat(mi, service_mountpoint(mi)); } diff --git a/criu/namespaces.c b/criu/namespaces.c index 0c9b16a87..b7c0ab400 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -1009,31 +1009,36 @@ int dump_user_ns(pid_t pid, int ns_id) ret = parse_id_map(pid, "uid_map", &e->uid_map); if (ret < 0) - /* - * The uid_map and gid_map is clean up in free_userns_maps - * later, so we don't need to clean these up in error cases. - */ - return -1; - + goto err; e->n_uid_map = ret; ret = parse_id_map(pid, "gid_map", &e->gid_map); if (ret < 0) - return -1; + goto err; e->n_gid_map = ret; if (check_user_ns(pid)) - return -1; + goto err; img = open_image(CR_FD_USERNS, O_DUMP, ns_id); if (!img) - return -1; + goto err; ret = pb_write_one(img, e, PB_USERNS); close_image(img); if (ret < 0) - return -1; + goto err; return 0; +err: + if (e->uid_map) { + xfree(e->uid_map[0]); + xfree(e->uid_map); + } + if (e->gid_map) { + xfree(e->gid_map[0]); + xfree(e->gid_map); + } + return -1; } void free_userns_maps(void) diff --git a/criu/net.c b/criu/net.c index e5775a328..eee331108 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2128,117 +2128,6 @@ nft_ctx_free_out: } #endif -static const char *ipv4_sysctl_entries[] = { - "ping_group_range", -}; - -#define IPV4_SYSCTL_BASE "net/ipv4" -#define IPV4_SYSCTL_FMT IPV4_SYSCTL_BASE"/%s" -#define MAX_IPV4_SYSCTL_OPT 32 -#define MAX_IPV4_SYSCTL_PATH (sizeof(IPV4_SYSCTL_FMT) + MAX_IPV4_SYSCTL_OPT - 2) -#define MAX_STR_IPV4_SYSCTL_LEN 200 - -static int ipv4_sysctls_op(SysctlEntry ***rsysctl, size_t *pn, int op) -{ - int i, ret = -1, flags = 0; - char path[ARRAY_SIZE(ipv4_sysctl_entries)][MAX_IPV4_SYSCTL_PATH] = {}; - struct sysctl_req req[ARRAY_SIZE(ipv4_sysctl_entries)] = {}; - SysctlEntry **sysctl = *rsysctl; - size_t n = *pn, ri; - - if (n != ARRAY_SIZE(ipv4_sysctl_entries)) { - pr_err("ipv4: Unexpected entries in sysctl (%zu %zu)\n", n, ARRAY_SIZE(ipv4_sysctl_entries)); - return -EINVAL; - } - - if (opts.weak_sysctls || op == CTL_READ) - flags = CTL_FLAGS_OPTIONAL; - - for (i = 0, ri = 0; i < n; i++) { - snprintf(path[ri], MAX_IPV4_SYSCTL_PATH, IPV4_SYSCTL_FMT, ipv4_sysctl_entries[i]); - req[ri].name = path[ri]; - req[ri].flags = flags; - - switch (sysctl[i]->type) { - case SYSCTL_TYPE__CTL_STR: - req[ri].type = CTL_STR(MAX_STR_IPV4_SYSCTL_LEN); - - /* skip write if have no value */ - if (op == CTL_WRITE && !sysctl[i]->sarg) - continue; - - req[ri].arg = sysctl[i]->sarg; - break; - default: - pr_err("ipv4: Unknown sysctl type %d\n", sysctl[i]->type); - return -1; - } - ri++; - } - - ret = sysctl_op(req, ri, op, CLONE_NEWNET); - if (ret < 0) { - pr_err("ipv4: Failed to %s %s/\n", (op == CTL_READ) ? "read" : "write", IPV4_SYSCTL_BASE); - return -1; - } - - if (op == CTL_READ) { - bool has_entries = false; - - BUG_ON(ri != n); - for (i = 0; i < n; i++) { - if (req[i].flags & CTL_FLAGS_HAS) { - has_entries = true; - } else { - sysctl[i]->sarg = NULL; - } - } - - if (!has_entries) { - *pn = 0; - *rsysctl = NULL; - } - } - - return 0; -} - -static int ipv4_sysctls_ping_group_range_map_gid(SysctlEntry *ent, size_t size) -{ - int start, end, ustart, uend, ret; - - if (sscanf(ent->sarg, "%d %d", &start, &end) != 2) { - pr_err("Failed to parse ping_group_range: %s\n", ent->sarg); - return -1; - } - - /* - * The default is "1 0", which means no group - * is allowed to create ICMP Echo sockets. - */ - if (start == 1 && end == 0) { - pr_debug("The ping_group_range is set to default, skipping it.\n"); - ent->sarg = NULL; - return 0; - } - - if (!(root_ns_mask & CLONE_NEWUSER)) - return 0; - - ustart = userns_gid(start); - uend = userns_gid(end); - pr_debug("Mapping ping_group_range %d %d to userns -> %d %d\n", - start, end, ustart, uend); - - ret = snprintf(ent->sarg, size, "%d\t%d\n", ustart, uend); - if (ret < 0 || ret >= size) { - pr_err("Failed to map ping_group_range: %d\t%d\n", ustart, uend); - return -1; - } - - return 0; -} - static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) { void *buf, *o_buf; @@ -2253,10 +2142,6 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) int size6 = ARRAY_SIZE(devconfs6); char def_stable_secret[MAX_STR_CONF_LEN + 1] = {}; char all_stable_secret[MAX_STR_CONF_LEN + 1] = {}; - SysctlEntry *ipv4_sysctls = NULL; - size_t ipv4_sysctl_size = ARRAY_SIZE(ipv4_sysctl_entries); - char ping_group_range[MAX_STR_IPV4_SYSCTL_LEN + 1] = {}; - int ping_group_range_id = -1; NetnsId *ids; struct netns_id *p; @@ -2264,16 +2149,10 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) list_for_each_entry(p, &ns->net.ids, node) i++; - /* - * Here we allocate one single big buffer for storing multiple arrays - * of protobuf entries and pointers to entries in it and we later use - * xptr_pull_s to claim a part of this buffer of proper size for each - * particular array. Next we read data from sysctl files to those - * arrays and then finally save them into images. - */ o_buf = buf = xmalloc(i * (sizeof(NetnsId *) + sizeof(NetnsId)) + - (2 * size4 + 2 * size6 + sizex + ipv4_sysctl_size) * - (sizeof(SysctlEntry *) + sizeof(SysctlEntry))); + size4 * (sizeof(SysctlEntry *) + sizeof(SysctlEntry)) * 2 + + size6 * (sizeof(SysctlEntry *) + sizeof(SysctlEntry)) * 2 + + sizex * (sizeof(SysctlEntry *) + sizeof(SysctlEntry))); if (!buf) goto out; @@ -2338,22 +2217,6 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) netns.unix_conf[i]->type = SYSCTL_TYPE__CTL_32; } - netns.n_ipv4_sysctl = ipv4_sysctl_size; - netns.ipv4_sysctl = xptr_pull_s(&buf, ipv4_sysctl_size * sizeof(SysctlEntry *)); - ipv4_sysctls = xptr_pull_s(&buf, ipv4_sysctl_size * sizeof(SysctlEntry)); - for (i = 0; i < ipv4_sysctl_size; i++) { - sysctl_entry__init(&ipv4_sysctls[i]); - netns.ipv4_sysctl[i] = &ipv4_sysctls[i]; - if (!strcmp(ipv4_sysctl_entries[i], "ping_group_range")) { - netns.ipv4_sysctl[i]->type = SYSCTL_TYPE__CTL_STR; - netns.ipv4_sysctl[i]->sarg = ping_group_range; - ping_group_range_id = i; - } else { - /* Need to handle this case when we have more sysctls */ - BUG(); - } - } - ret = ipv4_conf_op("default", netns.def_conf4, size4, CTL_READ, NULL); if (ret < 0) goto err_free; @@ -2372,16 +2235,6 @@ static int dump_netns_conf(struct ns_id *ns, struct cr_imgset *fds) if (ret < 0) goto err_free; - ret = ipv4_sysctls_op(&netns.ipv4_sysctl, &netns.n_ipv4_sysctl, CTL_READ); - if (ret < 0) - goto err_free; - - BUG_ON(ping_group_range_id == -1); - ret = ipv4_sysctls_ping_group_range_map_gid(netns.ipv4_sysctl[ping_group_range_id], - MAX_STR_IPV4_SYSCTL_LEN + 1); - if (ret < 0) - goto err_free; - ret = pb_write_one(img_from_set(fds, CR_FD_NETNS), &netns, PB_NETNS); err_free: xfree(o_buf); @@ -2734,12 +2587,6 @@ static int restore_netns_conf(struct ns_id *ns) goto out; } - if ((netns)->ipv4_sysctl) { - ret = ipv4_sysctls_op(&(netns)->ipv4_sysctl, &(netns)->n_ipv4_sysctl, CTL_WRITE); - if (ret) - goto out; - } - ns->net.netns = netns; out: return ret; @@ -3219,45 +3066,11 @@ err: return ret; } -#if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) -static inline FILE *redirect_nftables_output(struct nft_ctx *nft) -{ - FILE *fp; - int fd; - - fd = dup(log_get_fd()); - if (fd < 0) { - pr_perror("dup() to redirect nftables output failed"); - return NULL; - } - - fp = fdopen(fd, "w"); - if (!fp) { - pr_perror("fdopen() to redirect nftables output failed"); - return NULL; - } - - /** - * Without setvbuf() the output from libnftables will be - * somewhere in the log file, probably at the end. - * With setvbuf() potential output will be at the correct - * position. - */ - setvbuf(fp, NULL, _IONBF, 0); - - nft_ctx_set_output(nft, fp); - nft_ctx_set_error(nft, fp); - - return fp; -} -#endif - -static inline int nftables_lock_network_internal(bool restore) +static inline int nftables_lock_network_internal(void) { #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) - cleanup_file FILE *fp = NULL; struct nft_ctx *nft; - int ret = 0, exit_code = -1; + int ret = 0; char table[32]; char buf[128]; @@ -3268,18 +3081,9 @@ static inline int nftables_lock_network_internal(bool restore) if (!nft) return -1; - fp = redirect_nftables_output(nft); - if (!fp) - goto err2; - snprintf(buf, sizeof(buf), "create table %s", table); - ret = NFT_RUN_CMD(nft, buf); - if (ret) { - /* The network has been locked on dump. */ - if (restore && errno == EEXIST) - return 0; + if (NFT_RUN_CMD(nft, buf)) goto err2; - } snprintf(buf, sizeof(buf), "add chain %s output { type filter hook output priority 0; policy drop; }", table); if (NFT_RUN_CMD(nft, buf)) @@ -3297,16 +3101,17 @@ static inline int nftables_lock_network_internal(bool restore) if (NFT_RUN_CMD(nft, buf)) goto err1; - exit_code = 0; -out: - nft_ctx_free(nft); - return exit_code; + goto out; + err1: snprintf(buf, sizeof(buf), "delete table %s", table); NFT_RUN_CMD(nft, buf); err2: + ret = -1; pr_err("Locking network failed using nftables\n"); - goto out; +out: + nft_ctx_free(nft); + return ret; #else pr_err("CRIU was built without libnftables support\n"); return -1; @@ -3338,7 +3143,7 @@ static int iptables_network_lock_internal(void) return ret; } -int network_lock_internal(bool restore) +int network_lock_internal(void) { int ret = 0, nsret; @@ -3351,7 +3156,7 @@ int network_lock_internal(bool restore) if (opts.network_lock_method == NETWORK_LOCK_IPTABLES) ret = iptables_network_lock_internal(); else if (opts.network_lock_method == NETWORK_LOCK_NFTABLES) - ret = nftables_lock_network_internal(restore); + ret = nftables_lock_network_internal(); if (restore_ns(nsret, &net_ns_desc)) ret = -1; @@ -3363,7 +3168,6 @@ static inline int nftables_network_unlock(void) { #if defined(CONFIG_HAS_NFTABLES_LIB_API_0) || defined(CONFIG_HAS_NFTABLES_LIB_API_1) int ret = 0; - cleanup_file FILE *fp = NULL; struct nft_ctx *nft; char table[32]; char buf[128]; @@ -3375,10 +3179,6 @@ static inline int nftables_network_unlock(void) if (!nft) return -1; - fp = redirect_nftables_output(nft); - if (!fp) - return -1; - snprintf(buf, sizeof(buf), "delete table %s", table); if (NFT_RUN_CMD(nft, buf)) ret = -1; @@ -3477,7 +3277,7 @@ int network_lock(void) if (run_scripts(ACT_NET_LOCK)) return -1; - return network_lock_internal(false); + return network_lock_internal(); } void network_unlock(void) diff --git a/criu/netfilter.c b/criu/netfilter.c index e2c82764f..9e78dc4b0 100644 --- a/criu/netfilter.c +++ b/criu/netfilter.c @@ -299,25 +299,7 @@ int nftables_lock_connection(struct inet_sk_desc *sk) int nftables_get_table(char *table, int n) { - int ret; - - switch(dump_criu_run_id[0]) { - case 0: - /* This is not a restore.*/ - ret = snprintf(table, n, "inet CRIU-%s", criu_run_id); - break; - case NO_DUMP_CRIU_RUN_ID: - /** - * This is a restore from an older image with no - * dump_criu_run_id available. Let's use the old ID. - */ - ret = snprintf(table, n, "inet CRIU-%d", root_item->pid->real); - break; - default: - ret = snprintf(table, n, "inet CRIU-%s", dump_criu_run_id); - } - - if (ret < 0) { + if (snprintf(table, n, "inet CRIU-%d", root_item->pid->real) < 0) { pr_err("Cannot generate CRIU's nftables table name\n"); return -1; } diff --git a/criu/page-pipe.c b/criu/page-pipe.c index 4601d8f9c..aab6742be 100644 --- a/criu/page-pipe.c +++ b/criu/page-pipe.c @@ -381,7 +381,7 @@ int pipe_read_dest_init(struct pipe_read_dest *prd) return 0; } -int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned long int *nr_pages, +int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned long addr, unsigned int *nr_pages, unsigned int ppb_flags) { struct page_pipe_buf *ppb; @@ -406,7 +406,7 @@ int page_pipe_read(struct page_pipe *pp, struct pipe_read_dest *prd, unsigned lo } /* clamp the request if it passes the end of iovec */ - len = min((unsigned long)iov->iov_base + iov->iov_len - addr, *nr_pages * PAGE_SIZE); + len = min((unsigned long)iov->iov_base + iov->iov_len - addr, (unsigned long)(*nr_pages) * PAGE_SIZE); *nr_pages = len / PAGE_SIZE; skip += ppb->pipe_off * PAGE_SIZE; @@ -446,17 +446,17 @@ void debug_show_page_pipe(struct page_pipe *pp) pr_debug("Page pipe:\n"); pr_debug("* %u pipes %u/%u iovs:\n", pp->nr_pipes, pp->free_iov, pp->nr_iovs); list_for_each_entry(ppb, &pp->bufs, l) { - pr_debug("\tbuf %lx pages, %u iovs, flags: %x pipe_off: %lx :\n", ppb->pages_in, ppb->nr_segs, ppb->flags, + pr_debug("\tbuf %u pages, %u iovs, flags: %x pipe_off: %x :\n", ppb->pages_in, ppb->nr_segs, ppb->flags, ppb->pipe_off); for (i = 0; i < ppb->nr_segs; i++) { iov = &ppb->iov[i]; - pr_debug("\t\t%p - %p\n", iov->iov_base, iov->iov_base + iov->iov_len); + pr_debug("\t\t%p %lu\n", iov->iov_base, iov->iov_len / PAGE_SIZE); } } pr_debug("* %u holes:\n", pp->free_hole); for (i = 0; i < pp->free_hole; i++) { iov = &pp->holes[i]; - pr_debug("\t%p - %p\n", iov->iov_base, iov->iov_base + iov->iov_len); + pr_debug("\t%p %lu\n", iov->iov_base, iov->iov_len / PAGE_SIZE); } } diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 463d4c506..94f477414 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -32,7 +32,7 @@ static int page_server_sk = -1; struct page_server_iov { u32 cmd; - u64 nr_pages; + u32 nr_pages; u64 vaddr; u64 dst_id; }; @@ -178,12 +178,12 @@ static int write_pages_to_server(struct page_xfer *xfer, int p, unsigned long le ssize_t ret, left = len; if (opts.tls) { - pr_debug("Sending %lx bytes\n", len); + pr_debug("Sending %lu bytes / %lu pages\n", len, len / PAGE_SIZE); if (tls_send_data_from_fd(p, len)) return -1; } else { - pr_debug("Splicing %lx bytes into socket\n", len); + pr_debug("Splicing %lu bytes / %lu pages into socket\n", len, len / PAGE_SIZE); while (left > 0) { ret = splice(p, NULL, xfer->sk, NULL, left, SPLICE_F_MOVE); @@ -192,7 +192,7 @@ static int write_pages_to_server(struct page_xfer *xfer, int p, unsigned long le return -1; } - pr_debug("\tSpliced: %lx bytes sent\n", (unsigned long)ret); + pr_debug("\tSpliced: %lu bytes sent\n", (unsigned long)ret); left -= ret; } } @@ -288,7 +288,7 @@ static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov) * read_pagemap_page routine. */ - pr_debug("Checking %p - %p hole\n", iov->iov_base, iov->iov_base + iov->iov_len); + pr_debug("Checking %p/%zu hole\n", iov->iov_base, iov->iov_len); off = (unsigned long)iov->iov_base; end = off + iov->iov_len; while (1) { @@ -300,8 +300,7 @@ static int check_pagehole_in_parent(struct page_read *p, struct iovec *iov) return -1; } - pr_debug("\tFound %" PRIx64 " - %" PRIx64 "\n", - p->pe->vaddr, p->pe->vaddr + pagemap_len(p->pe)); + pr_debug("\tFound %" PRIx64 "/%lu\n", p->pe->vaddr, pagemap_len(p->pe)); /* * The pagemap entry in parent may happen to be @@ -327,7 +326,6 @@ static int write_pagemap_loc(struct page_xfer *xfer, struct iovec *iov, u32 flag pe.nr_pages = iov->iov_len / PAGE_SIZE; pe.has_flags = true; pe.flags = flags; - pe.has_nr_pages = true; if (flags & PE_PRESENT) { if (opts.auto_dedup && xfer->parent != NULL) { @@ -341,8 +339,7 @@ static int write_pagemap_loc(struct page_xfer *xfer, struct iovec *iov, u32 flag if (xfer->parent != NULL) { ret = check_pagehole_in_parent(xfer->parent, iov); if (ret) { - pr_err("Hole %p - %p not found in parent\n", - iov->iov_base, iov->iov_base + iov->iov_len); + pr_err("Hole %p/%zu not found in parent\n", iov->iov_base, iov->iov_len); return -1; } } @@ -852,7 +849,7 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, struct page_pipe *p BUG_ON(iov.iov_base < (void *)xfer->offset); iov.iov_base -= xfer->offset; - pr_debug("\t p %p - %p\n", iov.iov_base, iov.iov_base + iov.iov_len); + pr_debug("\t p %p [%u]\n", iov.iov_base, (unsigned int)(iov.iov_len / PAGE_SIZE)); flags = ppb_xfer_flags(xfer, ppb); @@ -888,7 +885,7 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp) list_for_each_entry(ppb, &pp->bufs, l) { unsigned int i; - pr_debug("\tbuf %lx/%d\n", ppb->pages_in, ppb->nr_segs); + pr_debug("\tbuf %d/%d\n", ppb->pages_in, ppb->nr_segs); for (i = 0; i < ppb->nr_segs; i++) { struct iovec iov = ppb->iov[i]; @@ -900,7 +897,7 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp) BUG_ON(iov.iov_base < (void *)xfer->offset); iov.iov_base -= xfer->offset; - pr_debug("\tp %p - %p\n", iov.iov_base, iov.iov_base + iov.iov_len); + pr_debug("\tp %p [%u]\n", iov.iov_base, (unsigned int)(iov.iov_len / PAGE_SIZE)); flags = ppb_xfer_flags(xfer, ppb); @@ -1073,8 +1070,7 @@ static int page_server_add(int sk, struct page_server_iov *pi, u32 flags) struct page_xfer *lxfer = &cxfer.loc_xfer; struct iovec iov; - pr_debug("Adding %" PRIx64 " - %" PRIx64 "\n", - pi->vaddr, pi->vaddr + pi->nr_pages * PAGE_SIZE); + pr_debug("Adding %" PRIx64 "/%u\n", pi->vaddr, pi->nr_pages); if (prep_loc_xfer(pi)) return -1; @@ -1139,17 +1135,13 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi) { struct pstree_item *item; struct page_pipe *pp; - unsigned long len, nr_pages; + unsigned long len; int ret; item = pstree_item_by_virt(pi->dst_id); pp = dmpi(item)->mem_pp; - /* page_pipe_read() uses 'unsigned long *' but pi->nr_pages is u64. - * Use a temporary variable to fix the incompatible pointer type - * on 32-bit platforms (e.g. armv7). */ - nr_pages = pi->nr_pages; - ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr, &nr_pages, PPB_LAZY); + ret = page_pipe_read(pp, &pipe_read_dest, pi->vaddr, &pi->nr_pages, PPB_LAZY); if (ret) return ret; @@ -1158,7 +1150,6 @@ static int page_server_get_pages(int sk, struct page_server_iov *pi) * .dst_id all remain intact. */ - pi->nr_pages = nr_pages; if (pi->nr_pages == 0) { pr_debug("no iovs found, zero pages\n"); return -1; @@ -1356,7 +1347,7 @@ static int fill_page_pipe(struct page_read *pr, struct page_pipe *pp) static int page_pipe_from_pagemap(struct page_pipe **pp, int pid) { struct page_read pr; - unsigned long nr_pages = 0; + int nr_pages = 0; if (open_page_read(pid, &pr, PR_TASK) <= 0) { pr_err("Failed to open page read for %d\n", pid); @@ -1430,7 +1421,7 @@ int cr_page_server(bool daemon_mode, bool lazy_dump, int cfd) if (opts.ps_socket != -1) { ask = opts.ps_socket; - pr_info("Reusing ps socket %d\n", ask); + pr_info("Re-using ps socket %d\n", ask); goto no_server; } @@ -1476,7 +1467,7 @@ static int connect_to_page_server(void) if (opts.ps_socket != -1) { page_server_sk = opts.ps_socket; - pr_info("Reusing ps socket %d\n", page_server_sk); + pr_info("Re-using ps socket %d\n", page_server_sk); goto out; } @@ -1559,13 +1550,13 @@ struct ps_async_read { static LIST_HEAD(async_reads); -static inline void async_read_set_goal(struct ps_async_read *ar, unsigned long nr_pages) +static inline void async_read_set_goal(struct ps_async_read *ar, int nr_pages) { ar->goal = sizeof(ar->pi) + nr_pages * PAGE_SIZE; ar->nr_pages = nr_pages; } -static void init_ps_async_read(struct ps_async_read *ar, void *buf, unsigned long nr_pages, ps_async_read_complete complete, +static void init_ps_async_read(struct ps_async_read *ar, void *buf, int nr_pages, ps_async_read_complete complete, void *priv) { ar->pages = buf; @@ -1575,7 +1566,7 @@ static void init_ps_async_read(struct ps_async_read *ar, void *buf, unsigned lon async_read_set_goal(ar, nr_pages); } -static int page_server_start_async_read(void *buf, unsigned long nr_pages, ps_async_read_complete complete, void *priv) +static int page_server_start_async_read(void *buf, int nr_pages, ps_async_read_complete complete, void *priv) { struct ps_async_read *ar; @@ -1675,7 +1666,7 @@ int connect_to_page_server_to_recv(int epfd) return epoll_add_rfd(epfd, &ps_rfd); } -int request_remote_pages(unsigned long img_id, unsigned long addr, unsigned long nr_pages) +int request_remote_pages(unsigned long img_id, unsigned long addr, int nr_pages) { struct page_server_iov pi = { .cmd = PS_IOV_GET, @@ -1692,7 +1683,7 @@ int request_remote_pages(unsigned long img_id, unsigned long addr, unsigned long return 0; } -static int page_server_start_sync_read(void *buf, unsigned long nr, ps_async_read_complete complete, void *priv) +static int page_server_start_sync_read(void *buf, int nr, ps_async_read_complete complete, void *priv) { struct ps_async_read ar; int ret = 1; @@ -1703,7 +1694,7 @@ static int page_server_start_sync_read(void *buf, unsigned long nr, ps_async_rea return ret; } -int page_server_start_read(void *buf, unsigned long nr, ps_async_read_complete complete, void *priv, unsigned flags) +int page_server_start_read(void *buf, int nr, ps_async_read_complete complete, void *priv, unsigned flags) { if (flags & PR_ASYNC) return page_server_start_async_read(buf, nr, complete, priv); diff --git a/criu/pagemap-cache.c b/criu/pagemap-cache.c index 457c0d649..f04a517de 100644 --- a/criu/pagemap-cache.c +++ b/criu/pagemap-cache.c @@ -194,9 +194,6 @@ int pmc_fill(pmc_t *pmc, u64 start, u64 end) }; long ret; - if (kdat.has_pagemap_scan_guard_pages) - args.return_mask |= PAGE_IS_GUARD; - ret = ioctl(pmc->fd, PAGEMAP_SCAN, &args); if (ret == -1) { pr_perror("PAGEMAP_SCAN"); diff --git a/criu/pagemap.c b/criu/pagemap.c index 6c9c4f7fe..83f69bba3 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -168,15 +168,15 @@ static int seek_pagemap(struct page_read *pr, unsigned long vaddr) return 0; } -static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, unsigned long int nr) +static inline void pagemap_bound_check(PagemapEntry *pe, unsigned long vaddr, int nr) { if (vaddr < pe->vaddr || (vaddr - pe->vaddr) / PAGE_SIZE + nr > pe->nr_pages) { - pr_err("Page read err %" PRIx64 ":%" PRIx64 " vs %lx:%lx\n", pe->vaddr, pe->nr_pages, vaddr, nr); + pr_err("Page read err %" PRIx64 ":%u vs %lx:%u\n", pe->vaddr, pe->nr_pages, vaddr, nr); BUG(); } } -static int read_parent_page(struct page_read *pr, unsigned long vaddr, unsigned long int nr, void *buf, unsigned flags) +static int read_parent_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags) { struct page_read *ppr = pr->parent; int ret; @@ -195,7 +195,7 @@ static int read_parent_page(struct page_read *pr, unsigned long vaddr, unsigned */ do { - unsigned long int p_nr; + int p_nr; pr_debug("\tpr%lu-%u Read from parent\n", pr->img_id, pr->id); ret = ppr->seek_pagemap(ppr, vaddr); @@ -210,7 +210,7 @@ static int read_parent_page(struct page_read *pr, unsigned long vaddr, unsigned * read as much as we can. */ p_nr = ppr->pe->nr_pages - (vaddr - ppr->pe->vaddr) / PAGE_SIZE; - pr_info("\tparent has %lu pages in\n", p_nr); + pr_info("\tparent has %u pages in\n", p_nr); if (p_nr > nr) p_nr = nr; @@ -261,7 +261,7 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, unsigned l break; } - if (opts.auto_dedup && !pr->disable_dedup) { + if (opts.auto_dedup) { ret = punch_hole(pr, pr->pi_off, len, false); if (ret == -1) return -1; @@ -374,7 +374,7 @@ int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long len, st return 0; } -static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags) +static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags) { int ret; unsigned long len = nr * PAGE_SIZE; @@ -402,7 +402,7 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, unsi * We cannot use maybe_read_page_local() for streaming images as it uses * pread(), seeking in the file. Instead, we use this custom page reader. */ -static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags) +static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags) { unsigned long len = nr * PAGE_SIZE; int fd; @@ -445,7 +445,7 @@ static int maybe_read_page_img_streamer(struct page_read *pr, unsigned long vadd return ret; } -static int read_page_complete(unsigned long img_id, unsigned long vaddr, unsigned long int nr_pages, void *priv) +static int read_page_complete(unsigned long img_id, unsigned long vaddr, int nr_pages, void *priv) { int ret = 0; struct page_read *pr = priv; @@ -463,7 +463,7 @@ static int read_page_complete(unsigned long img_id, unsigned long vaddr, unsigne return ret; } -static int maybe_read_page_remote(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags) +static int maybe_read_page_remote(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags) { int ret; @@ -474,9 +474,9 @@ static int maybe_read_page_remote(struct page_read *pr, unsigned long vaddr, uns return ret; } -static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, unsigned long nr, void *buf, unsigned flags) +static int read_pagemap_page(struct page_read *pr, unsigned long vaddr, int nr, void *buf, unsigned flags) { - pr_info("pr%lu-%u Read %lx %lu pages\n", pr->img_id, pr->id, vaddr, nr); + pr_info("pr%lu-%u Read %lx %u pages\n", pr->img_id, pr->id, vaddr, nr); pagemap_bound_check(pr->pe, vaddr, nr); if (pagemap_in_parent(pr->pe)) { @@ -682,9 +682,6 @@ static void init_compat_pagemap_entry(PagemapEntry *pe) pe->flags |= PE_PARENT; else if (!pe->has_flags) pe->flags = PE_PRESENT; - - if (!pe->has_nr_pages) - pe->nr_pages = pe->compat_nr_pages; } /* @@ -795,7 +792,6 @@ int open_page_read_at(int dfd, unsigned long img_id, struct page_read *pr, int p pr->bunch.iov_base = NULL; pr->pmes = NULL; pr->pieok = false; - pr->disable_dedup = false; pr->pmi = open_image_at(dfd, i_typ, O_RSTR, img_id); if (!pr->pmi) @@ -856,14 +852,6 @@ int open_page_read(unsigned long img_id, struct page_read *pr, int pr_flags) #define DUP_IDS_BASE 1000 -void page_read_disable_dedup(struct page_read *pr) -{ - pr_debug("disable dedup, id: %d\n", pr->id); - pr->disable_dedup = true; - if (pr->parent) - page_read_disable_dedup(pr->parent); -} - void dup_page_read(struct page_read *src, struct page_read *dst) { static int dup_ids = 1; diff --git a/criu/parasite-syscall.c b/criu/parasite-syscall.c index e19847b37..a88f8a66f 100644 --- a/criu/parasite-syscall.c +++ b/criu/parasite-syscall.c @@ -103,19 +103,16 @@ static int alloc_groups_copy_creds(CredsEntry *ce, struct parasite_dump_creds *c BUILD_BUG_ON(sizeof(ce->cap_prm[0]) != sizeof(c->cap_prm[0])); BUILD_BUG_ON(sizeof(ce->cap_eff[0]) != sizeof(c->cap_eff[0])); BUILD_BUG_ON(sizeof(ce->cap_bnd[0]) != sizeof(c->cap_bnd[0])); - BUILD_BUG_ON(sizeof(ce->cap_amb[0]) != sizeof(c->cap_amb[0])); BUG_ON(ce->n_cap_inh != CR_CAP_SIZE); BUG_ON(ce->n_cap_prm != CR_CAP_SIZE); BUG_ON(ce->n_cap_eff != CR_CAP_SIZE); BUG_ON(ce->n_cap_bnd != CR_CAP_SIZE); - BUG_ON(ce->n_cap_amb != CR_CAP_SIZE); memcpy(ce->cap_inh, c->cap_inh, sizeof(c->cap_inh[0]) * CR_CAP_SIZE); memcpy(ce->cap_prm, c->cap_prm, sizeof(c->cap_prm[0]) * CR_CAP_SIZE); memcpy(ce->cap_eff, c->cap_eff, sizeof(c->cap_eff[0]) * CR_CAP_SIZE); memcpy(ce->cap_bnd, c->cap_bnd, sizeof(c->cap_bnd[0]) * CR_CAP_SIZE); - memcpy(ce->cap_amb, c->cap_amb, sizeof(c->cap_amb[0]) * CR_CAP_SIZE); if (c->no_new_privs > 0) { ce->no_new_privs = c->no_new_privs; @@ -421,7 +418,7 @@ struct parasite_ctl *parasite_infect_seized(pid_t pid, struct pstree_item *item, ictx->flags |= INFECT_NO_MEMFD; if (fault_injected(FI_PARASITE_CONNECT)) ictx->flags |= INFECT_FAIL_CONNECT; - if (fault_injected(FI_NO_BREAKPOINTS) || !kdat.has_breakpoints) + if (fault_injected(FI_NO_BREAKPOINTS)) ictx->flags |= INFECT_NO_BREAKPOINTS; if (kdat.compat_cr) ictx->flags |= INFECT_COMPATIBLE; diff --git a/criu/pidfd-store.c b/criu/pidfd-store.c index 110f7802a..9fdc74cb7 100644 --- a/criu/pidfd-store.c +++ b/criu/pidfd-store.c @@ -99,7 +99,7 @@ int init_pidfd_store_sk(pid_t pid, int sk) goto err; } - addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%s", pid, sk, + addrlen = snprintf(addr.sun_path, sizeof(addr.sun_path), "X/criu-pidfd-store-%d-%d-%" PRIx64, pid, sk, criu_run_id); addrlen += sizeof(addr.sun_family); diff --git a/criu/pidfd.c b/criu/pidfd.c deleted file mode 100644 index ae32025b0..000000000 --- a/criu/pidfd.c +++ /dev/null @@ -1,305 +0,0 @@ -#include "common/lock.h" -#include "imgset.h" -#include "pidfd.h" -#include "fdinfo.h" -#include "pidfd.pb-c.h" -#include "protobuf.h" -#include "pstree.h" -#include -#include -#include -#include "common/bug.h" -#include "rst-malloc.h" - -#include "compel/plugins/std/syscall-codes.h" - -#undef LOG_PREFIX -#define LOG_PREFIX "pidfd: " - -#ifndef PIDFD_THREAD -#define PIDFD_THREAD O_EXCL -#endif - -struct pidfd_info { - PidfdEntry *pidfe; - struct file_desc d; - - struct dead_pidfd *dead; - struct pidfd_info *next; -}; - -struct dead_pidfd { - unsigned int ino; - int creator_id; - - struct hlist_node hash; - struct pidfd_info *list; -}; - -#define DEAD_PIDFD_HASH_SIZE 32 -static struct hlist_head dead_pidfd_hash[DEAD_PIDFD_HASH_SIZE]; - -void init_dead_pidfd_hash(void) -{ - for (int i = 0; i < DEAD_PIDFD_HASH_SIZE; i++) - INIT_HLIST_HEAD(&dead_pidfd_hash[i]); -} - -static struct dead_pidfd *lookup_dead_pidfd(unsigned int ino) -{ - struct dead_pidfd *dead; - struct hlist_head *chain; - - chain = &dead_pidfd_hash[ino % DEAD_PIDFD_HASH_SIZE]; - hlist_for_each_entry(dead, chain, hash) { - if (dead->ino == ino) { - return dead; - } - } - - return NULL; -} - -int is_pidfd_link(char *link) -{ - /* - * pidfs was introduced in Linux 6.9 - * before which anonymous-inodes were used - */ - return is_anon_link_type(link, "[pidfd]"); -} - -static void pr_info_pidfd(char *action, PidfdEntry *pidfe) -{ - pr_info("%s: id %#08x flags %u NSpid %d ino %u\n", - action, pidfe->id, pidfe->flags, pidfe->nspid, pidfe->ino - ); -} - -static int dump_one_pidfd(int pidfd, u32 id, const struct fd_parms *p) -{ - struct pidfd_dump_info pidfd_info = {.pidfe = PIDFD_ENTRY__INIT}; - FileEntry fe = FILE_ENTRY__INIT; - - if (parse_fdinfo(pidfd, FD_TYPES__PIDFD, &pidfd_info)) - return -1; - - if (p->flags & PIDFD_THREAD) { - pr_err("PIDFD_THREAD flag is currently not supported\n"); - return -1; - } - - /* - * Check if the pid pidfd refers to is part of process tree - * This ensures the process will exist on restore. - */ - if (pidfd_info.pid != -1 && !pstree_item_by_real(pidfd_info.pid)) { - pr_err("pidfd pid %d is not a part of process tree..\n", - pidfd_info.pid); - return -1; - } - - pidfd_info.pidfe.id = id; - pidfd_info.pidfe.flags = (p->flags & ~O_RDWR); - pidfd_info.pidfe.fown = (FownEntry *)&p->fown; - - fe.type = FD_TYPES__PIDFD; - fe.id = pidfd_info.pidfe.id; - fe.pidfd = &pidfd_info.pidfe; - - pr_info_pidfd("Dumping", &pidfd_info.pidfe); - return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); -} - -const struct fdtype_ops pidfd_dump_ops = { - .type = FD_TYPES__PIDFD, - .dump = dump_one_pidfd, -}; - -static int pidfd_open(pid_t pid, int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int create_tmp_process(void) -{ - int tmp_process; - tmp_process = fork(); - if (tmp_process < 0) { - pr_perror("Could not fork"); - return -1; - } else if (tmp_process == 0) { - while(1) - sleep(1); - } - return tmp_process; -} - -static int kill_helper(pid_t pid) -{ - int status; - sigset_t blockmask, oldmask; - - /* - * Block SIGCHLD to prevent interfering from sigchld_handler() - * and to properly handle the tmp process termination without - * a race condition. A similar approach is used in cr_system(). - */ - sigemptyset(&oldmask); - sigemptyset(&blockmask); - sigaddset(&blockmask, SIGCHLD); - if (sigprocmask(SIG_BLOCK, &blockmask, &oldmask) == -1) { - pr_perror("Cannot set mask of blocked signals"); - goto err; - } - - if (kill(pid, SIGKILL) < 0) { - pr_perror("Could not kill temporary process with pid: %d", pid); - goto err; - } - - if (waitpid(pid, &status, 0) != pid) { - pr_perror("Could not wait on temporary process with pid: %d", pid); - goto err; - } - - /* Restore the original signal mask after tmp process has terminated */ - if (sigprocmask(SIG_SETMASK, &oldmask, NULL) == -1) { - pr_perror("Cannot clear blocked signals"); - goto err; - } - - if (!WIFSIGNALED(status)) { - pr_err("Expected temporary process to be terminated by a signal\n"); - goto err; - } - - if (WTERMSIG(status) != SIGKILL) { - pr_err("Expected temporary process to be terminated by SIGKILL\n"); - goto err; - } - - return 0; -err: - return -1; -} - -static int open_one_pidfd(struct file_desc *d, int *new_fd) -{ - struct pidfd_info *info, *child; - struct dead_pidfd *dead = NULL; - pid_t pid; - int pidfd; - - info = container_of(d, struct pidfd_info, d); - if (info->pidfe->nspid != -1) { - pidfd = pidfd_open(info->pidfe->nspid, info->pidfe->flags); - if (pidfd < 0) { - pr_perror("Could not open pidfd for %d", info->pidfe->nspid); - goto err_close; - } - goto out; - } - - dead = lookup_dead_pidfd(info->pidfe->ino); - BUG_ON(!dead); - - if (info->dead && info->dead->creator_id != info->pidfe->id) { - int ret = recv_desc_from_peer(&info->d, &pidfd); - if (ret != 0) { - if (ret != 1) - pr_err("Can't get fd\n"); - return ret; - } - goto out; - } - - pid = create_tmp_process(); - if (pid < 0) - goto err_close; - - for (child = dead->list; child; child = child->next) { - if (child == info) - continue; - pidfd = pidfd_open(pid, child->pidfe->flags); - if (pidfd < 0) { - pr_perror("Could not open pidfd for %d", child->pidfe->nspid); - goto err_close; - } - - if (send_desc_to_peer(pidfd, &child->d)) { - pr_perror("Can't send file descriptor"); - close(pidfd); - return -1; - } - close(pidfd); - } - - pidfd = pidfd_open(pid, info->pidfe->flags); - if (pidfd < 0) { - pr_perror("Could not open pidfd for %d", info->pidfe->nspid); - goto err_close; - } - if (kill_helper(pid)) - goto err_close; -out: - if (rst_file_params(pidfd, info->pidfe->fown, info->pidfe->flags)) { - goto err_close; - } - - *new_fd = pidfd; - return 0; -err_close: - pr_err("Can't create pidfd %#08x NSpid: %d flags: %u\n", - info->pidfe->id, info->pidfe->nspid, info->pidfe->flags); - return -1; -} - -static struct file_desc_ops pidfd_desc_ops = { - .type = FD_TYPES__PIDFD, - .open = open_one_pidfd -}; - -static int collect_one_pidfd(void *obj, ProtobufCMessage *msg, struct cr_img *i) -{ - struct dead_pidfd *dead; - struct pidfd_info *info = obj; - - info->pidfe = pb_msg(msg, PidfdEntry); - pr_info_pidfd("Collected ", info->pidfe); - - info->dead = NULL; - if (info->pidfe->nspid != -1) - goto out; - - dead = lookup_dead_pidfd(info->pidfe->ino); - if (!dead) { - dead = xmalloc(sizeof(*dead)); - if (!dead) { - pr_err("Could not allocate memory..\n"); - return -1; - } - - INIT_HLIST_NODE(&dead->hash); - dead->list = NULL; - dead->ino = info->pidfe->ino; - dead->creator_id = info->pidfe->id; - hlist_add_head(&dead->hash, &dead_pidfd_hash[dead->ino % DEAD_PIDFD_HASH_SIZE]); - } - - info->dead = dead; - info->next = dead->list; - dead->list = info; - if (dead->creator_id > info->pidfe->id) - dead->creator_id = info->pidfe->id; - -out: - return file_desc_add(&info->d, info->pidfe->id, &pidfd_desc_ops); -} - -struct collect_image_info pidfd_cinfo = { - .fd_type = CR_FD_PIDFD, - .pb_type = PB_PIDFD, - .priv_size = sizeof(struct pidfd_info), - .collect = collect_one_pidfd, -}; diff --git a/criu/pie/Makefile b/criu/pie/Makefile index 60c7f1e94..912fab24b 100644 --- a/criu/pie/Makefile +++ b/criu/pie/Makefile @@ -23,10 +23,6 @@ ifeq ($(ARCH),x86) ccflags-y += -mshstk endif -ifeq ($(ARCH),riscv64) - ccflags-y += -fno-stack-protector -endif - LDS := compel/arch/$(ARCH)/scripts/compel-pack.lds.S restorer-obj-y += parasite-vdso.o ./$(ARCH_DIR)/vdso-pie.o @@ -47,10 +43,6 @@ ifeq ($(ARCH),ppc64) restorer-obj-y += ./$(ARCH_DIR)/vdso-trampoline.o endif -ifeq ($(ARCH),riscv64) - restorer-obj-y += ./$(ARCH_DIR)/vdso-lookup.o -endif - define gen-pie-rules $(1)-obj-y += $(1).o $(1)-obj-e += pie.lib.a diff --git a/criu/pie/Makefile.library b/criu/pie/Makefile.library index d96a7ac32..da2a2fab3 100644 --- a/criu/pie/Makefile.library +++ b/criu/pie/Makefile.library @@ -27,7 +27,3 @@ CFLAGS += $(CFLAGS_PIE) ifeq ($(ARCH),mips) CFLAGS += -fno-stack-protector -DCR_NOGLIBC -mno-abicalls -fno-pic endif - -ifeq ($(ARCH),riscv64) - ccflags-y += -fno-stack-protector -endif \ No newline at end of file diff --git a/criu/pie/parasite-vdso.c b/criu/pie/parasite-vdso.c index f3ad3107f..355007fa9 100644 --- a/criu/pie/parasite-vdso.c +++ b/criu/pie/parasite-vdso.c @@ -45,7 +45,6 @@ static int remap_one(char *who, unsigned long *from, unsigned long to, size_t si static int park_at(struct vdso_maps *rt, unsigned long vdso, unsigned long vvar) { unsigned long vvar_size = rt->sym.vvar_size; - unsigned long vvar_vclock_size = rt->sym.vvar_vclock_size; unsigned long vdso_size = rt->sym.vdso_size; int ret; @@ -55,24 +54,8 @@ static int park_at(struct vdso_maps *rt, unsigned long vdso, unsigned long vvar) std_log_set_gettimeofday(NULL); /* stop using vdso for timings */ - if (vvar) { - /* - * v6.13-rc1~172^2~9 splits the vvar vma in two parts vvar and - * vvar_clock. The last one is mapped right after the first - * one. - */ - if (vvar_vclock_size) { - unsigned long from; - - vvar_size -= vvar_vclock_size; - from = rt->vvar_start + vvar_size; - - ret = remap_one("rt-vvar", &from, vvar + vvar_size, vvar_vclock_size); - if (ret) - return ret; - } + if (vvar) ret = remap_one("rt-vvar", &rt->vvar_start, vvar, vvar_size); - } if (!ret) vdso_update_gtod_addr(rt); diff --git a/criu/pie/parasite.c b/criu/pie/parasite.c index c966e9e62..e151ed656 100644 --- a/criu/pie/parasite.c +++ b/criu/pie/parasite.c @@ -101,7 +101,7 @@ static int dump_pages(struct parasite_dump_pages_args *args) } if (spliced_bytes != args->nr_pages * PAGE_SIZE) { sys_close(p); - pr_err("Can't splice all pages to pipe (%ld/%ld)\n", spliced_bytes, args->nr_pages); + pr_err("Can't splice all pages to pipe (%ld/%d)\n", spliced_bytes, args->nr_pages); return -1; } @@ -324,7 +324,6 @@ static int dump_creds(struct parasite_dump_creds *args) args->cap_prm[i] = data[i].prm; args->cap_inh[i] = data[i].inh; args->cap_bnd[i] = 0; - args->cap_amb[i] = 0; for (j = 0; j < 32; j++) { if (j + i * 32 > args->cap_last_cap) @@ -337,18 +336,6 @@ static int dump_creds(struct parasite_dump_creds *args) if (ret) args->cap_bnd[i] |= (1 << j); } - - for (j = 0; j < 32; j++) { - if (j + i * 32 > args->cap_last_cap) - break; - ret = sys_prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, j + i * 32, 0, 0); - if (ret < 0) { - pr_err("Unable to read ambient capability %d: %d\n", j + i * 32, ret); - return -1; - } - if (ret) - args->cap_amb[i] |= (1 << j); - } } args->no_new_privs = sys_prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 0a8aba41b..51ed6ed4c 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -28,7 +28,6 @@ #include #include #include -#include "mman.h" #include "signal.h" #include "prctl.h" #include "criu-log.h" @@ -348,22 +347,6 @@ skip_xids: return -1; } - for (b = 0; b < CR_CAP_SIZE; b++) { - for (i = 0; i < 32; i++) { - if (b * 32 + i > args->cap_last_cap) - break; - if ((args->cap_amb[b] & (1 << i)) == 0) - /* don't set */ - continue; - ret = sys_prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i + b * 32, 0, 0); - if (!ret) - continue; - pr_err("Unable to raise ambient capability %d: %d\n", i + b * 32, ret); - return -1; - } - } - - if (lsm_type != LSMTYPE__SELINUX) { /* * SELinux does not support setting the process context for @@ -705,8 +688,9 @@ static int send_cg_set(int sk, int cg_set) } /* - * As the cgroupd socket is shared among threads and processes, this - * should be called with task_entries->cgroupd_sync_lock held. + * As this socket is shared among threads, recvmsg(MSG_PEEK) + * from the socket until getting its own thread id as an + * acknowledge of successful threaded cgroup fixup */ static int recv_cg_set_restore_ack(int sk) { @@ -719,22 +703,33 @@ static int recv_cg_set_restore_ack(int sk) h.msg_control = cmsg; h.msg_controllen = sizeof(cmsg); - ret = sys_recvmsg(sk, &h, 0); - if (ret < 0) { - pr_err("Unable to receive from cgroupd %d\n", ret); - return -1; - } + while (1) { + ret = sys_recvmsg(sk, &h, MSG_PEEK); + if (ret < 0) { + pr_err("Unable to peek from cgroupd %d\n", ret); + return -1; + } - if (h.msg_controllen != sizeof(cmsg)) { - pr_err("The message from cgroupd is truncated\n"); - return -1; - } + if (h.msg_controllen != sizeof(cmsg)) { + pr_err("The message from cgroupd is truncated\n"); + return -1; + } - ch = CMSG_FIRSTHDR(&h); - cred = (struct ucred *)CMSG_DATA(ch); - if (cred->pid != sys_gettid()) { - pr_err("cred pid %d != gettid\n", cred->pid); - return -1; + ch = CMSG_FIRSTHDR(&h); + cred = (struct ucred *)CMSG_DATA(ch); + if (cred->pid != sys_gettid()) + continue; + + /* + * Actual remove message from recv queue of socket + */ + ret = sys_recvmsg(sk, &h, 0); + if (ret < 0) { + pr_err("Unable to receive from cgroupd %d\n", ret); + return -1; + } + + break; } return 0; } @@ -771,21 +766,12 @@ __visible long __export_restore_thread(struct thread_restore_args *args) rt_sigframe = (void *)&args->mz->rt_sigframe; if (args->cg_set != -1) { - int err = 0; - - mutex_lock(&task_entries_local->cgroupd_sync_lock); - pr_info("Restore cg_set in thread cg_set: %d\n", args->cg_set); - - err = send_cg_set(args->cgroupd_sk, args->cg_set); - if (!err) - err = recv_cg_set_restore_ack(args->cgroupd_sk); - - mutex_unlock(&task_entries_local->cgroupd_sync_lock); - sys_close(args->cgroupd_sk); - - if (err) + if (send_cg_set(args->cgroupd_sk, args->cg_set)) goto core_restore_end; + if (recv_cg_set_restore_ack(args->cgroupd_sk)) + goto core_restore_end; + sys_close(args->cgroupd_sk); } if (restore_thread_common(args)) @@ -1112,23 +1098,6 @@ static int vma_remap(VmaEntry *vma_entry, int uffd) pr_info("Remap %lx->%lx len %lx\n", src, dst, len); - /* - * SHSTK VMAs are a bit special, in fact we create shstk vma right in the - * shstk_vma_restore() and populate it with contents from a premapped VMA - * (which in turns is just a normal anonymous VMA!). Then, we munmap() this - * premapped VMA. After, we need to adjust vma_premmaped_start(vma_entry) - * to point to a created shstk vma and treat it as a premmaped one in vma_remap(). - */ - if (vma_entry_is(vma_entry, VMA_AREA_SHSTK)) { - if (shstk_vma_restore(vma_entry)) { - pr_err("Unable to prepare shadow stack vma for remap %lx -> %lx\n", src, dst); - return -1; - } - - /* shstk_vma_restore() modifies vma premapped address */ - src = vma_premmaped_start(vma_entry); - } - if (src - dst < len) guard = dst; else if (dst - src < len) @@ -1253,23 +1222,9 @@ static int timerfd_arm(struct task_restore_args *args) static int create_posix_timers(struct task_restore_args *args) { - int ret, i, exit_code = -1; + int ret, i; kernel_timer_t next_id = 0, timer_id; struct sigevent sev; - bool create_restore_ids = false; - - if (!args->posix_timers_n) - return 0; - - /* prctl returns EINVAL if PR_TIMER_CREATE_RESTORE_IDS isn't supported. */ - ret = sys_prctl(PR_TIMER_CREATE_RESTORE_IDS, - PR_TIMER_CREATE_RESTORE_IDS_ON, 0, 0, 0); - if (ret == 0) { - create_restore_ids = true; - } else if (ret != -EINVAL) { - pr_err("Can't enabled PR_TIMER_CREATE_RESTORE_IDS: %d\n", ret); - return -1; - } for (i = 0; i < args->posix_timers_n; i++) { sev.sigev_notify = args->posix_timers[i].spt.it_sigev_notify; @@ -1281,36 +1236,16 @@ static int create_posix_timers(struct task_restore_args *args) #endif sev.sigev_value.sival_ptr = args->posix_timers[i].spt.sival_ptr; - if (create_restore_ids) { - /* - * With enabled PR_TIMER_CREATE_RESTORE_IDS, the - * timer_create syscall creates a new timer with the - * specified ID. - */ - timer_id = args->posix_timers[i].spt.it_id; - ret = sys_timer_create(args->posix_timers[i].spt.clock_id, &sev, &timer_id); - if (ret < 0) { - pr_err("Can't create posix timer - %d: %d\n", i, ret); - goto out; - } - if (timer_id != args->posix_timers[i].spt.it_id) { - pr_err("Unexpected timer id %u (expected %lu)\n", - timer_id, args->posix_timers[i].spt.it_id); - goto out; - } - continue; - } - while (1) { ret = sys_timer_create(args->posix_timers[i].spt.clock_id, &sev, &timer_id); if (ret < 0) { pr_err("Can't create posix timer - %d\n", i); - goto out; + return ret; } if (timer_id != next_id) { pr_err("Can't create timers, kernel don't give them consequently\n"); - goto out; + return -1; } next_id++; @@ -1320,22 +1255,12 @@ static int create_posix_timers(struct task_restore_args *args) ret = sys_timer_delete(timer_id); if (ret < 0) { pr_err("Can't remove temporaty posix timer 0x%x\n", timer_id); - goto out; + return ret; } } } - exit_code = 0; -out: - if (create_restore_ids) { - ret = sys_prctl(PR_TIMER_CREATE_RESTORE_IDS, - PR_TIMER_CREATE_RESTORE_IDS_OFF, 0, 0, 0); - if (ret != 0) { - pr_err("Can't disable PR_TIMER_CREATE_RESTORE_IDS: %d\n", ret); - exit_code = -1; - } - } - return exit_code; + return 0; } static void restore_posix_timers(struct task_restore_args *args) @@ -1363,19 +1288,13 @@ __visible void __export_unmap(void) sys_munmap(bootstrap_start, bootstrap_len - vdso_rt_size); } -static int unregister_libc_rseq(struct rst_rseq_param *rseq) +static void unregister_libc_rseq(struct rst_rseq_param *rseq) { - long ret; - if (!rseq->rseq_abi_pointer) - return 0; + return; - ret = sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature); - if (ret) { - pr_err("Failed to unregister libc rseq %ld\n", ret); - return -1; - } - return 0; + /* can't fail if rseq is registered */ + sys_rseq(decode_pointer(rseq->rseq_abi_pointer), rseq->rseq_abi_size, 1, rseq->signature); } /* @@ -1689,30 +1608,6 @@ static int restore_membarrier_registrations(int mask) return ret; } -static int restore_madv_guard_regions(struct task_restore_args *args) -{ - int i, ret; - - for (i = 0; i < args->vmas_n; i++) { - VmaEntry *vma_entry = args->vmas + i; - size_t len; - - if (!vma_entry_is(vma_entry, VMA_AREA_GUARD)) - continue; - - len = vma_entry->end - vma_entry->start; - ret = sys_madvise(vma_entry->start, len, MADV_GUARD_INSTALL); - if (ret) { - pr_err("madvise(%" PRIx64 ", %zu, MADV_GUARD_INSTALL) " - "failed with %d\n", - vma_entry->start, len, ret); - return -1; - } - } - - return 0; -} - /* * The main routine to restore task via sigreturn. * This one is very special, we never return there @@ -1809,8 +1704,7 @@ __visible long __export_restore_task(struct task_restore_args *args) * for instance once the kernel will want to update (struct rseq).cpu_id field: * https://github.com/torvalds/linux/blob/ce522ba9ef7e/kernel/rseq.c#L89 */ - if (unregister_libc_rseq(&args->libc_rseq)) - goto core_restore_end; + unregister_libc_rseq(&args->libc_rseq); if (unmap_old_vmas((void *)args->premmapped_addr, args->premmapped_len, bootstrap_start, bootstrap_len, args->task_size)) @@ -1835,6 +1729,13 @@ __visible long __export_restore_task(struct task_restore_args *args) if (vma_entry->start > vma_entry->shmid) break; + /* + * shadow stack VMAs cannot be remapped, they must be + * recreated with map_shadow_stack system call + */ + if (vma_entry_is(vma_entry, VMA_AREA_SHSTK)) + continue; + if (vma_remap(vma_entry, args->uffd)) goto core_restore_end; } @@ -1852,6 +1753,13 @@ __visible long __export_restore_task(struct task_restore_args *args) if (vma_entry->start < vma_entry->shmid) break; + /* + * shadow stack VMAs cannot be remapped, they must be + * recreated with map_shadow_stack system call + */ + if (vma_entry_is(vma_entry, VMA_AREA_SHSTK)) + continue; + if (vma_remap(vma_entry, args->uffd)) goto core_restore_end; } @@ -1996,9 +1904,6 @@ __visible long __export_restore_task(struct task_restore_args *args) for (m = 0; m < sizeof(vma_entry->madv) * 8; m++) { if (vma_entry->madv & (1ul << m)) { - if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR))) - continue; - ret = sys_madvise(vma_entry->start, vma_entry_len(vma_entry), m); if (ret) { pr_err("madvise(%" PRIx64 ", %" PRIu64 ", %ld) " @@ -2010,13 +1915,6 @@ __visible long __export_restore_task(struct task_restore_args *args) } } - /* - * Restore madvise(MADV_GUARD_INSTALL) - */ - ret = restore_madv_guard_regions(args); - if (ret) - goto core_restore_end; - /* * Tune up the task fields. */ @@ -2312,7 +2210,7 @@ __visible long __export_restore_task(struct task_restore_args *args) * code below doesn't fail due to bad timing values. */ -#define itimer_armed(args, i) (args->itimers[i].it_value.tv_sec || args->itimers[i].it_value.tv_usec) +#define itimer_armed(args, i) (args->itimers[i].it_interval.tv_sec || args->itimers[i].it_interval.tv_usec) if (itimer_armed(args, 0)) sys_setitimer(ITIMER_REAL, &args->itimers[0], NULL); diff --git a/criu/pie/util-vdso.c b/criu/pie/util-vdso.c index 45fb6a648..f1e3239ff 100644 --- a/criu/pie/util-vdso.c +++ b/criu/pie/util-vdso.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include @@ -49,25 +48,10 @@ static bool __ptr_struct_oob(uintptr_t ptr, size_t struct_size, uintptr_t start, return __ptr_oob(ptr, start, size) || __ptr_struct_end_oob(ptr, struct_size, start, size); } -/* Local strlen implementation */ -static size_t __strlen(const char *str) -{ - const char *ptr; - - if (!str) - return 0; - - ptr = str; - while (*ptr != '\0') - ptr++; - - return ptr - str; -} - /* * Elf hash, see format specification. */ -static unsigned long elf_sysv_hash(const unsigned char *name) +static unsigned long elf_hash(const unsigned char *name) { unsigned long h = 0, g; @@ -81,15 +65,6 @@ static unsigned long elf_sysv_hash(const unsigned char *name) return h; } -/* * The GNU hash format. Taken from glibc. */ -static unsigned long elf_gnu_hash(const unsigned char *name) -{ - unsigned long h = 5381; - for (unsigned char c = *name; c != '\0'; c = *++name) - h = h * 33 + c; - return h; -} - #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define BORD ELFDATA2MSB /* 0x02 */ #else @@ -98,51 +73,30 @@ static unsigned long elf_gnu_hash(const unsigned char *name) static int has_elf_identity(Ehdr_t *ehdr) { - /* check ELF magic */ - - if (ehdr->e_ident[EI_MAG0] != ELFMAG0 || - ehdr->e_ident[EI_MAG1] != ELFMAG1 || - ehdr->e_ident[EI_MAG2] != ELFMAG2 || - ehdr->e_ident[EI_MAG3] != ELFMAG3) { - pr_err("Invalid ELF magic\n"); - return false; - }; - - /* check ELF class */ + /* + * See Elf specification for this magic values. + */ #if defined(CONFIG_VDSO_32) - if (ehdr->e_ident[EI_CLASS] != ELFCLASS32) { - pr_err("Unsupported ELF class: %d\n", ehdr->e_ident[EI_CLASS]); - return false; + static const char elf_ident[] = { + 0x7f, 0x45, 0x4c, 0x46, 0x01, BORD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; #else - if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) { - pr_err("Unsupported ELF class: %d\n", ehdr->e_ident[EI_CLASS]); - return false; + static const char elf_ident[] = { + 0x7f, 0x45, 0x4c, 0x46, 0x02, BORD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; #endif - /* check ELF data encoding */ - if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB) { - pr_err("Unsupported ELF data encoding: %d\n", ehdr->e_ident[EI_DATA]); + BUILD_BUG_ON(sizeof(elf_ident) != sizeof(ehdr->e_ident)); + + if (memcmp(ehdr->e_ident, elf_ident, sizeof(elf_ident))) { + pr_err("ELF header magic mismatch\n"); return false; - }; - /* check ELF version */ - if (ehdr->e_ident[EI_VERSION] != EV_CURRENT) { - pr_err("Unsupported ELF version: %d\n", ehdr->e_ident[EI_VERSION]); - return false; - }; - /* check ELF OSABI */ - if (ehdr->e_ident[EI_OSABI] != ELFOSABI_NONE && - ehdr->e_ident[EI_OSABI] != ELFOSABI_LINUX) { - pr_err("Unsupported OSABI version: %d\n", ehdr->e_ident[EI_OSABI]); - return false; - }; + } return true; } -static int parse_elf_phdr(uintptr_t mem, size_t size, - Phdr_t **dynamic, Phdr_t **load, bool *is_32bit) +static int parse_elf_phdr(uintptr_t mem, size_t size, Phdr_t **dynamic, Phdr_t **load) { Ehdr_t *ehdr = (void *)mem; uintptr_t addr; @@ -157,8 +111,6 @@ static int parse_elf_phdr(uintptr_t mem, size_t size, if (!has_elf_identity(ehdr)) return -EINVAL; - *is_32bit = ehdr->e_ident[EI_CLASS] != ELFCLASS64; - addr = mem + ehdr->e_phoff; if (__ptr_oob(addr, mem, size)) goto err_oob; @@ -197,14 +149,11 @@ err_oob: * Output parameters are: * @dyn_strtab - address of the symbol table * @dyn_symtab - address of the string table section - * @dyn_hash - address of the symbol hash table - * @use_gnu_hash - the format of hash DT_HASH or DT_GNU_HASH + * @dyn_hash - address of the symbol hash table */ -static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, - Dyn_t **dyn_strtab, Dyn_t **dyn_symtab, - Dyn_t **dyn_hash, bool *use_gnu_hash) +static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, Dyn_t **dyn_strtab, Dyn_t **dyn_symtab, + Dyn_t **dyn_hash) { - Dyn_t *dyn_gnu_hash = NULL, *dyn_sysv_hash = NULL; Dyn_t *dyn_syment = NULL; Dyn_t *dyn_strsz = NULL; uintptr_t addr; @@ -235,52 +184,16 @@ static int parse_elf_dynamic(uintptr_t mem, size_t size, Phdr_t *dynamic, dyn_syment = d; pr_debug("DT_SYMENT: %lx\n", (unsigned long)d->d_un.d_val); } else if (d->d_tag == DT_HASH) { - dyn_sysv_hash = d; + *dyn_hash = d; pr_debug("DT_HASH: %lx\n", (unsigned long)d->d_un.d_ptr); - } else if (d->d_tag == DT_GNU_HASH) { - /* - * This is complicated. - * - * Looking at the Linux kernel source, the following can be seen - * regarding which hashing style the VDSO uses on each arch: - * - * aarch64: not specified (depends on linker, can be - * only GNU hash style) - * arm: --hash-style=sysv - * loongarch: --hash-style=sysv - * mips: --hash-style=sysv - * powerpc: --hash-style=both - * riscv: --hash-style=both - * s390: --hash-style=both - * x86: --hash-style=both - * - * Some architectures are using both hash-styles, that - * is the easiest for CRIU. Some architectures are only - * using the old style (sysv), that is what CRIU supports. - * - * Starting with Linux 6.11, aarch64 unfortunately decided - * to switch from '--hash-style=sysv' to ''. Specifying - * nothing unfortunately may mean GNU hash style only and not - * 'both' (depending on the linker). - */ - dyn_gnu_hash = d; - pr_debug("DT_GNU_HASH: %lx\n", (unsigned long)d->d_un.d_ptr); } } - if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment || - (!dyn_gnu_hash && !dyn_sysv_hash)) { + if (!*dyn_strtab || !*dyn_symtab || !dyn_strsz || !dyn_syment || !*dyn_hash) { pr_err("Not all dynamic entries are present\n"); return -EINVAL; } - /* - * Prefer DT_HASH over DT_GNU_HASH as it's been more tested and - * as a result more stable. - */ - *use_gnu_hash = !dyn_sysv_hash; - *dyn_hash = dyn_sysv_hash ?: dyn_gnu_hash; - return 0; err_oob: @@ -295,156 +208,60 @@ typedef unsigned long Hash_t; typedef Word_t Hash_t; #endif -typedef uint32_t Hash32_t; - -static bool elf_symbol_match(uintptr_t mem, size_t size, - uintptr_t dynsymbol_names, Sym_t *sym, - const char *symbol, const size_t vdso_symbol_length) -{ - uintptr_t addr = (uintptr_t)sym; - char *name; - - if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size)) - return false; - - if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL) - return false; - - addr = dynsymbol_names + sym->st_name; - if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size)) - return false; - name = (void *)addr; - - return !std_strncmp(name, symbol, vdso_symbol_length); -} - - -static unsigned long elf_symbol_lookup(uintptr_t mem, size_t size, - const char *symbol, uint32_t symbol_hash, unsigned int sym_off, - uintptr_t dynsymbol_names, Dyn_t *dyn_symtab, Phdr_t *load, - uint32_t nbucket, uint32_t nchain, void *_bucket, Hash_t *chain, - const size_t vdso_symbol_length, bool use_gnu_hash) -{ - unsigned int j; - uintptr_t addr; - - addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; - - if (use_gnu_hash) { - Hash32_t *h, hash_val, *bucket = _bucket; - - j = bucket[symbol_hash % nbucket]; - if (j == STN_UNDEF) - return 0; - - h = bucket + nbucket + (j - sym_off); - - symbol_hash |= 1; - do { - Sym_t *sym = (void *)addr + sizeof(Sym_t) * j; - - hash_val = *h++; - if ((hash_val | 1) == symbol_hash && - elf_symbol_match(mem, size, dynsymbol_names, sym, - symbol, vdso_symbol_length)) - return sym->st_value; - j++; - } while (!(hash_val & 1)); - } else { - Hash_t *bucket = _bucket; - - j = bucket[symbol_hash % nbucket]; - if (j == STN_UNDEF) - return 0; - - for (; j < nchain && j != STN_UNDEF; j = chain[j]) { - Sym_t *sym = (void *)addr + sizeof(Sym_t) * j; - - if (elf_symbol_match(mem, size, dynsymbol_names, sym, - symbol, vdso_symbol_length)) - return sym->st_value; - } - } - return 0; -} - -static int parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, - struct vdso_symtable *t, uintptr_t dynsymbol_names, - Hash_t *hash, Dyn_t *dyn_symtab, bool use_gnu_hash, - bool is_32bit) +static void parse_elf_symbols(uintptr_t mem, size_t size, Phdr_t *load, struct vdso_symtable *t, + uintptr_t dynsymbol_names, Hash_t *hash, Dyn_t *dyn_symtab) { ARCH_VDSO_SYMBOLS_LIST const char *vdso_symbols[VDSO_SYMBOL_MAX] = { ARCH_VDSO_SYMBOLS }; const size_t vdso_symbol_length = sizeof(t->symbols[0].name) - 1; - void *bucket = NULL; - Hash_t *chain = NULL; - uint32_t nbucket, nchain = 0; + Hash_t nbucket, nchain; + Hash_t *bucket, *chain; - unsigned int sym_off = 0; - unsigned int i = 0; + unsigned int i, j, k; + uintptr_t addr; - unsigned long (*elf_hash)(const unsigned char *); - - if (use_gnu_hash) { - uint32_t *gnu_hash = (uint32_t *)hash; - uint32_t bloom_sz; - - nbucket = gnu_hash[0]; - sym_off = gnu_hash[1]; - bloom_sz = gnu_hash[2]; - if (is_32bit) { - uint32_t *bloom; - bloom = (uint32_t *)&gnu_hash[4]; - bucket = (Hash_t *)(&bloom[bloom_sz]); - } else { - uint64_t *bloom; - bloom = (uint64_t *)&gnu_hash[4]; - bucket = (Hash_t *)(&bloom[bloom_sz]); - } - elf_hash = &elf_gnu_hash; - pr_debug("nbucket %lx sym_off %lx bloom_sz %lx bucket %lx\n", - (unsigned long)nbucket, (unsigned long)sym_off, - (unsigned long)bloom_sz, - (unsigned long)bucket); - } else { - nbucket = hash[0]; - nchain = hash[1]; - bucket = &hash[2]; - chain = &hash[nbucket + 2]; - elf_hash = &elf_sysv_hash; - pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n", - (unsigned long)nbucket, (unsigned long)nchain, - (unsigned long)bucket, (unsigned long)chain); - } + nbucket = hash[0]; + nchain = hash[1]; + bucket = &hash[2]; + chain = &hash[nbucket + 2]; + pr_debug("nbucket %lx nchain %lx bucket %lx chain %lx\n", (long)nbucket, (long)nchain, (unsigned long)bucket, + (unsigned long)chain); for (i = 0; i < VDSO_SYMBOL_MAX; i++) { const char *symbol = vdso_symbols[i]; - unsigned long addr, symbol_hash; - const size_t symbol_length = __strlen(symbol); + k = elf_hash((const unsigned char *)symbol); - symbol_hash = elf_hash((const unsigned char *)symbol); - addr = elf_symbol_lookup(mem, size, symbol, symbol_hash, - sym_off, dynsymbol_names, dyn_symtab, load, - nbucket, nchain, bucket, chain, - vdso_symbol_length, use_gnu_hash); - pr_debug("symbol %s at address %lx\n", symbol, addr); - if (!addr) - continue; + for (j = bucket[k % nbucket]; j < nchain && j != STN_UNDEF; j = chain[j]) { + Sym_t *sym; + char *name; - /* XXX: provide strncpy() implementation for PIE */ - if (symbol_length > vdso_symbol_length) { - pr_err("strlen(%s) %zd, only %zd bytes available\n", - symbol, symbol_length, vdso_symbol_length); - return -EINVAL; + addr = mem + dyn_symtab->d_un.d_ptr - load->p_vaddr; + + addr += sizeof(Sym_t) * j; + if (__ptr_struct_oob(addr, sizeof(Sym_t), mem, size)) + continue; + sym = (void *)addr; + + if (ELF_ST_TYPE(sym->st_info) != STT_FUNC && ELF_ST_BIND(sym->st_info) != STB_GLOBAL) + continue; + + addr = dynsymbol_names + sym->st_name; + if (__ptr_struct_oob(addr, vdso_symbol_length, mem, size)) + continue; + name = (void *)addr; + + if (std_strncmp(name, symbol, vdso_symbol_length)) + continue; + + /* XXX: provide strncpy() implementation for PIE */ + memcpy(t->symbols[i].name, name, vdso_symbol_length); + t->symbols[i].offset = (unsigned long)sym->st_value - load->p_vaddr; + break; } - memcpy(t->symbols[i].name, symbol, symbol_length); - t->symbols[i].offset = addr - load->p_vaddr; } - - return 0; } int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) @@ -454,8 +271,6 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) Dyn_t *dyn_symtab = NULL; Dyn_t *dyn_hash = NULL; Hash_t *hash = NULL; - bool use_gnu_hash; - bool is_32bit; uintptr_t dynsymbol_names; uintptr_t addr; @@ -466,7 +281,7 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) /* * We need PT_LOAD and PT_DYNAMIC here. Each once. */ - ret = parse_elf_phdr(mem, size, &dynamic, &load, &is_32bit); + ret = parse_elf_phdr(mem, size, &dynamic, &load); if (ret < 0) return ret; if (!load || !dynamic) { @@ -481,8 +296,7 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) * needed. Note that we're interested in a small set of tags. */ - ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab, - &dyn_hash, &use_gnu_hash); + ret = parse_elf_dynamic(mem, size, dynamic, &dyn_strtab, &dyn_symtab, &dyn_hash); if (ret < 0) return ret; @@ -496,11 +310,7 @@ int vdso_fill_symtable(uintptr_t mem, size_t size, struct vdso_symtable *t) goto err_oob; hash = (void *)addr; - ret = parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab, - use_gnu_hash, is_32bit); - - if (ret <0) - return ret; + parse_elf_symbols(mem, size, load, t, dynsymbol_names, hash, dyn_symtab); return 0; diff --git a/criu/plugin.c b/criu/plugin.c index f9322a3c2..58b5ea5bf 100644 --- a/criu/plugin.c +++ b/criu/plugin.c @@ -59,9 +59,6 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path) __assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late"); __assign_hook(PAUSE_DEVICES, "cr_plugin_pause_devices"); __assign_hook(CHECKPOINT_DEVICES, "cr_plugin_checkpoint_devices"); - __assign_hook(POST_FORKING, "cr_plugin_post_forking"); - __assign_hook(RESTORE_INIT, "cr_plugin_restore_init"); - __assign_hook(DUMP_DEVICES_LATE, "cr_plugin_dump_devices_late"); #undef __assign_hook @@ -259,17 +256,6 @@ int cr_plugin_init(int stage) goto err; } - if (stage == CR_PLUGIN_STAGE__RESTORE) { - int ret; - - if (check_inventory_plugins()) - goto err; - - ret = run_plugins(RESTORE_INIT); - if (ret < 0 && ret != -ENOTSUP) - goto err; - } - exit_code = 0; err: closedir(d); diff --git a/criu/proc_parse.c b/criu/proc_parse.c index f51f2e801..92655a484 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -42,12 +42,10 @@ #include "fault-injection.h" #include "memfd.h" #include "hugetlb.h" -#include "pidfd.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" #include "images/mnt.pb-c.h" -#include "pidfd.pb-c.h" #include "plugin.h" #include @@ -74,8 +72,6 @@ struct buffer { static struct buffer __buf; static char *buf = __buf.buf; -/* only ever goes from false to true, if at all */ -static bool uprobes_vma_exists = false; /* * This is how AIO ring buffers look like in proc @@ -146,8 +142,6 @@ static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf, *flags |= MAP_NORESERVE; else if (_vmflag_match(tok, "ht")) *flags |= MAP_HUGETLB; - else if (_vmflag_match(tok, "dp")) - *flags |= MAP_DROPPABLE; /* madvise() block */ if (_vmflag_match(tok, "sr")) @@ -164,8 +158,6 @@ static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf, *madv |= (1ul << MADV_HUGEPAGE); else if (_vmflag_match(tok, "nh")) *madv |= (1ul << MADV_NOHUGEPAGE); - else if (_vmflag_match(tok, "wf")) - *madv |= (1ul << MADV_WIPEONFORK); /* vmsplice doesn't work for VM_IO and VM_PFNMAP mappings. */ if (_vmflag_match(tok, "io") || _vmflag_match(tok, "pf")) @@ -204,29 +196,12 @@ static void parse_vma_vmflags(char *buf, struct vma_area *vma_area) * vmsplice doesn't work for VM_IO and VM_PFNMAP mappings, the * only exception is VVAR area that mapped by the kernel as * VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP - * - * The uprobes vma is also mapped by the kernel with VM_IO, among other flags */ - if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED) - && !vma_area_is(vma_area, VMA_AREA_UPROBES)) + if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED)) vma_area->e->status |= VMA_UNSUPP; if (vma_area->e->madv) vma_area->e->has_madv = true; - - /* - * We set MAP_PRIVATE flag on vma_area->e->flags right after parsing - * a first line of VMA entry in /proc//smaps file: - * 7fa84fa70000-7fa84fa95000 rw-p 00000000 00:00 0 - * but it's too early and we can't distinguish between MAP_DROPPABLE - * and MAP_PRIVATE mappings yet, as they both private mappings in nature - * and at this point we haven't yet read "VmFlags:" line in smaps. - * - * Let's detect this situation and drop MAP_PRIVATE flag while keep - * MAP_DROPPABLE, otherwise restorer's restore_mapping() helper will fail. - */ - if ((vma_area->e->flags & MAP_PRIVATE) && (vma_area->e->flags & MAP_DROPPABLE)) - vma_area->e->flags &= ~MAP_PRIVATE; } static inline int is_anon_shmem_map(dev_t dev) @@ -602,20 +577,11 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat } else if (!strcmp(file_path, "[vdso]")) { if (handle_vdso_vma(vma_area)) goto err; - } else if (!strcmp(file_path, "[vvar]") || - !strcmp(file_path, "[vvar_vclock]")) { + } else if (!strcmp(file_path, "[vvar]")) { if (handle_vvar_vma(vma_area)) goto err; } else if (!strcmp(file_path, "[heap]")) { vma_area->e->status |= VMA_AREA_REGULAR | VMA_AREA_HEAP; - } else if (!strcmp(file_path, "[uprobes]")) { - uprobes_vma_exists = true; - if (!opts.allow_uprobes) { - pr_err("PID %d has uprobes vma. Consider using --" OPT_ALLOW_UPROBES ".\n", - pid); - goto err; - } - vma_area->e->status |= VMA_AREA_UPROBES; } else { vma_area->e->status = VMA_AREA_REGULAR; } @@ -752,10 +718,6 @@ static int vma_list_add(struct vma_area *vma_area, struct vm_area_list *vma_area */ pr_debug("Device file mapping %016" PRIx64 "-%016" PRIx64 " supported via device plugins\n", vma_area->e->start, vma_area->e->end); - } else if (vma_area->e->status & VMA_AREA_UPROBES) { - pr_debug("Skipping uprobes vma %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start, - vma_area->e->end); - return 0; } else if (vma_area->e->status & VMA_UNSUPP) { pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start, vma_area->e->end); @@ -807,7 +769,7 @@ static int task_size_check(pid_t pid, VmaEntry *entry) int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t dump_filemap) { - struct vma_area *vma_area = NULL, *prev_vma_area = NULL; + struct vma_area *vma_area = NULL; unsigned long start, end, pgoff, prev_end = 0; char r, w, x, s; int ret = -1, vm_file_fd = -1; @@ -849,22 +811,8 @@ int parse_smaps(pid_t pid, struct vm_area_list *vma_area_list, dump_filemap_t du continue; } - if (vma_area && vma_area_is(vma_area, VMA_AREA_VVAR) && - prev_vma_area && vma_area_is(prev_vma_area, VMA_AREA_VVAR)) { - if (prev_vma_area->e->end != vma_area->e->start) { - pr_err("two nonconsecutive vvar vma-s: " - "%" PRIx64 "-%" PRIx64 " %" PRIx64 "-%" PRIx64 "\n", - prev_vma_area->e->start, prev_vma_area->e->end, - vma_area->e->start, vma_area->e->end); - goto err; - } - /* Merge all vvar vma-s into one. */ - prev_vma_area->e->end = vma_area->e->end; - } else { - if (vma_area && vma_list_add(vma_area, vma_area_list, &prev_end, &vfi, &prev_vfi)) - goto err; - prev_vma_area = vma_area; - } + if (vma_area && vma_list_add(vma_area, vma_area_list, &prev_end, &vfi, &prev_vfi)) + goto err; if (eof) break; @@ -1106,7 +1054,7 @@ int parse_pid_status(pid_t pid, struct seize_task_status *ss, void *data) if (bfdopenr(&f)) return -1; - while (done < 14) { + while (done < 13) { str = breadline(&f); if (str == NULL) break; @@ -1190,13 +1138,6 @@ int parse_pid_status(pid_t pid, struct seize_task_status *ss, void *data) continue; } - if (!strncmp(str, "CapAmb:", 7)) { - if (cap_parse(str + 8, cr->cap_amb)) - goto err_parse; - done++; - continue; - } - if (!strncmp(str, "Seccomp:", 8)) { if (sscanf(str + 9, "%d", &cr->s.seccomp_mode) != 1) { goto err_parse; @@ -1240,7 +1181,7 @@ int parse_pid_status(pid_t pid, struct seize_task_status *ss, void *data) } /* seccomp and nspids are optional */ - expected_done = (parsed_seccomp ? 13 : 12); + expected_done = (parsed_seccomp ? 12 : 11); if (kdat.has_nspid) expected_done++; if (done == expected_done) @@ -1477,7 +1418,7 @@ static int parse_mountinfo_ent(char *str, struct mount_info *new, char **fsname) goto err; new->mountpoint[0] = '.'; - ret = sscanf(str, "%i %i %u:%u %ms %4094s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root, + ret = sscanf(str, "%i %i %u:%u %ms %s %ms %n", &new->mnt_id, &new->parent_mnt_id, &kmaj, &kmin, &new->root, new->mountpoint + 1, &opt, &n); if (ret != 7) goto err; @@ -2224,33 +2165,6 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) if (ret) goto parse_err; - entry_met = true; - continue; - } - if (fdinfo_field(str, "ino") || fdinfo_field(str, "NSpid") || fdinfo_field(str, "Pid")) { - struct pidfd_dump_info *pidfd_info = arg; - - if (type != FD_TYPES__PIDFD) - continue; - - if (fdinfo_field(str, "ino")) { - ret = sscanf(str, "%*s %u", &pidfd_info->pidfe.ino); - if (ret != 1) - goto parse_err; - } else if (fdinfo_field(str, "Pid")) { - ret = sscanf(str, "%*s %d", &pidfd_info->pid); - if (ret != 1) - goto parse_err; - } else if (fdinfo_field(str, "NSpid")) { - char *last; - - last = strrchr(str, '\t'); - if (!last || sscanf(last, "%d", &pidfd_info->pidfe.nspid) != 1) { - pr_err("Unable to parse: %s\n", str); - goto parse_err; - } - } - entry_met = true; continue; } @@ -2302,10 +2216,10 @@ static int parse_file_lock_buf(char *buf, struct file_lock *fl, bool is_blocked) char fl_flag[10], fl_type[15], fl_option[10]; if (is_blocked) { - num = sscanf(buf, "%lld: -> %9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option, + num = sscanf(buf, "%lld: -> %s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option, &fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end); } else { - num = sscanf(buf, "%lld:%9s %14s %9s %d %x:%x:%ld %lld %31s", &fl->fl_id, fl_flag, fl_type, fl_option, + num = sscanf(buf, "%lld:%s %s %s %d %x:%x:%ld %lld %s", &fl->fl_id, fl_flag, fl_type, fl_option, &fl->fl_owner, &fl->maj, &fl->min, &fl->i_no, &fl->start, fl->end); } @@ -2946,8 +2860,3 @@ int parse_uptime(uint64_t *upt) fclose(f); return 0; } - -bool found_uprobes_vma(void) -{ - return uprobes_vma_exists; -} diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index e0dbfccc2..ff16b9f5b 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -68,7 +68,6 @@ #include "images/bpfmap-file.pb-c.h" #include "images/bpfmap-data.pb-c.h" #include "images/apparmor.pb-c.h" -#include "images/pidfd.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/criu/pstree.c b/criu/pstree.c index cee8b5741..8c44e7134 100644 --- a/criu/pstree.c +++ b/criu/pstree.c @@ -63,7 +63,6 @@ CoreEntry *core_entry_alloc(int th, int tsk) sz += CR_CAP_SIZE * sizeof(ce->cap_prm[0]); sz += CR_CAP_SIZE * sizeof(ce->cap_eff[0]); sz += CR_CAP_SIZE * sizeof(ce->cap_bnd[0]); - sz += CR_CAP_SIZE * sizeof(ce->cap_amb[0]); /* * @groups are dynamic and allocated * on demand. @@ -123,12 +122,10 @@ CoreEntry *core_entry_alloc(int th, int tsk) ce->n_cap_prm = CR_CAP_SIZE; ce->n_cap_eff = CR_CAP_SIZE; ce->n_cap_bnd = CR_CAP_SIZE; - ce->n_cap_amb = CR_CAP_SIZE; ce->cap_inh = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_inh[0])); ce->cap_prm = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_prm[0])); ce->cap_eff = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_eff[0])); ce->cap_bnd = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_bnd[0])); - ce->cap_amb = xptr_pull_s(&m, CR_CAP_SIZE * sizeof(ce->cap_amb[0])); if (arch_alloc_thread_info(core)) { xfree(core); @@ -182,7 +179,7 @@ void free_pstree(struct pstree_item *root_item) struct pstree_item *item = root_item, *parent; while (item) { - if (has_children(item)) { + if (!list_empty(&item->children)) { item = list_first_entry(&item->children, struct pstree_item, sibling); continue; } @@ -237,21 +234,17 @@ int init_pstree_helper(struct pstree_item *ret) { BUG_ON(!ret->parent); ret->pid->state = TASK_HELPER; - rsti(ret)->clone_flags = 0; - INIT_LIST_HEAD(&rsti(ret)->fds); + rsti(ret)->clone_flags = CLONE_FILES | CLONE_FS; + if (shared_fdt_prepare(ret) < 0) + return -1; task_entries->nr_helpers++; return 0; } -bool has_children(struct pstree_item *item) -{ - return !list_empty(&item->children); -} - /* Deep first search on children */ struct pstree_item *pstree_item_next(struct pstree_item *item) { - if (has_children(item)) + if (!list_empty(&item->children)) return list_first_entry(&item->children, struct pstree_item, sibling); while (item->parent) { @@ -966,7 +959,7 @@ static int prepare_pstree_kobj_ids(void) * this namespace is either inherited from the * criu or is created for the init task (only) */ - pr_err("Can't restore sub-task in NS (cflags %lx)\n", cflags); + pr_err("Can't restore sub-task in NS\n"); return -1; } } diff --git a/criu/seize.c b/criu/seize.c index d0cf7b36c..edeb57cc8 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -25,17 +25,17 @@ #include "xmalloc.h" #include "util.h" -static bool compel_interrupt_only_mode; +static bool freeze_cgroup_disabled; /* * Disables the use of freeze cgroups for process seizing, even if explicitly - * requested via the --freeze-cgroup option or already set in a frozen state. - * This is necessary for plugins (e.g., CUDA) that do not function correctly - * when processes are frozen using cgroups. + * requested via the --freeze-cgroup option. This is necessary for plugins + * (e.g., CUDA) that do not function correctly when processes are frozen using + * cgroups. */ -void __attribute__((used)) set_compel_interrupt_only_mode(void) +void __attribute__((used)) dont_use_freeze_cgroup(void) { - compel_interrupt_only_mode = true; + freeze_cgroup_disabled = true; } char *task_comm_info(pid_t pid, char *comm, size_t size) @@ -87,10 +87,7 @@ static const char frozen[] = "FROZEN"; static const char freezing[] = "FREEZING"; static const char thawed[] = "THAWED"; -enum freezer_state { FREEZER_ERROR = -1, - THAWED, - FROZEN, - FREEZING }; +enum freezer_state { FREEZER_ERROR = -1, THAWED, FROZEN, FREEZING }; /* Track if we are running on cgroup v2 system. */ static bool cgroup_v2 = false; @@ -413,7 +410,7 @@ static int freezer_detach(void) { int i; - if (!opts.freeze_cgroup || compel_interrupt_only_mode) + if (!opts.freeze_cgroup || freeze_cgroup_disabled) return 0; for (i = 0; i < processes_to_wait && processes_to_wait_pids; i++) { @@ -508,63 +505,29 @@ static int log_unfrozen_stacks(char *root) return 0; } -static int prepare_freezer_for_interrupt_only_mode(void) +static int check_freezer_cgroup(void) { enum freezer_state state = THAWED; int fd; - int exit_code = -1; - BUG_ON(!compel_interrupt_only_mode); + BUG_ON(!freeze_cgroup_disabled); fd = freezer_open(); if (fd < 0) return -1; state = get_freezer_state(fd); + close(fd); if (state == FREEZER_ERROR) { - goto err; + return -1; } - origin_freezer_state = state == FREEZING ? FROZEN : state; - if (state != THAWED) { - pr_warn("unfreezing cgroup for plugin compatibility\n"); - if (freezer_write_state(fd, THAWED)) - goto err; + pr_err("One or more plugins are incompatible with the freezer cgroup in the FROZEN state.\n"); + return -1; } - exit_code = 0; -err: - close(fd); - return exit_code; -} - -static void cgroupv1_freezer_kludges(int fd, int iter, const struct timespec *req) { - /* As per older kernel docs (freezer-subsystem.txt before - * the kernel commit ef9fe980c6fcc1821), if FREEZING is seen, - * userspace should either retry or thaw. While current - * kernel cgroup v1 docs no longer mention a need to retry, - * even recent kernels can't reliably freeze a cgroup v1. - * - * Let's keep asking the kernel to freeze from time to time. - * In addition, do occasional thaw/sleep/freeze. - * - * This is still a game of chances (the real fix belongs to the kernel) - * but these kludges might improve the probability of success. - * - * Cgroup v2 does not have this problem. - */ - switch (iter % 32) { - case 9: - case 20: - freezer_write_state(fd, FROZEN); - break; - case 31: - freezer_write_state(fd, THAWED); - nanosleep(req, NULL); - freezer_write_state(fd, FROZEN); - break; - } + return 0; } static int freeze_processes(void) @@ -573,8 +536,7 @@ static int freeze_processes(void) enum freezer_state state = THAWED; static const unsigned long step_ms = 100; - /* Since opts.timeout is in seconds, multiply it by 1000 to convert to milliseconds. */ - unsigned long nr_attempts = (opts.timeout * 1000) / step_ms; + unsigned long nr_attempts = (opts.timeout * 1000000) / step_ms; unsigned long i = 0; const struct timespec req = { @@ -583,12 +545,14 @@ static int freeze_processes(void) }; if (unlikely(!nr_attempts)) { - /* If the timeout is 0, wait for at least 10 seconds. */ - nr_attempts = (10 * 1000) / step_ms; + /* + * If timeout is turned off, lets + * wait for at least 10 seconds. + */ + nr_attempts = (10 * 1000000) / step_ms; } - pr_debug("freezing cgroup %s: %lu x %lums attempts, timeout: %us\n", - opts.freeze_cgroup, nr_attempts, step_ms, opts.timeout); + pr_debug("freezing processes: %lu attempts with %lu ms steps\n", nr_attempts, step_ms); fd = freezer_open(); if (fd < 0) @@ -615,25 +579,22 @@ static int freeze_processes(void) * not read @tasks pids while freezer in * transition stage. */ - while (1) { + for (; i <= nr_attempts; i++) { state = get_freezer_state(fd); if (state == FREEZER_ERROR) { close(fd); return -1; } - if (state == FROZEN || i++ == nr_attempts || alarm_timeouted()) + if (state == FROZEN) break; - - if (!cgroup_v2) - cgroupv1_freezer_kludges(fd, i, &req); - + if (alarm_timeouted()) + goto err; nanosleep(&req, NULL); } - if (state != FROZEN) { - pr_err("Unable to freeze cgroup %s (%lu x %lums attempts, timeout: %us)\n", - opts.freeze_cgroup, i, step_ms, opts.timeout); + if (i > nr_attempts) { + pr_err("Unable to freeze cgroup %s\n", opts.freeze_cgroup); if (!pr_quelled(LOG_DEBUG)) log_unfrozen_stacks(opts.freeze_cgroup); goto err; @@ -707,6 +668,8 @@ static int collect_children(struct pstree_item *item) goto free; } + pr_info("Seized task %d, state %d\n", pid, ret); + c = alloc_pstree_item(); if (c == NULL) { ret = -1; @@ -718,7 +681,7 @@ static int collect_children(struct pstree_item *item) goto free; } - if (!opts.freeze_cgroup || compel_interrupt_only_mode) + if (!opts.freeze_cgroup || freeze_cgroup_disabled) /* fails when meets a zombie */ __ignore_value(compel_interrupt_task(pid)); @@ -744,8 +707,6 @@ static int collect_children(struct pstree_item *item) if (ret == TASK_STOPPED) c->pid->stop_signo = compel_parse_stop_signo(pid); - pr_info("Seized task %d, state %d\n", pid, ret); - c->pid->real = pid; c->parent = item; c->pid->state = ret; @@ -908,7 +869,7 @@ static int collect_threads(struct pstree_item *item) pr_info("\tSeizing %d's %d thread\n", item->pid->real, pid); - if ((!opts.freeze_cgroup || compel_interrupt_only_mode) && + if ((!opts.freeze_cgroup || freeze_cgroup_disabled) && compel_interrupt_task(pid)) continue; @@ -965,7 +926,7 @@ static int collect_loop(struct pstree_item *item, int (*collect)(struct pstree_i { int attempts = NR_ATTEMPTS, nr_inprogress = 1; - if (opts.freeze_cgroup && !compel_interrupt_only_mode) + if (opts.freeze_cgroup && !freeze_cgroup_disabled) attempts = 1; /* @@ -1008,7 +969,7 @@ static int collect_task(struct pstree_item *item) if (ret < 0) goto err_close; - if ((item->pid->state == TASK_DEAD) && has_children(item)) { + if ((item->pid->state == TASK_DEAD) && !list_empty(&item->children)) { pr_err("Zombie with children?! O_o Run, run, run!\n"); goto err_close; } @@ -1048,8 +1009,9 @@ static int cgroup_version(void) int collect_pstree(void) { pid_t pid = root_item->pid->real; - int ret, exit_code = -1; + int ret = -1; struct proc_status_creds creds; + struct pstree_item *iter; timing_start(TIME_FREEZING); @@ -1060,32 +1022,22 @@ int collect_pstree(void) */ alarm(opts.timeout); + ret = run_plugins(PAUSE_DEVICES, pid); + if (ret < 0 && ret != -ENOTSUP) { + goto err; + } + if (opts.freeze_cgroup && cgroup_version()) goto err; pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1); - if (opts.freeze_cgroup && !compel_interrupt_only_mode) { - ret = run_plugins(PAUSE_DEVICES, pid); - if (ret < 0 && ret != -ENOTSUP) { - goto err; - } - + if (opts.freeze_cgroup && !freeze_cgroup_disabled) { if (freeze_processes()) goto err; } else { - if (opts.freeze_cgroup && prepare_freezer_for_interrupt_only_mode()) + if (opts.freeze_cgroup && check_freezer_cgroup()) goto err; - - /* - * Call PAUSE_DEVICES after prepare_freezer_for_interrupt_only_mode() - * to be able to checkpoint containers in a frozen state. - */ - ret = run_plugins(PAUSE_DEVICES, pid); - if (ret < 0 && ret != -ENOTSUP) { - goto err; - } - if (compel_interrupt_task(pid)) { set_cr_errno(ESRCH); goto err; @@ -1115,26 +1067,12 @@ int collect_pstree(void) if (ret < 0) goto err; - if (opts.freeze_cgroup && !compel_interrupt_only_mode && + if (opts.freeze_cgroup && !freeze_cgroup_disabled && freezer_wait_processes()) { + ret = -1; goto err; } - exit_code = 0; - timing_stop(TIME_FREEZING); - timing_start(TIME_FROZEN); - -err: - /* Freezing stage finished in time - disable timer. */ - alarm(0); - return exit_code; -} - -int checkpoint_devices(void) -{ - struct pstree_item *iter; - int ret, exit_code = -1; - for_each_pstree_item(iter) { if (!task_alive(iter)) continue; @@ -1143,7 +1081,12 @@ int checkpoint_devices(void) goto err; } - exit_code = 0; + ret = 0; + timing_stop(TIME_FREEZING); + timing_start(TIME_FROZEN); + err: - return exit_code; + /* Freezing stage finished in time - disable timer. */ + alarm(0); + return ret; } diff --git a/criu/servicefd.c b/criu/servicefd.c index dfb019066..06a8d3eba 100644 --- a/criu/servicefd.c +++ b/criu/servicefd.c @@ -313,4 +313,4 @@ int clone_service_fd(struct pstree_item *me) ret = 0; return ret; -} \ No newline at end of file +} diff --git a/criu/shmem.c b/criu/shmem.c index bc7aa3669..9e3178352 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -206,34 +206,31 @@ static int expand_shmem(struct shmem_info *si, unsigned long new_size) return 0; } -static int update_shmem_pmaps(struct shmem_info *si, pmc_t *pmc, VmaEntry *vma) +static void update_shmem_pmaps(struct shmem_info *si, pmc_t *pmc, VmaEntry *vma) { unsigned long shmem_pfn, vma_pfn, vma_pgcnt; u64 vaddr; if (!is_shmem_tracking_en()) - return 0; + return; vma_pgcnt = DIV_ROUND_UP(si->size - vma->pgoff, PAGE_SIZE); for (vma_pfn = 0, vaddr = vma->start; vma_pfn < vma_pgcnt; ++vma_pfn, vaddr += PAGE_SIZE) { - struct page_info page_info = {}; + bool softdirty = false; + u64 next; - if (should_dump_page(pmc, vma, vaddr, &page_info)) - return -1; - - if (page_info.next != vaddr) { - vaddr = page_info.next - PAGE_SIZE; + next = should_dump_page(pmc, vma, vaddr, &softdirty); + if (next != vaddr) { + vaddr = next - PAGE_SIZE; continue; } shmem_pfn = vma_pfn + DIV_ROUND_UP(vma->pgoff, PAGE_SIZE); - if (page_info.softdirty) + if (softdirty) set_pstate(si->pstate_map, shmem_pfn, PST_DIRTY); else set_pstate(si->pstate_map, shmem_pfn, PST_DUMP); } - - return 0; } int collect_sysv_shmem(unsigned long shmid, unsigned long size) @@ -670,9 +667,7 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, pmc_t *pmc) if (expand_shmem(si, size)) return -1; } - - if (update_shmem_pmaps(si, pmc, vma)) - return -1; + update_shmem_pmaps(si, pmc, vma); return 0; } @@ -689,9 +684,7 @@ int add_shmem_area(pid_t pid, VmaEntry *vma, pmc_t *pmc) if (expand_shmem(si, size)) return -1; - - if (update_shmem_pmaps(si, pmc, vma)) - return -1; + update_shmem_pmaps(si, pmc, vma); return 0; } diff --git a/criu/sk-inet.c b/criu/sk-inet.c index 422edc656..92f53e569 100644 --- a/criu/sk-inet.c +++ b/criu/sk-inet.c @@ -44,11 +44,6 @@ #define PB_ALEN_INET 1 #define PB_ALEN_INET6 4 -/* Definition for older kernels without MPTCP support (e.g. Ubuntu 20.04) */ -#ifndef IPPROTO_MPTCP -#define IPPROTO_MPTCP 262 -#endif - static LIST_HEAD(inet_ports); struct inet_port { @@ -130,13 +125,9 @@ static int can_dump_ipproto(unsigned int ino, int proto, int type) case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_UDPLITE: - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: break; default: pr_err("Unsupported proto %d for socket %x\n", proto, ino); - if (proto == IPPROTO_MPTCP) - pr_err("For Go programs, consider using \"GODEBUG=multipathtcp=0\" to disable MPTCP\n"); return 0; } @@ -581,7 +572,7 @@ static int do_dump_one_inet_fd(int lfd, u32 id, const struct fd_parms *p, int fa if (dump_ip_opts(lfd, family, type, proto, &ipopts)) goto err; - if (dump_socket_opts(lfd, family, &skopts)) + if (dump_socket_opts(lfd, &skopts)) goto err; pr_info("Dumping inet socket at %d\n", p->fd); @@ -924,9 +915,8 @@ static int open_inet_sk(struct file_desc *d, int *new_fd) } if (ie->src_port) { - if (ie->proto != IPPROTO_ICMP && ie->proto != IPPROTO_ICMPV6) - if (inet_bind(sk, ii)) - goto err; + if (inet_bind(sk, ii)) + goto err; } /* diff --git a/criu/sk-netlink.c b/criu/sk-netlink.c index dc2baa1b8..a219b69be 100644 --- a/criu/sk-netlink.c +++ b/criu/sk-netlink.c @@ -165,7 +165,7 @@ static int dump_one_netlink_fd(int lfd, u32 id, const struct fd_parms *p) ne.fown = (FownEntry *)&p->fown; ne.opts = &skopts; - if (dump_socket_opts(lfd, AF_NETLINK, &skopts)) + if (dump_socket_opts(lfd, &skopts)) goto err; fe.type = FD_TYPES__NETLINKSK; diff --git a/criu/sk-packet.c b/criu/sk-packet.c index 6530bff58..1d2e23522 100644 --- a/criu/sk-packet.c +++ b/criu/sk-packet.c @@ -173,7 +173,7 @@ static int dump_one_packet_fd(int lfd, u32 id, const struct fd_parms *p) psk.fown = (FownEntry *)&p->fown; psk.opts = &skopts; - if (dump_socket_opts(lfd, AF_PACKET, &skopts)) + if (dump_socket_opts(lfd, &skopts)) return -1; psk.protocol = sd->proto; diff --git a/criu/sk-unix.c b/criu/sk-unix.c index 6145fe734..70ca16be4 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -527,7 +527,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) } } dump: - if (dump_socket_opts(lfd, AF_UNIX, skopts)) + if (dump_socket_opts(lfd, skopts)) goto err; pr_info("Dumping unix socket at %d\n", p->fd); diff --git a/criu/sockets.c b/criu/sockets.c index e4adae03c..f9ce999be 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -65,7 +65,7 @@ const char *socket_proto_name(unsigned int proto, char *nm, size_t size) [IPPROTO_IPV6] = __stringify_1(IPPROTO_IPV6), [IPPROTO_RSVP] = __stringify_1(IPPROTO_RSVP), [IPPROTO_GRE] = __stringify_1(IPPROTO_GRE), [IPPROTO_ESP] = __stringify_1(IPPROTO_ESP), [IPPROTO_AH] = __stringify_1(IPPROTO_AH), [IPPROTO_UDPLITE] = __stringify_1(IPPROTO_UDPLITE), - [IPPROTO_RAW] = __stringify_1(IPPROTO_RAW), [IPPROTO_ICMPV6] = __stringify_1(IPPROTO_ICMPV6), + [IPPROTO_RAW] = __stringify_1(IPPROTO_RAW), }; return __socket_const_name(nm, size, protos, ARRAY_SIZE(protos), proto); } @@ -131,12 +131,10 @@ enum socket_cl_bits { INET_UDP_CL_BIT, INET_UDPLITE_CL_BIT, INET_RAW_CL_BIT, - INET_ICMP_CL_BIT, INET6_TCP_CL_BIT, INET6_UDP_CL_BIT, INET6_UDPLITE_CL_BIT, INET6_RAW_CL_BIT, - INET6_ICMP_CL_BIT, UNIX_CL_BIT, PACKET_CL_BIT, _MAX_CL_BIT, @@ -163,8 +161,6 @@ static inline enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsign return INET_UDPLITE_CL_BIT; if (proto == IPPROTO_RAW) return INET_RAW_CL_BIT; - if (proto == IPPROTO_ICMP) - return INET_ICMP_CL_BIT; } if (family == AF_INET6) { if (proto == IPPROTO_TCP) @@ -175,8 +171,6 @@ static inline enum socket_cl_bits get_collect_bit_nr(unsigned int family, unsign return INET6_UDPLITE_CL_BIT; if (proto == IPPROTO_RAW) return INET6_RAW_CL_BIT; - if (proto == IPPROTO_ICMPV6) - return INET6_ICMP_CL_BIT; } pr_err("Unknown pair family %d proto %d\n", family, proto); @@ -288,12 +282,6 @@ void preload_socket_modules(void) req.r.i.sdiag_protocol = IPPROTO_RAW; probe_diag(nl, &req, -ENOENT); - req.r.i.sdiag_protocol = IPPROTO_ICMP; - probe_diag(nl, &req, -ENOENT); - - req.r.i.sdiag_protocol = IPPROTO_ICMPV6; - probe_diag(nl, &req, -ENOENT); - close(nl); pr_info("Done probing\n"); } @@ -649,7 +637,7 @@ int do_dump_opt(int sk, int level, int name, void *val, int len) return 0; } -int dump_socket_opts(int sk, int family, SkOptsEntry *soe) +int dump_socket_opts(int sk, SkOptsEntry *soe) { int ret = 0, val; struct timeval tv; @@ -688,15 +676,13 @@ int dump_socket_opts(int sk, int family, SkOptsEntry *soe) soe->so_reuseport = val ? true : false; soe->has_so_reuseport = true; - if (family == AF_UNIX || family == AF_NETLINK) { - ret |= dump_opt(sk, SOL_SOCKET, SO_PASSCRED, &val); - soe->has_so_passcred = true; - soe->so_passcred = val ? true : false; + ret |= dump_opt(sk, SOL_SOCKET, SO_PASSCRED, &val); + soe->has_so_passcred = true; + soe->so_passcred = val ? true : false; - ret |= dump_opt(sk, SOL_SOCKET, SO_PASSSEC, &val); - soe->has_so_passsec = true; - soe->so_passsec = val ? true : false; - } + ret |= dump_opt(sk, SOL_SOCKET, SO_PASSSEC, &val); + soe->has_so_passsec = true; + soe->so_passsec = val ? true : false; ret |= dump_opt(sk, SOL_SOCKET, SO_DONTROUTE, &val); soe->has_so_dontroute = true; @@ -787,10 +773,6 @@ static int inet_receive_one(struct nlmsghdr *h, struct ns_id *ns, void *arg) case IPPROTO_RAW: type = SOCK_RAW; break; - case IPPROTO_ICMP: - case IPPROTO_ICMPV6: - type = SOCK_DGRAM; - break; default: BUG_ON(1); return -1; @@ -815,7 +797,7 @@ static int collect_err(int err, struct ns_id *ns, void *arg) char family[32], proto[32]; char msg[256]; - snprintf(msg, sizeof(msg), "Sockets collect procedure family %s proto %s", + snprintf(msg, sizeof(msg), "Sockects collect procedure family %s proto %s", socket_family_name(gr->family, family, sizeof(family)), socket_proto_name(gr->protocol, proto, sizeof(proto))); @@ -923,13 +905,6 @@ int collect_sockets(struct ns_id *ns) if (tmp) err = tmp; - /* Collect IPv4 ICMP sockets */ - req.r.i.sdiag_family = AF_INET; - req.r.i.sdiag_protocol = IPPROTO_ICMP; - req.r.i.idiag_ext = 0; - req.r.i.idiag_states = -1; /* All */ - set_collect_bit(req.r.n.sdiag_family, req.r.n.sdiag_protocol); - /* Collect IPv6 TCP sockets */ req.r.i.sdiag_family = AF_INET6; req.r.i.sdiag_protocol = IPPROTO_TCP; @@ -969,13 +944,6 @@ int collect_sockets(struct ns_id *ns) if (tmp) err = tmp; - /* Collect IPv6 ICMP sockets */ - req.r.i.sdiag_family = AF_INET6; - req.r.i.sdiag_protocol = IPPROTO_ICMPV6; - req.r.i.idiag_ext = 0; - req.r.i.idiag_states = -1; /* All */ - set_collect_bit(req.r.n.sdiag_family, req.r.n.sdiag_protocol); - req.r.p.sdiag_family = AF_PACKET; req.r.p.sdiag_protocol = 0; req.r.p.pdiag_show = PACKET_SHOW_INFO | PACKET_SHOW_MCLIST | PACKET_SHOW_FANOUT | PACKET_SHOW_RING_CFG; diff --git a/criu/timer.c b/criu/timer.c index 856501be6..e94cf0280 100644 --- a/criu/timer.c +++ b/criu/timer.c @@ -16,7 +16,7 @@ static inline int timeval_valid(struct timeval *tv) static inline int decode_itimer(char *n, ItimerEntry *ie, struct itimerval *val) { - if (ie->isec == 0 && ie->iusec == 0 && ie->vsec == 0 && ie->vusec == 0) { + if (ie->isec == 0 && ie->iusec == 0) { memzero_p(val); return 0; } @@ -195,7 +195,6 @@ int prepare_posix_timers_from_fd(int pid, struct task_restore_args *ta) if (!img) return -1; - ta->posix_timer_cr_ids = kdat.has_timer_cr_ids; ta->posix_timers_n = 0; while (1) { PosixTimerEntry *pte; @@ -235,7 +234,6 @@ int prepare_posix_timers(int pid, struct task_restore_args *ta, CoreEntry *core) return prepare_posix_timers_from_fd(pid, ta); ta->posix_timers_n = tte->n_posix; - ta->posix_timer_cr_ids = kdat.has_timer_cr_ids; for (i = 0; i < ta->posix_timers_n; i++) { t = rst_mem_alloc(sizeof(struct restore_posix_timer), RM_PRIVATE); if (!t) diff --git a/criu/tty.c b/criu/tty.c index 9a4520d53..ae23094b7 100644 --- a/criu/tty.c +++ b/criu/tty.c @@ -259,7 +259,7 @@ static int pts_fd_get_index(int fd, const struct fd_parms *p) { int index; const struct fd_link *link = p->link; - const char *pos = strrchr(link->name, '/'); + char *pos = strrchr(link->name, '/'); if (!pos || pos == (link->name + link->len - 1)) { pr_err("Unexpected format on path %s\n", link->name + 1); diff --git a/criu/uffd.c b/criu/uffd.c index 8e12dcd63..e07b21b69 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -668,11 +668,12 @@ static int remap_iovs(struct lazy_pages_info *lpi, unsigned long from, unsigned */ static int collect_iovs(struct lazy_pages_info *lpi) { - unsigned long start, end, len, nr_pages = 0; - int n_vma = 0, max_iov_len = 0, ret = -1; struct page_read *pr = &lpi->pr; struct lazy_iov *iov; MmEntry *mm; + int nr_pages = 0, n_vma = 0, max_iov_len = 0; + int ret = -1; + unsigned long start, end, len; mm = init_mm_entry(lpi); if (!mm) @@ -727,7 +728,7 @@ free_mm: return ret; } -static int uffd_io_complete(struct page_read *pr, unsigned long vaddr, unsigned long nr); +static int uffd_io_complete(struct page_read *pr, unsigned long vaddr, int nr); static int ud_open(int client, struct lazy_pages_info **_lpi) { @@ -821,7 +822,7 @@ static bool uffd_recoverable_error(int mcopy_rc) return false; } -static int uffd_check_op_error(struct lazy_pages_info *lpi, const char *op, unsigned long *nr_pages, long mcopy_rc) +static int uffd_check_op_error(struct lazy_pages_info *lpi, const char *op, int *nr_pages, long mcopy_rc) { if (errno == ENOSPC || errno == ESRCH) { handle_exit(lpi); @@ -843,7 +844,7 @@ static int uffd_check_op_error(struct lazy_pages_info *lpi, const char *op, unsi return 0; } -static int uffd_copy(struct lazy_pages_info *lpi, __u64 address, unsigned long *nr_pages) +static int uffd_copy(struct lazy_pages_info *lpi, __u64 address, int *nr_pages) { struct uffdio_copy uffdio_copy; unsigned long len = *nr_pages * page_size(); @@ -864,12 +865,12 @@ static int uffd_copy(struct lazy_pages_info *lpi, __u64 address, unsigned long * return 0; } -static int uffd_io_complete(struct page_read *pr, unsigned long img_addr, unsigned long nr) +static int uffd_io_complete(struct page_read *pr, unsigned long img_addr, int nr) { struct lazy_pages_info *lpi; - unsigned long addr = 0, req_pages; + unsigned long addr = 0; + int req_pages, ret; struct lazy_iov *req; - int ret; lpi = container_of(pr, struct lazy_pages_info, pr); @@ -919,7 +920,7 @@ static int uffd_io_complete(struct page_read *pr, unsigned long img_addr, unsign return drop_iovs(lpi, addr, nr * PAGE_SIZE); } -static int uffd_zero(struct lazy_pages_info *lpi, __u64 address, unsigned long nr_pages) +static int uffd_zero(struct lazy_pages_info *lpi, __u64 address, int nr_pages) { struct uffdio_zeropage uffdio_zeropage; unsigned long len = page_size() * nr_pages; @@ -945,7 +946,7 @@ static int uffd_zero(struct lazy_pages_info *lpi, __u64 address, unsigned long n * Returns 0 for zero pages, 1 for "real" pages and negative value on * error */ -static int uffd_seek_pages(struct lazy_pages_info *lpi, __u64 address, unsigned long nr) +static int uffd_seek_pages(struct lazy_pages_info *lpi, __u64 address, int nr) { int ret; @@ -960,7 +961,7 @@ static int uffd_seek_pages(struct lazy_pages_info *lpi, __u64 address, unsigned return 0; } -static int uffd_handle_pages(struct lazy_pages_info *lpi, __u64 address, unsigned long nr, unsigned flags) +static int uffd_handle_pages(struct lazy_pages_info *lpi, __u64 address, int nr, unsigned flags) { int ret; @@ -1002,7 +1003,7 @@ static void update_xfer_len(struct lazy_pages_info *lpi, bool pf) static int xfer_pages(struct lazy_pages_info *lpi) { struct lazy_iov *iov; - unsigned long nr_pages; + unsigned int nr_pages; unsigned long len; int err; @@ -1097,8 +1098,6 @@ static int handle_fork(struct lazy_pages_info *parent_lpi, struct uffd_msg *msg) lpi_get(lpi->parent); - page_read_disable_dedup(&parent_lpi->pr); - page_read_disable_dedup(&lpi->pr); return 1; out: diff --git a/criu/unittest/mock.c b/criu/unittest/mock.c index b2d507278..e517720e4 100644 --- a/criu/unittest/mock.c +++ b/criu/unittest/mock.c @@ -5,8 +5,6 @@ #include #include -#include "compel/infect-util.h" - int add_external(char *key) { return 0; @@ -143,4 +141,4 @@ int check_mount_v2(void) return 0; } -char compel_run_id[RUN_ID_HASH_LENGTH]; +uint64_t compel_run_id; diff --git a/criu/util.c b/criu/util.c index 2eaad35bb..d2bc9a865 100644 --- a/criu/util.c +++ b/criu/util.c @@ -28,7 +28,6 @@ #include #include #include -#include #include "linux/mount.h" @@ -195,7 +194,6 @@ static void vma_opt_str(const struct vma_area *v, char *opt) opt2s(VMA_ANON_PRIVATE, "ap"); opt2s(VMA_AREA_SYSVIPC, "sysv"); opt2s(VMA_AREA_SOCKET, "sk"); - opt2s(VMA_AREA_UPROBES, "uprobes"); #undef opt2s } @@ -222,9 +220,10 @@ int close_safe(int *fd) if (*fd > -1) { ret = close(*fd); - if (ret) - pr_perror("Failed closing fd %d", *fd); - *fd = -1; + if (!ret) + *fd = -1; + else + pr_perror("Unable to close fd %d", *fd); } return ret; @@ -2027,16 +2026,20 @@ int run_command(char *buf, size_t buf_size, int (*child_fn)(void *), void *args) return fret; } -char criu_run_id[RUN_ID_HASH_LENGTH]; +uint64_t criu_run_id; void util_init(void) { - uuid_t uuid; + struct stat statbuf; - uuid_generate(uuid); - uuid_unparse(uuid, criu_run_id); - pr_info("CRIU run id = %s\n", criu_run_id); - memcpy(compel_run_id, criu_run_id, sizeof(criu_run_id)); + criu_run_id = getpid(); + if (!stat("/proc/self/ns/pid", &statbuf)) + criu_run_id |= (uint64_t)statbuf.st_ino << 32; + else if (errno != ENOENT) + pr_perror("Can't stat /proc/self/ns/pid - CRIU run id might not be unique"); + + compel_run_id = criu_run_id; + pr_info("CRIU run id = %#" PRIx64 "\n", criu_run_id); } /* diff --git a/criu/vdso.c b/criu/vdso.c index 2d9e57c4d..7de2fae78 100644 --- a/criu/vdso.c +++ b/criu/vdso.c @@ -145,9 +145,6 @@ static void drop_rt_vdso(struct vm_area_list *vma_area_list, struct vdso_quarter * Also BTW search for rt-vvar to remove it later. */ list_for_each_entry(vma, &vma_area_list->h, list) { - if (vma_area_is(vma, VMA_AREA_GUARD)) - continue; - if (vma->e->start == addr->orig_vdso) { vma->e->status |= VMA_AREA_REGULAR | VMA_AREA_VDSO; pr_debug("vdso: Restore orig vDSO status at %lx\n", (long)vma->e->start); @@ -279,9 +276,6 @@ int parasite_fixup_vdso(struct parasite_ctl *ctl, pid_t pid, struct vm_area_list } list_for_each_entry(vma, &vma_area_list->h, list) { - if (vma_area_is(vma, VMA_AREA_GUARD)) - continue; - /* * Defer handling marked vdso until we walked over * all vmas and restore potentially remapped vDSO @@ -316,7 +310,7 @@ static int vdso_parse_maps(pid_t pid, struct vdso_maps *s) while (1) { unsigned long start, end; - char *has_vdso, *has_vvar, *has_vvar_vclock; + char *has_vdso, *has_vvar; buf = breadline(&f); if (buf == NULL) @@ -324,19 +318,13 @@ static int vdso_parse_maps(pid_t pid, struct vdso_maps *s) if (IS_ERR(buf)) goto err; - has_vvar = NULL; - has_vvar_vclock = NULL; - do { - has_vdso = strstr(buf, "[vdso]"); - if (has_vdso) - break; + has_vdso = strstr(buf, "[vdso]"); + if (!has_vdso) has_vvar = strstr(buf, "[vvar]"); - if (has_vvar) - break; - has_vvar_vclock = strstr(buf, "[vvar_vclock]"); - } while (0); + else + has_vvar = NULL; - if (!has_vdso && !has_vvar && !has_vvar_vclock) + if (!has_vdso && !has_vvar) continue; if (sscanf(buf, "%lx-%lx", &start, &end) != 2) { @@ -351,21 +339,13 @@ static int vdso_parse_maps(pid_t pid, struct vdso_maps *s) } s->vdso_start = start; s->sym.vdso_size = end - start; - } else if (has_vvar) { + } else { if (s->vvar_start != VVAR_BAD_ADDR) { pr_err("Got second VVAR entry\n"); goto err; } s->vvar_start = start; s->sym.vvar_size = end - start; - } else { - if (s->vvar_start == VDSO_BAD_ADDR || - s->vvar_start + s->sym.vvar_size != start) { - pr_err("VVAR and VVAR_VCLOCK entries are not subsequent\n"); - goto err; - } - s->sym.vvar_vclock_size = end - start; - s->sym.vvar_size += s->sym.vvar_vclock_size; } } diff --git a/flake.lock b/flake.lock deleted file mode 100644 index 90c914452..000000000 --- a/flake.lock +++ /dev/null @@ -1,61 +0,0 @@ -{ - "nodes": { - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1731533236, - "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1744463964, - "narHash": "sha256-LWqduOgLHCFxiTNYi3Uj5Lgz0SR+Xhw3kr/3Xd0GPTM=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "2631b0b7abcea6e640ce31cd78ea58910d31e650", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "root": { - "inputs": { - "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/flake.nix b/flake.nix deleted file mode 100644 index dc2429ffc..000000000 --- a/flake.nix +++ /dev/null @@ -1,77 +0,0 @@ -{ - description = "CRIU development environment"; - - inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - flake-utils.url = "github:numtide/flake-utils"; - }; - - outputs = { self, nixpkgs, flake-utils }: - flake-utils.lib.eachDefaultSystem (system: - let - pkgs = nixpkgs.legacyPackages.${system}; - - # Dependencies for CRIU - criuDeps = with pkgs; [ - # Compiler and build essentials - gcc - gnumake - pkg-config - - # Protocol Buffers - protobuf - protobufc - python3Packages.protobuf - - # Other required libraries - libuuid - libbsd - iproute2 - nftables - libcap - libnet - libnl - libaio - gnutls - libdrm - - # ZDTM - python3Packages.pyyaml - ]; - - # Multilib support for 32-bit compatibility - # criuDeps32bit = with pkgs; [ - # glibc.dev - # glibc - # gcc-unwrapped - # ]; - - devShell = pkgs.mkShell { - buildInputs = criuDeps; # ++ (if pkgs.stdenv.isx86_64 then criuDeps32bit else []); - - shellHook = '' - echo "CRIU development environment" - echo "==============================" - echo "" - echo "Useful commands:" - echo " make - Build CRIU" - echo " make test - Run tests (requires ZDTM dependencies)" - echo "" - ''; - - # Add proper flags for multilib support - # NIX_CFLAGS_COMPILE = pkgs.lib.optional pkgs.stdenv.isx86_64 "-m32"; - - # Make sure the shell can find headers for multilib - # PKG_CONFIG_PATH = pkgs.lib.makeSearchPath "lib/pkgconfig" criuDeps; - }; - in - { - # Export the development shell - devShells.default = devShell; - - # Build CRIU package as well - packages.default = pkgs.criu; - } - ); -} diff --git a/images/Makefile b/images/Makefile index 2c33152e9..ca85b1a21 100644 --- a/images/Makefile +++ b/images/Makefile @@ -7,7 +7,6 @@ proto-obj-y += core-arm.o proto-obj-y += core-aarch64.o proto-obj-y += core-ppc64.o proto-obj-y += core-s390.o -proto-obj-y += core-riscv64.o proto-obj-y += cpuinfo.o proto-obj-y += inventory.o proto-obj-y += fdinfo.o @@ -58,6 +57,7 @@ proto-obj-y += ext-file.o proto-obj-y += cgroup.o proto-obj-y += userns.o proto-obj-y += pidns.o +proto-obj-y += google/protobuf/descriptor.o # To make protoc-c happy and compile opts.proto proto-obj-y += opts.o proto-obj-y += seccomp.o proto-obj-y += binfmt-misc.o @@ -73,7 +73,6 @@ proto-obj-y += bpfmap-file.o proto-obj-y += bpfmap-data.o proto-obj-y += apparmor.o proto-obj-y += rseq.o -proto-obj-y += pidfd.o CFLAGS += -iquote $(obj)/ @@ -90,27 +89,12 @@ endef makefile-deps := Makefile $(obj)/Makefile -# -# Generate descriptor.pb-c.c and descriptor.pb-c.h to compile opts.proto. -DESCRIPTOR_DIR := images/google/protobuf -GOOGLE_INCLUDE=$(shell pkg-config protobuf --variable=includedir)/google/protobuf -$(DESCRIPTOR_DIR)/descriptor.pb-c.c: $(GOOGLE_INCLUDE)/descriptor.proto - $(call msg-gen, $@) - $(Q) protoc --proto_path=/usr/include --proto_path=$(obj)/ --c_out=$(obj)/ $< - -cleanup-y += $(DESCRIPTOR_DIR)/descriptor.pb-c.d - -submrproper: - $(Q) rm -f $(DESCRIPTOR_DIR)/* -.PHONY: submrproper -mrproper: submrproper - # # Generates rules needed to compile protobuf files. define gen-proto-rules $(obj)/$(1).pb-c.c $(obj)/$(1).pb-c.h: $(obj)/$(1).proto $(addsuffix .pb-c.c,$(addprefix $(obj)/,$(2))) $(makefile-deps) $$(E) " PBCC " $$@ - $$(Q) protoc --proto_path=$(obj)/ --c_out=$(obj)/ $$< + $$(Q) protoc-c --proto_path=$(obj)/ --c_out=$(obj)/ $$< ifeq ($(PROTOUFIX),y) $$(Q) sed -i -e 's/4294967295/0xFFFFFFFF/g' $$@ $$(Q) sed -i -e 's/4294967295/0xFFFFFFFF/g' $$(patsubst %.c,%.h,$$@) diff --git a/images/core-aarch64.proto b/images/core-aarch64.proto index a94911c0b..3356e6b75 100644 --- a/images/core-aarch64.proto +++ b/images/core-aarch64.proto @@ -17,38 +17,9 @@ message user_aarch64_fpsimd_context_entry { required uint32 fpcr = 3; } -message user_aarch64_gcs_entry { - required uint64 gcspr_el0 = 1 [(criu).hex = true]; - required uint64 features_enabled = 2 [(criu).hex = true]; -} - -message pac_address_keys { - required uint64 apiakey_lo = 1; - required uint64 apiakey_hi = 2; - required uint64 apibkey_lo = 3; - required uint64 apibkey_hi = 4; - required uint64 apdakey_lo = 5; - required uint64 apdakey_hi = 6; - required uint64 apdbkey_lo = 7; - required uint64 apdbkey_hi = 8; - required uint64 pac_enabled_key = 9; -} - -message pac_generic_keys { - required uint64 apgakey_lo = 1; - required uint64 apgakey_hi = 2; -} - -message pac_keys { - optional pac_address_keys pac_address_keys = 6; - optional pac_generic_keys pac_generic_keys = 7; -} - message thread_info_aarch64 { required uint64 clear_tid_addr = 1[(criu).hex = true]; required uint64 tls = 2; required user_aarch64_regs_entry gpregs = 3[(criu).hex = true]; required user_aarch64_fpsimd_context_entry fpsimd = 4; - optional pac_keys pac_keys = 5; - optional user_aarch64_gcs_entry gcs = 6; } diff --git a/images/core-riscv64.proto b/images/core-riscv64.proto deleted file mode 100644 index 1ddfdd8bd..000000000 --- a/images/core-riscv64.proto +++ /dev/null @@ -1,53 +0,0 @@ -// SPDX-License-Identifier: MIT - -syntax = "proto2"; - -import "opts.proto"; - -// Refer to riscv-gnu-toolchain/linux-headers/include/asm/ptrace.h -message user_riscv64_regs_entry { - required uint64 pc = 1; - required uint64 ra = 2; - required uint64 sp = 3; - required uint64 gp = 4; - required uint64 tp = 5; - required uint64 t0 = 6; - required uint64 t1 = 7; - required uint64 t2 = 8; - required uint64 s0 = 9; - required uint64 s1 = 10; - required uint64 a0 = 11; - required uint64 a1 = 12; - required uint64 a2 = 13; - required uint64 a3 = 14; - required uint64 a4 = 15; - required uint64 a5 = 16; - required uint64 a6 = 17; - required uint64 a7 = 18; - required uint64 s2 = 19; - required uint64 s3 = 20; - required uint64 s4 = 21; - required uint64 s5 = 22; - required uint64 s6 = 23; - required uint64 s7 = 24; - required uint64 s8 = 25; - required uint64 s9 = 26; - required uint64 s10 = 27; - required uint64 s11 = 28; - required uint64 t3 = 29; - required uint64 t4 = 30; - required uint64 t5 = 31; - required uint64 t6 = 32; -} - -message user_riscv64_d_ext_entry { - repeated uint64 f = 1; - required uint32 fcsr = 2; -} - -message thread_info_riscv64 { - required uint64 clear_tid_addr = 1[(criu).hex = true]; - required uint64 tls = 2; - required user_riscv64_regs_entry gpregs = 3[(criu).hex = true]; - required user_riscv64_d_ext_entry fpsimd = 4; -} diff --git a/images/core.proto b/images/core.proto index 1fa23868b..5b07b5c44 100644 --- a/images/core.proto +++ b/images/core.proto @@ -9,7 +9,6 @@ import "core-ppc64.proto"; import "core-s390.proto"; import "core-mips.proto"; import "core-loongarch64.proto"; -import "core-riscv64.proto"; import "rlimit.proto"; import "timer.proto"; @@ -127,7 +126,6 @@ message core_entry { S390 = 5; MIPS = 6; LOONGARCH64 = 7; - RISCV64 = 8; } required march mtype = 1; @@ -138,7 +136,6 @@ message core_entry { optional thread_info_s390 ti_s390 = 10; optional thread_info_mips ti_mips = 11; optional thread_info_loongarch64 ti_loongarch64 = 12; - optional thread_info_riscv64 ti_riscv64 = 13; optional task_core_entry tc = 3; optional task_kobj_ids_entry ids = 4; diff --git a/images/creds.proto b/images/creds.proto index 932a40ccf..220ed3858 100644 --- a/images/creds.proto +++ b/images/creds.proto @@ -25,6 +25,4 @@ message creds_entry { optional string lsm_sockcreate = 16; optional bytes apparmor_data = 17; optional uint32 no_new_privs = 18; - - repeated uint32 cap_amb = 19; } diff --git a/images/fdinfo.proto b/images/fdinfo.proto index 32ec13cf4..88f1c1186 100644 --- a/images/fdinfo.proto +++ b/images/fdinfo.proto @@ -17,7 +17,6 @@ import "ext-file.proto"; import "sk-unix.proto"; import "fifo.proto"; import "pipe.proto"; -import "pidfd.proto"; import "tty.proto"; import "memfd.proto"; import "bpfmap-file.proto"; @@ -43,7 +42,6 @@ enum fd_types { TIMERFD = 17; MEMFD = 18; BPFMAP = 19; - PIDFD = 20; /* Any number above the real used. Not stored to image */ CTL_TTY = 65534; @@ -80,5 +78,4 @@ message file_entry { optional tty_file_entry tty = 19; optional memfd_file_entry memfd = 20; optional bpfmap_file_entry bpf = 21; - optional pidfd_entry pidfd = 22; } diff --git a/images/google/protobuf/.gitignore b/images/google/protobuf/.gitignore deleted file mode 100644 index 68359a786..000000000 --- a/images/google/protobuf/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -*.c -*.h diff --git a/images/google/protobuf/descriptor.proto b/images/google/protobuf/descriptor.proto new file mode 120000 index 000000000..07a4c9add --- /dev/null +++ b/images/google/protobuf/descriptor.proto @@ -0,0 +1 @@ +/usr/include/google/protobuf/descriptor.proto \ No newline at end of file diff --git a/images/inventory.proto b/images/inventory.proto index feed5b850..a735bad1d 100644 --- a/images/inventory.proto +++ b/images/inventory.proto @@ -10,13 +10,6 @@ enum lsmtype { APPARMOR = 2; } -// It is not possible to distinguish between an empty repeated field -// and unset repeated field. To solve this problem and provide backwards -// compabibility, we use the 'plugins_entry' message. -message plugins_entry { - repeated string plugins = 12; -}; - message inventory_entry { required uint32 img_version = 1; optional bool fdinfo_per_id = 2; @@ -28,10 +21,4 @@ message inventory_entry { optional uint32 pre_dump_mode = 9; optional bool tcp_close = 10; optional uint32 network_lock_method = 11; - optional plugins_entry plugins_entry = 12; - // Remember the criu_run_id when CRIU dumped the process. - // This is currently used to delete the correct nftables - // network locking rule. - optional string dump_criu_run_id = 13; - optional bool allow_uprobes = 14; } diff --git a/images/netdev.proto b/images/netdev.proto index 42e2bc7d7..748fd0200 100644 --- a/images/netdev.proto +++ b/images/netdev.proto @@ -74,5 +74,4 @@ message netns_entry { repeated netns_id nsids = 7; optional string ext_key = 8; repeated sysctl_entry unix_conf = 9; - repeated sysctl_entry ipv4_sysctl = 10; } diff --git a/images/pagemap.proto b/images/pagemap.proto index f2436a51a..e6d341b0f 100644 --- a/images/pagemap.proto +++ b/images/pagemap.proto @@ -10,8 +10,7 @@ message pagemap_head { message pagemap_entry { required uint64 vaddr = 1 [(criu).hex = true]; - required uint32 compat_nr_pages = 2; + required uint32 nr_pages = 2; optional bool in_parent = 3; optional uint32 flags = 4 [(criu).flags = "pmap.flags" ]; - optional uint64 nr_pages = 5; } diff --git a/images/pidfd.proto b/images/pidfd.proto deleted file mode 100644 index a9da3e454..000000000 --- a/images/pidfd.proto +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: MIT - -syntax = "proto2"; - -import "fown.proto"; - -message pidfd_entry { - required uint32 id = 1; - required uint32 ino = 2; - required uint32 flags = 3; - required int32 nspid = 4; - required fown_entry fown = 5; -} diff --git a/include/common/arch/riscv64/asm/atomic.h b/include/common/arch/riscv64/asm/atomic.h deleted file mode 100644 index 4b08bd9fd..000000000 --- a/include/common/arch/riscv64/asm/atomic.h +++ /dev/null @@ -1,109 +0,0 @@ -#ifndef __CR_ATOMIC_H__ -#define __CR_ATOMIC_H__ - -typedef struct { - int counter; -} atomic_t; - -/* Copied from the Linux header arch/riscv/include/asm/barrier.h */ - -#define nop() __asm__ __volatile__("nop") - -#define RISCV_FENCE(p, s) __asm__ __volatile__("fence " #p "," #s : : : "memory") - -/* These barriers need to enforce ordering on both devices or memory. */ -#define mb() RISCV_FENCE(iorw, iorw) -#define rmb() RISCV_FENCE(ir, ir) -#define wmb() RISCV_FENCE(ow, ow) - -/* These barriers do not need to enforce ordering on devices, just memory. */ -#define __smp_mb() RISCV_FENCE(rw, rw) -#define __smp_rmb() RISCV_FENCE(r, r) -#define __smp_wmb() RISCV_FENCE(w, w) - -#define __smp_store_release(p, v) \ - do { \ - compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(rw, w); \ - WRITE_ONCE(*p, v); \ - } while (0) - -#define __smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(r, rw); \ - ___p1; \ - }) - -/* Copied from the Linux kernel header arch/riscv/include/asm/atomic.h */ - -static inline int atomic_read(const atomic_t *v) -{ - return (*(volatile int *)&(v)->counter); -} - -static inline void atomic_set(atomic_t *v, int i) -{ - v->counter = i; -} - -#define atomic_get atomic_read - -static inline int atomic_add_return(int i, atomic_t *v) -{ - int result; - - asm volatile("amoadd.w.aqrl %1, %2, %0" : "+A"(v->counter), "=r"(result) : "r"(i) : "memory"); - __smp_mb(); - return result + i; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - return atomic_add_return(-i, v); -} - -static inline int atomic_inc(atomic_t *v) -{ - return atomic_add_return(1, v) - 1; -} - -static inline int atomic_add(int val, atomic_t *v) -{ - return atomic_add_return(val, v) - val; -} - -static inline int atomic_dec(atomic_t *v) -{ - return atomic_sub_return(1, v) + 1; -} - -/* true if the result is 0, or false for all other cases. */ -#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) - -#define atomic_inc_return(v) (atomic_add_return(1, v)) - -static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) -{ - unsigned long tmp; - int oldval; - - __smp_mb(); - - asm volatile("1:\n" - " lr.w %1, %2\n" - " bne %1, %3, 2f\n" - " sc.w %0, %4, %2\n" - " bnez %0, 1b\n" - "2:" - : "=&r"(tmp), "=&r"(oldval), "+A"(ptr->counter) - : "r"(old), "r"(new) - : "memory"); - - __smp_mb(); - return oldval; -} - -#endif /* __CR_ATOMIC_H__ */ diff --git a/include/common/arch/riscv64/asm/bitops.h b/include/common/arch/riscv64/asm/bitops.h deleted file mode 100644 index eabab27c7..000000000 --- a/include/common/arch/riscv64/asm/bitops.h +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef __CR_ASM_BITOPS_H__ -#define __CR_ASM_BITOPS_H__ - -#include "common/compiler.h" -#include "common/asm/bitsperlong.h" - -#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) - -#define DECLARE_BITMAP(name, bits) unsigned long name[BITS_TO_LONGS(bits)] -#define BITMAP_SIZE(name) (sizeof(name) * CHAR_BIT) - -#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) -/* Technically wrong, but this avoids compilation errors on some gcc - versions. */ -#define BITOP_ADDR(x) "=m"(*(volatile long *)(x)) -#else -#define BITOP_ADDR(x) "+m"(*(volatile long *)(x)) -#endif - -#define ADDR BITOP_ADDR(addr) - -static inline void set_bit(int nr, volatile unsigned long *addr) -{ - addr += nr / BITS_PER_LONG; - *addr |= (1UL << (nr % BITS_PER_LONG)); -} - -static inline void change_bit(int nr, volatile unsigned long *addr) -{ - addr += nr / BITS_PER_LONG; - *addr ^= (1UL << (nr % BITS_PER_LONG)); -} - -static inline int test_bit(int nr, volatile const unsigned long *addr) -{ - addr += nr / BITS_PER_LONG; - return (*addr & (1UL << (nr % BITS_PER_LONG))) ? -1 : 0; -} - -static inline void clear_bit(int nr, volatile unsigned long *addr) -{ - addr += nr / BITS_PER_LONG; - *addr &= ~(1UL << (nr % BITS_PER_LONG)); -} - -/** - * __ffs - find first set bit in word - * @word: The word to search - * - * Undefined if no bit exists, so code should check against 0 first. - */ -static inline unsigned long __ffs(unsigned long word) -{ - int p = 0; - - for (; p < 8*sizeof(word); ++p) { - if (word & 1) { - break; - } - - word >>= 1; - } - - return p; -} - -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - -/* - * Find the next set bit in a memory region. - */ -static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG - 1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} - -#define for_each_bit(i, bitmask) \ - for (i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), 0); i < BITMAP_SIZE(bitmask); \ - i = find_next_bit(bitmask, BITMAP_SIZE(bitmask), i + 1)) - - -#define BITS_PER_LONG 64 - -#define BIT_MASK(nr) ((1##UL) << ((nr) % BITS_PER_LONG)) -#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) - -#define __AMO(op) "amo" #op ".d" - -#define __test_and_op_bit_ord(op, mod, nr, addr, ord) \ - ({ \ - unsigned long __res, __mask; \ - __mask = BIT_MASK(nr); \ - __asm__ __volatile__(__AMO(op) #ord " %0, %2, %1" \ - : "=r"(__res), "+A"(addr[BIT_WORD(nr)]) \ - : "r"(mod(__mask)) \ - : "memory"); \ - ((__res & __mask) != 0); \ - }) - -#define __op_bit_ord(op, mod, nr, addr, ord) \ - __asm__ __volatile__(__AMO(op) #ord " zero, %1, %0" \ - : "+A"(addr[BIT_WORD(nr)]) \ - : "r"(mod(BIT_MASK(nr))) \ - : "memory"); - -#define __test_and_op_bit(op, mod, nr, addr) __test_and_op_bit_ord(op, mod, nr, addr, .aqrl) -#define __op_bit(op, mod, nr, addr) __op_bit_ord(op, mod, nr, addr, ) - -/* Bitmask modifiers */ -#define __NOP(x) (x) -#define __NOT(x) (~(x)) - -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - * - * This operation may be reordered on other architectures than x86. - */ -static inline int test_and_set_bit(int nr, volatile unsigned long *addr) -{ - return __test_and_op_bit(or, __NOP, nr, addr); -} - -#endif /* __CR_ASM_BITOPS_H__ */ diff --git a/include/common/arch/riscv64/asm/bitsperlong.h b/include/common/arch/riscv64/asm/bitsperlong.h deleted file mode 100644 index d95727d19..000000000 --- a/include/common/arch/riscv64/asm/bitsperlong.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __CR_BITSPERLONG_H__ -#define __CR_BITSPERLONG_H__ - -#define BITS_PER_LONG 64 - -#endif /* __CR_BITSPERLONG_H__ */ diff --git a/include/common/arch/riscv64/asm/linkage.h b/include/common/arch/riscv64/asm/linkage.h deleted file mode 100644 index c6d40f750..000000000 --- a/include/common/arch/riscv64/asm/linkage.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __CR_LINKAGE_H__ -#define __CR_LINKAGE_H__ - -#ifdef __ASSEMBLY__ - -#define __ALIGN .align 4, 0x00 -#define __ALIGN_STR ".align 4, 0x00" - -#define GLOBAL(name) \ - .globl name; \ -name: - -#define ENTRY(name) \ - .globl name; \ - .type name, @function; \ - __ALIGN; \ -name: - -#define END(sym) .size sym, .- sym - -#endif /* __ASSEMBLY__ */ - -#endif /* __CR_LINKAGE_H__ */ diff --git a/include/common/arch/riscv64/asm/page.h b/include/common/arch/riscv64/asm/page.h deleted file mode 100644 index 5113cb6db..000000000 --- a/include/common/arch/riscv64/asm/page.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef __CR_ASM_PAGE_H__ -#define __CR_ASM_PAGE_H__ - -#define ARCH_HAS_LONG_PAGES - -#ifndef CR_NOGLIBC -#include /* ffsl() */ -#include /* _SC_PAGESIZE */ - -extern unsigned __page_size; -extern unsigned __page_shift; - -static inline unsigned page_size(void) -{ - if (!__page_size) - __page_size = sysconf(_SC_PAGESIZE); - return __page_size; -} - -static inline unsigned page_shift(void) -{ - if (!__page_shift) - __page_shift = (ffsl(page_size()) - 1); - return __page_shift; -} - -/* - * Don't add ifdefs for PAGE_SIZE: if any header defines it as a constant - * on aarch64, then we need refrain using PAGE_SIZE in criu and use - * page_size() across sources (as it may differ on aarch64). - */ -#define PAGE_SIZE page_size() -#define PAGE_MASK (~(PAGE_SIZE - 1)) -#define PAGE_SHIFT page_shift() - -#define PAGE_PFN(addr) ((addr) / PAGE_SIZE) - -#else /* CR_NOGLIBC */ - -extern unsigned long page_size(void); -#define PAGE_SIZE page_size() - -#endif /* CR_NOGLIBC */ -#endif /* __CR_ASM_PAGE_H__ */ diff --git a/include/common/lock.h b/include/common/lock.h index 4733d7287..ccfa468b8 100644 --- a/include/common/lock.h +++ b/include/common/lock.h @@ -2,7 +2,6 @@ #define __CR_COMMON_LOCK_H__ #include -#include #include #include #include @@ -163,11 +162,6 @@ static inline void mutex_lock(mutex_t *m) } } -static inline bool mutex_trylock(mutex_t *m) -{ - return atomic_inc_return(&m->raw) == 1; -} - static inline void mutex_unlock(mutex_t *m) { uint32_t c = 0; diff --git a/lib/c/criu.c b/lib/c/criu.c index 485c8b178..7f766db85 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -352,8 +352,8 @@ int criu_set_parent_images(const char *path) int criu_local_set_pre_dump_mode(criu_opts *opts, enum criu_pre_dump_mode mode) { + opts->rpc->has_pre_dump_mode = true; if (mode == CRIU_PRE_DUMP_SPLICE || mode == CRIU_PRE_DUMP_READ) { - opts->rpc->has_pre_dump_mode = true; opts->rpc->pre_dump_mode = (CriuPreDumpMode)mode; return 0; } @@ -1867,8 +1867,8 @@ void criu_set_pidfd_store_sk(int sk) int criu_local_set_network_lock(criu_opts *opts, enum criu_network_lock_method method) { + opts->rpc->has_network_lock = true; if (method == CRIU_NETWORK_LOCK_IPTABLES || method == CRIU_NETWORK_LOCK_NFTABLES || method == CRIU_NETWORK_LOCK_SKIP) { - opts->rpc->has_network_lock = true; opts->rpc->network_lock = (CriuNetworkLockMethod)method; return 0; } @@ -2041,22 +2041,3 @@ void criu_set_empty_ns(int namespaces) { criu_local_set_empty_ns(global_opts, namespaces); } - -int criu_local_set_config_file(criu_opts *opts, const char *path) -{ - char *new; - - new = strdup(path); - if (!new) - return -ENOMEM; - - free(opts->rpc->config_file); - opts->rpc->config_file = new; - - return 0; -} - -int criu_set_config_file(const char *path) -{ - return criu_local_set_config_file(global_opts, path); -} diff --git a/lib/c/criu.h b/lib/c/criu.h index 44446f664..c1c607869 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -116,7 +116,6 @@ void criu_set_pidfd_store_sk(int sk); int criu_set_network_lock(enum criu_network_lock_method method); int criu_join_ns_add(const char *ns, const char *ns_file, const char *extra_opt); void criu_set_mntns_compat_mode(bool val); -int criu_set_config_file(const char *path); /* * The criu_notify_arg_t na argument is an opaque @@ -282,7 +281,6 @@ void criu_local_set_pidfd_store_sk(criu_opts *opts, int sk); int criu_local_set_network_lock(criu_opts *opts, enum criu_network_lock_method method); int criu_local_join_ns_add(criu_opts *opts, const char *ns, const char *ns_file, const char *extra_opt); void criu_local_set_mntns_compat_mode(criu_opts *opts, bool val); -int criu_local_set_config_file(criu_opts *opts, const char *path); void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_notify_arg_t na)); diff --git a/lib/pycriu/__init__.py b/lib/pycriu/__init__.py index 28f1e9424..2abcf029d 100644 --- a/lib/pycriu/__init__.py +++ b/lib/pycriu/__init__.py @@ -1,15 +1,4 @@ from . import rpc_pb2 as rpc from . import images -from .criu import criu, CRIUExceptionExternal, CRIUException -from .criu import CR_DEFAULT_SERVICE_ADDRESS -from .version import __version__ - -__all__ = ( - "rpc", - "images", - "criu", - "CRIUExceptionExternal", - "CRIUException", - "CR_DEFAULT_SERVICE_ADDRESS", - "__version__", -) \ No newline at end of file +from .criu import * +from .version import __version__ \ No newline at end of file diff --git a/lib/pycriu/criu.py b/lib/pycriu/criu.py index 51a5c2902..f3e018095 100644 --- a/lib/pycriu/criu.py +++ b/lib/pycriu/criu.py @@ -8,7 +8,6 @@ import struct import pycriu.rpc_pb2 as rpc -CR_DEFAULT_SERVICE_ADDRESS = "./criu_service.socket" class _criu_comm: """ @@ -46,14 +45,7 @@ class _criu_comm_sk(_criu_comm): def connect(self, daemon): self.sk = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET) - try: - self.sk.connect(self.comm) - - except FileNotFoundError: - raise FileNotFoundError("Socket file not found.") - - except ConnectionRefusedError: - raise ConnectionRefusedError("Service not running.") + self.sk.connect(self.comm) return self.sk @@ -111,7 +103,7 @@ class _criu_comm_bin(_criu_comm): os.close(2) css[0].send(struct.pack('i', os.getpid())) - os.execvp(self.comm, + os.execv(self.comm, [self.comm, 'swrk', "%d" % css[0].fileno()]) os._exit(1) @@ -189,14 +181,15 @@ class CRIUExceptionExternal(CRIUException): if self.errno == errno.EBADRQC: s += "Bad options" - elif self.typ == rpc.DUMP and self.errno == errno.ESRCH: - s += "No process with such pid" + if self.typ == rpc.DUMP: + if self.errno == errno.ESRCH: + s += "No process with such pid" - elif self.typ == rpc.RESTORE and self.errno == errno.EEXIST: - s += "Process with requested pid already exists" + if self.typ == rpc.RESTORE: + if self.errno == errno.EEXIST: + s += "Process with requested pid already exists" - else: - s += "Unknown" + s += "Unknown" return s @@ -211,11 +204,10 @@ class criu: def __init__(self): self.use_binary('criu') - # images_dir_fd is required field with default value of -1 - self.opts = rpc.criu_opts(images_dir_fd=-1) + self.opts = rpc.criu_opts() self.sk = None - def use_sk(self, sk_name=CR_DEFAULT_SERVICE_ADDRESS): + def use_sk(self, sk_name): """ Access criu using unix socket which that belongs to criu service daemon. """ @@ -242,7 +234,7 @@ class criu: # process resources from its own if criu is located in a same # process tree it is trying to dump. daemon = False - if req.type == rpc.DUMP and (not req.opts.HasField('pid') or req.opts.pid == os.getpid()): + if req.type == rpc.DUMP and not req.opts.HasField('pid'): daemon = True try: @@ -274,7 +266,6 @@ class criu: """ req = rpc.criu_req() req.type = rpc.CHECK - req.opts.MergeFrom(self.opts) resp = self._send_req_and_recv_resp(req) diff --git a/lib/pycriu/images/pb2dict.py b/lib/pycriu/images/pb2dict.py index f22887a52..0d1a24692 100644 --- a/lib/pycriu/images/pb2dict.py +++ b/lib/pycriu/images/pb2dict.py @@ -83,7 +83,6 @@ mmap_prot_map = [ mmap_flags_map = [ ('MAP_SHARED', 0x1), ('MAP_PRIVATE', 0x2), - ('MAP_DROPPABLE', 0x08), ('MAP_ANON', 0x20), ('MAP_GROWSDOWN', 0x0100), ] @@ -105,7 +104,6 @@ mmap_status_map = [ ('VMA_AREA_AIORING', 1 << 13), ('VMA_AREA_MEMFD', 1 << 14), ('VMA_AREA_SHSTK', 1 << 15), - ('VMA_AREA_UPROBES', 1 << 17), ('VMA_UNSUPP', 1 << 31), ] @@ -154,9 +152,8 @@ flags_maps = { gen_maps = { 'task_state': { 1: 'Alive', - 2: 'Dead', - 3: 'Stopped', - 6: 'Zombie', + 3: 'Zombie', + 6: 'Stopped' }, } @@ -307,7 +304,7 @@ def _pb2dict_cast(field, value, pretty=False, is_hex=False): return field.enum_type.values_by_number.get(value, None).name elif field.type in _basic_cast: cast = _basic_cast[field.type] - if pretty and cast is int: + if pretty and (cast == int): if is_hex: # Fields that have (criu).hex = true option set # should be stored in hex string format. @@ -379,7 +376,7 @@ def _dict2pb_cast(field, value): return field.enum_type.values_by_name.get(value, None).number elif field.type in _basic_cast: cast = _basic_cast[field.type] - if cast is int and is_string(value): + if (cast == int) and is_string(value): if _marked_as_dev(field): return encode_dev(field, value) diff --git a/lib/pyproject.toml b/lib/pyproject.toml index ea9f88dcc..8eb4b7084 100644 --- a/lib/pyproject.toml +++ b/lib/pyproject.toml @@ -6,12 +6,11 @@ build-backend = "setuptools.build_meta" name = "pycriu" description = "Python bindings for CRIU" authors = [ - {name = "CRIU team", email = "criu@lists.linux.dev"}, + {name = "CRIU team", email = "criu@openvz.org"}, ] -license = {text = "LGPLv2.1"} +license = {text = "GPLv2"} dynamic = ["version"] requires-python = ">=3.6" -dependencies = ["protobuf"] [tool.setuptools] packages = ["pycriu", "pycriu.images"] diff --git a/lib/setup.cfg b/lib/setup.cfg index 28c9e49c3..23ee48dd5 100644 --- a/lib/setup.cfg +++ b/lib/setup.cfg @@ -7,12 +7,10 @@ name = pycriu description = Python bindings for CRIU author = CRIU team -author_email = criu@lists.linux.dev -license = LGPLv2.1 +author_email = criu@openvz.org +license = GPLv2 version = attr: pycriu.__version__ [options] packages = find: python_requires = >=3.6 -install_requires = - protobuf diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile index 250e7b0e7..7d3388b80 100644 --- a/plugins/amdgpu/Makefile +++ b/plugins/amdgpu/Makefile @@ -15,7 +15,7 @@ DEPS_NOK := ; __nmk_dir ?= ../../scripts/nmk/scripts/ include $(__nmk_dir)msg.mk -PLUGIN_CFLAGS := -g -Wall -Werror -D _GNU_SOURCE -shared -nostartfiles -fPIC +PLUGIN_CFLAGS := -g -Wall -Werror -D _GNU_SOURCE -shared -nostartfiles -fPIC -DCR_PLUGIN_DEFAULT="$(PLUGINDIR)" PLUGIN_LDFLAGS := -lpthread -lrt -ldrm -ldrm_amdgpu ifeq ($(CONFIG_AMDGPU),y) @@ -25,10 +25,10 @@ else endif criu-amdgpu.pb-c.c: criu-amdgpu.proto - protoc --proto_path=. --c_out=. criu-amdgpu.proto + protoc-c --proto_path=. --c_out=. criu-amdgpu.proto -amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_dmabuf.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c amdgpu_socket_utils.c - $(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC) +amdgpu_plugin.so: amdgpu_plugin.c amdgpu_plugin_drm.c amdgpu_plugin_topology.c amdgpu_plugin_util.c criu-amdgpu.pb-c.c + $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) $(LIBDRM_INC) amdgpu_plugin_clean: $(call msg-clean, $@) diff --git a/plugins/amdgpu/README.md b/plugins/amdgpu/README.md index b808fbc4f..1078eafe6 100644 --- a/plugins/amdgpu/README.md +++ b/plugins/amdgpu/README.md @@ -3,8 +3,7 @@ Supporting ROCm with CRIU _Felix Kuehling _
_Rajneesh Bardwaj _
-_David Yat Sin _
-_Yanning Yang _ +_David Yat Sin _ # Introduction @@ -225,26 +224,6 @@ to resume execution on the GPUs. *This new plugin is enabled by the new hook `__RESUME_DEVICES_LATE` in our RFC patch series.* -## Restoring BO content in parallel - -Restoring the BO content is an important part in the restore of GPU state and -usually takes a significant amount of time. A possible location for this -procedure is the `cr_plugin_restore_file` hook. However, restoring in this hook -blocks the target process from performing other restore operations, which -hinders further optimization of the restore process. - -Therefore, a new plugin hook that runs in the master restore process is -introduced, and it interacts with the `cr_plugin_restore_file` hook to complete -the restore of BO content. Specifically, the target process only needs to send -the relevant BOs to the master restore process, while this new hook handles all -the restore of buffer objects. Through this method, during the restore of the BO -content, the target process can perform other restore operations, thus -accelerating the restore procedure. This is an implementation of the gCROP -method proposed in the ACM SoCC'24 paper: [On-demand and Parallel -Checkpoint/Restore for GPU Applications](https://dl.acm.org/doi/10.1145/3698038.3698510). - -*This optimization technique is enabled by the `__POST_FORKING` hook.* - ## Other CRIU changes In addition to the new plugins, we need to make some changes to CRIU itself to diff --git a/plugins/amdgpu/amdgpu_drm.h b/plugins/amdgpu/amdgpu_drm.h deleted file mode 100644 index 69227a12b..000000000 --- a/plugins/amdgpu/amdgpu_drm.h +++ /dev/null @@ -1,1801 +0,0 @@ -/* amdgpu_drm.h -- Public header for the amdgpu driver -*- linux-c -*- - * - * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Fremont, California. - * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Kevin E. Martin - * Gareth Hughes - * Keith Whitwell - */ - -#ifndef __AMDGPU_DRM_H__ -#define __AMDGPU_DRM_H__ - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -#define DRM_AMDGPU_GEM_CREATE 0x00 -#define DRM_AMDGPU_GEM_MMAP 0x01 -#define DRM_AMDGPU_CTX 0x02 -#define DRM_AMDGPU_BO_LIST 0x03 -#define DRM_AMDGPU_CS 0x04 -#define DRM_AMDGPU_INFO 0x05 -#define DRM_AMDGPU_GEM_METADATA 0x06 -#define DRM_AMDGPU_GEM_WAIT_IDLE 0x07 -#define DRM_AMDGPU_GEM_VA 0x08 -#define DRM_AMDGPU_WAIT_CS 0x09 -#define DRM_AMDGPU_GEM_OP 0x10 -#define DRM_AMDGPU_GEM_USERPTR 0x11 -#define DRM_AMDGPU_WAIT_FENCES 0x12 -#define DRM_AMDGPU_VM 0x13 -#define DRM_AMDGPU_FENCE_TO_HANDLE 0x14 -#define DRM_AMDGPU_SCHED 0x15 -#define DRM_AMDGPU_USERQ 0x16 -#define DRM_AMDGPU_USERQ_SIGNAL 0x17 -#define DRM_AMDGPU_USERQ_WAIT 0x18 -#define DRM_AMDGPU_GEM_LIST_HANDLES 0x19 -/* not upstream */ -#define DRM_AMDGPU_GEM_DGMA 0x5c - -/* hybrid specific ioctls */ -#define DRM_AMDGPU_SEM 0x5b - -#define DRM_IOCTL_AMDGPU_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create) -#define DRM_IOCTL_AMDGPU_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap) -#define DRM_IOCTL_AMDGPU_CTX DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CTX, union drm_amdgpu_ctx) -#define DRM_IOCTL_AMDGPU_BO_LIST DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_BO_LIST, union drm_amdgpu_bo_list) -#define DRM_IOCTL_AMDGPU_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_CS, union drm_amdgpu_cs) -#define DRM_IOCTL_AMDGPU_INFO DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_INFO, struct drm_amdgpu_info) -#define DRM_IOCTL_AMDGPU_GEM_METADATA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, struct drm_amdgpu_gem_metadata) -#define DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_WAIT_IDLE, union drm_amdgpu_gem_wait_idle) -#define DRM_IOCTL_AMDGPU_GEM_VA DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_VA, struct drm_amdgpu_gem_va) -#define DRM_IOCTL_AMDGPU_WAIT_CS DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_CS, union drm_amdgpu_wait_cs) -#define DRM_IOCTL_AMDGPU_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op) -#define DRM_IOCTL_AMDGPU_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr) -#define DRM_IOCTL_AMDGPU_WAIT_FENCES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences) -#define DRM_IOCTL_AMDGPU_VM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_VM, union drm_amdgpu_vm) -#define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle) -#define DRM_IOCTL_AMDGPU_SCHED DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched) -#define DRM_IOCTL_AMDGPU_USERQ DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ, union drm_amdgpu_userq) -#define DRM_IOCTL_AMDGPU_USERQ_SIGNAL DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal) -#define DRM_IOCTL_AMDGPU_USERQ_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait) -#define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles) - -#define DRM_IOCTL_AMDGPU_GEM_DGMA DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_DGMA, struct drm_amdgpu_gem_dgma) - -/** - * DOC: memory domains - * - * %AMDGPU_GEM_DOMAIN_CPU System memory that is not GPU accessible. - * Memory in this pool could be swapped out to disk if there is pressure. - * - * %AMDGPU_GEM_DOMAIN_GTT GPU accessible system memory, mapped into the - * GPU's virtual address space via gart. Gart memory linearizes non-contiguous - * pages of system memory, allows GPU access system memory in a linearized - * fashion. - * - * %AMDGPU_GEM_DOMAIN_VRAM Local video memory. For APUs, it is memory - * carved out by the BIOS. - * - * %AMDGPU_GEM_DOMAIN_GDS Global on-chip data storage used to share data - * across shader threads. - * - * %AMDGPU_GEM_DOMAIN_GWS Global wave sync, used to synchronize the - * execution of all the waves on a device. - * - * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines - * for appending data. - * - * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for - * signalling user mode queues. - * - * %AMDGPU_GEM_DOMAIN_MMIO_REMAP MMIO remap page (special mapping for HDP flushing). - */ -/* hybrid specific ioctls */ -#define DRM_IOCTL_AMDGPU_SEM DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem) - -#define AMDGPU_GEM_DOMAIN_CPU 0x1 -#define AMDGPU_GEM_DOMAIN_GTT 0x2 -#define AMDGPU_GEM_DOMAIN_VRAM 0x4 -#define AMDGPU_GEM_DOMAIN_GDS 0x8 -#define AMDGPU_GEM_DOMAIN_GWS 0x10 -#define AMDGPU_GEM_DOMAIN_OA 0x20 -#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 -#define AMDGPU_GEM_DOMAIN_MMIO_REMAP 0x80 -#define AMDGPU_GEM_DOMAIN_DGMA 0x400 -#define AMDGPU_GEM_DOMAIN_DGMA_IMPORT 0x800 - -#define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ - AMDGPU_GEM_DOMAIN_GTT | \ - AMDGPU_GEM_DOMAIN_VRAM | \ - AMDGPU_GEM_DOMAIN_GDS | \ - AMDGPU_GEM_DOMAIN_GWS | \ - AMDGPU_GEM_DOMAIN_OA |\ - AMDGPU_GEM_DOMAIN_DOORBELL |\ - AMDGPU_GEM_DOMAIN_MMIO_REMAP |\ - AMDGPU_GEM_DOMAIN_DGMA |\ - AMDGPU_GEM_DOMAIN_DGMA_IMPORT) - -/* Flag that CPU access will be required for the case of VRAM domain */ -#define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) -/* Flag that CPU access will not work, this VRAM domain is invisible */ -#define AMDGPU_GEM_CREATE_NO_CPU_ACCESS (1 << 1) -/* Flag that USWC attributes should be used for GTT */ -#define AMDGPU_GEM_CREATE_CPU_GTT_USWC (1 << 2) -/* Flag that the memory should be in VRAM and cleared */ -#define AMDGPU_GEM_CREATE_VRAM_CLEARED (1 << 3) -/* Flag that allocating the BO should use linear VRAM */ -#define AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS (1 << 5) -/* Flag that BO is always valid in this VM */ -#define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID (1 << 6) -/* Flag that BO sharing will be explicitly synchronized */ -#define AMDGPU_GEM_CREATE_EXPLICIT_SYNC (1 << 7) -/* Flag that indicates allocating MQD gart on GFX9, where the mtype - * for the second page onward should be set to NC. It should never - * be used by user space applications. - */ -#define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) -/* Flag that BO may contain sensitive data that must be wiped before - * releasing the memory - */ -#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) -/* Flag that BO will be encrypted and that the TMZ bit should be - * set in the PTEs when mapping this buffer via GPUVM or - * accessing it with various hw blocks - */ -#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) -/* Flag that BO will be used only in preemptible context, which does - * not require GTT memory accounting - */ -#define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) -/* Flag that BO can be discarded under memory pressure without keeping the - * content. - */ -#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) -/* Flag that BO is shared coherently between multiple devices or CPU threads. - * May depend on GPU instructions to flush caches to system scope explicitly. - * - * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and - * may override the MTYPE selected in AMDGPU_VA_OP_MAP. - */ -#define AMDGPU_GEM_CREATE_COHERENT (1 << 13) -/* Flag that BO should not be cached by GPU. Coherent without having to flush - * GPU caches explicitly - * - * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and - * may override the MTYPE selected in AMDGPU_VA_OP_MAP. - */ -#define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) -/* Flag that BO should be coherent across devices when using device-level - * atomics. May depend on GPU instructions to flush caches to device scope - * explicitly, promoting them to system scope automatically. - * - * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and - * may override the MTYPE selected in AMDGPU_VA_OP_MAP. - */ -#define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) -/* Set PTE.D and recompress during GTT->VRAM moves according to TILING flags. */ -#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16) - -/* hybrid specific */ -/* Flag that the memory should be in SPARSE resource */ -#define AMDGPU_GEM_CREATE_SPARSE (1ULL << 29) -/* Flag that the memory allocation should be from top of domain */ -#define AMDGPU_GEM_CREATE_TOP_DOWN (1ULL << 30) -/* Flag that the memory allocation should be pinned */ -#define AMDGPU_GEM_CREATE_NO_EVICT (1ULL << 31) - -struct drm_amdgpu_gem_create_in { - /** the requested memory size */ - __u64 bo_size; - /** physical start_addr alignment in bytes for some HW requirements */ - __u64 alignment; - /** the requested memory domains */ - __u64 domains; - /** allocation flags */ - __u64 domain_flags; -}; - -struct drm_amdgpu_gem_create_out { - /** returned GEM object handle */ - __u32 handle; - __u32 _pad; -}; - -union drm_amdgpu_gem_create { - struct drm_amdgpu_gem_create_in in; - struct drm_amdgpu_gem_create_out out; -}; - -/** Opcode to create new residency list. */ -#define AMDGPU_BO_LIST_OP_CREATE 0 -/** Opcode to destroy previously created residency list */ -#define AMDGPU_BO_LIST_OP_DESTROY 1 -/** Opcode to update resource information in the list */ -#define AMDGPU_BO_LIST_OP_UPDATE 2 - -struct drm_amdgpu_bo_list_in { - /** Type of operation */ - __u32 operation; - /** Handle of list or 0 if we want to create one */ - __u32 list_handle; - /** Number of BOs in list */ - __u32 bo_number; - /** Size of each element describing BO */ - __u32 bo_info_size; - /** Pointer to array describing BOs */ - __u64 bo_info_ptr; -}; - -struct drm_amdgpu_bo_list_entry { - /** Handle of BO */ - __u32 bo_handle; - /** New (if specified) BO priority to be used during migration */ - __u32 bo_priority; -}; - -struct drm_amdgpu_bo_list_out { - /** Handle of resource list */ - __u32 list_handle; - __u32 _pad; -}; - -union drm_amdgpu_bo_list { - struct drm_amdgpu_bo_list_in in; - struct drm_amdgpu_bo_list_out out; -}; - -/* context related */ -#define AMDGPU_CTX_OP_ALLOC_CTX 1 -#define AMDGPU_CTX_OP_FREE_CTX 2 -#define AMDGPU_CTX_OP_QUERY_STATE 3 -#define AMDGPU_CTX_OP_QUERY_STATE2 4 -#define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 -#define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 - -/* GPU reset status */ -#define AMDGPU_CTX_NO_RESET 0 -/* this the context caused it */ -#define AMDGPU_CTX_GUILTY_RESET 1 -/* some other context caused it */ -#define AMDGPU_CTX_INNOCENT_RESET 2 -/* unknown cause */ -#define AMDGPU_CTX_UNKNOWN_RESET 3 - -/* indicate gpu reset occurred after ctx created */ -#define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) -/* indicate vram lost occurred after ctx created */ -#define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) -/* indicate some job from this context once cause gpu hang */ -#define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) -/* indicate some errors are detected by RAS */ -#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) -#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) -/* indicate that the reset hasn't completed yet */ -#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5) - -/* Context priority level */ -#define AMDGPU_CTX_PRIORITY_UNSET -2048 -#define AMDGPU_CTX_PRIORITY_VERY_LOW -1023 -#define AMDGPU_CTX_PRIORITY_LOW -512 -#define AMDGPU_CTX_PRIORITY_NORMAL 0 -/* - * When used in struct drm_amdgpu_ctx_in, a priority above NORMAL requires - * CAP_SYS_NICE or DRM_MASTER -*/ -#define AMDGPU_CTX_PRIORITY_HIGH 512 -#define AMDGPU_CTX_PRIORITY_VERY_HIGH 1023 - -/* select a stable profiling pstate for perfmon tools */ -#define AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK 0xf -#define AMDGPU_CTX_STABLE_PSTATE_NONE 0 -#define AMDGPU_CTX_STABLE_PSTATE_STANDARD 1 -#define AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK 2 -#define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 -#define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 - -struct drm_amdgpu_ctx_in { - /** AMDGPU_CTX_OP_* */ - __u32 op; - /** Flags */ - __u32 flags; - __u32 ctx_id; - /** AMDGPU_CTX_PRIORITY_* */ - __s32 priority; -}; - -union drm_amdgpu_ctx_out { - struct { - __u32 ctx_id; - __u32 _pad; - } alloc; - - struct { - /** For future use, no flags defined so far */ - __u64 flags; - /** Number of resets caused by this context so far. */ - __u32 hangs; - /** Reset status since the last call of the ioctl. */ - __u32 reset_status; - } state; - - struct { - __u32 flags; - __u32 _pad; - } pstate; -}; - -union drm_amdgpu_ctx { - struct drm_amdgpu_ctx_in in; - union drm_amdgpu_ctx_out out; -}; - -/* user queue IOCTL operations */ -#define AMDGPU_USERQ_OP_CREATE 1 -#define AMDGPU_USERQ_OP_FREE 2 - -/* queue priority levels */ -/* low < normal low < normal high < high */ -#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_MASK 0x3 -#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_SHIFT 0 -#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_LOW 0 -#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_LOW 1 -#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_NORMAL_HIGH 2 -#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_PRIORITY_HIGH 3 /* admin only */ -/* for queues that need access to protected content */ -#define AMDGPU_USERQ_CREATE_FLAGS_QUEUE_SECURE (1 << 2) - -/* - * This structure is a container to pass input configuration - * info for all supported userqueue related operations. - * For operation AMDGPU_USERQ_OP_CREATE: user is expected - * to set all fields, excep the parameter 'queue_id'. - * For operation AMDGPU_USERQ_OP_FREE: the only input parameter expected - * to be set is 'queue_id', eveything else is ignored. - */ -struct drm_amdgpu_userq_in { - /** AMDGPU_USERQ_OP_* */ - __u32 op; - /** Queue id passed for operation USERQ_OP_FREE */ - __u32 queue_id; - /** the target GPU engine to execute workload (AMDGPU_HW_IP_*) */ - __u32 ip_type; - /** - * @doorbell_handle: the handle of doorbell GEM object - * associated with this userqueue client. - */ - __u32 doorbell_handle; - /** - * @doorbell_offset: 32-bit offset of the doorbell in the doorbell bo. - * Kernel will generate absolute doorbell offset using doorbell_handle - * and doorbell_offset in the doorbell bo. - */ - __u32 doorbell_offset; - /** - * @flags: flags used for queue parameters - */ - __u32 flags; - /** - * @queue_va: Virtual address of the GPU memory which holds the queue - * object. The queue holds the workload packets. - */ - __u64 queue_va; - /** - * @queue_size: Size of the queue in bytes, this needs to be 256-byte - * aligned. - */ - __u64 queue_size; - /** - * @rptr_va : Virtual address of the GPU memory which holds the ring RPTR. - * This object must be at least 8 byte in size and aligned to 8-byte offset. - */ - __u64 rptr_va; - /** - * @wptr_va : Virtual address of the GPU memory which holds the ring WPTR. - * This object must be at least 8 byte in size and aligned to 8-byte offset. - * - * Queue, RPTR and WPTR can come from the same object, as long as the size - * and alignment related requirements are met. - */ - __u64 wptr_va; - /** - * @mqd: MQD (memory queue descriptor) is a set of parameters which allow - * the GPU to uniquely define and identify a usermode queue. - * - * MQD data can be of different size for different GPU IP/engine and - * their respective versions/revisions, so this points to a __u64 * - * which holds IP specific MQD of this usermode queue. - */ - __u64 mqd; - /** - * @size: size of MQD data in bytes, it must match the MQD structure - * size of the respective engine/revision defined in UAPI for ex, for - * gfx11 workloads, size = sizeof(drm_amdgpu_userq_mqd_gfx11). - */ - __u64 mqd_size; -}; - -/* The structure to carry output of userqueue ops */ -struct drm_amdgpu_userq_out { - /** - * For operation AMDGPU_USERQ_OP_CREATE: This field contains a unique - * queue ID to represent the newly created userqueue in the system, otherwise - * it should be ignored. - */ - __u32 queue_id; - __u32 _pad; -}; - -union drm_amdgpu_userq { - struct drm_amdgpu_userq_in in; - struct drm_amdgpu_userq_out out; -}; - -/* GFX V11 IP specific MQD parameters */ -struct drm_amdgpu_userq_mqd_gfx11 { - /** - * @shadow_va: Virtual address of the GPU memory to hold the shadow buffer. - * Use AMDGPU_INFO_IOCTL to find the exact size of the object. - */ - __u64 shadow_va; - /** - * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. - * Use AMDGPU_INFO_IOCTL to find the exact size of the object. - */ - __u64 csa_va; -}; - -/* GFX V11 SDMA IP specific MQD parameters */ -struct drm_amdgpu_userq_mqd_sdma_gfx11 { - /** - * @csa_va: Virtual address of the GPU memory to hold the CSA buffer. - * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL - * to get the size. - */ - __u64 csa_va; -}; - -/* GFX V11 Compute IP specific MQD parameters */ -struct drm_amdgpu_userq_mqd_compute_gfx11 { - /** - * @eop_va: Virtual address of the GPU memory to hold the EOP buffer. - * This must be a from a separate GPU object, and use AMDGPU_INFO IOCTL - * to get the size. - */ - __u64 eop_va; -}; - -/* userq signal/wait ioctl */ -struct drm_amdgpu_userq_signal { - /** - * @queue_id: Queue handle used by the userq fence creation function - * to retrieve the WPTR. - */ - __u32 queue_id; - __u32 pad; - /** - * @syncobj_handles: The list of syncobj handles submitted by the user queue - * job to be signaled. - */ - __u64 syncobj_handles; - /** - * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles. - */ - __u64 num_syncobj_handles; - /** - * @bo_read_handles: The list of BO handles that the submitted user queue job - * is using for read only. This will update BO fences in the kernel. - */ - __u64 bo_read_handles; - /** - * @bo_write_handles: The list of BO handles that the submitted user queue job - * is using for write only. This will update BO fences in the kernel. - */ - __u64 bo_write_handles; - /** - * @num_bo_read_handles: A count that represents the number of read BO handles in - * @bo_read_handles. - */ - __u32 num_bo_read_handles; - /** - * @num_bo_write_handles: A count that represents the number of write BO handles in - * @bo_write_handles. - */ - __u32 num_bo_write_handles; -}; - -struct drm_amdgpu_userq_fence_info { - /** - * @va: A gpu address allocated for each queue which stores the - * read pointer (RPTR) value. - */ - __u64 va; - /** - * @value: A 64 bit value represents the write pointer (WPTR) of the - * queue commands which compared with the RPTR value to signal the - * fences. - */ - __u64 value; -}; - -struct drm_amdgpu_userq_wait { - /** - * @waitq_id: Queue handle used by the userq wait IOCTL to retrieve the - * wait queue and maintain the fence driver references in it. - */ - __u32 waitq_id; - __u32 pad; - /** - * @syncobj_handles: The list of syncobj handles submitted by the user queue - * job to get the va/value pairs. - */ - __u64 syncobj_handles; - /** - * @syncobj_timeline_handles: The list of timeline syncobj handles submitted by - * the user queue job to get the va/value pairs at given @syncobj_timeline_points. - */ - __u64 syncobj_timeline_handles; - /** - * @syncobj_timeline_points: The list of timeline syncobj points submitted by the - * user queue job for the corresponding @syncobj_timeline_handles. - */ - __u64 syncobj_timeline_points; - /** - * @bo_read_handles: The list of read BO handles submitted by the user queue - * job to get the va/value pairs. - */ - __u64 bo_read_handles; - /** - * @bo_write_handles: The list of write BO handles submitted by the user queue - * job to get the va/value pairs. - */ - __u64 bo_write_handles; - /** - * @num_syncobj_timeline_handles: A count that represents the number of timeline - * syncobj handles in @syncobj_timeline_handles. - */ - __u16 num_syncobj_timeline_handles; - /** - * @num_fences: This field can be used both as input and output. As input it defines - * the maximum number of fences that can be returned and as output it will specify - * how many fences were actually returned from the ioctl. - */ - __u16 num_fences; - /** - * @num_syncobj_handles: A count that represents the number of syncobj handles in - * @syncobj_handles. - */ - __u32 num_syncobj_handles; - /** - * @num_bo_read_handles: A count that represents the number of read BO handles in - * @bo_read_handles. - */ - __u32 num_bo_read_handles; - /** - * @num_bo_write_handles: A count that represents the number of write BO handles in - * @bo_write_handles. - */ - __u32 num_bo_write_handles; - /** - * @out_fences: The field is a return value from the ioctl containing the list of - * address/value pairs to wait for. - */ - __u64 out_fences; -}; - -/* sem related */ -#define AMDGPU_SEM_OP_CREATE_SEM 1 -#define AMDGPU_SEM_OP_WAIT_SEM 2 -#define AMDGPU_SEM_OP_SIGNAL_SEM 3 -#define AMDGPU_SEM_OP_DESTROY_SEM 4 -#define AMDGPU_SEM_OP_IMPORT_SEM 5 -#define AMDGPU_SEM_OP_EXPORT_SEM 6 - -struct drm_amdgpu_sem_in { - /** AMDGPU_SEM_OP_* */ - uint32_t op; - uint32_t handle; - uint32_t ctx_id; - uint32_t ip_type; - uint32_t ip_instance; - uint32_t ring; - uint64_t seq; -}; - -union drm_amdgpu_sem_out { - int32_t fd; - uint32_t handle; -}; - -union drm_amdgpu_sem { - struct drm_amdgpu_sem_in in; - union drm_amdgpu_sem_out out; -}; - -/* vm ioctl */ -#define AMDGPU_VM_OP_RESERVE_VMID 1 -#define AMDGPU_VM_OP_UNRESERVE_VMID 2 - -struct drm_amdgpu_vm_in { - /** AMDGPU_VM_OP_* */ - __u32 op; - __u32 flags; -}; - -struct drm_amdgpu_vm_out { - /** For future use, no flags defined so far */ - __u64 flags; -}; - -union drm_amdgpu_vm { - struct drm_amdgpu_vm_in in; - struct drm_amdgpu_vm_out out; -}; - -/* sched ioctl */ -#define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 -#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 - -struct drm_amdgpu_sched_in { - /* AMDGPU_SCHED_OP_* */ - __u32 op; - __u32 fd; - /** AMDGPU_CTX_PRIORITY_* */ - __s32 priority; - __u32 ctx_id; -}; - -union drm_amdgpu_sched { - struct drm_amdgpu_sched_in in; -}; - -/* - * This is not a reliable API and you should expect it to fail for any - * number of reasons and have fallback path that do not use userptr to - * perform any operation. - */ -#define AMDGPU_GEM_USERPTR_READONLY (1 << 0) -#define AMDGPU_GEM_USERPTR_ANONONLY (1 << 1) -#define AMDGPU_GEM_USERPTR_VALIDATE (1 << 2) -#define AMDGPU_GEM_USERPTR_REGISTER (1 << 3) - -struct drm_amdgpu_gem_userptr { - __u64 addr; - __u64 size; - /* AMDGPU_GEM_USERPTR_* */ - __u32 flags; - /* Resulting GEM handle */ - __u32 handle; -}; - -#define AMDGPU_GEM_DGMA_IMPORT 0 -#define AMDGPU_GEM_DGMA_QUERY_PHYS_ADDR 1 -struct drm_amdgpu_gem_dgma { - __u64 addr; - __u64 size; - __u32 op; - __u32 handle; -}; - -/* SI-CI-VI: */ -/* same meaning as the GB_TILE_MODE and GL_MACRO_TILE_MODE fields */ -#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 -#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf -#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 -#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f -#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 -#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 -#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 -#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 -#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 -#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 -#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 -#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 -#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 -#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 -#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 -#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 - -/* GFX9 - GFX11: */ -#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 -#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f -#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 -#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF -#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 -#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF -#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 -#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 -#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 -#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 -#define AMDGPU_TILING_SCANOUT_SHIFT 63 -#define AMDGPU_TILING_SCANOUT_MASK 0x1 - -/* GFX12 and later: */ -#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 -#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 -/* These are DCC recompression settings for memory management: */ -#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 -#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ -#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 -#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ -#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 -#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ -/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata - * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ -#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 -#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 -/* bit gap */ -#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 -#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 - -/* Set/Get helpers for tiling flags. */ -#define AMDGPU_TILING_SET(field, value) \ - (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) -#define AMDGPU_TILING_GET(value, field) \ - (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) - -#define AMDGPU_GEM_METADATA_OP_SET_METADATA 1 -#define AMDGPU_GEM_METADATA_OP_GET_METADATA 2 - -/** The same structure is shared for input/output */ -struct drm_amdgpu_gem_metadata { - /** GEM Object handle */ - __u32 handle; - /** Do we want get or set metadata */ - __u32 op; - struct { - /** For future use, no flags defined so far */ - __u64 flags; - /** family specific tiling info */ - __u64 tiling_info; - __u32 data_size_bytes; - __u32 data[64]; - } data; -}; - -struct drm_amdgpu_gem_mmap_in { - /** the GEM object handle */ - __u32 handle; - __u32 _pad; -}; - -struct drm_amdgpu_gem_mmap_out { - /** mmap offset from the vma offset manager */ - __u64 addr_ptr; -}; - -union drm_amdgpu_gem_mmap { - struct drm_amdgpu_gem_mmap_in in; - struct drm_amdgpu_gem_mmap_out out; -}; - -struct drm_amdgpu_gem_wait_idle_in { - /** GEM object handle */ - __u32 handle; - /** For future use, no flags defined so far */ - __u32 flags; - /** Absolute timeout to wait */ - __u64 timeout; -}; - -struct drm_amdgpu_gem_wait_idle_out { - /** BO status: 0 - BO is idle, 1 - BO is busy */ - __u32 status; - /** Returned current memory domain */ - __u32 domain; -}; - -union drm_amdgpu_gem_wait_idle { - struct drm_amdgpu_gem_wait_idle_in in; - struct drm_amdgpu_gem_wait_idle_out out; -}; - -struct drm_amdgpu_wait_cs_in { - /* Command submission handle - * handle equals 0 means none to wait for - * handle equals ~0ull means wait for the latest sequence number - */ - __u64 handle; - /** Absolute timeout to wait */ - __u64 timeout; - __u32 ip_type; - __u32 ip_instance; - __u32 ring; - __u32 ctx_id; -}; - -struct drm_amdgpu_wait_cs_out { - /** CS status: 0 - CS completed, 1 - CS still busy */ - __u64 status; -}; - -union drm_amdgpu_wait_cs { - struct drm_amdgpu_wait_cs_in in; - struct drm_amdgpu_wait_cs_out out; -}; - -struct drm_amdgpu_fence { - __u32 ctx_id; - __u32 ip_type; - __u32 ip_instance; - __u32 ring; - __u64 seq_no; -}; - -struct drm_amdgpu_wait_fences_in { - /** This points to uint64_t * which points to fences */ - __u64 fences; - __u32 fence_count; - __u32 wait_all; - __u64 timeout_ns; -}; - -struct drm_amdgpu_wait_fences_out { - __u32 status; - __u32 first_signaled; -}; - -union drm_amdgpu_wait_fences { - struct drm_amdgpu_wait_fences_in in; - struct drm_amdgpu_wait_fences_out out; -}; - -#define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO 0 -#define AMDGPU_GEM_OP_SET_PLACEMENT 1 -#define AMDGPU_GEM_OP_GET_MAPPING_INFO 2 - -struct drm_amdgpu_gem_vm_entry { - /* Start of mapping (in bytes) */ - __u64 addr; - - /* Size of mapping (in bytes) */ - __u64 size; - - /* Mapping offset */ - __u64 offset; - - /* flags needed to recreate mapping */ - __u64 flags; -}; - -/* Sets or returns a value associated with a buffer. */ -struct drm_amdgpu_gem_op { - /** GEM object handle */ - __u32 handle; - /** AMDGPU_GEM_OP_* */ - __u32 op; - /** Input or return value. For MAPPING_INFO op: pointer to array of struct drm_amdgpu_gem_vm_entry */ - __u64 value; - /** For MAPPING_INFO op: number of mappings (in/out) */ - __u32 num_entries; - - __u32 padding; -}; - -#define AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT (1 << 0) - -struct drm_amdgpu_gem_list_handles { - /* User pointer to array of drm_amdgpu_gem_bo_info_entry */ - __u64 entries; - - /* Size of entries buffer / Number of handles in process (if larger than size of buffer, must retry) */ - __u32 num_entries; - - __u32 padding; -}; - -struct drm_amdgpu_gem_list_handles_entry { - /* gem handle of buffer object */ - __u32 gem_handle; - - /* Currently just one flag: IS_IMPORT */ - __u32 flags; - - /* Size of bo */ - __u64 size; - - /* Preferred domains for GEM_CREATE */ - __u64 preferred_domains; - - /* GEM_CREATE flags for re-creation of buffer */ - __u64 alloc_flags; - - /* physical start_addr alignment in bytes for some HW requirements */ - __u64 alignment; -}; - -#define AMDGPU_VA_OP_MAP 1 -#define AMDGPU_VA_OP_UNMAP 2 -#define AMDGPU_VA_OP_CLEAR 3 -#define AMDGPU_VA_OP_REPLACE 4 - -/* Delay the page table update till the next CS */ -#define AMDGPU_VM_DELAY_UPDATE (1 << 0) - -/* Mapping flags */ -/* readable mapping */ -#define AMDGPU_VM_PAGE_READABLE (1 << 1) -/* writable mapping */ -#define AMDGPU_VM_PAGE_WRITEABLE (1 << 2) -/* executable mapping, new for VI */ -#define AMDGPU_VM_PAGE_EXECUTABLE (1 << 3) -/* partially resident texture */ -#define AMDGPU_VM_PAGE_PRT (1 << 4) -/* MTYPE flags use bit 5 to 8 */ -#define AMDGPU_VM_MTYPE_MASK (0xf << 5) -/* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ -#define AMDGPU_VM_MTYPE_DEFAULT (0 << 5) -/* Use Non Coherent MTYPE instead of default MTYPE */ -#define AMDGPU_VM_MTYPE_NC (1 << 5) -/* Use Write Combine MTYPE instead of default MTYPE */ -#define AMDGPU_VM_MTYPE_WC (2 << 5) -/* Use Cache Coherent MTYPE instead of default MTYPE */ -#define AMDGPU_VM_MTYPE_CC (3 << 5) -/* Use UnCached MTYPE instead of default MTYPE */ -#define AMDGPU_VM_MTYPE_UC (4 << 5) -/* Use Read Write MTYPE instead of default MTYPE */ -#define AMDGPU_VM_MTYPE_RW (5 << 5) -/* don't allocate MALL */ -#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) - -struct drm_amdgpu_gem_va { - /** GEM object handle */ - __u32 handle; - __u32 _pad; - /** AMDGPU_VA_OP_* */ - __u32 operation; - /** AMDGPU_VM_PAGE_* */ - __u32 flags; - /** va address to assign . Must be correctly aligned.*/ - __u64 va_address; - /** Specify offset inside of BO to assign. Must be correctly aligned.*/ - __u64 offset_in_bo; - /** Specify mapping size. Must be correctly aligned. */ - __u64 map_size; - /** - * vm_timeline_point is a sequence number used to add new timeline point. - */ - __u64 vm_timeline_point; - /** - * The vm page table update fence is installed in given vm_timeline_syncobj_out - * at vm_timeline_point. - */ - __u32 vm_timeline_syncobj_out; - /** the number of syncobj handles in @input_fence_syncobj_handles */ - __u32 num_syncobj_handles; - /** Array of sync object handle to wait for given input fences */ - __u64 input_fence_syncobj_handles; -}; - -#define AMDGPU_HW_IP_GFX 0 -#define AMDGPU_HW_IP_COMPUTE 1 -#define AMDGPU_HW_IP_DMA 2 -#define AMDGPU_HW_IP_UVD 3 -#define AMDGPU_HW_IP_VCE 4 -#define AMDGPU_HW_IP_UVD_ENC 5 -#define AMDGPU_HW_IP_VCN_DEC 6 -/* - * From VCN4, AMDGPU_HW_IP_VCN_ENC is re-used to support - * both encoding and decoding jobs. - */ -#define AMDGPU_HW_IP_VCN_ENC 7 -#define AMDGPU_HW_IP_VCN_JPEG 8 -#define AMDGPU_HW_IP_VPE 9 -#define AMDGPU_HW_IP_NUM 10 - -#define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 - -#define AMDGPU_CHUNK_ID_IB 0x01 -#define AMDGPU_CHUNK_ID_FENCE 0x02 -#define AMDGPU_CHUNK_ID_DEPENDENCIES 0x03 -#define AMDGPU_CHUNK_ID_SYNCOBJ_IN 0x04 -#define AMDGPU_CHUNK_ID_SYNCOBJ_OUT 0x05 -#define AMDGPU_CHUNK_ID_BO_HANDLES 0x06 -#define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 -#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 -#define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 -#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a - -struct drm_amdgpu_cs_chunk { - __u32 chunk_id; - __u32 length_dw; - __u64 chunk_data; -}; - -struct drm_amdgpu_cs_in { - /** Rendering context id */ - __u32 ctx_id; - /** Handle of resource list associated with CS */ - __u32 bo_list_handle; - __u32 num_chunks; - __u32 flags; - /** this points to __u64 * which point to cs chunks */ - __u64 chunks; -}; - -struct drm_amdgpu_cs_out { - __u64 handle; -}; - -union drm_amdgpu_cs { - struct drm_amdgpu_cs_in in; - struct drm_amdgpu_cs_out out; -}; - -/* Specify flags to be used for IB */ - -/* This IB should be submitted to CE */ -#define AMDGPU_IB_FLAG_CE (1<<0) - -/* Preamble flag, which means the IB could be dropped if no context switch */ -#define AMDGPU_IB_FLAG_PREAMBLE (1<<1) - -/* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */ -#define AMDGPU_IB_FLAG_PREEMPT (1<<2) - -/* The IB fence should do the L2 writeback but not invalidate any shader - * caches (L2/vL1/sL1/I$). */ -#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3) - -/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER. - * This will reset wave ID counters for the IB. - */ -#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) - -/* Flag the IB as secure (TMZ) - */ -#define AMDGPU_IB_FLAGS_SECURE (1 << 5) - -/* Tell KMD to flush and invalidate caches - */ -#define AMDGPU_IB_FLAG_EMIT_MEM_SYNC (1 << 6) - -struct drm_amdgpu_cs_chunk_ib { - __u32 _pad; - /** AMDGPU_IB_FLAG_* */ - __u32 flags; - /** Virtual address to begin IB execution */ - __u64 va_start; - /** Size of submission */ - __u32 ib_bytes; - /** HW IP to submit to */ - __u32 ip_type; - /** HW IP index of the same type to submit to */ - __u32 ip_instance; - /** Ring index to submit to */ - __u32 ring; -}; - -struct drm_amdgpu_cs_chunk_dep { - __u32 ip_type; - __u32 ip_instance; - __u32 ring; - __u32 ctx_id; - __u64 handle; -}; - -struct drm_amdgpu_cs_chunk_fence { - __u32 handle; - __u32 offset; -}; - -struct drm_amdgpu_cs_chunk_sem { - __u32 handle; -}; - -struct drm_amdgpu_cs_chunk_syncobj { - __u32 handle; - __u32 flags; - __u64 point; -}; - -#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ 0 -#define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD 1 -#define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD 2 - -union drm_amdgpu_fence_to_handle { - struct { - struct drm_amdgpu_fence fence; - __u32 what; - __u32 pad; - } in; - struct { - __u32 handle; - } out; -}; - -struct drm_amdgpu_cs_chunk_data { - union { - struct drm_amdgpu_cs_chunk_ib ib_data; - struct drm_amdgpu_cs_chunk_fence fence_data; - }; -}; - -#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1 - -struct drm_amdgpu_cs_chunk_cp_gfx_shadow { - __u64 shadow_va; - __u64 csa_va; - __u64 gds_va; - __u64 flags; -}; - -/* - * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU - * - */ -#define AMDGPU_IDS_FLAGS_FUSION 0x01 -#define AMDGPU_IDS_FLAGS_PREEMPTION 0x02 -#define AMDGPU_IDS_FLAGS_TMZ 0x04 -#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x08 -#define AMDGPU_IDS_FLAGS_GANG_SUBMIT 0x10 - -/* - * Query h/w info: Flag identifying VF/PF/PT mode - * - */ -#define AMDGPU_IDS_FLAGS_MODE_MASK 0x300 -#define AMDGPU_IDS_FLAGS_MODE_SHIFT 0x8 -#define AMDGPU_IDS_FLAGS_MODE_PF 0x0 -#define AMDGPU_IDS_FLAGS_MODE_VF 0x1 -#define AMDGPU_IDS_FLAGS_MODE_PT 0x2 - -/* indicate if acceleration can be working */ -#define AMDGPU_INFO_ACCEL_WORKING 0x00 -/* get the crtc_id from the mode object id? */ -#define AMDGPU_INFO_CRTC_FROM_ID 0x01 -/* query hw IP info */ -#define AMDGPU_INFO_HW_IP_INFO 0x02 -/* query hw IP instance count for the specified type */ -#define AMDGPU_INFO_HW_IP_COUNT 0x03 -/* timestamp for GL_ARB_timer_query */ -#define AMDGPU_INFO_TIMESTAMP 0x05 -/* Query the firmware version */ -#define AMDGPU_INFO_FW_VERSION 0x0e - /* Subquery id: Query VCE firmware version */ - #define AMDGPU_INFO_FW_VCE 0x1 - /* Subquery id: Query UVD firmware version */ - #define AMDGPU_INFO_FW_UVD 0x2 - /* Subquery id: Query GMC firmware version */ - #define AMDGPU_INFO_FW_GMC 0x03 - /* Subquery id: Query GFX ME firmware version */ - #define AMDGPU_INFO_FW_GFX_ME 0x04 - /* Subquery id: Query GFX PFP firmware version */ - #define AMDGPU_INFO_FW_GFX_PFP 0x05 - /* Subquery id: Query GFX CE firmware version */ - #define AMDGPU_INFO_FW_GFX_CE 0x06 - /* Subquery id: Query GFX RLC firmware version */ - #define AMDGPU_INFO_FW_GFX_RLC 0x07 - /* Subquery id: Query GFX MEC firmware version */ - #define AMDGPU_INFO_FW_GFX_MEC 0x08 - /* Subquery id: Query SMC firmware version */ - #define AMDGPU_INFO_FW_SMC 0x0a - /* Subquery id: Query SDMA firmware version */ - #define AMDGPU_INFO_FW_SDMA 0x0b - /* Subquery id: Query PSP SOS firmware version */ - #define AMDGPU_INFO_FW_SOS 0x0c - /* Subquery id: Query PSP ASD firmware version */ - #define AMDGPU_INFO_FW_ASD 0x0d - /* Subquery id: Query VCN firmware version */ - #define AMDGPU_INFO_FW_VCN 0x0e - /* Subquery id: Query GFX RLC SRLC firmware version */ - #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f - /* Subquery id: Query GFX RLC SRLG firmware version */ - #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 - /* Subquery id: Query GFX RLC SRLS firmware version */ - #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 - /* Subquery id: Query DMCU firmware version */ - #define AMDGPU_INFO_FW_DMCU 0x12 - #define AMDGPU_INFO_FW_TA 0x13 - /* Subquery id: Query DMCUB firmware version */ - #define AMDGPU_INFO_FW_DMCUB 0x14 - /* Subquery id: Query TOC firmware version */ - #define AMDGPU_INFO_FW_TOC 0x15 - /* Subquery id: Query CAP firmware version */ - #define AMDGPU_INFO_FW_CAP 0x16 - /* Subquery id: Query GFX RLCP firmware version */ - #define AMDGPU_INFO_FW_GFX_RLCP 0x17 - /* Subquery id: Query GFX RLCV firmware version */ - #define AMDGPU_INFO_FW_GFX_RLCV 0x18 - /* Subquery id: Query MES_KIQ firmware version */ - #define AMDGPU_INFO_FW_MES_KIQ 0x19 - /* Subquery id: Query MES firmware version */ - #define AMDGPU_INFO_FW_MES 0x1a - /* Subquery id: Query IMU firmware version */ - #define AMDGPU_INFO_FW_IMU 0x1b - /* Subquery id: Query VPE firmware version */ - #define AMDGPU_INFO_FW_VPE 0x1c - -/* number of bytes moved for TTM migration */ -#define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f -/* the used VRAM size */ -#define AMDGPU_INFO_VRAM_USAGE 0x10 -/* the used GTT size */ -#define AMDGPU_INFO_GTT_USAGE 0x11 -/* Information about GDS, etc. resource configuration */ -#define AMDGPU_INFO_GDS_CONFIG 0x13 -/* Query information about VRAM and GTT domains */ -#define AMDGPU_INFO_VRAM_GTT 0x14 -/* Query information about register in MMR address space*/ -#define AMDGPU_INFO_READ_MMR_REG 0x15 -/* Query information about device: rev id, family, etc. */ -#define AMDGPU_INFO_DEV_INFO 0x16 -/* visible vram usage */ -#define AMDGPU_INFO_VIS_VRAM_USAGE 0x17 -/* number of TTM buffer evictions */ -#define AMDGPU_INFO_NUM_EVICTIONS 0x18 -/* Query memory about VRAM and GTT domains */ -#define AMDGPU_INFO_MEMORY 0x19 -/* Query vce clock table */ -#define AMDGPU_INFO_VCE_CLOCK_TABLE 0x1A -/* Query vbios related information */ -#define AMDGPU_INFO_VBIOS 0x1B - /* Subquery id: Query vbios size */ - #define AMDGPU_INFO_VBIOS_SIZE 0x1 - /* Subquery id: Query vbios image */ - #define AMDGPU_INFO_VBIOS_IMAGE 0x2 - /* Subquery id: Query vbios info */ - #define AMDGPU_INFO_VBIOS_INFO 0x3 -/* Query UVD handles */ -#define AMDGPU_INFO_NUM_HANDLES 0x1C -/* Query sensor related information */ -#define AMDGPU_INFO_SENSOR 0x1D - /* Subquery id: Query GPU shader clock */ - #define AMDGPU_INFO_SENSOR_GFX_SCLK 0x1 - /* Subquery id: Query GPU memory clock */ - #define AMDGPU_INFO_SENSOR_GFX_MCLK 0x2 - /* Subquery id: Query GPU temperature */ - #define AMDGPU_INFO_SENSOR_GPU_TEMP 0x3 - /* Subquery id: Query GPU load */ - #define AMDGPU_INFO_SENSOR_GPU_LOAD 0x4 - /* Subquery id: Query average GPU power */ - #define AMDGPU_INFO_SENSOR_GPU_AVG_POWER 0x5 - /* Subquery id: Query northbridge voltage */ - #define AMDGPU_INFO_SENSOR_VDDNB 0x6 - /* Subquery id: Query graphics voltage */ - #define AMDGPU_INFO_SENSOR_VDDGFX 0x7 - /* Subquery id: Query GPU stable pstate shader clock */ - #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 - /* Subquery id: Query GPU stable pstate memory clock */ - #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 - /* Subquery id: Query GPU peak pstate shader clock */ - #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa - /* Subquery id: Query GPU peak pstate memory clock */ - #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb - /* Subquery id: Query input GPU power */ - #define AMDGPU_INFO_SENSOR_GPU_INPUT_POWER 0xc -/* Number of VRAM page faults on CPU access. */ -#define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E -#define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F -/* query ras mask of enabled features*/ -#define AMDGPU_INFO_RAS_ENABLED_FEATURES 0x20 -/* RAS MASK: UMC (VRAM) */ -#define AMDGPU_INFO_RAS_ENABLED_UMC (1 << 0) -/* RAS MASK: SDMA */ -#define AMDGPU_INFO_RAS_ENABLED_SDMA (1 << 1) -/* RAS MASK: GFX */ -#define AMDGPU_INFO_RAS_ENABLED_GFX (1 << 2) -/* RAS MASK: MMHUB */ -#define AMDGPU_INFO_RAS_ENABLED_MMHUB (1 << 3) -/* RAS MASK: ATHUB */ -#define AMDGPU_INFO_RAS_ENABLED_ATHUB (1 << 4) -/* RAS MASK: PCIE */ -#define AMDGPU_INFO_RAS_ENABLED_PCIE (1 << 5) -/* RAS MASK: HDP */ -#define AMDGPU_INFO_RAS_ENABLED_HDP (1 << 6) -/* RAS MASK: XGMI */ -#define AMDGPU_INFO_RAS_ENABLED_XGMI (1 << 7) -/* RAS MASK: DF */ -#define AMDGPU_INFO_RAS_ENABLED_DF (1 << 8) -/* RAS MASK: SMN */ -#define AMDGPU_INFO_RAS_ENABLED_SMN (1 << 9) -/* RAS MASK: SEM */ -#define AMDGPU_INFO_RAS_ENABLED_SEM (1 << 10) -/* RAS MASK: MP0 */ -#define AMDGPU_INFO_RAS_ENABLED_MP0 (1 << 11) -/* RAS MASK: MP1 */ -#define AMDGPU_INFO_RAS_ENABLED_MP1 (1 << 12) -/* RAS MASK: FUSE */ -#define AMDGPU_INFO_RAS_ENABLED_FUSE (1 << 13) -/* query video encode/decode caps */ -#define AMDGPU_INFO_VIDEO_CAPS 0x21 - /* Subquery id: Decode */ - #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 - /* Subquery id: Encode */ - #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 -/* Query the max number of IBs per gang per submission */ -#define AMDGPU_INFO_MAX_IBS 0x22 -/* query last page fault info */ -#define AMDGPU_INFO_GPUVM_FAULT 0x23 -/* query FW object size and alignment */ -#define AMDGPU_INFO_UQ_FW_AREAS 0x24 - -/* Hybrid Stack Specific Defs*/ -/* gpu capability */ -#define AMDGPU_INFO_CAPABILITY 0x50 -/* virtual range */ -#define AMDGPU_INFO_VIRTUAL_RANGE 0x51 -/* query pin memory capability */ -#define AMDGPU_CAPABILITY_PIN_MEM_FLAG (1 << 0) -/* query direct gma capability */ -#define AMDGPU_CAPABILITY_DIRECT_GMA_FLAG (1 << 1) - -#define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 -#define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff -#define AMDGPU_INFO_MMR_SH_INDEX_SHIFT 8 -#define AMDGPU_INFO_MMR_SH_INDEX_MASK 0xff - -struct drm_amdgpu_query_fw { - /** AMDGPU_INFO_FW_* */ - __u32 fw_type; - /** - * Index of the IP if there are more IPs of - * the same type. - */ - __u32 ip_instance; - /** - * Index of the engine. Whether this is used depends - * on the firmware type. (e.g. MEC, SDMA) - */ - __u32 index; - __u32 _pad; -}; - -/* Input structure for the INFO ioctl */ -struct drm_amdgpu_info { - /* Where the return value will be stored */ - __u64 return_pointer; - /* The size of the return value. Just like "size" in "snprintf", - * it limits how many bytes the kernel can write. */ - __u32 return_size; - /* The query request id. */ - __u32 query; - - union { - struct { - __u32 id; - __u32 _pad; - } mode_crtc; - - struct { - /** AMDGPU_HW_IP_* */ - __u32 type; - /** - * Index of the IP if there are more IPs of the same - * type. Ignored by AMDGPU_INFO_HW_IP_COUNT. - */ - __u32 ip_instance; - } query_hw_ip; - - struct { - __u32 dword_offset; - /** number of registers to read */ - __u32 count; - __u32 instance; - /** For future use, no flags defined so far */ - __u32 flags; - } read_mmr_reg; - - struct { - uint32_t aperture; - uint32_t _pad; - } virtual_range; - - struct drm_amdgpu_query_fw query_fw; - - struct { - __u32 type; - __u32 offset; - } vbios_info; - - struct { - __u32 type; - } sensor_info; - - struct { - __u32 type; - } video_cap; - }; -}; - -struct drm_amdgpu_info_gds { - /** GDS GFX partition size */ - __u32 gds_gfx_partition_size; - /** GDS compute partition size */ - __u32 compute_partition_size; - /** total GDS memory size */ - __u32 gds_total_size; - /** GWS size per GFX partition */ - __u32 gws_per_gfx_partition; - /** GSW size per compute partition */ - __u32 gws_per_compute_partition; - /** OA size per GFX partition */ - __u32 oa_per_gfx_partition; - /** OA size per compute partition */ - __u32 oa_per_compute_partition; - __u32 _pad; -}; - -struct drm_amdgpu_info_vram_gtt { - __u64 vram_size; - __u64 vram_cpu_accessible_size; - __u64 gtt_size; -}; - -struct drm_amdgpu_heap_info { - /** max. physical memory */ - __u64 total_heap_size; - - /** Theoretical max. available memory in the given heap */ - __u64 usable_heap_size; - - /** - * Number of bytes allocated in the heap. This includes all processes - * and private allocations in the kernel. It changes when new buffers - * are allocated, freed, and moved. It cannot be larger than - * heap_size. - */ - __u64 heap_usage; - - /** - * Theoretical possible max. size of buffer which - * could be allocated in the given heap - */ - __u64 max_allocation; -}; - -struct drm_amdgpu_memory_info { - struct drm_amdgpu_heap_info vram; - struct drm_amdgpu_heap_info cpu_accessible_vram; - struct drm_amdgpu_heap_info gtt; -}; - -struct drm_amdgpu_info_firmware { - __u32 ver; - __u32 feature; -}; - -struct drm_amdgpu_info_vbios { - __u8 name[64]; - __u8 vbios_pn[64]; - __u32 version; - __u32 pad; - __u8 vbios_ver_str[32]; - __u8 date[32]; -}; - -#define AMDGPU_VRAM_TYPE_UNKNOWN 0 -#define AMDGPU_VRAM_TYPE_GDDR1 1 -#define AMDGPU_VRAM_TYPE_DDR2 2 -#define AMDGPU_VRAM_TYPE_GDDR3 3 -#define AMDGPU_VRAM_TYPE_GDDR4 4 -#define AMDGPU_VRAM_TYPE_GDDR5 5 -#define AMDGPU_VRAM_TYPE_HBM 6 -#define AMDGPU_VRAM_TYPE_DDR3 7 -#define AMDGPU_VRAM_TYPE_DDR4 8 -#define AMDGPU_VRAM_TYPE_GDDR6 9 -#define AMDGPU_VRAM_TYPE_DDR5 10 -#define AMDGPU_VRAM_TYPE_LPDDR4 11 -#define AMDGPU_VRAM_TYPE_LPDDR5 12 -#define AMDGPU_VRAM_TYPE_HBM3E 13 - -#define AMDGPU_VRAM_TYPE_HBM_WIDTH 4096 - -struct drm_amdgpu_info_device { - /** PCI Device ID */ - __u32 device_id; - /** Internal chip revision: A0, A1, etc.) */ - __u32 chip_rev; - __u32 external_rev; - /** Revision id in PCI Config space */ - __u32 pci_rev; - __u32 family; - __u32 num_shader_engines; - __u32 num_shader_arrays_per_engine; - /* in KHz */ - __u32 gpu_counter_freq; - __u64 max_engine_clock; - __u64 max_memory_clock; - /* cu information */ - __u32 cu_active_number; - /* NOTE: cu_ao_mask is INVALID, DON'T use it */ - __u32 cu_ao_mask; - __u32 cu_bitmap[4][4]; - /** Render backend pipe mask. One render backend is CB+DB. */ - __u32 enabled_rb_pipes_mask; - __u32 num_rb_pipes; - __u32 num_hw_gfx_contexts; - /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ - __u32 pcie_gen; - __u64 ids_flags; - /** Starting virtual address for UMDs. */ - __u64 virtual_address_offset; - /** The maximum virtual address */ - __u64 virtual_address_max; - /** Required alignment of virtual addresses. */ - __u32 virtual_address_alignment; - /** Page table entry - fragment size */ - __u32 pte_fragment_size; - __u32 gart_page_size; - /** constant engine ram size*/ - __u32 ce_ram_size; - /** video memory type info*/ - __u32 vram_type; - /** video memory bit width*/ - __u32 vram_bit_width; - /* vce harvesting instance */ - __u32 vce_harvest_config; - /* gfx double offchip LDS buffers */ - __u32 gc_double_offchip_lds_buf; - /* NGG Primitive Buffer */ - __u64 prim_buf_gpu_addr; - /* NGG Position Buffer */ - __u64 pos_buf_gpu_addr; - /* NGG Control Sideband */ - __u64 cntl_sb_buf_gpu_addr; - /* NGG Parameter Cache */ - __u64 param_buf_gpu_addr; - __u32 prim_buf_size; - __u32 pos_buf_size; - __u32 cntl_sb_buf_size; - __u32 param_buf_size; - /* wavefront size*/ - __u32 wave_front_size; - /* shader visible vgprs*/ - __u32 num_shader_visible_vgprs; - /* CU per shader array*/ - __u32 num_cu_per_sh; - /* number of tcc blocks*/ - __u32 num_tcc_blocks; - /* gs vgt table depth*/ - __u32 gs_vgt_table_depth; - /* gs primitive buffer depth*/ - __u32 gs_prim_buffer_depth; - /* max gs wavefront per vgt*/ - __u32 max_gs_waves_per_vgt; - /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */ - __u32 pcie_num_lanes; - /* always on cu bitmap */ - __u32 cu_ao_bitmap[4][4]; - /** Starting high virtual address for UMDs. */ - __u64 high_va_offset; - /** The maximum high virtual address */ - __u64 high_va_max; - /* gfx10 pa_sc_tile_steering_override */ - __u32 pa_sc_tile_steering_override; - /* disabled TCCs */ - __u64 tcc_disabled_mask; - __u64 min_engine_clock; - __u64 min_memory_clock; - /* The following fields are only set on gfx11+, older chips set 0. */ - __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ - __u32 num_sqc_per_wgp; - __u32 sqc_data_cache_size; /* AKA SMEM cache */ - __u32 sqc_inst_cache_size; - __u32 gl1c_cache_size; - __u32 gl2c_cache_size; - __u64 mall_size; /* AKA infinity cache */ - /* high 32 bits of the rb pipes mask */ - __u32 enabled_rb_pipes_mask_hi; - /* shadow area size for gfx11 */ - __u32 shadow_size; - /* shadow area base virtual alignment for gfx11 */ - __u32 shadow_alignment; - /* context save area size for gfx11 */ - __u32 csa_size; - /* context save area base virtual alignment for gfx11 */ - __u32 csa_alignment; - /* Userq IP mask (1 << AMDGPU_HW_IP_*) */ - __u32 userq_ip_mask; - __u32 pad; -}; - -struct drm_amdgpu_info_hw_ip { - /** Version of h/w IP */ - __u32 hw_ip_version_major; - __u32 hw_ip_version_minor; - /** Capabilities */ - __u64 capabilities_flags; - /** command buffer address start alignment*/ - __u32 ib_start_alignment; - /** command buffer size alignment*/ - __u32 ib_size_alignment; - /** Bitmask of available rings. Bit 0 means ring 0, etc. */ - __u32 available_rings; - /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ - __u32 ip_discovery_version; - /* Userq available slots */ - __u32 userq_num_slots; -}; - -/* GFX metadata BO sizes and alignment info (in bytes) */ -struct drm_amdgpu_info_uq_fw_areas_gfx { - /* shadow area size */ - __u32 shadow_size; - /* shadow area base virtual mem alignment */ - __u32 shadow_alignment; - /* context save area size */ - __u32 csa_size; - /* context save area base virtual mem alignment */ - __u32 csa_alignment; -}; - -/* IP specific fw related information used in the - * subquery AMDGPU_INFO_UQ_FW_AREAS - */ -struct drm_amdgpu_info_uq_fw_areas { - union { - struct drm_amdgpu_info_uq_fw_areas_gfx gfx; - }; -}; - -struct drm_amdgpu_info_num_handles { - /** Max handles as supported by firmware for UVD */ - __u32 uvd_max_handles; - /** Handles currently in use for UVD */ - __u32 uvd_used_handles; -}; - -#define AMDGPU_VCE_CLOCK_TABLE_ENTRIES 6 - -struct drm_amdgpu_info_vce_clock_table_entry { - /** System clock */ - __u32 sclk; - /** Memory clock */ - __u32 mclk; - /** VCE clock */ - __u32 eclk; - __u32 pad; -}; - -struct drm_amdgpu_info_vce_clock_table { - struct drm_amdgpu_info_vce_clock_table_entry entries[AMDGPU_VCE_CLOCK_TABLE_ENTRIES]; - __u32 num_valid_entries; - __u32 pad; -}; - -/* query video encode/decode caps */ -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2 0 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4 1 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1 2 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC 3 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC 4 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG 5 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9 6 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1 7 -#define AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT 8 - -struct drm_amdgpu_info_video_codec_info { - __u32 valid; - __u32 max_width; - __u32 max_height; - __u32 max_pixels_per_frame; - __u32 max_level; - __u32 pad; -}; - -struct drm_amdgpu_info_video_caps { - struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; -}; - -#define AMDGPU_VMHUB_TYPE_MASK 0xff -#define AMDGPU_VMHUB_TYPE_SHIFT 0 -#define AMDGPU_VMHUB_TYPE_GFX 0 -#define AMDGPU_VMHUB_TYPE_MM0 1 -#define AMDGPU_VMHUB_TYPE_MM1 2 -#define AMDGPU_VMHUB_IDX_MASK 0xff00 -#define AMDGPU_VMHUB_IDX_SHIFT 8 - -struct drm_amdgpu_info_gpuvm_fault { - __u64 addr; - __u32 status; - __u32 vmhub; -}; - -struct drm_amdgpu_info_uq_metadata_gfx { - /* shadow area size for gfx11 */ - __u32 shadow_size; - /* shadow area base virtual alignment for gfx11 */ - __u32 shadow_alignment; - /* context save area size for gfx11 */ - __u32 csa_size; - /* context save area base virtual alignment for gfx11 */ - __u32 csa_alignment; -}; - -struct drm_amdgpu_info_uq_metadata { - union { - struct drm_amdgpu_info_uq_metadata_gfx gfx; - }; -}; - -/* - * Supported GPU families - */ -#define AMDGPU_FAMILY_UNKNOWN 0 -#define AMDGPU_FAMILY_SI 110 /* Hainan, Oland, Verde, Pitcairn, Tahiti */ -#define AMDGPU_FAMILY_CI 120 /* Bonaire, Hawaii */ -#define AMDGPU_FAMILY_KV 125 /* Kaveri, Kabini, Mullins */ -#define AMDGPU_FAMILY_VI 130 /* Iceland, Tonga */ -#define AMDGPU_FAMILY_CZ 135 /* Carrizo, Stoney */ -#define AMDGPU_FAMILY_AI 141 /* Vega10 */ -#define AMDGPU_FAMILY_RV 142 /* Raven */ -#define AMDGPU_FAMILY_NV 143 /* Navi10 */ -#define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ -#define AMDGPU_FAMILY_GC_11_0_0 145 /* GC 11.0.0 */ -#define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ -#define AMDGPU_FAMILY_GC_11_0_1 148 /* GC 11.0.1 */ -#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ -#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ -#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ -#define AMDGPU_FAMILY_GC_12_0_0 152 /* GC 12.0.0 */ - -#ifndef HAVE_DRM_COLOR_CTM_3X4 -/* FIXME wrong namespace! */ -struct drm_color_ctm_3x4 { - /* - * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude - * (not two's complement!) format. - */ - __u64 matrix[12]; -}; -#endif - -/** - * Definition of System Unified Address (SUA) apertures - */ -#define AMDGPU_SUA_APERTURE_PRIVATE 1 -#define AMDGPU_SUA_APERTURE_SHARED 2 -struct drm_amdgpu_virtual_range { - uint64_t start; - uint64_t end; -}; - -struct drm_amdgpu_capability { - __u32 flag; - __u32 direct_gma_size; -}; - -/* - * Definition of free sync enter and exit signals - * We may have more options in the future - */ -#define AMDGPU_FREESYNC_FULLSCREEN_ENTER 1 -#define AMDGPU_FREESYNC_FULLSCREEN_EXIT 2 - -struct drm_amdgpu_freesync { - __u32 op; /* AMDGPU_FREESYNC_FULLSCREEN_ENTER or */ - /* AMDGPU_FREESYNC_FULLSCREEN_ENTER */ - __u32 spare[7]; -}; - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/plugins/amdgpu/amdgpu_plugin.c b/plugins/amdgpu/amdgpu_plugin.c index ee55bde0a..b56ba6d14 100644 --- a/plugins/amdgpu/amdgpu_plugin.c +++ b/plugins/amdgpu/amdgpu_plugin.c @@ -12,42 +12,31 @@ #include #include #include -#include -#include #include #include #include #include #include +#include #include "criu-plugin.h" #include "plugin.h" #include "criu-amdgpu.pb-c.h" -#include "util.h" -#include "util-pie.h" -#include "fdstore.h" #include "kfd_ioctl.h" #include "xmalloc.h" #include "criu-log.h" #include "files.h" -#include "pstree.h" -#include "sockets.h" -#include "rst-malloc.h" #include "common/list.h" -#include "amdgpu_drm.h" -#include "amdgpu_plugin_dmabuf.h" #include "amdgpu_plugin_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" -#include "amdgpu_socket_utils.h" #include "img-streamer.h" #include "image.h" #include "cr_options.h" -#include "util.h" struct vma_metadata { struct list_head list; @@ -60,39 +49,17 @@ struct vma_metadata { /************************************ Global Variables ********************************************/ +/** + * FD of KFD device used to checkpoint. On a multi-process + * tree the order of checkpointing goes from parent to child + * and so on - so saving the FD will not be overwritten + */ +static int kfd_checkpoint_fd; + static LIST_HEAD(update_vma_info_list); size_t kfd_max_buffer_size; -bool plugin_added_to_inventory = false; - -bool plugin_disabled = false; - -struct handle_id { - int handle; - int fdstore_id; -}; -struct shared_handle_ids { - int num_handles; - struct handle_id *handles; -}; -struct shared_handle_ids *shared_memory = NULL; - -static mutex_t *shared_memory_mutex; - -int current_pid; -/* - * In the case of a single process (common case), this optimization can effectively - * reduce the restore latency with parallel restore. In the case of multiple processes, - * states are already restored in parallel within different processes. Therefore, this - * optimization does not introduce further improvement and will be disabled by default - * in this case. The flag, parallel_disabled, is used to control whether the - * optimization is enabled or disabled. - */ -bool parallel_disabled = false; - -pthread_t parallel_thread = 0; -int parallel_thread_result = 0; /**************************************************************************************************/ /* Call ioctl, restarting if it is interrupted */ @@ -328,6 +295,8 @@ void getenv_size_t(const char *var, size_t *value) int sh = 0; size_t size; + pr_info("Value str: %s\n", value_str); + if (value_str) { size = (size_t)strtoul(value_str, &endp, 0); if (errno || value_str == endp) { @@ -363,13 +332,6 @@ void getenv_size_t(const char *var, size_t *value) int amdgpu_plugin_init(int stage) { - if (stage == CR_PLUGIN_STAGE__RESTORE) { - if (!check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name))) { - plugin_disabled = true; - return 0; - } - } - pr_info("initialized: %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name); topology_init(&src_topology); @@ -378,15 +340,6 @@ int amdgpu_plugin_init(int stage) maps_init(&restore_maps); if (stage == CR_PLUGIN_STAGE__RESTORE) { - if (has_children(root_item)) { - pr_info("Parallel restore disabled\n"); - parallel_disabled = true; - } else { - if (install_parallel_sock() < 0) { - pr_err("Failed to install parallel socket\n"); - return -1; - } - } /* Default Values */ kfd_fw_version_check = true; kfd_sdma_fw_version_check = true; @@ -412,9 +365,6 @@ int amdgpu_plugin_init(int stage) void amdgpu_plugin_fini(int stage, int ret) { - if (plugin_disabled) - return; - pr_info("finished %s (AMDGPU/KFD)\n", CR_PLUGIN_DESC.name); if (stage == CR_PLUGIN_STAGE__RESTORE) @@ -464,14 +414,6 @@ int amdgpu_plugin_handle_device_vma(int fd, const struct stat *st_buf) if (ret) pr_perror("%s(), Can't handle VMAs of input device", __func__); - if (!ret && !plugin_added_to_inventory) { - ret = add_inventory_plugin(CR_PLUGIN_DESC.name); - if (ret) - pr_err("Failed to add AMDGPU plugin to inventory image\n"); - else - plugin_added_to_inventory = true; - } - return ret; } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, amdgpu_plugin_handle_device_vma) @@ -539,11 +481,11 @@ void free_and_unmap(uint64_t size, amdgpu_bo_handle h_bo, amdgpu_va_handle h_va, amdgpu_bo_free(h_bo); } -int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp, - void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, - uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free) +static int sdma_copy_bo(struct kfd_criu_bo_bucket bo_bucket, FILE *storage_fp, + void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, + uint64_t max_copy_size, enum sdma_op_type type) { - uint64_t src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain; + uint64_t size, src_bo_size, dst_bo_size, buffer_bo_size, bytes_remain, buffer_space_remain; uint64_t gpu_addr_src, gpu_addr_dst, gpu_addr_ib, copy_src, copy_dst, copy_size; amdgpu_va_handle h_va_src, h_va_dst, h_va_ib; amdgpu_bo_handle h_bo_src, h_bo_dst, h_bo_ib; @@ -556,8 +498,10 @@ int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp, uint32_t expired; amdgpu_context_handle h_ctx; uint32_t *ib = NULL; - int j, err, packets_per_buffer; + int j, err, shared_fd, packets_per_buffer; + shared_fd = bo_bucket.dmabuf_fd; + size = bo_bucket.size; buffer_bo_size = min(size, buffer_size); packets_per_buffer = ((buffer_bo_size - 1) / max_copy_size) + 1; src_bo_size = (type == SDMA_OP_VRAM_WRITE) ? buffer_bo_size : size; @@ -768,8 +712,7 @@ err_dst_bo_map: if (err) pr_perror("dest range free failed"); err_dst_va: - if (!do_not_free) - err = amdgpu_bo_free(h_bo_dst); + err = amdgpu_bo_free(h_bo_dst); if (err) pr_perror("dest bo free failed"); err_dst_bo_prep: @@ -857,9 +800,8 @@ void *dump_bo_contents(void *_thread_data) num_bos++; /* perform sDMA based vram copy */ - ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, - SDMA_OP_VRAM_READ, false); - + ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_READ); if (ret) { pr_err("Failed to drain the BO using sDMA: bo_buckets[%d]\n", i); break; @@ -956,8 +898,8 @@ void *restore_bo_contents(void *_thread_data) num_bos++; - ret = sdma_copy_bo(bo_buckets[i].dmabuf_fd, bo_buckets[i].size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, - SDMA_OP_VRAM_WRITE, false); + ret = sdma_copy_bo(bo_buckets[i], bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, + SDMA_OP_VRAM_WRITE); if (ret) { pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); break; @@ -1043,163 +985,28 @@ int restore_hsakmt_shared_mem(const uint64_t shared_mem_size, const uint32_t sha return 0; } -int amdgpu_unpause_processes(int pid) +static int unpause_process(int fd) { int ret = 0; struct kfd_ioctl_criu_args args = { 0 }; - struct list_head *l = get_dumped_fds(); - struct dumped_fd *st; - list_for_each_entry(st, l, l) { - if (st->is_drm) { - close(st->fd); - } else { - args.op = KFD_CRIU_OP_UNPAUSE; + args.op = KFD_CRIU_OP_UNPAUSE; - ret = kmtIoctl(st->fd, AMDKFD_IOC_CRIU_OP, &args); - if (ret) { - pr_perror("Failed to unpause process"); - goto exit; - } - } + ret = kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args); + if (ret) { + pr_perror("Failed to unpause process"); + goto exit; } - if (post_dump_dmabuf_check() < 0) - ret = -1; + // Reset the KFD FD + kfd_checkpoint_fd = -1; + sys_close_drm_render_devices(&src_topology); exit: pr_info("Process unpaused %s (ret:%d)\n", ret ? "Failed" : "Ok", ret); - clear_dumped_fds(); return ret; } -CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__DUMP_DEVICES_LATE, amdgpu_unpause_processes) - -int store_dmabuf_fd(int handle, int fd) -{ - int id; - - id = fdstore_add(fd); - mutex_lock(shared_memory_mutex); - for (int i = 0; i < shared_memory->num_handles; i++) { - if (shared_memory->handles[i].handle == handle) { - mutex_unlock(shared_memory_mutex); - return 0; - } - if (shared_memory->handles[i].handle == -1) { - shared_memory->handles[i].handle = handle; - shared_memory->handles[i].fdstore_id = id; - mutex_unlock(shared_memory_mutex); - return 0; - } - } - mutex_unlock(shared_memory_mutex); - - return -1; -} - -int amdgpu_id_for_handle(int handle) -{ - mutex_lock(shared_memory_mutex); - for (int i = 0; i < shared_memory->num_handles; i++) { - if (shared_memory->handles[i].handle == handle) { - mutex_unlock(shared_memory_mutex); - return shared_memory->handles[i].fdstore_id; - } - } - mutex_unlock(shared_memory_mutex); - return -1; -} - -int amdgpu_restore_init(void) -{ - if (!shared_memory) { - int protection = PROT_READ | PROT_WRITE; - int visibility = MAP_SHARED | MAP_ANONYMOUS; - size_t img_size; - FILE *img_fp = NULL; - int ret; - unsigned char *buf; - int num_handles = 0; - char img_path[PATH_MAX]; - CriuRenderNode *rd = NULL; - CriuKfd *e = NULL; - - DIR *d; - struct dirent *dir; - d = opendir("."); - if (d) { - while ((dir = readdir(d)) != NULL) { - if (strncmp("amdgpu-kfd-", dir->d_name, strlen("amdgpu-kfd-")) == 0) { - img_fp = open_img_file(dir->d_name, false, &img_size); - buf = xmalloc(img_size); - if (!buf) { - fclose(img_fp); - return -ENOMEM; - } - - ret = read_fp(img_fp, buf, img_size); - if (ret) { - pr_perror("Unable to read from %s", img_path); - fclose(img_fp); - xfree(buf); - return ret; - } - - fclose(img_fp); - e = criu_kfd__unpack(NULL, img_size, buf); - num_handles += e->num_of_bos; - criu_kfd__free_unpacked(e, NULL); - xfree(buf); - } - if (strncmp("amdgpu-renderD-", dir->d_name, strlen("amdgpu-renderD-")) == 0) { - img_fp = open_img_file(dir->d_name, false, &img_size); - buf = xmalloc(img_size); - if (!buf) { - fclose(img_fp); - return -ENOMEM; - } - - ret = read_fp(img_fp, buf, img_size); - if (ret) { - pr_perror("Unable to read from %s", img_path); - fclose(img_fp); - xfree(buf); - return ret; - } - - fclose(img_fp); - rd = criu_render_node__unpack(NULL, img_size, buf); - num_handles += rd->num_of_bos; - criu_render_node__free_unpacked(rd, NULL); - xfree(buf); - } - } - closedir(d); - } - - if (num_handles > 0) { - shared_memory = mmap(NULL, sizeof(shared_memory), protection, visibility, -1, 0); - shared_memory->num_handles = num_handles; - shared_memory->handles = mmap(NULL, sizeof(struct handle_id) * num_handles, protection, visibility, -1, 0); - - for (int i = 0; i < num_handles; i++) { - shared_memory->handles[i].handle = -1; - shared_memory->handles[i].fdstore_id = -1; - } - - shared_memory_mutex = shmalloc(sizeof(*shared_memory_mutex)); - if (!shared_memory_mutex) { - pr_err("Can't create amdgpu mutex\n"); - return -1; - } - mutex_init(shared_memory_mutex); - } - } - - return 0; -} -CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESTORE_INIT, amdgpu_restore_init) static int save_devices(int fd, struct kfd_ioctl_criu_args *args, struct kfd_criu_device_bucket *device_buckets, CriuKfd *e) @@ -1243,8 +1050,6 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd { struct thread_data *thread_datas; int ret = 0, i; - amdgpu_device_handle h_dev; - uint32_t major, minor; pr_debug("Dumping %d BOs\n", args->num_bos); @@ -1268,19 +1073,6 @@ static int save_bos(int id, int fd, struct kfd_ioctl_criu_args *args, struct kfd boinfo->size = bo_bucket->size; boinfo->offset = bo_bucket->offset; boinfo->alloc_flags = bo_bucket->alloc_flags; - - ret = amdgpu_device_initialize(node_get_drm_render_device(sys_get_node_by_gpu_id(&src_topology, bo_bucket->gpu_id)), &major, &minor, &h_dev); - - boinfo->handle = get_gem_handle(h_dev, bo_bucket->dmabuf_fd); - - amdgpu_device_deinitialize(h_dev); - } - for (i = 0; i < e->num_of_bos; i++) { - KfdBoEntry *boinfo = e->bo_entries[i]; - - ret = record_shared_bo(boinfo->handle, false); - if (ret) - goto exit; } for (int i = 0; i < e->num_of_gpus; i++) { @@ -1401,17 +1193,10 @@ int amdgpu_plugin_dump_file(int fd, int id) return -1; } - /* Check whether this plugin was called for kfd, dmabuf or render nodes */ - ret = get_dmabuf_info(fd, &st); - if (ret < 0) { - pr_perror("Failed to get dmabuf info"); - return -1; - } - if (ret == 0) { - pr_info("Dumping dmabuf fd = %d\n", fd); - return amdgpu_plugin_dmabuf_dump(fd, id); - } + /* Initialize number of device files that will be checkpointed */ + init_gpu_count(&src_topology); + /* Check whether this plugin was called for kfd or render nodes */ if (major(st.st_rdev) != major(st_kfd.st_rdev) || minor(st.st_rdev) != 0) { /* This is RenderD dumper plugin, for now just save renderD @@ -1422,12 +1207,14 @@ int amdgpu_plugin_dump_file(int fd, int id) if (ret) return ret; - ret = record_dumped_fd(fd, true); - if (ret) - return ret; + /* Invoke unpause process if needed */ + decrement_checkpoint_count(); + if (checkpoint_is_complete()) { + ret = unpause_process(kfd_checkpoint_fd); + } /* Need to return success here so that criu can call plugins for renderD nodes */ - return try_dump_dmabuf_list(); + return ret; } pr_info("%s() called for fd = %d\n", __func__, major(st.st_rdev)); @@ -1522,11 +1309,14 @@ int amdgpu_plugin_dump_file(int fd, int id) xfree(buf); - ret = record_dumped_fd(fd, false); - if (ret) - goto exit; - exit: + /* Restore all queues if conditions permit */ + kfd_checkpoint_fd = fd; + decrement_checkpoint_count(); + if (checkpoint_is_complete()) { + ret = unpause_process(fd); + } + xfree((void *)args.devices); xfree((void *)args.bos); xfree((void *)args.priv_data); @@ -1549,6 +1339,7 @@ static int restore_devices(struct kfd_ioctl_criu_args *args, CriuKfd *e) int ret = 0, bucket_index = 0; pr_debug("Restoring %d devices\n", e->num_of_gpus); + args->num_devices = e->num_of_gpus; device_buckets = xzalloc(sizeof(*device_buckets) * args->num_devices); if (!device_buckets) @@ -1621,37 +1412,19 @@ static int restore_bos(struct kfd_ioctl_criu_args *args, CriuKfd *e) } pr_info("Restore BOs Ok\n"); - - return 0; -} - -int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int fd) -{ - struct vma_metadata *vma_md; - - vma_md = xmalloc(sizeof(*vma_md)); - if (!vma_md) { - return -ENOMEM; - } - - memset(vma_md, 0, sizeof(*vma_md)); - - vma_md->old_pgoff = offset; - vma_md->vma_entry = addr; - - vma_md->new_pgoff = restored_offset; - vma_md->fd = fd; - - list_add_tail(&vma_md->list, &update_vma_info_list); - return 0; } static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKfd *e) { - struct thread_data *thread_datas = NULL; + struct thread_data *thread_datas; int thread_i, ret = 0; - uint64_t offset = 0; + + thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); + if (!thread_datas) { + ret = -ENOMEM; + goto exit; + } for (int i = 0; i < e->num_of_bos; i++) { struct kfd_criu_bo_bucket *bo_bucket = &bo_buckets[i]; @@ -1694,101 +1467,56 @@ static int restore_bo_data(int id, struct kfd_criu_bo_bucket *bo_buckets, CriuKf } } - if (!parallel_disabled) { - parallel_restore_cmd restore_cmd; - pr_info("Begin to send parallel restore cmd\n"); - ret = init_parallel_restore_cmd(e->num_of_bos, id, e->num_of_gpus, &restore_cmd); - if (ret) - goto exit_parallel; + thread_i = 0; + for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { + struct tp_node *dev; + int ret_thread = 0; + uint32_t target_gpu_id; - for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { - uint32_t target_gpu_id; - struct tp_node *dev; + if (!e->device_entries[i]->gpu_id) + continue; - if (!e->device_entries[i]->gpu_id) - continue; + /* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */ + target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); - target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); - dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); - if (!dev) { - pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); - ret = -ENODEV; - goto exit_parallel; - } - parallel_restore_gpu_id_add(e->device_entries[i]->gpu_id, dev->drm_render_minor, &restore_cmd); - - for (int j = 0; j < e->num_of_bos; j++) { - if (bo_buckets[j].gpu_id != e->device_entries[i]->gpu_id) - continue; - if (bo_buckets[j].alloc_flags & - (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) { - parallel_restore_bo_add(bo_buckets[j].dmabuf_fd, bo_buckets[j].gpu_id, - bo_buckets[j].size, offset, &restore_cmd); - offset += bo_buckets[j].size; - } - } - } - ret = send_parallel_restore_cmd(&restore_cmd); -exit_parallel: - free_parallel_restore_cmd(&restore_cmd); - } else { - thread_datas = xzalloc(sizeof(*thread_datas) * e->num_of_gpus); - if (!thread_datas) { - ret = -ENOMEM; + /* We need the fd for actual_gpu_id */ + dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); + if (!dev) { + pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); + ret = -ENODEV; goto exit; } - thread_i = 0; - for (int i = 0; i < e->num_of_gpus + e->num_of_cpus; i++) { - struct tp_node *dev; - int ret_thread = 0; - uint32_t target_gpu_id; + thread_datas[thread_i].id = id; + thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id; + thread_datas[thread_i].bo_buckets = bo_buckets; + thread_datas[thread_i].bo_entries = e->bo_entries; + thread_datas[thread_i].pid = e->pid; + thread_datas[thread_i].num_of_bos = e->num_of_bos; - if (!e->device_entries[i]->gpu_id) - continue; - - /* e->device_entries[i]->gpu_id is user_gpu_id, target_gpu_id is actual_gpu_id */ - target_gpu_id = maps_get_dest_gpu(&restore_maps, e->device_entries[i]->gpu_id); - - /* We need the fd for actual_gpu_id */ - dev = sys_get_node_by_gpu_id(&dest_topology, target_gpu_id); - if (!dev) { - pr_err("Failed to find node with gpu_id:0x%04x\n", target_gpu_id); - ret = -ENODEV; - goto exit; - } - - thread_datas[thread_i].id = id; - thread_datas[thread_i].gpu_id = e->device_entries[i]->gpu_id; - thread_datas[thread_i].bo_buckets = bo_buckets; - thread_datas[thread_i].bo_entries = e->bo_entries; - thread_datas[thread_i].pid = e->pid; - thread_datas[thread_i].num_of_bos = e->num_of_bos; - - thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev); - if (thread_datas[thread_i].drm_fd < 0) { - ret = -thread_datas[thread_i].drm_fd; - goto exit; - } - - ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents, - (void *)&thread_datas[thread_i]); - if (ret_thread) { - pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread); - ret = -ret_thread; - goto exit; - } - thread_i++; + thread_datas[thread_i].drm_fd = node_get_drm_render_device(dev); + if (thread_datas[thread_i].drm_fd < 0) { + ret = -thread_datas[thread_i].drm_fd; + goto exit; } - for (int i = 0; i < e->num_of_gpus; i++) { - pthread_join(thread_datas[i].thread, NULL); - pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); + ret_thread = pthread_create(&thread_datas[thread_i].thread, NULL, restore_bo_contents, + (void *)&thread_datas[thread_i]); + if (ret_thread) { + pr_err("Failed to create thread[%i] ret:%d\n", thread_i, ret_thread); + ret = -ret_thread; + goto exit; + } + thread_i++; + } - if (thread_datas[i].ret) { - ret = thread_datas[i].ret; - goto exit; - } + for (int i = 0; i < e->num_of_gpus; i++) { + pthread_join(thread_datas[i].thread, NULL); + pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); + + if (thread_datas[i].ret) { + ret = thread_datas[i].ret; + goto exit; } } exit: @@ -1796,12 +1524,12 @@ exit: if (bo_buckets[i].dmabuf_fd != KFD_INVALID_FD) close(bo_buckets[i].dmabuf_fd); } - if (thread_datas) - xfree(thread_datas); + + xfree(thread_datas); return ret; } -int amdgpu_plugin_restore_file(int id, bool *retry_needed) +int amdgpu_plugin_restore_file(int id) { int ret = 0, fd; char img_path[PATH_MAX]; @@ -1812,11 +1540,6 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) size_t img_size; FILE *img_fp = NULL; - *retry_needed = false; - - if (plugin_disabled) - return -ENOTSUP; - pr_info("Initialized kfd plugin restorer with ID = %d\n", id); snprintf(img_path, sizeof(img_path), IMG_KFD_FILE, id); @@ -1832,21 +1555,12 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) * first as we assume restore_maps is already filled. Need to fix this later. */ snprintf(img_path, sizeof(img_path), IMG_DRM_FILE, id); + pr_info("Restoring RenderD %s\n", img_path); img_fp = open_img_file(img_path, false, &img_size); - if (!img_fp) { - ret = amdgpu_plugin_dmabuf_restore(id); - if (ret == 1) { - /* This is a dmabuf fd, but the corresponding buffer object that was - * exported to make it has not yet been restored. Need to try again - * later when the buffer object exists, so it can be re-exported. - */ - *retry_needed = true; - return 0; - } - return ret; - } - pr_info("Restoring RenderD %s\n", img_path); + if (!img_fp) + return -EINVAL; + pr_debug("RenderD Image file size:%ld\n", img_size); buf = xmalloc(img_size); if (!buf) { @@ -1887,18 +1601,8 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) pr_info("render node destination gpu_id = 0x%04x\n", tp_node->gpu_id); fd = node_get_drm_render_device(tp_node); - if (fd < 0) { + if (fd < 0) pr_err("Failed to open render device (minor:%d)\n", tp_node->drm_render_minor); - return -1; - } - - ret = amdgpu_plugin_drm_restore_file(fd, rd); - if (ret == 1) - *retry_needed = true; - if (ret < 0) { - fd = ret; - goto fail; - } fail: criu_render_node__free_unpacked(rd, NULL); xfree(buf); @@ -1910,20 +1614,12 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) * copy of the fd. CRIU core owns the duplicated returned fd, and amdgpu_plugin owns the fd stored in * tp_node. */ - - if (fd < 0) - return fd; - - if (!(*retry_needed)) { - fd = dup(fd); - if (fd == -1) { - pr_perror("unable to duplicate the render fd"); - return -1; - } - return fd; + fd = dup(fd); + if (fd == -1) { + pr_perror("unable to duplicate the render fd"); + return -1; } - - return 0; + return fd; } fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC); @@ -1967,13 +1663,11 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) * This way, we know that the file descriptors we store will not conflict with file descriptors inside core * CRIU. */ - if (fd_next == -1) { - fd_next = find_unused_fd_pid(e->pid); - if (fd_next <= 0) { - pr_err("Failed to find unused fd (fd:%d)\n", fd_next); - ret = -EINVAL; - goto exit; - } + fd_next = find_unused_fd_pid(e->pid); + if (fd_next <= 0) { + pr_err("Failed to find unused fd (fd:%d)\n", fd_next); + ret = -EINVAL; + goto exit; } ret = devinfo_to_topology(e->device_entries, e->num_of_gpus + e->num_of_cpus, &src_topology); @@ -2006,26 +1700,14 @@ int amdgpu_plugin_restore_file(int id, bool *retry_needed) args.num_objects = e->num_of_objects; args.priv_data_size = e->priv_data.len; args.priv_data = (uintptr_t)e->priv_data.data; - args.op = KFD_CRIU_OP_RESTORE; + args.op = KFD_CRIU_OP_RESTORE; if (kmtIoctl(fd, AMDKFD_IOC_CRIU_OP, &args) == -1) { pr_perror("Restore ioctl failed"); ret = -1; goto exit; } - if (ret < 0) - goto exit; - - for (int i = 0; i < args.num_bos; i++) { - struct kfd_criu_bo_bucket *bo_bucket = &((struct kfd_criu_bo_bucket *)args.bos)[i]; - KfdBoEntry *bo_entry = e->bo_entries[i]; - - if (bo_entry->handle != -1) { - store_dmabuf_fd(bo_entry->handle, bo_bucket->dmabuf_fd); - } - } - ret = restore_bo_data(id, (struct kfd_criu_bo_bucket *)args.bos, e); if (ret) goto exit; @@ -2064,9 +1746,6 @@ int amdgpu_plugin_update_vmamap(const char *in_path, const uint64_t addr, const char *p_end; bool is_kfd = false, is_renderD = false; - if (plugin_disabled) - return -ENOTSUP; - plugin_log_msg("Enter %s\n", __func__); strncpy(path, in_path, sizeof(path)); @@ -2126,27 +1805,6 @@ int amdgpu_plugin_resume_devices_late(int target_pid) struct kfd_ioctl_criu_args args = { 0 }; int fd, exit_code = 0; - if (plugin_disabled) - return -ENOTSUP; - - if (!parallel_disabled) { - pr_info("Close parallel restore server\n"); - if (close_parallel_restore_server()) { - pr_err("Close parallel restore server fail\n"); - return -1; - } - - exit_code = pthread_join(parallel_thread, NULL); - if (exit_code) { - pr_err("Failed to join parallel thread ret:%d\n", exit_code); - return -1; - } - if (parallel_thread_result) { - pr_err("Parallel restore fail\n"); - return parallel_thread_result; - } - } - pr_info("Inside %s for target pid = %d\n", __func__, target_pid); fd = open(AMDGPU_KFD_DEVICE, O_RDWR | O_CLOEXEC); @@ -2168,246 +1826,8 @@ int amdgpu_plugin_resume_devices_late(int target_pid) } } - clear_restore_state(); - close(fd); return exit_code; } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, amdgpu_plugin_resume_devices_late) - -int init_dev(int dev_minor, amdgpu_device_handle *h_dev, uint64_t *max_copy_size) -{ - int ret = 0; - int drm_fd = -1; - uint32_t major, minor; - - struct amdgpu_gpu_info gpu_info = { 0 }; - - drm_fd = open_drm_render_device(dev_minor); - if (drm_fd < 0) { - return drm_fd; - } - - ret = amdgpu_device_initialize(drm_fd, &major, &minor, h_dev); - if (ret) { - pr_perror("Failed to initialize device"); - goto err; - } - - ret = amdgpu_query_gpu_info(*h_dev, &gpu_info); - if (ret) { - pr_perror("failed to query gpuinfo via libdrm"); - goto err; - } - *max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE : - SDMA_LINEAR_COPY_MAX_SIZE - 1; - return 0; -err: - amdgpu_device_deinitialize(*h_dev); - return ret; -} - -FILE *get_bo_contents_fp(int id, int gpu_id, size_t tot_size) -{ - char img_path[PATH_MAX]; - size_t image_size = 0; - FILE *bo_contents_fp = NULL; - - snprintf(img_path, sizeof(img_path), IMG_KFD_PAGES_FILE, id, gpu_id); - bo_contents_fp = open_img_file(img_path, false, &image_size); - if (!bo_contents_fp) { - pr_perror("Cannot fopen %s", img_path); - return NULL; - } - - if (tot_size != image_size) { - pr_err("%s size mismatch (current:%ld:expected:%ld)\n", img_path, image_size, tot_size); - fclose(bo_contents_fp); - return NULL; - } - return bo_contents_fp; -} - -struct parallel_thread_data { - pthread_t thread; - uint32_t gpu_id; - int minor; - parallel_restore_cmd *restore_cmd; - int ret; -}; - -void *parallel_restore_bo_contents(void *_thread_data) -{ - struct parallel_thread_data *thread_data = (struct parallel_thread_data *)_thread_data; - amdgpu_device_handle h_dev; - uint64_t max_copy_size; - size_t total_bo_size = 0, max_bo_size = 0, buffer_size = 0; - FILE *bo_contents_fp = NULL; - parallel_restore_entry *entry; - parallel_restore_cmd *restore_cmd = thread_data->restore_cmd; - int ret = 0; - int offset = 0; - void *buffer = NULL; - - ret = init_dev(thread_data->minor, &h_dev, &max_copy_size); - if (ret) { - goto err; - } - - for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) { - if (restore_cmd->entries[i].gpu_id == thread_data->gpu_id) { - total_bo_size += restore_cmd->entries[i].size; - max_bo_size = max(restore_cmd->entries[i].size, max_bo_size); - } - } - - buffer_size = kfd_max_buffer_size > 0 ? min(kfd_max_buffer_size, max_bo_size) : max_bo_size; - - bo_contents_fp = get_bo_contents_fp(restore_cmd->cmd_head.id, thread_data->gpu_id, total_bo_size); - if (bo_contents_fp == NULL) { - ret = -1; - goto err_sdma; - } - offset = ftell(bo_contents_fp); - - posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size); - if (!buffer) { - pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE."); - ret = -ENOMEM; - goto err_sdma; - } - - for (int i = 0; i < restore_cmd->cmd_head.entry_num; i++) { - if (restore_cmd->entries[i].gpu_id != thread_data->gpu_id) - continue; - - entry = &restore_cmd->entries[i]; - fseeko(bo_contents_fp, entry->read_offset + offset, SEEK_SET); - ret = sdma_copy_bo(restore_cmd->fds_write[entry->write_id], entry->size, bo_contents_fp, - buffer, buffer_size, h_dev, - max_copy_size, SDMA_OP_VRAM_WRITE, false); - - if (ret) { - pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); - goto err_sdma; - } - } - -err_sdma: - if (bo_contents_fp) - fclose(bo_contents_fp); - if (buffer) - xfree(buffer); - amdgpu_device_deinitialize(h_dev); -err: - thread_data->ret = ret; - return NULL; -} - -void *restore_device_parallel_worker(void *arg) -{ - while (1) { - parallel_restore_cmd restore_cmd = { 0 }; - struct parallel_thread_data *thread_datas = NULL; - int ret; - int error_occurred = 0, join_ret = 0, created_threads = 0; - - ret = recv_parallel_restore_cmd(&restore_cmd); - if (ret) { - if (ret == 1) { - *(int *)arg = 0; - goto exit; - } - goto err; - } - - thread_datas = xzalloc(sizeof(*thread_datas) * restore_cmd.cmd_head.gpu_num); - if (!thread_datas) { - ret = -ENOMEM; - goto err; - } - - for (; created_threads < restore_cmd.cmd_head.gpu_num; created_threads++) { - thread_datas[created_threads].gpu_id = restore_cmd.gpu_ids[created_threads].gpu_id; - thread_datas[created_threads].minor = restore_cmd.gpu_ids[created_threads].minor; - thread_datas[created_threads].restore_cmd = &restore_cmd; - - ret = pthread_create(&thread_datas[created_threads].thread, NULL, parallel_restore_bo_contents, - (void *)&thread_datas[created_threads]); - if (ret) { - pr_err("Failed to create thread[0x%x] ret:%d\n", thread_datas[created_threads].gpu_id, ret); - error_occurred = 1; - break; - } - } - - for (int i = 0; i < created_threads; i++) { - join_ret = pthread_join(thread_datas[i].thread, NULL); - if (join_ret != 0) { - pr_err("pthread_join failed for Thread[0x%x] ret:%d\n", - thread_datas[i].gpu_id, join_ret); - if (!error_occurred) { - ret = join_ret; - error_occurred = 1; - } - } - - pr_info("Thread[0x%x] finished ret:%d\n", thread_datas[i].gpu_id, thread_datas[i].ret); - - /* Check thread return value */ - if (thread_datas[i].ret && !error_occurred) { - ret = thread_datas[i].ret; - error_occurred = 1; - } - } - - if (thread_datas) - xfree(thread_datas); -err: - free_parallel_restore_cmd(&restore_cmd); - - if (ret) { - *(int *)arg = ret; - return NULL; - } - } -exit: - return NULL; -} - -/* - * While the background thread is running, some processing functions (e.g., stop_cgroupd) - * in the main thread need to block SIGCHLD. To prevent interference from this background - * thread, SIGCHLD is blocked in this thread. - */ -static int back_thread_create(pthread_t *newthread, void *(*f)(void *), void *arg) -{ - int ret = 0; - sigset_t blockmask, oldmask; - - sigemptyset(&blockmask); - sigaddset(&blockmask, SIGCHLD); - sigprocmask(SIG_BLOCK, &blockmask, &oldmask); - - ret = pthread_create(newthread, NULL, f, arg); - if (ret) { - pr_err("Create worker thread fail: %d\n", ret); - return -1; - } - - sigprocmask(SIG_SETMASK, &oldmask, NULL); - return 0; -} - -int amdgpu_plugin_post_forking(void) -{ - if (plugin_disabled) - return -ENOTSUP; - - if (parallel_disabled) - return 0; - - return back_thread_create(¶llel_thread, restore_device_parallel_worker, ¶llel_thread_result); -} -CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__POST_FORKING, amdgpu_plugin_post_forking) diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.c b/plugins/amdgpu/amdgpu_plugin_dmabuf.c deleted file mode 100644 index 11c9792e3..000000000 --- a/plugins/amdgpu/amdgpu_plugin_dmabuf.c +++ /dev/null @@ -1,197 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "common/list.h" -#include "criu-amdgpu.pb-c.h" - -#include "xmalloc.h" -#include "criu-log.h" -#include "amdgpu_plugin_drm.h" -#include "amdgpu_plugin_util.h" -#include "amdgpu_plugin_dmabuf.h" -#include "fdstore.h" - -#include "util.h" -#include "common/scm.h" - -struct dmabuf { - int id; - int dmabuf_fd; - struct list_head node; -}; - -static LIST_HEAD(dmabuf_list); - -/* Return < 0 for error, > 0 for "not a dmabuf" and 0 "is a dmabuf" */ -int get_dmabuf_info(int fd, struct stat *st) -{ - char path[PATH_MAX]; - - if (read_fd_link(fd, path, sizeof(path)) < 0) - return -1; - - if (strncmp(path, DMABUF_LINK, strlen(DMABUF_LINK)) != 0) - return 1; - - return 0; -} - -int __amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id) -{ - int ret = 0; - char path[PATH_MAX]; - size_t len = 0; - unsigned char *buf = NULL; - int gem_handle; - - gem_handle = handle_for_shared_bo_fd(dmabuf_fd); - if (gem_handle < 0) { - pr_err("Failed to get handle for dmabuf_fd = %d\n", dmabuf_fd); - return -EAGAIN; /* Retry needed */ - } - - CriuDmabufNode *node = xmalloc(sizeof(*node)); - if (!node) { - pr_err("Failed to allocate memory for dmabuf node\n"); - return -ENOMEM; - } - criu_dmabuf_node__init(node); - - node->gem_handle = gem_handle; - - if (node->gem_handle < 0) { - pr_err("Failed to get handle for dmabuf_fd\n"); - xfree(node); - return -EINVAL; - } - - /* Serialize metadata to a file */ - snprintf(path, sizeof(path), IMG_DMABUF_FILE, id); - len = criu_dmabuf_node__get_packed_size(node); - buf = xmalloc(len); - if (!buf) { - pr_err("Failed to allocate buffer for dmabuf metadata\n"); - xfree(node); - return -ENOMEM; - } - criu_dmabuf_node__pack(node, buf); - ret = write_img_file(path, buf, len); - - xfree(buf); - xfree(node); - return ret; -} - -int amdgpu_plugin_dmabuf_restore(int id) -{ - char path[PATH_MAX]; - size_t img_size; - FILE *img_fp = NULL; - int ret = 0; - CriuDmabufNode *rd = NULL; - unsigned char *buf = NULL; - int fd_id; - - snprintf(path, sizeof(path), IMG_DMABUF_FILE, id); - - /* Read serialized metadata */ - img_fp = open_img_file(path, false, &img_size); - if (!img_fp) { - pr_err("Failed to open dmabuf metadata file: %s\n", path); - return -EINVAL; - } - - pr_debug("dmabuf Image file size:%ld\n", img_size); - buf = xmalloc(img_size); - if (!buf) { - pr_perror("Failed to allocate memory"); - return -ENOMEM; - } - - ret = read_fp(img_fp, buf, img_size); - if (ret) { - pr_perror("Unable to read from %s", path); - xfree(buf); - return ret; - } - - rd = criu_dmabuf_node__unpack(NULL, img_size, buf); - if (rd == NULL) { - pr_perror("Unable to parse the dmabuf message %d", id); - xfree(buf); - fclose(img_fp); - return -1; - } - fclose(img_fp); - - /* Match GEM handle with shared_dmabuf list */ - fd_id = amdgpu_id_for_handle(rd->gem_handle); - if (fd_id == -1) { - pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle); - return 1; - } - - int dmabuf_fd = fdstore_get(fd_id); - if (dmabuf_fd == -1) { - pr_err("Failed to find dmabuf_fd for GEM handle = %d\n", rd->gem_handle); - return 1; /* Retry needed */ - } - - pr_info("Restored dmabuf_fd = %d for GEM handle = %d\n", dmabuf_fd, rd->gem_handle); - ret = dmabuf_fd; - - pr_info("Successfully restored dmabuf_fd %d\n", dmabuf_fd); - criu_dmabuf_node__free_unpacked(rd, NULL); - xfree(buf); - return ret; -} - -int amdgpu_plugin_dmabuf_dump(int dmabuf_fd, int id) -{ - int ret; - - ret = __amdgpu_plugin_dmabuf_dump(dmabuf_fd, id); - if (ret == -EAGAIN) { - struct dmabuf *b = xmalloc(sizeof(*b)); - b->id = id; - b->dmabuf_fd = dmabuf_fd; - list_add(&b->node, &dmabuf_list); - return 0; - } - return ret; -} - -int try_dump_dmabuf_list() -{ - struct dmabuf *b, *t; - list_for_each_entry_safe(b, t, &dmabuf_list, node) { - int ret = __amdgpu_plugin_dmabuf_dump(b->dmabuf_fd, b->id); - if (ret == -EAGAIN) - continue; - if (ret) - return ret; - list_del(&b->node); - xfree(b); - } - return 0; -} - -int post_dump_dmabuf_check() -{ - if (!list_empty(&dmabuf_list)) { - pr_err("Not all dma buffers have been dumped\n"); - return -1; - } - return 0; -} diff --git a/plugins/amdgpu/amdgpu_plugin_dmabuf.h b/plugins/amdgpu/amdgpu_plugin_dmabuf.h deleted file mode 100644 index f07af7ee0..000000000 --- a/plugins/amdgpu/amdgpu_plugin_dmabuf.h +++ /dev/null @@ -1,16 +0,0 @@ - -#ifndef __AMDGPU_PLUGIN_DMABUF_H__ -#define __AMDGPU_PLUGIN_DMABUF_H__ - -#include "amdgpu_plugin_util.h" -#include "criu-amdgpu.pb-c.h" - -int amdgpu_plugin_dmabuf_dump(int fd, int id); -int amdgpu_plugin_dmabuf_restore(int id); - -int try_dump_dmabuf_list(); -int post_dump_dmabuf_check(); - -int get_dmabuf_info(int fd, struct stat *st); - -#endif /* __AMDGPU_PLUGIN_DMABUF_H__ */ \ No newline at end of file diff --git a/plugins/amdgpu/amdgpu_plugin_drm.c b/plugins/amdgpu/amdgpu_plugin_drm.c index 3520bca7a..d54cd937d 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.c +++ b/plugins/amdgpu/amdgpu_plugin_drm.c @@ -19,115 +19,19 @@ #include #include "common/list.h" -#include "files.h" -#include "fdstore.h" #include "criu-amdgpu.pb-c.h" -/* Define __user as empty for kernel headers in user-space */ -#define __user -#include "drm.h" - #include #include #include "xmalloc.h" -#include "amdgpu_drm.h" +#include "criu-log.h" +#include "kfd_ioctl.h" #include "amdgpu_plugin_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" -#include "util.h" -#include "common/scm.h" - -int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd) -{ - uint32_t handle; - int fd = amdgpu_device_get_fd(h_dev); - - if (dmabuf_fd == -1) { - return -1; - } - - if (drmPrimeFDToHandle(fd, dmabuf_fd, &handle)) - return -1; - - return handle; -} - -int drmIoctl(int fd, unsigned long request, void *arg) -{ - int ret, max_retries = 200; - - do { - ret = ioctl(fd, request, arg); - } while (ret == -1 && max_retries-- > 0 && (errno == EINTR || errno == EAGAIN)); - - if (ret == -1 && errno == EBADF) - /* In case pthread_atfork didn't catch it, this will - * make any subsequent hsaKmt calls fail in CHECK_KFD_OPEN. - */ - pr_perror("KFD file descriptor not valid in this process"); - return ret; -} - -static int allocate_bo_entries(CriuRenderNode *e, int num_bos) -{ - e->bo_entries = xmalloc(sizeof(DrmBoEntry *) * num_bos); - if (!e->bo_entries) { - pr_err("Failed to allocate bo_info\n"); - return -ENOMEM; - } - - for (int i = 0; i < num_bos; i++) { - DrmBoEntry *entry = xzalloc(sizeof(*entry)); - - if (!entry) { - pr_err("Failed to allocate botest\n"); - return -ENOMEM; - } - - drm_bo_entry__init(entry); - - e->bo_entries[i] = entry; - e->n_bo_entries++; - } - return 0; -} - -static int allocate_vm_entries(DrmBoEntry *e, int num_vms) -{ - e->vm_entries = xmalloc(sizeof(DrmVmEntry *) * num_vms); - if (!e->vm_entries) { - pr_err("Failed to allocate bo_info\n"); - return -ENOMEM; - } - - for (int i = 0; i < num_vms; i++) { - DrmVmEntry *entry = xzalloc(sizeof(*entry)); - - if (!entry) { - pr_err("Failed to allocate botest\n"); - return -ENOMEM; - } - - drm_vm_entry__init(entry); - - e->vm_entries[i] = entry; - e->n_vm_entries++; - } - return 0; -} - -static void free_e(CriuRenderNode *e) -{ - for (int i = 0; i < e->n_bo_entries; i++) { - if (e->bo_entries[i]) - xfree(e->bo_entries[i]); - } - - xfree(e); -} int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) { @@ -156,257 +60,19 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *st) return 0; } -static int restore_bo_contents_drm(int drm_render_minor, CriuRenderNode *rd, int drm_fd, int *dmabufs) -{ - size_t image_size = 0, max_bo_size = 0, buffer_size; - struct amdgpu_gpu_info gpu_info = { 0 }; - amdgpu_device_handle h_dev; - uint64_t max_copy_size; - uint32_t major, minor; - FILE *bo_contents_fp = NULL; - void *buffer = NULL; - char img_path[40]; - int i, ret = 0; - - ret = amdgpu_device_initialize(drm_fd, &major, &minor, &h_dev); - if (ret) { - pr_perror("failed to initialize device"); - goto exit; - } - plugin_log_msg("libdrm initialized successfully\n"); - - ret = amdgpu_query_gpu_info(h_dev, &gpu_info); - if (ret) { - pr_perror("failed to query gpuinfo via libdrm"); - goto exit; - } - - max_copy_size = (gpu_info.family_id >= AMDGPU_FAMILY_AI) ? SDMA_LINEAR_COPY_MAX_SIZE : - SDMA_LINEAR_COPY_MAX_SIZE - 1; - - for (i = 0; i < rd->num_of_bos; i++) { - if (rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { - if (rd->bo_entries[i]->size > max_bo_size) - max_bo_size = rd->bo_entries[i]->size; - } - } - - buffer_size = max_bo_size; - - posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), buffer_size); - if (!buffer) { - pr_perror("Failed to alloc aligned memory. Consider setting KFD_MAX_BUFFER_SIZE."); - ret = -ENOMEM; - goto exit; - } - - for (i = 0; i < rd->num_of_bos; i++) { - if (!(rd->bo_entries[i]->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT))) - continue; - - if (rd->bo_entries[i]->num_of_vms == 0) - continue; - - snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, drm_render_minor, i); - - bo_contents_fp = open_img_file(img_path, false, &image_size); - - ret = sdma_copy_bo(dmabufs[i], rd->bo_entries[i]->size, bo_contents_fp, buffer, buffer_size, h_dev, max_copy_size, - SDMA_OP_VRAM_WRITE, true); - if (ret) { - pr_err("Failed to fill the BO using sDMA: bo_buckets[%d]\n", i); - break; - } - plugin_log_msg("** Successfully filled the BO using sDMA: bo_buckets[%d] **\n", i); - - if (bo_contents_fp) - fclose(bo_contents_fp); - } - -exit: - for (int i = 0; i < rd->num_of_bos; i++) { - if (dmabufs[i] != KFD_INVALID_FD) - close(dmabufs[i]); - } - - xfree(buffer); - - amdgpu_device_deinitialize(h_dev); - return ret; -} int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm) { - CriuRenderNode *rd = NULL; + CriuRenderNode rd = CRIU_RENDER_NODE__INIT; + struct tp_node *tp_node; char path[PATH_MAX]; unsigned char *buf; int minor; int len; int ret; - size_t image_size; - struct tp_node *tp_node; - struct drm_amdgpu_gem_list_handles list_handles_args = { 0 }; - struct drm_amdgpu_gem_list_handles_entry *list_handles_entries; - int num_bos; - - rd = xmalloc(sizeof(*rd)); - if (!rd) { - ret = -ENOMEM; - goto exit; - } - criu_render_node__init(rd); /* Get the topology node of the DRM device */ minor = minor(drm->st_rdev); - rd->drm_render_minor = minor; - rd->id = id; - - num_bos = 8; - list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos); - list_handles_args.num_entries = num_bos; - list_handles_args.entries = (uintptr_t)list_handles_entries; - - ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args); - if (ret && errno == EINVAL) { - pr_info("This kernel appears not to have AMDGPU_GEM_LIST_HANDLES ioctl. Consider disabling Dmabuf IPC or updating your kernel.\n"); - list_handles_args.num_entries = 0; - } else if (ret) { - pr_perror("Failed to call bo info ioctl"); - goto exit; - } - - if (list_handles_args.num_entries > num_bos) { - num_bos = list_handles_args.num_entries; - xfree(list_handles_entries); - list_handles_entries = xzalloc(sizeof(struct drm_amdgpu_gem_list_handles_entry) * num_bos); - list_handles_args.num_entries = num_bos; - list_handles_args.entries = (uintptr_t)list_handles_entries; - ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES, &list_handles_args); - if (ret) { - pr_perror("Failed to call bo info ioctl"); - goto exit; - } - } else { - num_bos = list_handles_args.num_entries; - } - - rd->num_of_bos = num_bos; - ret = allocate_bo_entries(rd, num_bos); - if (ret) - goto exit; - - for (int i = 0; i < num_bos; i++) { - int num_vm_entries = 8; - struct drm_amdgpu_gem_vm_entry *vm_info_entries; - struct drm_amdgpu_gem_op vm_info_args = { 0 }; - DrmBoEntry *boinfo = rd->bo_entries[i]; - struct drm_amdgpu_gem_list_handles_entry handle_entry = list_handles_entries[i]; - union drm_amdgpu_gem_mmap mmap_args = { 0 }; - int dmabuf_fd; - uint32_t major, minor; - amdgpu_device_handle h_dev; - void *buffer = NULL; - char img_path[40]; - FILE *bo_contents_fp = NULL; - int device_fd; - - boinfo->size = handle_entry.size; - - boinfo->alloc_flags = handle_entry.alloc_flags; - boinfo->preferred_domains = handle_entry.preferred_domains; - boinfo->alignment = handle_entry.alignment; - boinfo->handle = handle_entry.gem_handle; - boinfo->is_import = (handle_entry.flags & AMDGPU_GEM_LIST_HANDLES_FLAG_IS_IMPORT) || shared_bo_has_exporter(boinfo->handle); - - mmap_args.in.handle = boinfo->handle; - - if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) { - pr_perror("Error Failed to call mmap ioctl"); - ret = -1; - goto exit; - } - - boinfo->offset = mmap_args.out.addr_ptr; - - vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries); - vm_info_args.handle = handle_entry.gem_handle; - vm_info_args.num_entries = num_vm_entries; - vm_info_args.value = (uintptr_t)vm_info_entries; - vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO; - ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args); - if (ret) { - pr_perror("Failed to call vm info ioctl"); - goto exit; - } - - if (vm_info_args.num_entries > num_vm_entries) { - num_vm_entries = vm_info_args.num_entries; - xfree(vm_info_entries); - vm_info_entries = xzalloc(sizeof(struct drm_amdgpu_gem_vm_entry) * num_vm_entries); - vm_info_args.handle = handle_entry.gem_handle; - vm_info_args.num_entries = num_vm_entries; - vm_info_args.value = (uintptr_t)vm_info_entries; - vm_info_args.op = AMDGPU_GEM_OP_GET_MAPPING_INFO; - ret = drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_OP, &vm_info_args); - if (ret) { - pr_perror("Failed to call vm info ioctl"); - goto exit; - } - } else { - num_vm_entries = vm_info_args.num_entries; - } - - boinfo->num_of_vms = num_vm_entries; - ret = allocate_vm_entries(boinfo, num_vm_entries); - if (ret) - goto exit; - - for (int j = 0; j < num_vm_entries; j++) { - DrmVmEntry *vminfo = boinfo->vm_entries[j]; - - boinfo->addr = vm_info_entries[j].addr; - vminfo->addr = vm_info_entries[j].addr; - vminfo->size = vm_info_entries[j].size; - vminfo->offset = vm_info_entries[j].offset; - vminfo->flags = vm_info_entries[j].flags; - } - - ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev); - - device_fd = amdgpu_device_get_fd(h_dev); - - drmPrimeHandleToFD(device_fd, boinfo->handle, 0, &dmabuf_fd); - - snprintf(img_path, sizeof(img_path), IMG_DRM_PAGES_FILE, rd->id, rd->drm_render_minor, i); - bo_contents_fp = open_img_file(img_path, true, &image_size); - - posix_memalign(&buffer, sysconf(_SC_PAGE_SIZE), handle_entry.size); - - ret = sdma_copy_bo(dmabuf_fd, handle_entry.size, bo_contents_fp, buffer, handle_entry.size, h_dev, 0x1000, - SDMA_OP_VRAM_READ, false); - - if (dmabuf_fd != KFD_INVALID_FD) - close(dmabuf_fd); - - if (bo_contents_fp) - fclose(bo_contents_fp); - - ret = amdgpu_device_deinitialize(h_dev); - if (ret) - goto exit; - - xfree(vm_info_entries); - } - xfree(list_handles_entries); - - for (int i = 0; i < num_bos; i++) { - DrmBoEntry *boinfo = rd->bo_entries[i]; - - ret = record_shared_bo(boinfo->handle, boinfo->is_import); - if (ret) - goto exit; - } - tp_node = sys_get_node_by_render_minor(&src_topology, minor); if (!tp_node) { pr_err("Failed to find a device with minor number = %d\n", minor); @@ -414,156 +80,21 @@ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm) } /* Get the GPU_ID of the DRM device */ - rd->gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); - if (!rd->gpu_id) { - pr_err("Failed to find valid gpu_id for the device = %d\n", rd->gpu_id); + rd.gpu_id = maps_get_dest_gpu(&checkpoint_maps, tp_node->gpu_id); + if (!rd.gpu_id) { + pr_err("Failed to find valid gpu_id for the device = %d\n", rd.gpu_id); return -ENODEV; } - len = criu_render_node__get_packed_size(rd); + len = criu_render_node__get_packed_size(&rd); buf = xmalloc(len); if (!buf) return -ENOMEM; - criu_render_node__pack(rd, buf); + criu_render_node__pack(&rd, buf); snprintf(path, sizeof(path), IMG_DRM_FILE, id); ret = write_img_file(path, buf, len); - xfree(buf); -exit: - free_e(rd); return ret; } - -int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd) -{ - int ret = 0; - bool retry_needed = false; - uint32_t major, minor; - amdgpu_device_handle h_dev; - int device_fd; - int *dmabufs = xzalloc(sizeof(int) * rd->num_of_bos); - - ret = amdgpu_device_initialize(fd, &major, &minor, &h_dev); - if (ret) { - pr_info("Error in init amdgpu device\n"); - goto exit; - } - - device_fd = amdgpu_device_get_fd(h_dev); - - for (int i = 0; i < rd->num_of_bos; i++) { - DrmBoEntry *boinfo = rd->bo_entries[i]; - int dmabuf_fd = -1; - uint32_t handle; - struct drm_gem_change_handle change_args = { 0 }; - union drm_amdgpu_gem_mmap mmap_args = { 0 }; - struct drm_amdgpu_gem_va va_args = { 0 }; - int fd_id; - - if (work_already_completed(boinfo->handle, rd->drm_render_minor)) { - continue; - } else if (boinfo->handle != -1) { - if (boinfo->is_import) { - fd_id = amdgpu_id_for_handle(boinfo->handle); - if (fd_id == -1) { - retry_needed = true; - continue; - } - dmabuf_fd = fdstore_get(fd_id); - } - } - - if (boinfo->is_import) { - drmPrimeFDToHandle(device_fd, dmabuf_fd, &handle); - } else { - union drm_amdgpu_gem_create create_args = { 0 }; - - create_args.in.bo_size = boinfo->size; - create_args.in.alignment = boinfo->alignment; - create_args.in.domains = boinfo->preferred_domains; - create_args.in.domain_flags = boinfo->alloc_flags; - - if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &create_args) == -1) { - pr_perror("Error Failed to call create ioctl"); - ret = -1; - goto exit; - } - handle = create_args.out.handle; - - drmPrimeHandleToFD(device_fd, handle, 0, &dmabuf_fd); - } - - change_args.handle = handle; - change_args.new_handle = boinfo->handle; - - if (drmIoctl(fd, DRM_IOCTL_GEM_CHANGE_HANDLE, &change_args) == -1) { - pr_perror("Error Failed to call change ioctl; check if the kernel has DRM_IOCTL_GEM_CHANGE_HANDLE support"); - ret = -1; - goto exit; - } - - if (!boinfo->is_import) - store_dmabuf_fd(boinfo->handle, dmabuf_fd); - - dmabufs[i] = dmabuf_fd; - - ret = record_completed_work(boinfo->handle, rd->drm_render_minor); - if (ret) - goto exit; - - mmap_args.in.handle = boinfo->handle; - - if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &mmap_args) == -1) { - pr_perror("Error Failed to call mmap ioctl"); - ret = -1; - goto exit; - } - - for (int j = 0; j < boinfo->num_of_vms; j++) { - DrmVmEntry *vminfo = boinfo->vm_entries[j]; - - va_args.handle = boinfo->handle; - va_args.operation = AMDGPU_VA_OP_MAP; - va_args.flags = vminfo->flags; - va_args.va_address = vminfo->addr; - va_args.offset_in_bo = vminfo->offset; - va_args.map_size = vminfo->size; - - if (drmIoctl(fd, DRM_IOCTL_AMDGPU_GEM_VA, &va_args) == -1) { - pr_perror("Error Failed to call gem va ioctl"); - ret = -1; - goto exit; - } - } - - ret = save_vma_updates(boinfo->offset, boinfo->addr, mmap_args.out.addr_ptr, fd); - if (ret < 0) - goto exit; - } - - if (ret) { - pr_info("Error in deinit amdgpu device\n"); - goto exit; - } - - ret = record_completed_work(-1, rd->drm_render_minor); - if (ret) - goto exit; - - ret = amdgpu_device_deinitialize(h_dev); - - if (rd->num_of_bos > 0) { - ret = restore_bo_contents_drm(rd->drm_render_minor, rd, fd, dmabufs); - if (ret) - goto exit; - } - -exit: - if (ret < 0) - return ret; - xfree(dmabufs); - - return retry_needed; -} diff --git a/plugins/amdgpu/amdgpu_plugin_drm.h b/plugins/amdgpu/amdgpu_plugin_drm.h index c766def56..6f0c1a9a6 100644 --- a/plugins/amdgpu/amdgpu_plugin_drm.h +++ b/plugins/amdgpu/amdgpu_plugin_drm.h @@ -24,17 +24,5 @@ int amdgpu_plugin_drm_handle_device_vma(int fd, const struct stat *drm); */ int amdgpu_plugin_drm_dump_file(int fd, int id, struct stat *drm); -int amdgpu_plugin_drm_restore_file(int fd, CriuRenderNode *rd); - -int amdgpu_plugin_drm_unpause_file(int fd); - -int amdgpu_id_for_handle(int handle); - -int store_dmabuf_fd(int handle, int fd); - -int get_gem_handle(amdgpu_device_handle h_dev, int dmabuf_fd); - -int save_vma_updates(uint64_t offset, uint64_t addr, uint64_t restored_offset, int gpu_id); - #endif /* __AMDGPU_PLUGIN_DRM_H__ */ diff --git a/plugins/amdgpu/amdgpu_plugin_topology.c b/plugins/amdgpu/amdgpu_plugin_topology.c index 730f2e028..5b4396a0c 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.c +++ b/plugins/amdgpu/amdgpu_plugin_topology.c @@ -45,7 +45,7 @@ bool kfd_capability_check = true; */ int fd_next = -1; -int open_drm_render_device(int minor) +static int open_drm_render_device(int minor) { char path[128]; int fd, ret_fd; diff --git a/plugins/amdgpu/amdgpu_plugin_topology.h b/plugins/amdgpu/amdgpu_plugin_topology.h index e19f8e7ce..c890e3dda 100644 --- a/plugins/amdgpu/amdgpu_plugin_topology.h +++ b/plugins/amdgpu/amdgpu_plugin_topology.h @@ -118,7 +118,6 @@ struct tp_node *sys_get_node_by_gpu_id(const struct tp_system *sys, const uint32 struct tp_node *sys_get_node_by_render_minor(const struct tp_system *sys, const int drm_render_minor); struct tp_node *sys_get_node_by_index(const struct tp_system *sys, uint32_t index); -int open_drm_render_device(int minor); int node_get_drm_render_device(struct tp_node *node); void sys_close_drm_render_devices(struct tp_system *sys); diff --git a/plugins/amdgpu/amdgpu_plugin_util.c b/plugins/amdgpu/amdgpu_plugin_util.c old mode 100644 new mode 100755 index 592562474..a165fc9cd --- a/plugins/amdgpu/amdgpu_plugin_util.c +++ b/plugins/amdgpu/amdgpu_plugin_util.c @@ -37,11 +37,9 @@ #include "amdgpu_drm.h" #include "amdgpu_plugin_util.h" #include "amdgpu_plugin_topology.h" -#include "amdgpu_plugin_drm.h" -static LIST_HEAD(dumped_fds); -static LIST_HEAD(shared_bos); -static LIST_HEAD(completed_work); +/* Tracks number of device files that need to be checkpointed */ +static int dev_file_cnt = 0; /* Helper structures to encode device topology of SRC and DEST platforms */ struct tp_system src_topology; @@ -51,145 +49,23 @@ struct tp_system dest_topology; struct device_maps checkpoint_maps; struct device_maps restore_maps; -int record_dumped_fd(int fd, bool is_drm) +bool checkpoint_is_complete() { - int newfd = dup(fd); - - if (newfd < 0) - return newfd; - struct dumped_fd *st = malloc(sizeof(struct dumped_fd)); - if (!st) - return -1; - st->fd = newfd; - st->is_drm = is_drm; - list_add(&st->l, &dumped_fds); - - return 0; + return (dev_file_cnt == 0); } -struct list_head *get_dumped_fds() +void decrement_checkpoint_count() { - return &dumped_fds; + dev_file_cnt--; } -bool shared_bo_has_exporter(int handle) +void init_gpu_count(struct tp_system *topo) { - struct shared_bo *bo; + if (dev_file_cnt != 0) + return; - if (handle == -1) - return false; - - list_for_each_entry(bo, &shared_bos, l) { - if (bo->handle == handle) { - return bo->has_exporter; - } - } - - return false; -} - -int record_shared_bo(int handle, bool is_imported) -{ - struct shared_bo *bo; - - if (handle == -1) - return 0; - - list_for_each_entry(bo, &shared_bos, l) { - if (bo->handle == handle) { - return 0; - } - } - bo = malloc(sizeof(struct shared_bo)); - if (!bo) - return -1; - bo->handle = handle; - bo->has_exporter = !is_imported; - list_add(&bo->l, &shared_bos); - - return 0; -} - -int handle_for_shared_bo_fd(int fd) -{ - struct dumped_fd *df; - int trial_handle; - amdgpu_device_handle h_dev; - uint32_t major, minor; - struct shared_bo *bo; - - list_for_each_entry(df, &dumped_fds, l) { - /* see if the gem handle for fd using the hdev for df->fd is the - same as bo->handle. */ - - if (!df->is_drm) { - continue; - } - - if (amdgpu_device_initialize(df->fd, &major, &minor, &h_dev)) { - pr_err("Failed to initialize amdgpu device\n"); - continue; - } - - trial_handle = get_gem_handle(h_dev, fd); - if (trial_handle < 0) - continue; - - list_for_each_entry(bo, &shared_bos, l) { - if (bo->handle == trial_handle) - return trial_handle; - } - - amdgpu_device_deinitialize(h_dev); - } - - return -1; -} - -int record_completed_work(int handle, int id) -{ - struct restore_completed_work *work; - - work = malloc(sizeof(struct restore_completed_work)); - if (!work) - return -1; - work->handle = handle; - work->id = id; - list_add(&work->l, &completed_work); - - return 0; -} - -bool work_already_completed(int handle, int id) -{ - struct restore_completed_work *work; - - list_for_each_entry(work, &completed_work, l) { - if (work->handle == handle && work->id == id) { - return true; - } - } - - return false; -} - -void clear_restore_state() -{ - while (!list_empty(&completed_work)) { - struct restore_completed_work *st = list_first_entry(&completed_work, struct restore_completed_work, l); - list_del(&st->l); - free(st); - } -} - -void clear_dumped_fds() -{ - while (!list_empty(&dumped_fds)) { - struct dumped_fd *st = list_first_entry(&dumped_fds, struct dumped_fd, l); - list_del(&st->l); - close(st->fd); - free(st); - } + /* We add ONE to include checkpointing of KFD device */ + dev_file_cnt = 1 + topology_gpu_count(topo); } int read_fp(FILE *fp, void *buf, const size_t buf_len) diff --git a/plugins/amdgpu/amdgpu_plugin_util.h b/plugins/amdgpu/amdgpu_plugin_util.h old mode 100644 new mode 100755 index f5f752d0b..aacca3a28 --- a/plugins/amdgpu/amdgpu_plugin_util.h +++ b/plugins/amdgpu/amdgpu_plugin_util.h @@ -1,8 +1,6 @@ #ifndef __AMDGPU_PLUGIN_UTIL_H__ #define __AMDGPU_PLUGIN_UTIL_H__ -#include - #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif @@ -53,18 +51,14 @@ /* Name of file having serialized data of DRM device */ #define IMG_DRM_FILE "amdgpu-renderD-%d.img" -/* Name of file having serialized data of dmabuf meta */ -#define IMG_DMABUF_FILE "amdgpu-dmabuf_%d.img" - /* Name of file having serialized data of DRM device buffer objects (BOs) */ -#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%d-%04x.img" +#define IMG_DRM_PAGES_FILE "amdgpu-drm-pages-%d-%04x.img" /* Helper macros to Checkpoint and Restore a ROCm file */ #define HSAKMT_SHM_PATH "/dev/shm/hsakmt_shared_mem" #define HSAKMT_SHM "/hsakmt_shared_mem" #define HSAKMT_SEM_PATH "/dev/shm/sem.hsakmt_semaphore" #define HSAKMT_SEM "hsakmt_semaphore" -#define DMABUF_LINK "/dmabuf" /* Help macros to build sDMA command packets */ #define SDMA_PACKET(op, sub_op, e) ((((e)&0xFFFF) << 16) | (((sub_op)&0xFF) << 8) | (((op)&0xFF) << 0)) @@ -79,24 +73,6 @@ enum sdma_op_type { SDMA_OP_VRAM_WRITE, }; -struct dumped_fd { - struct list_head l; - int fd; - bool is_drm; -}; - -struct shared_bo { - struct list_head l; - int handle; - bool has_exporter; -}; - -struct restore_completed_work { - struct list_head l; - int handle; - int id; -}; - /* Helper structures to encode device topology of SRC and DEST platforms */ extern struct tp_system src_topology; extern struct tp_system dest_topology; @@ -121,25 +97,10 @@ int read_file(const char *file_path, void *buf, const size_t buf_len); int write_img_file(char *path, const void *buf, const size_t buf_len); FILE *open_img_file(char *path, bool write, size_t *size); -int record_dumped_fd(int fd, bool is_drm); -struct list_head *get_dumped_fds(); -void clear_dumped_fds(); - -bool shared_bo_has_exporter(int handle); -int record_shared_bo(int handle, bool is_imported); -int handle_for_shared_bo_fd(int dmabuf_fd); - -int record_completed_work(int handle, int id); -bool work_already_completed(int handle, int id); - -void clear_restore_state(); +bool checkpoint_is_complete(); +void decrement_checkpoint_count(); +void init_gpu_count(struct tp_system *topology); void print_kfd_bo_stat(int bo_cnt, struct kfd_criu_bo_bucket *bo_list); -int sdma_copy_bo(int shared_fd, uint64_t size, FILE *storage_fp, - void *buffer, size_t buffer_size, amdgpu_device_handle h_dev, - uint64_t max_copy_size, enum sdma_op_type type, bool do_not_free); - -int serve_out_dmabuf_fd(int handle, int fd); - #endif /* __AMDGPU_PLUGIN_UTIL_H__ */ diff --git a/plugins/amdgpu/amdgpu_socket_utils.c b/plugins/amdgpu/amdgpu_socket_utils.c deleted file mode 100644 index c8bf6d1ba..000000000 --- a/plugins/amdgpu/amdgpu_socket_utils.c +++ /dev/null @@ -1,320 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "amdgpu_socket_utils.h" -#include "criu-log.h" -#include "common/scm.h" -#include "fdstore.h" -#include "util-pie.h" -#include "util.h" - -int parallel_socket_addr_len; -struct sockaddr_un parallel_socket_addr; -int parallel_socket_id = 0; - -static void amdgpu_socket_name_gen(struct sockaddr_un *addr, int *len) -{ - addr->sun_family = AF_UNIX; - snprintf(addr->sun_path, UNIX_PATH_MAX, "x/criu-amdgpu-parallel-%s", criu_run_id); - *len = SUN_LEN(addr); - *addr->sun_path = '\0'; -} - -int install_parallel_sock(void) -{ - int ret = 0; - int sock_fd; - - sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (sock_fd < 0) { - pr_perror("socket creation failed"); - return -1; - } - - amdgpu_socket_name_gen(¶llel_socket_addr, ¶llel_socket_addr_len); - ret = bind(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); - if (ret < 0) { - pr_perror("bind failed"); - goto err; - } - - ret = listen(sock_fd, SOMAXCONN); - if (ret < 0) { - pr_perror("listen failed"); - goto err; - } - - parallel_socket_id = fdstore_add(sock_fd); - if (parallel_socket_id < 0) { - ret = -1; - goto err; - } -err: - close(sock_fd); - return ret; -} - -void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset, - parallel_restore_cmd *restore_cmd) -{ - parallel_restore_entry *restore_entry = &restore_cmd->entries[restore_cmd->cmd_head.entry_num]; - restore_entry->gpu_id = gpu_id; - restore_entry->write_id = restore_cmd->cmd_head.fd_write_num; - restore_entry->write_offset = 0; - restore_entry->read_offset = offset; - restore_entry->size = size; - - restore_cmd->fds_write[restore_cmd->cmd_head.fd_write_num] = dmabuf_fd; - - restore_cmd->cmd_head.entry_num += 1; - restore_cmd->cmd_head.fd_write_num += 1; -} - -void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd) -{ - restore_cmd->gpu_ids[restore_cmd->cmd_head.gpu_num] = (parallel_gpu_info){ gpu_id, minor }; - restore_cmd->cmd_head.gpu_num += 1; -} - -static int send_metadata(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send(sock_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { - pr_perror("Send parallel restore command head fail"); - return -1; - } - return 0; -} - -static int send_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) { - pr_perror("Send GPU ids of parallel restore command fail"); - return -1; - } - return 0; -} - -static int send_cmds(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send(sock_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) { - pr_perror("Send parallel restore command fail"); - return -1; - } - return 0; -} - -static int send_dmabuf_fds(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (send_fds(sock_fd, NULL, 0, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) { - pr_perror("Send dmabuf fds fail"); - return -1; - } - return 0; -} - -int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) -{ - int sock_fd; - int ret = 0; - - sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (sock_fd < 0) { - pr_perror("Socket creation failed"); - return -1; - } - - ret = connect(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); - if (ret < 0) { - pr_perror("Connect failed"); - goto err; - } - - ret = send_metadata(sock_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = send_gpu_ids(sock_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = send_cmds(sock_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = send_dmabuf_fds(sock_fd, restore_cmd); - -err: - close(sock_fd); - return ret; -} - -int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd) -{ - restore_cmd->cmd_head.id = id; - restore_cmd->cmd_head.fd_write_num = 0; - restore_cmd->cmd_head.entry_num = 0; - restore_cmd->cmd_head.gpu_num = 0; - - restore_cmd->gpu_ids = xzalloc(gpu_num * sizeof(parallel_gpu_info)); - if (!restore_cmd->gpu_ids) - return -ENOMEM; - restore_cmd->fds_write = xzalloc(num * sizeof(int)); - if (!restore_cmd->fds_write) - return -ENOMEM; - restore_cmd->entries = xzalloc(num * sizeof(parallel_restore_entry)); - if (!restore_cmd->entries) - return -ENOMEM; - return 0; -} - -void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) -{ - if (restore_cmd->gpu_ids) - xfree(restore_cmd->gpu_ids); - if (restore_cmd->fds_write) - xfree(restore_cmd->fds_write); - if (restore_cmd->entries) - xfree(restore_cmd->entries); -} - -static int init_parallel_restore_cmd_by_head(parallel_restore_cmd *restore_cmd) -{ - restore_cmd->gpu_ids = xzalloc(restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info)); - if (!restore_cmd->gpu_ids) - return -ENOMEM; - restore_cmd->fds_write = xzalloc(restore_cmd->cmd_head.fd_write_num * sizeof(int)); - if (!restore_cmd->fds_write) - return -ENOMEM; - restore_cmd->entries = xzalloc(restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry)); - if (!restore_cmd->entries) - return -ENOMEM; - return 0; -} - -static int check_quit_cmd(parallel_restore_cmd *restore_cmd) -{ - return restore_cmd->cmd_head.fd_write_num == 0; -} - -static int recv_metadata(int client_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv(client_fd, &restore_cmd->cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { - pr_perror("Recv parallel restore command head fail"); - return -1; - } - return 0; -} - -static int recv_cmds(int client_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv(client_fd, restore_cmd->entries, restore_cmd->cmd_head.entry_num * sizeof(parallel_restore_entry), 0) < 0) { - pr_perror("Recv parallel restore command fail"); - return -1; - } - return 0; -} - -static int recv_gpu_ids(int sock_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv(sock_fd, restore_cmd->gpu_ids, restore_cmd->cmd_head.gpu_num * sizeof(parallel_gpu_info), 0) < 0) { - pr_perror("Send GPU ids of parallel restore command fail"); - return -1; - } - return 0; -} - -static int recv_dmabuf_fds(int client_fd, parallel_restore_cmd *restore_cmd) -{ - if (recv_fds(client_fd, restore_cmd->fds_write, restore_cmd->cmd_head.fd_write_num, 0, 0) < 0) { - pr_perror("Recv dmabuf fds fail"); - return -1; - } - return 0; -} - -int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd) -{ - int sock_fd, client_fd; - int ret = 0; - - sock_fd = fdstore_get(parallel_socket_id); - if (sock_fd < 0) - return -1; - - client_fd = accept(sock_fd, NULL, NULL); - if (client_fd < 0) { - ret = client_fd; - goto err_accept; - } - - ret = recv_metadata(client_fd, restore_cmd); - if (ret) { - goto err; - } - - // Return 1 to quit - if (check_quit_cmd(restore_cmd)) { - ret = 1; - goto err; - } - - ret = init_parallel_restore_cmd_by_head(restore_cmd); - if (ret) { - goto err; - } - - ret = recv_gpu_ids(client_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = recv_cmds(client_fd, restore_cmd); - if (ret) { - goto err; - } - - ret = recv_dmabuf_fds(client_fd, restore_cmd); - -err: - close(client_fd); -err_accept: - close(sock_fd); - return ret; -} - -int close_parallel_restore_server(void) -{ - int sock_fd; - int ret = 0; - parallel_restore_cmd_head cmd_head; - - sock_fd = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); - if (sock_fd < 0) { - pr_perror("Socket creation failed"); - return -1; - } - - ret = connect(sock_fd, (struct sockaddr *)¶llel_socket_addr, parallel_socket_addr_len); - if (ret < 0) { - pr_perror("Connect failed"); - goto err; - } - - memset(&cmd_head, 0, sizeof(parallel_restore_cmd_head)); - if (send(sock_fd, &cmd_head, sizeof(parallel_restore_cmd_head), 0) < 0) { - pr_perror("Send parallel restore command head fail"); - return -1; - } - -err: - close(sock_fd); - return ret; -} \ No newline at end of file diff --git a/plugins/amdgpu/amdgpu_socket_utils.h b/plugins/amdgpu/amdgpu_socket_utils.h deleted file mode 100644 index d7200c6bd..000000000 --- a/plugins/amdgpu/amdgpu_socket_utils.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__ -#define __KFD_PLUGIN_AMDGPU_SOCKET_UTILS_H__ - -typedef struct { - int id; - int fd_write_num; /* The number of buffer objects to be restored. */ - int entry_num; /* The number of restore commands.*/ - int gpu_num; -} parallel_restore_cmd_head; - -typedef struct { - int gpu_id; - int minor; -} parallel_gpu_info; - -typedef struct { - int gpu_id; - int write_id; - uint64_t read_offset; - uint64_t write_offset; - uint64_t size; -} parallel_restore_entry; - -typedef struct { - parallel_restore_cmd_head cmd_head; - int *fds_write; - parallel_gpu_info *gpu_ids; - parallel_restore_entry *entries; -} parallel_restore_cmd; - -/* - * For parallel_restore, a background thread in the main CRIU process is used to restore the GPU - * buffer object. However, initially, the ownership of these buffer objects and the metadata for - * restoration are all with the target process. Therefore, we introduce a series of functions to - * help the target process send these tasks to the main CRIU process. - */ -int init_parallel_restore_cmd(int num, int id, int gpu_num, parallel_restore_cmd *restore_cmd); - -void free_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); - -int install_parallel_sock(void); - -int send_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); - -int recv_parallel_restore_cmd(parallel_restore_cmd *restore_cmd); - -void parallel_restore_bo_add(int dmabuf_fd, int gpu_id, uint64_t size, uint64_t offset, - parallel_restore_cmd *restore_cmd); - -void parallel_restore_gpu_id_add(int gpu_id, int minor, parallel_restore_cmd *restore_cmd); - -int close_parallel_restore_server(void); - -#endif \ No newline at end of file diff --git a/plugins/amdgpu/criu-amdgpu.proto b/plugins/amdgpu/criu-amdgpu.proto index 7682a8f21..078b67650 100644 --- a/plugins/amdgpu/criu-amdgpu.proto +++ b/plugins/amdgpu/criu-amdgpu.proto @@ -46,7 +46,6 @@ message kfd_bo_entry { required uint64 offset = 3; required uint32 alloc_flags = 4; required uint32 gpu_id = 5; - required uint32 handle = 6; } message criu_kfd { @@ -62,34 +61,6 @@ message criu_kfd { required bytes priv_data = 10; } -message drm_bo_entry { - required uint64 addr = 1; - required uint64 size = 2; - required uint64 offset = 3; - required uint64 alloc_flags = 4; - required uint64 alignment = 5; - required uint32 preferred_domains = 6; - required uint32 handle = 7; - required uint32 is_import = 8; - required uint32 num_of_vms = 9; - repeated drm_vm_entry vm_entries = 10; -} - -message drm_vm_entry { - required uint64 addr = 1; - required uint64 size = 2; - required uint64 offset = 3; - required uint64 flags = 4; -} - message criu_render_node { required uint32 gpu_id = 1; - required uint32 id = 2; - required uint32 drm_render_minor = 3; - required uint64 num_of_bos = 4; - repeated drm_bo_entry bo_entries = 5; -} - -message criu_dmabuf_node { - required uint32 gem_handle = 1; } diff --git a/plugins/amdgpu/drm.h b/plugins/amdgpu/drm.h deleted file mode 100644 index 3cd5cf15e..000000000 --- a/plugins/amdgpu/drm.h +++ /dev/null @@ -1,1476 +0,0 @@ -/* - * Header for the Direct Rendering Manager - * - * Author: Rickard E. (Rik) Faith - * - * Acknowledgments: - * Dec 1999, Richard Henderson , move to generic cmpxchg. - */ - -/* - * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. - * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. - * All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _DRM_H_ -#define _DRM_H_ - -#if defined(__KERNEL__) - -#include -#include -typedef unsigned int drm_handle_t; - -#elif defined(__linux__) - -#include -#include -typedef unsigned int drm_handle_t; - -#else /* One of the BSDs */ - -#include -#include -#include -typedef int8_t __s8; -typedef uint8_t __u8; -typedef int16_t __s16; -typedef uint16_t __u16; -typedef int32_t __s32; -typedef uint32_t __u32; -typedef int64_t __s64; -typedef uint64_t __u64; -typedef size_t __kernel_size_t; -typedef unsigned long drm_handle_t; - -#endif - -#if defined(__cplusplus) -extern "C" { -#endif - -#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */ -#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */ -#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */ -#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */ - -#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */ -#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */ -#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD) -#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT) -#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT)) - -typedef unsigned int drm_context_t; -typedef unsigned int drm_drawable_t; -typedef unsigned int drm_magic_t; - -/* - * Cliprect. - * - * \warning: If you change this structure, make sure you change - * XF86DRIClipRectRec in the server as well - * - * \note KW: Actually it's illegal to change either for - * backwards-compatibility reasons. - */ -struct drm_clip_rect { - unsigned short x1; - unsigned short y1; - unsigned short x2; - unsigned short y2; -}; - -/* - * Drawable information. - */ -struct drm_drawable_info { - unsigned int num_rects; - struct drm_clip_rect *rects; -}; - -/* - * Texture region, - */ -struct drm_tex_region { - unsigned char next; - unsigned char prev; - unsigned char in_use; - unsigned char padding; - unsigned int age; -}; - -/* - * Hardware lock. - * - * The lock structure is a simple cache-line aligned integer. To avoid - * processor bus contention on a multiprocessor system, there should not be any - * other data stored in the same cache line. - */ -struct drm_hw_lock { - __volatile__ unsigned int lock; /**< lock variable */ - char padding[60]; /**< Pad to cache line */ -}; - -/* - * DRM_IOCTL_VERSION ioctl argument type. - * - * \sa drmGetVersion(). - */ -struct drm_version { - int version_major; /**< Major version */ - int version_minor; /**< Minor version */ - int version_patchlevel; /**< Patch level */ - __kernel_size_t name_len; /**< Length of name buffer */ - char __user *name; /**< Name of driver */ - __kernel_size_t date_len; /**< Length of date buffer */ - char __user *date; /**< User-space buffer to hold date */ - __kernel_size_t desc_len; /**< Length of desc buffer */ - char __user *desc; /**< User-space buffer to hold desc */ -}; - -/* - * DRM_IOCTL_GET_UNIQUE ioctl argument type. - * - * \sa drmGetBusid() and drmSetBusId(). - */ -struct drm_unique { - __kernel_size_t unique_len; /**< Length of unique */ - char __user *unique; /**< Unique name for driver instantiation */ -}; - -struct drm_list { - int count; /**< Length of user-space structures */ - struct drm_version __user *version; -}; - -struct drm_block { - int unused; -}; - -/* - * DRM_IOCTL_CONTROL ioctl argument type. - * - * \sa drmCtlInstHandler() and drmCtlUninstHandler(). - */ -struct drm_control { - enum { - DRM_ADD_COMMAND, - DRM_RM_COMMAND, - DRM_INST_HANDLER, - DRM_UNINST_HANDLER - } func; - int irq; -}; - -/* - * Type of memory to map. - */ -enum drm_map_type { - _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */ - _DRM_REGISTERS = 1, /**< no caching, no core dump */ - _DRM_SHM = 2, /**< shared, cached */ - _DRM_AGP = 3, /**< AGP/GART */ - _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */ - _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ -}; - -/* - * Memory mapping flags. - */ -enum drm_map_flags { - _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */ - _DRM_READ_ONLY = 0x02, - _DRM_LOCKED = 0x04, /**< shared, cached, locked */ - _DRM_KERNEL = 0x08, /**< kernel requires access */ - _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */ - _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */ - _DRM_REMOVABLE = 0x40, /**< Removable mapping */ - _DRM_DRIVER = 0x80 /**< Managed by driver */ -}; - -struct drm_ctx_priv_map { - unsigned int ctx_id; /**< Context requesting private mapping */ - void *handle; /**< Handle of map */ -}; - -/* - * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls - * argument type. - * - * \sa drmAddMap(). - */ -struct drm_map { - unsigned long offset; /**< Requested physical address (0 for SAREA)*/ - unsigned long size; /**< Requested physical size (bytes) */ - enum drm_map_type type; /**< Type of memory to map */ - enum drm_map_flags flags; /**< Flags */ - void *handle; /**< User-space: "Handle" to pass to mmap() */ - /**< Kernel-space: kernel-virtual address */ - int mtrr; /**< MTRR slot used */ - /* Private data */ -}; - -/* - * DRM_IOCTL_GET_CLIENT ioctl argument type. - */ -struct drm_client { - int idx; /**< Which client desired? */ - int auth; /**< Is client authenticated? */ - unsigned long pid; /**< Process ID */ - unsigned long uid; /**< User ID */ - unsigned long magic; /**< Magic */ - unsigned long iocs; /**< Ioctl count */ -}; - -enum drm_stat_type { - _DRM_STAT_LOCK, - _DRM_STAT_OPENS, - _DRM_STAT_CLOSES, - _DRM_STAT_IOCTLS, - _DRM_STAT_LOCKS, - _DRM_STAT_UNLOCKS, - _DRM_STAT_VALUE, /**< Generic value */ - _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */ - _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */ - - _DRM_STAT_IRQ, /**< IRQ */ - _DRM_STAT_PRIMARY, /**< Primary DMA bytes */ - _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */ - _DRM_STAT_DMA, /**< DMA */ - _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */ - _DRM_STAT_MISSED /**< Missed DMA opportunity */ - /* Add to the *END* of the list */ -}; - -/* - * DRM_IOCTL_GET_STATS ioctl argument type. - */ -struct drm_stats { - unsigned long count; - struct { - unsigned long value; - enum drm_stat_type type; - } data[15]; -}; - -/* - * Hardware locking flags. - */ -enum drm_lock_flags { - _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */ - _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */ - _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */ - _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */ - /* These *HALT* flags aren't supported yet - -- they will be used to support the - full-screen DGA-like mode. */ - _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */ - _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ -}; - -/* - * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. - * - * \sa drmGetLock() and drmUnlock(). - */ -struct drm_lock { - int context; - enum drm_lock_flags flags; -}; - -/* - * DMA flags - * - * \warning - * These values \e must match xf86drm.h. - * - * \sa drm_dma. - */ -enum drm_dma_flags { - /* Flags for DMA buffer dispatch */ - _DRM_DMA_BLOCK = 0x01, /**< - * Block until buffer dispatched. - * - * \note The buffer may not yet have - * been processed by the hardware -- - * getting a hardware lock with the - * hardware quiescent will ensure - * that the buffer has been - * processed. - */ - _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */ - _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */ - - /* Flags for DMA buffer request */ - _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */ - _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */ - _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ -}; - -/* - * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. - * - * \sa drmAddBufs(). - */ -struct drm_buf_desc { - int count; /**< Number of buffers of this size */ - int size; /**< Size in bytes */ - int low_mark; /**< Low water mark */ - int high_mark; /**< High water mark */ - enum { - _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */ - _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */ - _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */ - _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */ - _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */ - } flags; - unsigned long agp_start; /**< - * Start address of where the AGP buffers are - * in the AGP aperture - */ -}; - -/* - * DRM_IOCTL_INFO_BUFS ioctl argument type. - */ -struct drm_buf_info { - int count; /**< Entries in list */ - struct drm_buf_desc __user *list; -}; - -/* - * DRM_IOCTL_FREE_BUFS ioctl argument type. - */ -struct drm_buf_free { - int count; - int __user *list; -}; - -/* - * Buffer information - * - * \sa drm_buf_map. - */ -struct drm_buf_pub { - int idx; /**< Index into the master buffer list */ - int total; /**< Buffer size */ - int used; /**< Amount of buffer in use (for DMA) */ - void __user *address; /**< Address of buffer */ -}; - -/* - * DRM_IOCTL_MAP_BUFS ioctl argument type. - */ -struct drm_buf_map { - int count; /**< Length of the buffer list */ -#ifdef __cplusplus - void __user *virt; -#else - void __user *virtual; /**< Mmap'd area in user-virtual */ -#endif - struct drm_buf_pub __user *list; /**< Buffer information */ -}; - -/* - * DRM_IOCTL_DMA ioctl argument type. - * - * Indices here refer to the offset into the buffer list in drm_buf_get. - * - * \sa drmDMA(). - */ -struct drm_dma { - int context; /**< Context handle */ - int send_count; /**< Number of buffers to send */ - int __user *send_indices; /**< List of handles to buffers */ - int __user *send_sizes; /**< Lengths of data to send */ - enum drm_dma_flags flags; /**< Flags */ - int request_count; /**< Number of buffers requested */ - int request_size; /**< Desired size for buffers */ - int __user *request_indices; /**< Buffer information */ - int __user *request_sizes; - int granted_count; /**< Number of buffers granted */ -}; - -enum drm_ctx_flags { - _DRM_CONTEXT_PRESERVED = 0x01, - _DRM_CONTEXT_2DONLY = 0x02 -}; - -/* - * DRM_IOCTL_ADD_CTX ioctl argument type. - * - * \sa drmCreateContext() and drmDestroyContext(). - */ -struct drm_ctx { - drm_context_t handle; - enum drm_ctx_flags flags; -}; - -/* - * DRM_IOCTL_RES_CTX ioctl argument type. - */ -struct drm_ctx_res { - int count; - struct drm_ctx __user *contexts; -}; - -/* - * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. - */ -struct drm_draw { - drm_drawable_t handle; -}; - -/* - * DRM_IOCTL_UPDATE_DRAW ioctl argument type. - */ -typedef enum { - DRM_DRAWABLE_CLIPRECTS -} drm_drawable_info_type_t; - -struct drm_update_draw { - drm_drawable_t handle; - unsigned int type; - unsigned int num; - unsigned long long data; -}; - -/* - * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. - */ -struct drm_auth { - drm_magic_t magic; -}; - -/* - * DRM_IOCTL_IRQ_BUSID ioctl argument type. - * - * \sa drmGetInterruptFromBusID(). - */ -struct drm_irq_busid { - int irq; /**< IRQ number */ - int busnum; /**< bus number */ - int devnum; /**< device number */ - int funcnum; /**< function number */ -}; - -enum drm_vblank_seq_type { - _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */ - _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */ - /* bits 1-6 are reserved for high crtcs */ - _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e, - _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */ - _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */ - _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */ - _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */ - _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */ -}; -#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1 - -#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE) -#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \ - _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS) - -struct drm_wait_vblank_request { - enum drm_vblank_seq_type type; - unsigned int sequence; - unsigned long signal; -}; - -struct drm_wait_vblank_reply { - enum drm_vblank_seq_type type; - unsigned int sequence; - long tval_sec; - long tval_usec; -}; - -/* - * DRM_IOCTL_WAIT_VBLANK ioctl argument type. - * - * \sa drmWaitVBlank(). - */ -union drm_wait_vblank { - struct drm_wait_vblank_request request; - struct drm_wait_vblank_reply reply; -}; - -#define _DRM_PRE_MODESET 1 -#define _DRM_POST_MODESET 2 - -/* - * DRM_IOCTL_MODESET_CTL ioctl argument type - * - * \sa drmModesetCtl(). - */ -struct drm_modeset_ctl { - __u32 crtc; - __u32 cmd; -}; - -/* - * DRM_IOCTL_AGP_ENABLE ioctl argument type. - * - * \sa drmAgpEnable(). - */ -struct drm_agp_mode { - unsigned long mode; /**< AGP mode */ -}; - -/* - * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. - * - * \sa drmAgpAlloc() and drmAgpFree(). - */ -struct drm_agp_buffer { - unsigned long size; /**< In bytes -- will round to page boundary */ - unsigned long handle; /**< Used for binding / unbinding */ - unsigned long type; /**< Type of memory to allocate */ - unsigned long physical; /**< Physical used by i810 */ -}; - -/* - * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. - * - * \sa drmAgpBind() and drmAgpUnbind(). - */ -struct drm_agp_binding { - unsigned long handle; /**< From drm_agp_buffer */ - unsigned long offset; /**< In bytes -- will round to page boundary */ -}; - -/* - * DRM_IOCTL_AGP_INFO ioctl argument type. - * - * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), - * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(), - * drmAgpVendorId() and drmAgpDeviceId(). - */ -struct drm_agp_info { - int agp_version_major; - int agp_version_minor; - unsigned long mode; - unsigned long aperture_base; /* physical address */ - unsigned long aperture_size; /* bytes */ - unsigned long memory_allowed; /* bytes */ - unsigned long memory_used; - - /* PCI information */ - unsigned short id_vendor; - unsigned short id_device; -}; - -/* - * DRM_IOCTL_SG_ALLOC ioctl argument type. - */ -struct drm_scatter_gather { - unsigned long size; /**< In bytes -- will round to page boundary */ - unsigned long handle; /**< Used for mapping / unmapping */ -}; - -/* - * DRM_IOCTL_SET_VERSION ioctl argument type. - */ -struct drm_set_version { - int drm_di_major; - int drm_di_minor; - int drm_dd_major; - int drm_dd_minor; -}; - -/** - * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. - * @handle: Handle of the object to be closed. - * @pad: Padding. - * - * Releases the handle to an mm object. - */ -struct drm_gem_close { - __u32 handle; - __u32 pad; -}; - -/** - * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. - * @handle: Handle for the object being named. - * @name: Returned global name. - * - * Create a global name for an object, returning the name. - * - * Note that the name does not hold a reference; when the object - * is freed, the name goes away. - */ -struct drm_gem_flink { - __u32 handle; - __u32 name; -}; - -/** - * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. - * @name: Name of object being opened. - * @handle: Returned handle for the object. - * @size: Returned size of the object - * - * Open an object using the global name, returning a handle and the size. - * - * This handle (of course) holds a reference to the object, so the object - * will not go away until the handle is deleted. - */ -struct drm_gem_open { - __u32 name; - __u32 handle; - __u64 size; -}; - -/** - * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. - * @handle: The handle of a gem object. - * @new_handle: An available gem handle. - * - * This ioctl changes the handle of a GEM object to the specified one. - * The new handle must be unused. On success the old handle is closed - * and all further IOCTL should refer to the new handle only. - * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. - */ -struct drm_gem_change_handle { - __u32 handle; - __u32 new_handle; -}; - -/** - * DRM_CAP_DUMB_BUFFER - * - * If set to 1, the driver supports creating dumb buffers via the - * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. - */ -#define DRM_CAP_DUMB_BUFFER 0x1 -/** - * DRM_CAP_VBLANK_HIGH_CRTC - * - * If set to 1, the kernel supports specifying a :ref:`CRTC index` - * in the high bits of &drm_wait_vblank_request.type. - * - * Starting kernel version 2.6.39, this capability is always set to 1. - */ -#define DRM_CAP_VBLANK_HIGH_CRTC 0x2 -/** - * DRM_CAP_DUMB_PREFERRED_DEPTH - * - * The preferred bit depth for dumb buffers. - * - * The bit depth is the number of bits used to indicate the color of a single - * pixel excluding any padding. This is different from the number of bits per - * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per - * pixel. - * - * Note that this preference only applies to dumb buffers, it's irrelevant for - * other types of buffers. - */ -#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 -/** - * DRM_CAP_DUMB_PREFER_SHADOW - * - * If set to 1, the driver prefers userspace to render to a shadow buffer - * instead of directly rendering to a dumb buffer. For best speed, userspace - * should do streaming ordered memory copies into the dumb buffer and never - * read from it. - * - * Note that this preference only applies to dumb buffers, it's irrelevant for - * other types of buffers. - */ -#define DRM_CAP_DUMB_PREFER_SHADOW 0x4 -/** - * DRM_CAP_PRIME - * - * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT - * and &DRM_PRIME_CAP_EXPORT. - * - * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and - * &DRM_PRIME_CAP_EXPORT are always advertised. - * - * PRIME buffers are exposed as dma-buf file descriptors. - * See :ref:`prime_buffer_sharing`. - */ -#define DRM_CAP_PRIME 0x5 -/** - * DRM_PRIME_CAP_IMPORT - * - * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME - * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. - * - * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. - */ -#define DRM_PRIME_CAP_IMPORT 0x1 -/** - * DRM_PRIME_CAP_EXPORT - * - * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME - * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. - * - * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. - */ -#define DRM_PRIME_CAP_EXPORT 0x2 -/** - * DRM_CAP_TIMESTAMP_MONOTONIC - * - * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in - * struct drm_event_vblank. If set to 1, the kernel will report timestamps with - * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these - * clocks. - * - * Starting from kernel version 2.6.39, the default value for this capability - * is 1. Starting kernel version 4.15, this capability is always set to 1. - */ -#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 -/** - * DRM_CAP_ASYNC_PAGE_FLIP - * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for legacy - * page-flips. - */ -#define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/** - * DRM_CAP_CURSOR_WIDTH - * - * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid - * width x height combination for the hardware cursor. The intention is that a - * hardware agnostic userspace can query a cursor plane size to use. - * - * Note that the cross-driver contract is to merely return a valid size; - * drivers are free to attach another meaning on top, eg. i915 returns the - * maximum plane size. - */ -#define DRM_CAP_CURSOR_WIDTH 0x8 -/** - * DRM_CAP_CURSOR_HEIGHT - * - * See &DRM_CAP_CURSOR_WIDTH. - */ -#define DRM_CAP_CURSOR_HEIGHT 0x9 -/** - * DRM_CAP_ADDFB2_MODIFIERS - * - * If set to 1, the driver supports supplying modifiers in the - * &DRM_IOCTL_MODE_ADDFB2 ioctl. - */ -#define DRM_CAP_ADDFB2_MODIFIERS 0x10 -/** - * DRM_CAP_PAGE_FLIP_TARGET - * - * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and - * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in - * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP - * ioctl. - */ -#define DRM_CAP_PAGE_FLIP_TARGET 0x11 -/** - * DRM_CAP_CRTC_IN_VBLANK_EVENT - * - * If set to 1, the kernel supports reporting the CRTC ID in - * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and - * &DRM_EVENT_FLIP_COMPLETE events. - * - * Starting kernel version 4.12, this capability is always set to 1. - */ -#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 -/** - * DRM_CAP_SYNCOBJ - * - * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. - */ -#define DRM_CAP_SYNCOBJ 0x13 -/** - * DRM_CAP_SYNCOBJ_TIMELINE - * - * If set to 1, the driver supports timeline operations on sync objects. See - * :ref:`drm_sync_objects`. - */ -#define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -/** - * DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP - * - * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC for atomic - * commits. - */ -#define DRM_CAP_ATOMIC_ASYNC_PAGE_FLIP 0x15 - -/* DRM_IOCTL_GET_CAP ioctl argument type */ -struct drm_get_cap { - __u64 capability; - __u64 value; -}; - -/** - * DRM_CLIENT_CAP_STEREO_3D - * - * If set to 1, the DRM core will expose the stereo 3D capabilities of the - * monitor by advertising the supported 3D layouts in the flags of struct - * drm_mode_modeinfo. See ``DRM_MODE_FLAG_3D_*``. - * - * This capability is always supported for all drivers starting from kernel - * version 3.13. - */ -#define DRM_CLIENT_CAP_STEREO_3D 1 - -/** - * DRM_CLIENT_CAP_UNIVERSAL_PLANES - * - * If set to 1, the DRM core will expose all planes (overlay, primary, and - * cursor) to userspace. - * - * This capability has been introduced in kernel version 3.15. Starting from - * kernel version 3.17, this capability is always supported for all drivers. - */ -#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2 - -/** - * DRM_CLIENT_CAP_ATOMIC - * - * If set to 1, the DRM core will expose atomic properties to userspace. This - * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and - * &DRM_CLIENT_CAP_ASPECT_RATIO. - * - * If the driver doesn't support atomic mode-setting, enabling this capability - * will fail with -EOPNOTSUPP. - * - * This capability has been introduced in kernel version 4.0. Starting from - * kernel version 4.2, this capability is always supported for atomic-capable - * drivers. - */ -#define DRM_CLIENT_CAP_ATOMIC 3 - -/** - * DRM_CLIENT_CAP_ASPECT_RATIO - * - * If set to 1, the DRM core will provide aspect ratio information in modes. - * See ``DRM_MODE_FLAG_PIC_AR_*``. - * - * This capability is always supported for all drivers starting from kernel - * version 4.18. - */ -#define DRM_CLIENT_CAP_ASPECT_RATIO 4 - -/** - * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS - * - * If set to 1, the DRM core will expose special connectors to be used for - * writing back to memory the scene setup in the commit. The client must enable - * &DRM_CLIENT_CAP_ATOMIC first. - * - * This capability is always supported for atomic-capable drivers starting from - * kernel version 4.19. - */ -#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 - -/** - * DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT - * - * Drivers for para-virtualized hardware (e.g. vmwgfx, qxl, virtio and - * virtualbox) have additional restrictions for cursor planes (thus - * making cursor planes on those drivers not truly universal,) e.g. - * they need cursor planes to act like one would expect from a mouse - * cursor and have correctly set hotspot properties. - * If this client cap is not set the DRM core will hide cursor plane on - * those virtualized drivers because not setting it implies that the - * client is not capable of dealing with those extra restictions. - * Clients which do set cursor hotspot and treat the cursor plane - * like a mouse cursor should set this property. - * The client must enable &DRM_CLIENT_CAP_ATOMIC first. - * - * Setting this property on drivers which do not special case - * cursor planes (i.e. non-virtualized drivers) will return - * EOPNOTSUPP, which can be used by userspace to gauge - * requirements of the hardware/drivers they're running on. - * - * This capability is always supported for atomic-capable virtualized - * drivers starting from kernel version 6.6. - */ -#define DRM_CLIENT_CAP_CURSOR_PLANE_HOTSPOT 6 - -/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ -struct drm_set_client_cap { - __u64 capability; - __u64 value; -}; - -#define DRM_RDWR O_RDWR -#define DRM_CLOEXEC O_CLOEXEC -struct drm_prime_handle { - __u32 handle; - - /** Flags.. only applicable for handle->fd */ - __u32 flags; - - /** Returned dmabuf file descriptor */ - __s32 fd; -}; - -struct drm_syncobj_create { - __u32 handle; -#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) - __u32 flags; -}; - -struct drm_syncobj_destroy { - __u32 handle; - __u32 pad; -}; - -#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) -#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) -#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) -#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) -struct drm_syncobj_handle { - __u32 handle; - __u32 flags; - - __s32 fd; - __u32 pad; - - __u64 point; -}; - -struct drm_syncobj_transfer { - __u32 src_handle; - __u32 dst_handle; - __u64 src_point; - __u64 dst_point; - __u32 flags; - __u32 pad; -}; - -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) /* wait for time point to become available */ -#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE (1 << 3) /* set fence deadline to deadline_nsec */ -struct drm_syncobj_wait { - __u64 handles; - /* absolute timeout */ - __s64 timeout_nsec; - __u32 count_handles; - __u32 flags; - __u32 first_signaled; /* only valid when not waiting all */ - __u32 pad; - /** - * @deadline_nsec - fence deadline hint - * - * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing - * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is - * set. - */ - __u64 deadline_nsec; -}; - -struct drm_syncobj_timeline_wait { - __u64 handles; - /* wait on specific timeline point for every handles*/ - __u64 points; - /* absolute timeout */ - __s64 timeout_nsec; - __u32 count_handles; - __u32 flags; - __u32 first_signaled; /* only valid when not waiting all */ - __u32 pad; - /** - * @deadline_nsec - fence deadline hint - * - * Deadline hint, in absolute CLOCK_MONOTONIC, to set on backing - * fence(s) if the DRM_SYNCOBJ_WAIT_FLAGS_WAIT_DEADLINE flag is - * set. - */ - __u64 deadline_nsec; -}; - -/** - * struct drm_syncobj_eventfd - * @handle: syncobj handle. - * @flags: Zero to wait for the point to be signalled, or - * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be - * available for the point. - * @point: syncobj timeline point (set to zero for binary syncobjs). - * @fd: Existing eventfd to sent events to. - * @pad: Must be zero. - * - * Register an eventfd to be signalled by a syncobj. The eventfd counter will - * be incremented by one. - */ -struct drm_syncobj_eventfd { - __u32 handle; - __u32 flags; - __u64 point; - __s32 fd; - __u32 pad; -}; - - -struct drm_syncobj_array { - __u64 handles; - __u32 count_handles; - __u32 pad; -}; - -#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */ -struct drm_syncobj_timeline_array { - __u64 handles; - __u64 points; - __u32 count_handles; - __u32 flags; -}; - - -/* Query current scanout sequence number */ -struct drm_crtc_get_sequence { - __u32 crtc_id; /* requested crtc_id */ - __u32 active; /* return: crtc output is active */ - __u64 sequence; /* return: most recent vblank sequence */ - __s64 sequence_ns; /* return: most recent time of first pixel out */ -}; - -/* Queue event to be delivered at specified sequence. Time stamp marks - * when the first pixel of the refresh cycle leaves the display engine - * for the display - */ -#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */ -#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */ - -struct drm_crtc_queue_sequence { - __u32 crtc_id; - __u32 flags; - __u64 sequence; /* on input, target sequence. on output, actual sequence */ - __u64 user_data; /* user data passed to event */ -}; - -#define DRM_CLIENT_NAME_MAX_LEN 64 -struct drm_set_client_name { - __u64 name_len; - __u64 name; -}; - - -#if defined(__cplusplus) -} -#endif - -#include "drm_mode.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -#define DRM_IOCTL_BASE 'd' -#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr) -#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type) -#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type) -#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type) - -#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version) -#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique) -#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth) -#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid) -#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map) -#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client) -#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) -#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) -#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) -/** - * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. - * - * GEM handles are not reference-counted by the kernel. User-space is - * responsible for managing their lifetime. For example, if user-space imports - * the same memory object twice on the same DRM file description, the same GEM - * handle is returned by both imports, and user-space needs to ensure - * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen - * when a memory object is allocated, then exported and imported again on the - * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception - * and always returns fresh new GEM handles even if an existing GEM handle - * already refers to the same memory object before the IOCTL is performed. - */ -#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) -#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) -#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) -#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap) -#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap) - -#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique) -#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth) -#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block) -#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block) -#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control) -#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map) -#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc) -#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc) -#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info) -#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map) -#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free) - -#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map) - -#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map) -#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map) - -#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e) -#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f) - -#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx) -#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx) -#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx) -#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx) -#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx) -#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx) -#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res) -#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw) -#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw) -#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma) -#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock) -#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) -#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) - -/** - * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. - * - * User-space sets &drm_prime_handle.handle with the GEM handle to export and - * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in - * &drm_prime_handle.fd. - * - * The export can fail for any driver-specific reason, e.g. because export is - * not supported for this specific GEM handle (but might be for others). - * - * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. - */ -#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) -/** - * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. - * - * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to - * import, and gets back a GEM handle in &drm_prime_handle.handle. - * &drm_prime_handle.flags is unused. - * - * If an existing GEM handle refers to the memory object backing the DMA-BUF, - * that GEM handle is returned. Therefore user-space which needs to handle - * arbitrary DMA-BUFs must have a user-space lookup data structure to manually - * reference-count duplicated GEM handles. For more information see - * &DRM_IOCTL_GEM_CLOSE. - * - * The import can fail for any driver-specific reason, e.g. because import is - * only supported for DMA-BUFs allocated on this DRM device. - * - * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. - */ -#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) - -#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) -#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31) -#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode) -#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info) -#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer) -#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer) -#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding) -#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding) - -#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather) -#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather) - -#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank) - -#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence) -#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence) - -#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw) - -#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res) -#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc) -#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc) -#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor) -#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut) -#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut) -#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder) -#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector) -#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */ -#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */ - -#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property) -#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property) -#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) -#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) -#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) -/** - * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. - * - * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL - * argument is a framebuffer object ID. - * - * Warning: removing a framebuffer currently in-use on an enabled plane will - * disable that plane. The CRTC the plane is linked to may also be disabled - * (depending on driver capabilities). - */ -#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) -#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) -#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) - -/** - * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. - * - * KMS dumb buffers provide a very primitive way to allocate a buffer object - * suitable for scanout and map it for software rendering. KMS dumb buffers are - * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb - * buffers are not suitable to be displayed on any other device than the KMS - * device where they were allocated from. Also see - * :ref:`kms_dumb_buffer_objects`. - * - * The IOCTL argument is a struct drm_mode_create_dumb. - * - * User-space is expected to create a KMS dumb buffer via this IOCTL, then add - * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via - * &DRM_IOCTL_MODE_MAP_DUMB. - * - * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. - * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate - * driver preferences for dumb buffers. - */ -#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) -#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) -#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) -#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res) -#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane) -#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane) -#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2) -#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties) -#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property) -#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2) -#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic) -#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob) -#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob) - -#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create) -#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) -#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) -#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) -#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) -#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) -#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) - -#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease) -#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees) -#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) -#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) - -#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) -#define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) -#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) -#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) - -/** - * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. - * - * This queries metadata about a framebuffer. User-space fills - * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the - * struct as the output. - * - * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles - * will be filled with GEM buffer handles. Fresh new GEM handles are always - * returned, even if another GEM handle referring to the same memory object - * already exists on the DRM file description. The caller is responsible for - * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same - * new handle will be returned for multiple planes in case they use the same - * memory object. Planes are valid until one has a zero handle -- this can be - * used to compute the number of planes. - * - * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid - * until one has a zero &drm_mode_fb_cmd2.pitches. - * - * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set - * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the - * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. - * - * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space - * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately - * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not - * double-close handles which are specified multiple times in the array. - */ -#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) - -#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) - -/** - * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. - * - * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL - * argument is a framebuffer object ID. - * - * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable - * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept - * alive. When the plane no longer uses the framebuffer (because the - * framebuffer is replaced with another one, or the plane is disabled), the - * framebuffer is cleaned up. - * - * This is useful to implement flicker-free transitions between two processes. - * - * Depending on the threat model, user-space may want to ensure that the - * framebuffer doesn't expose any sensitive user information: closed - * framebuffers attached to a plane can be read back by the next DRM master. - */ -#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) - -/** - * DRM_IOCTL_SET_CLIENT_NAME - Attach a name to a drm_file - * - * Having a name allows for easier tracking and debugging. - * The length of the name (without null ending char) must be - * <= DRM_CLIENT_NAME_MAX_LEN. - * The call will fail if the name contains whitespaces or non-printable chars. - */ -#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) - -/** - * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle - * - * Some applications (notably CRIU) need objects to have specific gem handles. - * This ioctl changes the object at one gem handle to use a new gem handle. - */ -#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) - -/* - * Device specific ioctls should only be in their respective headers - * The device specific ioctl range is from 0x40 to 0x9f. - * Generic IOCTLS restart at 0xA0. - * - * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and - * drmCommandReadWrite(). - */ -#define DRM_COMMAND_BASE 0x40 -#define DRM_COMMAND_END 0xA0 - -/** - * struct drm_event - Header for DRM events - * @type: event type. - * @length: total number of payload bytes (including header). - * - * This struct is a header for events written back to user-space on the DRM FD. - * A read on the DRM FD will always only return complete events: e.g. if the - * read buffer is 100 bytes large and there are two 64 byte events pending, - * only one will be returned. - * - * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and - * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, - * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. - */ -struct drm_event { - __u32 type; - __u32 length; -}; - -/** - * DRM_EVENT_VBLANK - vertical blanking event - * - * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the - * &_DRM_VBLANK_EVENT flag set. - * - * The event payload is a struct drm_event_vblank. - */ -#define DRM_EVENT_VBLANK 0x01 -/** - * DRM_EVENT_FLIP_COMPLETE - page-flip completion event - * - * This event is sent in response to an atomic commit or legacy page-flip with - * the &DRM_MODE_PAGE_FLIP_EVENT flag set. - * - * The event payload is a struct drm_event_vblank. - */ -#define DRM_EVENT_FLIP_COMPLETE 0x02 -/** - * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event - * - * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. - * - * The event payload is a struct drm_event_crtc_sequence. - */ -#define DRM_EVENT_CRTC_SEQUENCE 0x03 - -struct drm_event_vblank { - struct drm_event base; - __u64 user_data; - __u32 tv_sec; - __u32 tv_usec; - __u32 sequence; - __u32 crtc_id; /* 0 on older kernels that do not support this */ -}; - -/* Event delivered at sequence. Time stamp marks when the first pixel - * of the refresh cycle leaves the display engine for the display - */ -struct drm_event_crtc_sequence { - struct drm_event base; - __u64 user_data; - __s64 time_ns; - __u64 sequence; -}; - -/* typedef area */ -#ifndef __KERNEL__ -typedef struct drm_clip_rect drm_clip_rect_t; -typedef struct drm_drawable_info drm_drawable_info_t; -typedef struct drm_tex_region drm_tex_region_t; -typedef struct drm_hw_lock drm_hw_lock_t; -typedef struct drm_version drm_version_t; -typedef struct drm_unique drm_unique_t; -typedef struct drm_list drm_list_t; -typedef struct drm_block drm_block_t; -typedef struct drm_control drm_control_t; -typedef enum drm_map_type drm_map_type_t; -typedef enum drm_map_flags drm_map_flags_t; -typedef struct drm_ctx_priv_map drm_ctx_priv_map_t; -typedef struct drm_map drm_map_t; -typedef struct drm_client drm_client_t; -typedef enum drm_stat_type drm_stat_type_t; -typedef struct drm_stats drm_stats_t; -typedef enum drm_lock_flags drm_lock_flags_t; -typedef struct drm_lock drm_lock_t; -typedef enum drm_dma_flags drm_dma_flags_t; -typedef struct drm_buf_desc drm_buf_desc_t; -typedef struct drm_buf_info drm_buf_info_t; -typedef struct drm_buf_free drm_buf_free_t; -typedef struct drm_buf_pub drm_buf_pub_t; -typedef struct drm_buf_map drm_buf_map_t; -typedef struct drm_dma drm_dma_t; -typedef union drm_wait_vblank drm_wait_vblank_t; -typedef struct drm_agp_mode drm_agp_mode_t; -typedef enum drm_ctx_flags drm_ctx_flags_t; -typedef struct drm_ctx drm_ctx_t; -typedef struct drm_ctx_res drm_ctx_res_t; -typedef struct drm_draw drm_draw_t; -typedef struct drm_update_draw drm_update_draw_t; -typedef struct drm_auth drm_auth_t; -typedef struct drm_irq_busid drm_irq_busid_t; -typedef enum drm_vblank_seq_type drm_vblank_seq_type_t; - -typedef struct drm_agp_buffer drm_agp_buffer_t; -typedef struct drm_agp_binding drm_agp_binding_t; -typedef struct drm_agp_info drm_agp_info_t; -typedef struct drm_scatter_gather drm_scatter_gather_t; -typedef struct drm_set_version drm_set_version_t; -#endif - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/plugins/amdgpu/drm_mode.h b/plugins/amdgpu/drm_mode.h deleted file mode 100644 index c082810c0..000000000 --- a/plugins/amdgpu/drm_mode.h +++ /dev/null @@ -1,1362 +0,0 @@ -/* - * Copyright (c) 2007 Dave Airlie - * Copyright (c) 2007 Jakob Bornecrantz - * Copyright (c) 2008 Red Hat Inc. - * Copyright (c) 2007-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA - * Copyright (c) 2007-2008 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef _DRM_MODE_H -#define _DRM_MODE_H - -#include "drm.h" - -#if defined(__cplusplus) -extern "C" { -#endif - -/** - * DOC: overview - * - * DRM exposes many UAPI and structure definitions to have a consistent - * and standardized interface with users. - * Userspace can refer to these structure definitions and UAPI formats - * to communicate to drivers. - */ - -#define DRM_CONNECTOR_NAME_LEN 32 -#define DRM_DISPLAY_MODE_LEN 32 -#define DRM_PROP_NAME_LEN 32 - -#define DRM_MODE_TYPE_BUILTIN (1<<0) /* deprecated */ -#define DRM_MODE_TYPE_CLOCK_C ((1<<1) | DRM_MODE_TYPE_BUILTIN) /* deprecated */ -#define DRM_MODE_TYPE_CRTC_C ((1<<2) | DRM_MODE_TYPE_BUILTIN) /* deprecated */ -#define DRM_MODE_TYPE_PREFERRED (1<<3) -#define DRM_MODE_TYPE_DEFAULT (1<<4) /* deprecated */ -#define DRM_MODE_TYPE_USERDEF (1<<5) -#define DRM_MODE_TYPE_DRIVER (1<<6) - -#define DRM_MODE_TYPE_ALL (DRM_MODE_TYPE_PREFERRED | \ - DRM_MODE_TYPE_USERDEF | \ - DRM_MODE_TYPE_DRIVER) - -/* Video mode flags */ -/* bit compatible with the xrandr RR_ definitions (bits 0-13) - * - * ABI warning: Existing userspace really expects - * the mode flags to match the xrandr definitions. Any - * changes that don't match the xrandr definitions will - * likely need a new client cap or some other mechanism - * to avoid breaking existing userspace. This includes - * allocating new flags in the previously unused bits! - */ -#define DRM_MODE_FLAG_PHSYNC (1<<0) -#define DRM_MODE_FLAG_NHSYNC (1<<1) -#define DRM_MODE_FLAG_PVSYNC (1<<2) -#define DRM_MODE_FLAG_NVSYNC (1<<3) -#define DRM_MODE_FLAG_INTERLACE (1<<4) -#define DRM_MODE_FLAG_DBLSCAN (1<<5) -#define DRM_MODE_FLAG_CSYNC (1<<6) -#define DRM_MODE_FLAG_PCSYNC (1<<7) -#define DRM_MODE_FLAG_NCSYNC (1<<8) -#define DRM_MODE_FLAG_HSKEW (1<<9) /* hskew provided */ -#define DRM_MODE_FLAG_BCAST (1<<10) /* deprecated */ -#define DRM_MODE_FLAG_PIXMUX (1<<11) /* deprecated */ -#define DRM_MODE_FLAG_DBLCLK (1<<12) -#define DRM_MODE_FLAG_CLKDIV2 (1<<13) - /* - * When adding a new stereo mode don't forget to adjust DRM_MODE_FLAGS_3D_MAX - * (define not exposed to user space). - */ -#define DRM_MODE_FLAG_3D_MASK (0x1f<<14) -#define DRM_MODE_FLAG_3D_NONE (0<<14) -#define DRM_MODE_FLAG_3D_FRAME_PACKING (1<<14) -#define DRM_MODE_FLAG_3D_FIELD_ALTERNATIVE (2<<14) -#define DRM_MODE_FLAG_3D_LINE_ALTERNATIVE (3<<14) -#define DRM_MODE_FLAG_3D_SIDE_BY_SIDE_FULL (4<<14) -#define DRM_MODE_FLAG_3D_L_DEPTH (5<<14) -#define DRM_MODE_FLAG_3D_L_DEPTH_GFX_GFX_DEPTH (6<<14) -#define DRM_MODE_FLAG_3D_TOP_AND_BOTTOM (7<<14) -#define DRM_MODE_FLAG_3D_SIDE_BY_SIDE_HALF (8<<14) - -/* Picture aspect ratio options */ -#define DRM_MODE_PICTURE_ASPECT_NONE 0 -#define DRM_MODE_PICTURE_ASPECT_4_3 1 -#define DRM_MODE_PICTURE_ASPECT_16_9 2 -#define DRM_MODE_PICTURE_ASPECT_64_27 3 -#define DRM_MODE_PICTURE_ASPECT_256_135 4 - -/* Content type options */ -#define DRM_MODE_CONTENT_TYPE_NO_DATA 0 -#define DRM_MODE_CONTENT_TYPE_GRAPHICS 1 -#define DRM_MODE_CONTENT_TYPE_PHOTO 2 -#define DRM_MODE_CONTENT_TYPE_CINEMA 3 -#define DRM_MODE_CONTENT_TYPE_GAME 4 - -/* Aspect ratio flag bitmask (4 bits 22:19) */ -#define DRM_MODE_FLAG_PIC_AR_MASK (0x0F<<19) -#define DRM_MODE_FLAG_PIC_AR_NONE \ - (DRM_MODE_PICTURE_ASPECT_NONE<<19) -#define DRM_MODE_FLAG_PIC_AR_4_3 \ - (DRM_MODE_PICTURE_ASPECT_4_3<<19) -#define DRM_MODE_FLAG_PIC_AR_16_9 \ - (DRM_MODE_PICTURE_ASPECT_16_9<<19) -#define DRM_MODE_FLAG_PIC_AR_64_27 \ - (DRM_MODE_PICTURE_ASPECT_64_27<<19) -#define DRM_MODE_FLAG_PIC_AR_256_135 \ - (DRM_MODE_PICTURE_ASPECT_256_135<<19) - -#define DRM_MODE_FLAG_ALL (DRM_MODE_FLAG_PHSYNC | \ - DRM_MODE_FLAG_NHSYNC | \ - DRM_MODE_FLAG_PVSYNC | \ - DRM_MODE_FLAG_NVSYNC | \ - DRM_MODE_FLAG_INTERLACE | \ - DRM_MODE_FLAG_DBLSCAN | \ - DRM_MODE_FLAG_CSYNC | \ - DRM_MODE_FLAG_PCSYNC | \ - DRM_MODE_FLAG_NCSYNC | \ - DRM_MODE_FLAG_HSKEW | \ - DRM_MODE_FLAG_DBLCLK | \ - DRM_MODE_FLAG_CLKDIV2 | \ - DRM_MODE_FLAG_3D_MASK) - -/* DPMS flags */ -/* bit compatible with the xorg definitions. */ -#define DRM_MODE_DPMS_ON 0 -#define DRM_MODE_DPMS_STANDBY 1 -#define DRM_MODE_DPMS_SUSPEND 2 -#define DRM_MODE_DPMS_OFF 3 - -/* Scaling mode options */ -#define DRM_MODE_SCALE_NONE 0 /* Unmodified timing (display or - software can still scale) */ -#define DRM_MODE_SCALE_FULLSCREEN 1 /* Full screen, ignore aspect */ -#define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ -#define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ - -/* Dithering mode options */ -#define DRM_MODE_DITHERING_OFF 0 -#define DRM_MODE_DITHERING_ON 1 -#define DRM_MODE_DITHERING_AUTO 2 - -/* Dirty info options */ -#define DRM_MODE_DIRTY_OFF 0 -#define DRM_MODE_DIRTY_ON 1 -#define DRM_MODE_DIRTY_ANNOTATE 2 - -/* Link Status options */ -#define DRM_MODE_LINK_STATUS_GOOD 0 -#define DRM_MODE_LINK_STATUS_BAD 1 - -/* - * DRM_MODE_ROTATE_ - * - * Signals that a drm plane is been rotated degrees in counter - * clockwise direction. - * - * This define is provided as a convenience, looking up the property id - * using the name->prop id lookup is the preferred method. - */ -#define DRM_MODE_ROTATE_0 (1<<0) -#define DRM_MODE_ROTATE_90 (1<<1) -#define DRM_MODE_ROTATE_180 (1<<2) -#define DRM_MODE_ROTATE_270 (1<<3) - -/* - * DRM_MODE_ROTATE_MASK - * - * Bitmask used to look for drm plane rotations. - */ -#define DRM_MODE_ROTATE_MASK (\ - DRM_MODE_ROTATE_0 | \ - DRM_MODE_ROTATE_90 | \ - DRM_MODE_ROTATE_180 | \ - DRM_MODE_ROTATE_270) - -/* - * DRM_MODE_REFLECT_ - * - * Signals that the contents of a drm plane is reflected along the axis, - * in the same way as mirroring. - * See kerneldoc chapter "Plane Composition Properties" for more details. - * - * This define is provided as a convenience, looking up the property id - * using the name->prop id lookup is the preferred method. - */ -#define DRM_MODE_REFLECT_X (1<<4) -#define DRM_MODE_REFLECT_Y (1<<5) - -/* - * DRM_MODE_REFLECT_MASK - * - * Bitmask used to look for drm plane reflections. - */ -#define DRM_MODE_REFLECT_MASK (\ - DRM_MODE_REFLECT_X | \ - DRM_MODE_REFLECT_Y) - -/* Content Protection Flags */ -#define DRM_MODE_CONTENT_PROTECTION_UNDESIRED 0 -#define DRM_MODE_CONTENT_PROTECTION_DESIRED 1 -#define DRM_MODE_CONTENT_PROTECTION_ENABLED 2 - -/** - * struct drm_mode_modeinfo - Display mode information. - * @clock: pixel clock in kHz - * @hdisplay: horizontal display size - * @hsync_start: horizontal sync start - * @hsync_end: horizontal sync end - * @htotal: horizontal total size - * @hskew: horizontal skew - * @vdisplay: vertical display size - * @vsync_start: vertical sync start - * @vsync_end: vertical sync end - * @vtotal: vertical total size - * @vscan: vertical scan - * @vrefresh: approximate vertical refresh rate in Hz - * @flags: bitmask of misc. flags, see DRM_MODE_FLAG_* defines - * @type: bitmask of type flags, see DRM_MODE_TYPE_* defines - * @name: string describing the mode resolution - * - * This is the user-space API display mode information structure. For the - * kernel version see struct drm_display_mode. - */ -struct drm_mode_modeinfo { - __u32 clock; - __u16 hdisplay; - __u16 hsync_start; - __u16 hsync_end; - __u16 htotal; - __u16 hskew; - __u16 vdisplay; - __u16 vsync_start; - __u16 vsync_end; - __u16 vtotal; - __u16 vscan; - - __u32 vrefresh; - - __u32 flags; - __u32 type; - char name[DRM_DISPLAY_MODE_LEN]; -}; - -struct drm_mode_card_res { - __u64 fb_id_ptr; - __u64 crtc_id_ptr; - __u64 connector_id_ptr; - __u64 encoder_id_ptr; - __u32 count_fbs; - __u32 count_crtcs; - __u32 count_connectors; - __u32 count_encoders; - __u32 min_width; - __u32 max_width; - __u32 min_height; - __u32 max_height; -}; - -struct drm_mode_crtc { - __u64 set_connectors_ptr; - __u32 count_connectors; - - __u32 crtc_id; /**< Id */ - __u32 fb_id; /**< Id of framebuffer */ - - __u32 x; /**< x Position on the framebuffer */ - __u32 y; /**< y Position on the framebuffer */ - - __u32 gamma_size; - __u32 mode_valid; - struct drm_mode_modeinfo mode; -}; - -#define DRM_MODE_PRESENT_TOP_FIELD (1<<0) -#define DRM_MODE_PRESENT_BOTTOM_FIELD (1<<1) - -/* Planes blend with or override other bits on the CRTC */ -struct drm_mode_set_plane { - __u32 plane_id; - __u32 crtc_id; - __u32 fb_id; /* fb object contains surface format type */ - __u32 flags; /* see above flags */ - - /* Signed dest location allows it to be partially off screen */ - __s32 crtc_x; - __s32 crtc_y; - __u32 crtc_w; - __u32 crtc_h; - - /* Source values are 16.16 fixed point */ - __u32 src_x; - __u32 src_y; - __u32 src_h; - __u32 src_w; -}; - -/** - * struct drm_mode_get_plane - Get plane metadata. - * - * Userspace can perform a GETPLANE ioctl to retrieve information about a - * plane. - * - * To retrieve the number of formats supported, set @count_format_types to zero - * and call the ioctl. @count_format_types will be updated with the value. - * - * To retrieve these formats, allocate an array with the memory needed to store - * @count_format_types formats. Point @format_type_ptr to this array and call - * the ioctl again (with @count_format_types still set to the value returned in - * the first ioctl call). - */ -struct drm_mode_get_plane { - /** - * @plane_id: Object ID of the plane whose information should be - * retrieved. Set by caller. - */ - __u32 plane_id; - - /** @crtc_id: Object ID of the current CRTC. */ - __u32 crtc_id; - /** @fb_id: Object ID of the current fb. */ - __u32 fb_id; - - /** - * @possible_crtcs: Bitmask of CRTC's compatible with the plane. CRTC's - * are created and they receive an index, which corresponds to their - * position in the bitmask. Bit N corresponds to - * :ref:`CRTC index` N. - */ - __u32 possible_crtcs; - /** @gamma_size: Never used. */ - __u32 gamma_size; - - /** @count_format_types: Number of formats. */ - __u32 count_format_types; - /** - * @format_type_ptr: Pointer to ``__u32`` array of formats that are - * supported by the plane. These formats do not require modifiers. - */ - __u64 format_type_ptr; -}; - -struct drm_mode_get_plane_res { - __u64 plane_id_ptr; - __u32 count_planes; -}; - -#define DRM_MODE_ENCODER_NONE 0 -#define DRM_MODE_ENCODER_DAC 1 -#define DRM_MODE_ENCODER_TMDS 2 -#define DRM_MODE_ENCODER_LVDS 3 -#define DRM_MODE_ENCODER_TVDAC 4 -#define DRM_MODE_ENCODER_VIRTUAL 5 -#define DRM_MODE_ENCODER_DSI 6 -#define DRM_MODE_ENCODER_DPMST 7 -#define DRM_MODE_ENCODER_DPI 8 - -struct drm_mode_get_encoder { - __u32 encoder_id; - __u32 encoder_type; - - __u32 crtc_id; /**< Id of crtc */ - - __u32 possible_crtcs; - __u32 possible_clones; -}; - -/* This is for connectors with multiple signal types. */ -/* Try to match DRM_MODE_CONNECTOR_X as closely as possible. */ -enum drm_mode_subconnector { - DRM_MODE_SUBCONNECTOR_Automatic = 0, /* DVI-I, TV */ - DRM_MODE_SUBCONNECTOR_Unknown = 0, /* DVI-I, TV, DP */ - DRM_MODE_SUBCONNECTOR_VGA = 1, /* DP */ - DRM_MODE_SUBCONNECTOR_DVID = 3, /* DVI-I DP */ - DRM_MODE_SUBCONNECTOR_DVIA = 4, /* DVI-I */ - DRM_MODE_SUBCONNECTOR_Composite = 5, /* TV */ - DRM_MODE_SUBCONNECTOR_SVIDEO = 6, /* TV */ - DRM_MODE_SUBCONNECTOR_Component = 8, /* TV */ - DRM_MODE_SUBCONNECTOR_SCART = 9, /* TV */ - DRM_MODE_SUBCONNECTOR_DisplayPort = 10, /* DP */ - DRM_MODE_SUBCONNECTOR_HDMIA = 11, /* DP */ - DRM_MODE_SUBCONNECTOR_Native = 15, /* DP */ - DRM_MODE_SUBCONNECTOR_Wireless = 18, /* DP */ -}; - -#define DRM_MODE_CONNECTOR_Unknown 0 -#define DRM_MODE_CONNECTOR_VGA 1 -#define DRM_MODE_CONNECTOR_DVII 2 -#define DRM_MODE_CONNECTOR_DVID 3 -#define DRM_MODE_CONNECTOR_DVIA 4 -#define DRM_MODE_CONNECTOR_Composite 5 -#define DRM_MODE_CONNECTOR_SVIDEO 6 -#define DRM_MODE_CONNECTOR_LVDS 7 -#define DRM_MODE_CONNECTOR_Component 8 -#define DRM_MODE_CONNECTOR_9PinDIN 9 -#define DRM_MODE_CONNECTOR_DisplayPort 10 -#define DRM_MODE_CONNECTOR_HDMIA 11 -#define DRM_MODE_CONNECTOR_HDMIB 12 -#define DRM_MODE_CONNECTOR_TV 13 -#define DRM_MODE_CONNECTOR_eDP 14 -#define DRM_MODE_CONNECTOR_VIRTUAL 15 -#define DRM_MODE_CONNECTOR_DSI 16 -#define DRM_MODE_CONNECTOR_DPI 17 -#define DRM_MODE_CONNECTOR_WRITEBACK 18 -#define DRM_MODE_CONNECTOR_SPI 19 -#define DRM_MODE_CONNECTOR_USB 20 - -/** - * struct drm_mode_get_connector - Get connector metadata. - * - * User-space can perform a GETCONNECTOR ioctl to retrieve information about a - * connector. User-space is expected to retrieve encoders, modes and properties - * by performing this ioctl at least twice: the first time to retrieve the - * number of elements, the second time to retrieve the elements themselves. - * - * To retrieve the number of elements, set @count_props and @count_encoders to - * zero, set @count_modes to 1, and set @modes_ptr to a temporary struct - * drm_mode_modeinfo element. - * - * To retrieve the elements, allocate arrays for @encoders_ptr, @modes_ptr, - * @props_ptr and @prop_values_ptr, then set @count_modes, @count_props and - * @count_encoders to their capacity. - * - * Performing the ioctl only twice may be racy: the number of elements may have - * changed with a hotplug event in-between the two ioctls. User-space is - * expected to retry the last ioctl until the number of elements stabilizes. - * The kernel won't fill any array which doesn't have the expected length. - * - * **Force-probing a connector** - * - * If the @count_modes field is set to zero and the DRM client is the current - * DRM master, the kernel will perform a forced probe on the connector to - * refresh the connector status, modes and EDID. A forced-probe can be slow, - * might cause flickering and the ioctl will block. - * - * User-space needs to force-probe connectors to ensure their metadata is - * up-to-date at startup and after receiving a hot-plug event. User-space - * may perform a forced-probe when the user explicitly requests it. User-space - * shouldn't perform a forced-probe in other situations. - */ -struct drm_mode_get_connector { - /** @encoders_ptr: Pointer to ``__u32`` array of object IDs. */ - __u64 encoders_ptr; - /** @modes_ptr: Pointer to struct drm_mode_modeinfo array. */ - __u64 modes_ptr; - /** @props_ptr: Pointer to ``__u32`` array of property IDs. */ - __u64 props_ptr; - /** @prop_values_ptr: Pointer to ``__u64`` array of property values. */ - __u64 prop_values_ptr; - - /** @count_modes: Number of modes. */ - __u32 count_modes; - /** @count_props: Number of properties. */ - __u32 count_props; - /** @count_encoders: Number of encoders. */ - __u32 count_encoders; - - /** @encoder_id: Object ID of the current encoder. */ - __u32 encoder_id; - /** @connector_id: Object ID of the connector. */ - __u32 connector_id; - /** - * @connector_type: Type of the connector. - * - * See DRM_MODE_CONNECTOR_* defines. - */ - __u32 connector_type; - /** - * @connector_type_id: Type-specific connector number. - * - * This is not an object ID. This is a per-type connector number. Each - * (type, type_id) combination is unique across all connectors of a DRM - * device. - * - * The (type, type_id) combination is not a stable identifier: the - * type_id can change depending on the driver probe order. - */ - __u32 connector_type_id; - - /** - * @connection: Status of the connector. - * - * See enum drm_connector_status. - */ - __u32 connection; - /** @mm_width: Width of the connected sink in millimeters. */ - __u32 mm_width; - /** @mm_height: Height of the connected sink in millimeters. */ - __u32 mm_height; - /** - * @subpixel: Subpixel order of the connected sink. - * - * See enum subpixel_order. - */ - __u32 subpixel; - - /** @pad: Padding, must be zero. */ - __u32 pad; -}; - -#define DRM_MODE_PROP_PENDING (1<<0) /* deprecated, do not use */ -#define DRM_MODE_PROP_RANGE (1<<1) -#define DRM_MODE_PROP_IMMUTABLE (1<<2) -#define DRM_MODE_PROP_ENUM (1<<3) /* enumerated type with text strings */ -#define DRM_MODE_PROP_BLOB (1<<4) -#define DRM_MODE_PROP_BITMASK (1<<5) /* bitmask of enumerated types */ - -/* non-extended types: legacy bitmask, one bit per type: */ -#define DRM_MODE_PROP_LEGACY_TYPE ( \ - DRM_MODE_PROP_RANGE | \ - DRM_MODE_PROP_ENUM | \ - DRM_MODE_PROP_BLOB | \ - DRM_MODE_PROP_BITMASK) - -/* extended-types: rather than continue to consume a bit per type, - * grab a chunk of the bits to use as integer type id. - */ -#define DRM_MODE_PROP_EXTENDED_TYPE 0x0000ffc0 -#define DRM_MODE_PROP_TYPE(n) ((n) << 6) -#define DRM_MODE_PROP_OBJECT DRM_MODE_PROP_TYPE(1) -#define DRM_MODE_PROP_SIGNED_RANGE DRM_MODE_PROP_TYPE(2) - -/* the PROP_ATOMIC flag is used to hide properties from userspace that - * is not aware of atomic properties. This is mostly to work around - * older userspace (DDX drivers) that read/write each prop they find, - * without being aware that this could be triggering a lengthy modeset. - */ -#define DRM_MODE_PROP_ATOMIC 0x80000000 - -/** - * struct drm_mode_property_enum - Description for an enum/bitfield entry. - * @value: numeric value for this enum entry. - * @name: symbolic name for this enum entry. - * - * See struct drm_property_enum for details. - */ -struct drm_mode_property_enum { - __u64 value; - char name[DRM_PROP_NAME_LEN]; -}; - -/** - * struct drm_mode_get_property - Get property metadata. - * - * User-space can perform a GETPROPERTY ioctl to retrieve information about a - * property. The same property may be attached to multiple objects, see - * "Modeset Base Object Abstraction". - * - * The meaning of the @values_ptr field changes depending on the property type. - * See &drm_property.flags for more details. - * - * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the - * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For - * backwards compatibility, the kernel will always set @count_enum_blobs to - * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must - * ignore these two fields if the property has a different type. - * - * User-space is expected to retrieve values and enums by performing this ioctl - * at least twice: the first time to retrieve the number of elements, the - * second time to retrieve the elements themselves. - * - * To retrieve the number of elements, set @count_values and @count_enum_blobs - * to zero, then call the ioctl. @count_values will be updated with the number - * of elements. If the property has the type &DRM_MODE_PROP_ENUM or - * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well. - * - * To retrieve the elements themselves, allocate an array for @values_ptr and - * set @count_values to its capacity. If the property has the type - * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for - * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl - * again will fill the arrays. - */ -struct drm_mode_get_property { - /** @values_ptr: Pointer to a ``__u64`` array. */ - __u64 values_ptr; - /** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */ - __u64 enum_blob_ptr; - - /** - * @prop_id: Object ID of the property which should be retrieved. Set - * by the caller. - */ - __u32 prop_id; - /** - * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for - * a definition of the flags. - */ - __u32 flags; - /** - * @name: Symbolic property name. User-space should use this field to - * recognize properties. - */ - char name[DRM_PROP_NAME_LEN]; - - /** @count_values: Number of elements in @values_ptr. */ - __u32 count_values; - /** @count_enum_blobs: Number of elements in @enum_blob_ptr. */ - __u32 count_enum_blobs; -}; - -struct drm_mode_connector_set_property { - __u64 value; - __u32 prop_id; - __u32 connector_id; -}; - -#define DRM_MODE_OBJECT_CRTC 0xcccccccc -#define DRM_MODE_OBJECT_CONNECTOR 0xc0c0c0c0 -#define DRM_MODE_OBJECT_ENCODER 0xe0e0e0e0 -#define DRM_MODE_OBJECT_MODE 0xdededede -#define DRM_MODE_OBJECT_PROPERTY 0xb0b0b0b0 -#define DRM_MODE_OBJECT_FB 0xfbfbfbfb -#define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb -#define DRM_MODE_OBJECT_PLANE 0xeeeeeeee -#define DRM_MODE_OBJECT_ANY 0 - -struct drm_mode_obj_get_properties { - __u64 props_ptr; - __u64 prop_values_ptr; - __u32 count_props; - __u32 obj_id; - __u32 obj_type; -}; - -struct drm_mode_obj_set_property { - __u64 value; - __u32 prop_id; - __u32 obj_id; - __u32 obj_type; -}; - -struct drm_mode_get_blob { - __u32 blob_id; - __u32 length; - __u64 data; -}; - -struct drm_mode_fb_cmd { - __u32 fb_id; - __u32 width; - __u32 height; - __u32 pitch; - __u32 bpp; - __u32 depth; - /* driver specific handle */ - __u32 handle; -}; - -#define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ -#define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifier[] */ - -/** - * struct drm_mode_fb_cmd2 - Frame-buffer metadata. - * - * This struct holds frame-buffer metadata. There are two ways to use it: - * - * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 - * ioctl to register a new frame-buffer. The new frame-buffer object ID will - * be set by the kernel in @fb_id. - * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to - * fetch metadata about an existing frame-buffer. - * - * In case of planar formats, this struct allows up to 4 buffer objects with - * offsets and pitches per plane. The pitch and offset order are dictated by - * the format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: - * - * YUV 4:2:0 image with a plane of 8-bit Y samples followed by an - * interleaved U/V plane containing 8-bit 2x2 subsampled colour difference - * samples. - * - * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at - * ``offsets[1]``. - * - * To accommodate tiled, compressed, etc formats, a modifier can be specified. - * For more information see the "Format Modifiers" section. Note that even - * though it looks like we have a modifier per-plane, we in fact do not. The - * modifier for each plane must be identical. Thus all combinations of - * different data layouts for multi-plane formats must be enumerated as - * separate modifiers. - * - * All of the entries in @handles, @pitches, @offsets and @modifier must be - * zero when unused. Warning, for @offsets and @modifier zero can't be used to - * figure out whether the entry is used or not since it's a valid value (a zero - * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). - */ -struct drm_mode_fb_cmd2 { - /** @fb_id: Object ID of the frame-buffer. */ - __u32 fb_id; - /** @width: Width of the frame-buffer. */ - __u32 width; - /** @height: Height of the frame-buffer. */ - __u32 height; - /** - * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in - * ``drm_fourcc.h``. - */ - __u32 pixel_format; - /** - * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and - * &DRM_MODE_FB_MODIFIERS). - */ - __u32 flags; - - /** - * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is - * unused. The same handle can be used for multiple planes. - */ - __u32 handles[4]; - /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ - __u32 pitches[4]; - /** @offsets: Offset into the buffer in bytes, one per plane. */ - __u32 offsets[4]; - /** - * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` - * constants in ``drm_fourcc.h``. All planes must use the same - * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. - */ - __u64 modifier[4]; -}; - -#define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 -#define DRM_MODE_FB_DIRTY_ANNOTATE_FILL 0x02 -#define DRM_MODE_FB_DIRTY_FLAGS 0x03 - -#define DRM_MODE_FB_DIRTY_MAX_CLIPS 256 - -/* - * Mark a region of a framebuffer as dirty. - * - * Some hardware does not automatically update display contents - * as a hardware or software draw to a framebuffer. This ioctl - * allows userspace to tell the kernel and the hardware what - * regions of the framebuffer have changed. - * - * The kernel or hardware is free to update more then just the - * region specified by the clip rects. The kernel or hardware - * may also delay and/or coalesce several calls to dirty into a - * single update. - * - * Userspace may annotate the updates, the annotates are a - * promise made by the caller that the change is either a copy - * of pixels or a fill of a single color in the region specified. - * - * If the DRM_MODE_FB_DIRTY_ANNOTATE_COPY flag is given then - * the number of updated regions are half of num_clips given, - * where the clip rects are paired in src and dst. The width and - * height of each one of the pairs must match. - * - * If the DRM_MODE_FB_DIRTY_ANNOTATE_FILL flag is given the caller - * promises that the region specified of the clip rects is filled - * completely with a single color as given in the color argument. - */ - -struct drm_mode_fb_dirty_cmd { - __u32 fb_id; - __u32 flags; - __u32 color; - __u32 num_clips; - __u64 clips_ptr; -}; - -struct drm_mode_mode_cmd { - __u32 connector_id; - struct drm_mode_modeinfo mode; -}; - -#define DRM_MODE_CURSOR_BO 0x01 -#define DRM_MODE_CURSOR_MOVE 0x02 -#define DRM_MODE_CURSOR_FLAGS 0x03 - -/* - * depending on the value in flags different members are used. - * - * CURSOR_BO uses - * crtc_id - * width - * height - * handle - if 0 turns the cursor off - * - * CURSOR_MOVE uses - * crtc_id - * x - * y - */ -struct drm_mode_cursor { - __u32 flags; - __u32 crtc_id; - __s32 x; - __s32 y; - __u32 width; - __u32 height; - /* driver specific handle */ - __u32 handle; -}; - -struct drm_mode_cursor2 { - __u32 flags; - __u32 crtc_id; - __s32 x; - __s32 y; - __u32 width; - __u32 height; - /* driver specific handle */ - __u32 handle; - __s32 hot_x; - __s32 hot_y; -}; - -struct drm_mode_crtc_lut { - __u32 crtc_id; - __u32 gamma_size; - - /* pointers to arrays */ - __u64 red; - __u64 green; - __u64 blue; -}; - -struct drm_color_ctm { - /* - * Conversion matrix in S31.32 sign-magnitude - * (not two's complement!) format. - * - * out matrix in - * |R| |0 1 2| |R| - * |G| = |3 4 5| x |G| - * |B| |6 7 8| |B| - */ - __u64 matrix[9]; -}; - -struct drm_color_lut { - /* - * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and - * 0xffff == 1.0. - */ - __u16 red; - __u16 green; - __u16 blue; - __u16 reserved; -}; - -/** - * struct drm_plane_size_hint - Plane size hints - * @width: The width of the plane in pixel - * @height: The height of the plane in pixel - * - * The plane SIZE_HINTS property blob contains an - * array of struct drm_plane_size_hint. - */ -struct drm_plane_size_hint { - __u16 width; - __u16 height; -}; - -/** - * struct hdr_metadata_infoframe - HDR Metadata Infoframe Data. - * - * HDR Metadata Infoframe as per CTA 861.G spec. This is expected - * to match exactly with the spec. - * - * Userspace is expected to pass the metadata information as per - * the format described in this structure. - */ -struct hdr_metadata_infoframe { - /** - * @eotf: Electro-Optical Transfer Function (EOTF) - * used in the stream. - */ - __u8 eotf; - /** - * @metadata_type: Static_Metadata_Descriptor_ID. - */ - __u8 metadata_type; - /** - * @display_primaries: Color Primaries of the Data. - * These are coded as unsigned 16-bit values in units of - * 0.00002, where 0x0000 represents zero and 0xC350 - * represents 1.0000. - * @display_primaries.x: X coordinate of color primary. - * @display_primaries.y: Y coordinate of color primary. - */ - struct { - __u16 x, y; - } display_primaries[3]; - /** - * @white_point: White Point of Colorspace Data. - * These are coded as unsigned 16-bit values in units of - * 0.00002, where 0x0000 represents zero and 0xC350 - * represents 1.0000. - * @white_point.x: X coordinate of whitepoint of color primary. - * @white_point.y: Y coordinate of whitepoint of color primary. - */ - struct { - __u16 x, y; - } white_point; - /** - * @max_display_mastering_luminance: Max Mastering Display Luminance. - * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, - * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. - */ - __u16 max_display_mastering_luminance; - /** - * @min_display_mastering_luminance: Min Mastering Display Luminance. - * This value is coded as an unsigned 16-bit value in units of - * 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF - * represents 6.5535 cd/m2. - */ - __u16 min_display_mastering_luminance; - /** - * @max_cll: Max Content Light Level. - * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, - * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. - */ - __u16 max_cll; - /** - * @max_fall: Max Frame Average Light Level. - * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, - * where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2. - */ - __u16 max_fall; -}; - -/** - * struct hdr_output_metadata - HDR output metadata - * - * Metadata Information to be passed from userspace - */ -struct hdr_output_metadata { - /** - * @metadata_type: Static_Metadata_Descriptor_ID. - */ - __u32 metadata_type; - /** - * @hdmi_metadata_type1: HDR Metadata Infoframe. - */ - union { - struct hdr_metadata_infoframe hdmi_metadata_type1; - }; -}; - -/** - * DRM_MODE_PAGE_FLIP_EVENT - * - * Request that the kernel sends back a vblank event (see - * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the - * page-flip is done. - */ -#define DRM_MODE_PAGE_FLIP_EVENT 0x01 -/** - * DRM_MODE_PAGE_FLIP_ASYNC - * - * Request that the page-flip is performed as soon as possible, ie. with no - * delay due to waiting for vblank. This may cause tearing to be visible on - * the screen. - * - * When used with atomic uAPI, the driver will return an error if the hardware - * doesn't support performing an asynchronous page-flip for this update. - * User-space should handle this, e.g. by falling back to a regular page-flip. - * - * Note, some hardware might need to perform one last synchronous page-flip - * before being able to switch to asynchronous page-flips. As an exception, - * the driver will return success even though that first page-flip is not - * asynchronous. - */ -#define DRM_MODE_PAGE_FLIP_ASYNC 0x02 -#define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4 -#define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8 -#define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \ - DRM_MODE_PAGE_FLIP_TARGET_RELATIVE) -/** - * DRM_MODE_PAGE_FLIP_FLAGS - * - * Bitmask of flags suitable for &drm_mode_crtc_page_flip_target.flags. - */ -#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \ - DRM_MODE_PAGE_FLIP_ASYNC | \ - DRM_MODE_PAGE_FLIP_TARGET) - -/* - * Request a page flip on the specified crtc. - * - * This ioctl will ask KMS to schedule a page flip for the specified - * crtc. Once any pending rendering targeting the specified fb (as of - * ioctl time) has completed, the crtc will be reprogrammed to display - * that fb after the next vertical refresh. The ioctl returns - * immediately, but subsequent rendering to the current fb will block - * in the execbuffer ioctl until the page flip happens. If a page - * flip is already pending as the ioctl is called, EBUSY will be - * returned. - * - * Flag DRM_MODE_PAGE_FLIP_EVENT requests that drm sends back a vblank - * event (see drm.h: struct drm_event_vblank) when the page flip is - * done. The user_data field passed in with this ioctl will be - * returned as the user_data field in the vblank event struct. - * - * Flag DRM_MODE_PAGE_FLIP_ASYNC requests that the flip happen - * 'as soon as possible', meaning that it not delay waiting for vblank. - * This may cause tearing on the screen. - * - * The reserved field must be zero. - */ - -struct drm_mode_crtc_page_flip { - __u32 crtc_id; - __u32 fb_id; - __u32 flags; - __u32 reserved; - __u64 user_data; -}; - -/* - * Request a page flip on the specified crtc. - * - * Same as struct drm_mode_crtc_page_flip, but supports new flags and - * re-purposes the reserved field: - * - * The sequence field must be zero unless either of the - * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is specified. When - * the ABSOLUTE flag is specified, the sequence field denotes the absolute - * vblank sequence when the flip should take effect. When the RELATIVE - * flag is specified, the sequence field denotes the relative (to the - * current one when the ioctl is called) vblank sequence when the flip - * should take effect. NOTE: DRM_IOCTL_WAIT_VBLANK must still be used to - * make sure the vblank sequence before the target one has passed before - * calling this ioctl. The purpose of the - * DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE/RELATIVE flags is merely to clarify - * the target for when code dealing with a page flip runs during a - * vertical blank period. - */ - -struct drm_mode_crtc_page_flip_target { - __u32 crtc_id; - __u32 fb_id; - __u32 flags; - __u32 sequence; - __u64 user_data; -}; - -/** - * struct drm_mode_create_dumb - Create a KMS dumb buffer for scanout. - * @height: buffer height in pixels - * @width: buffer width in pixels - * @bpp: bits per pixel - * @flags: must be zero - * @handle: buffer object handle - * @pitch: number of bytes between two consecutive lines - * @size: size of the whole buffer in bytes - * - * User-space fills @height, @width, @bpp and @flags. If the IOCTL succeeds, - * the kernel fills @handle, @pitch and @size. - */ -struct drm_mode_create_dumb { - __u32 height; - __u32 width; - __u32 bpp; - __u32 flags; - - __u32 handle; - __u32 pitch; - __u64 size; -}; - -/* set up for mmap of a dumb scanout buffer */ -struct drm_mode_map_dumb { - /** Handle for the object being mapped. */ - __u32 handle; - __u32 pad; - /** - * Fake offset to use for subsequent mmap call - * - * This is a fixed-size type for 32/64 compatibility. - */ - __u64 offset; -}; - -struct drm_mode_destroy_dumb { - __u32 handle; -}; - -/** - * DRM_MODE_ATOMIC_TEST_ONLY - * - * Do not apply the atomic commit, instead check whether the hardware supports - * this configuration. - * - * See &drm_mode_config_funcs.atomic_check for more details on test-only - * commits. - */ -#define DRM_MODE_ATOMIC_TEST_ONLY 0x0100 -/** - * DRM_MODE_ATOMIC_NONBLOCK - * - * Do not block while applying the atomic commit. The &DRM_IOCTL_MODE_ATOMIC - * IOCTL returns immediately instead of waiting for the changes to be applied - * in hardware. Note, the driver will still check that the update can be - * applied before retuning. - */ -#define DRM_MODE_ATOMIC_NONBLOCK 0x0200 -/** - * DRM_MODE_ATOMIC_ALLOW_MODESET - * - * Allow the update to result in temporary or transient visible artifacts while - * the update is being applied. Applying the update may also take significantly - * more time than a page flip. All visual artifacts will disappear by the time - * the update is completed, as signalled through the vblank event's timestamp - * (see struct drm_event_vblank). - * - * This flag must be set when the KMS update might cause visible artifacts. - * Without this flag such KMS update will return a EINVAL error. What kind of - * update may cause visible artifacts depends on the driver and the hardware. - * User-space that needs to know beforehand if an update might cause visible - * artifacts can use &DRM_MODE_ATOMIC_TEST_ONLY without - * &DRM_MODE_ATOMIC_ALLOW_MODESET to see if it fails. - * - * To the best of the driver's knowledge, visual artifacts are guaranteed to - * not appear when this flag is not set. Some sinks might display visual - * artifacts outside of the driver's control. - */ -#define DRM_MODE_ATOMIC_ALLOW_MODESET 0x0400 - -/** - * DRM_MODE_ATOMIC_FLAGS - * - * Bitfield of flags accepted by the &DRM_IOCTL_MODE_ATOMIC IOCTL in - * &drm_mode_atomic.flags. - */ -#define DRM_MODE_ATOMIC_FLAGS (\ - DRM_MODE_PAGE_FLIP_EVENT |\ - DRM_MODE_PAGE_FLIP_ASYNC |\ - DRM_MODE_ATOMIC_TEST_ONLY |\ - DRM_MODE_ATOMIC_NONBLOCK |\ - DRM_MODE_ATOMIC_ALLOW_MODESET) - -struct drm_mode_atomic { - __u32 flags; - __u32 count_objs; - __u64 objs_ptr; - __u64 count_props_ptr; - __u64 props_ptr; - __u64 prop_values_ptr; - __u64 reserved; - __u64 user_data; -}; - -struct drm_format_modifier_blob { -#define FORMAT_BLOB_CURRENT 1 - /* Version of this blob format */ - __u32 version; - - /* Flags */ - __u32 flags; - - /* Number of fourcc formats supported */ - __u32 count_formats; - - /* Where in this blob the formats exist (in bytes) */ - __u32 formats_offset; - - /* Number of drm_format_modifiers */ - __u32 count_modifiers; - - /* Where in this blob the modifiers exist (in bytes) */ - __u32 modifiers_offset; - - /* __u32 formats[] */ - /* struct drm_format_modifier modifiers[] */ -}; - -struct drm_format_modifier { - /* Bitmask of formats in get_plane format list this info applies to. The - * offset allows a sliding window of which 64 formats (bits). - * - * Some examples: - * In today's world with < 65 formats, and formats 0, and 2 are - * supported - * 0x0000000000000005 - * ^-offset = 0, formats = 5 - * - * If the number formats grew to 128, and formats 98-102 are - * supported with the modifier: - * - * 0x0000007c00000000 0000000000000000 - * ^ - * |__offset = 64, formats = 0x7c00000000 - * - */ - __u64 formats; - __u32 offset; - __u32 pad; - - /* The modifier that applies to the >get_plane format list bitmask. */ - __u64 modifier; -}; - -/** - * struct drm_mode_create_blob - Create New blob property - * - * Create a new 'blob' data property, copying length bytes from data pointer, - * and returning new blob ID. - */ -struct drm_mode_create_blob { - /** @data: Pointer to data to copy. */ - __u64 data; - /** @length: Length of data to copy. */ - __u32 length; - /** @blob_id: Return: new property ID. */ - __u32 blob_id; -}; - -/** - * struct drm_mode_destroy_blob - Destroy user blob - * @blob_id: blob_id to destroy - * - * Destroy a user-created blob property. - * - * User-space can release blobs as soon as they do not need to refer to them by - * their blob object ID. For instance, if you are using a MODE_ID blob in an - * atomic commit and you will not make another commit re-using the same ID, you - * can destroy the blob as soon as the commit has been issued, without waiting - * for it to complete. - */ -struct drm_mode_destroy_blob { - __u32 blob_id; -}; - -/** - * struct drm_mode_create_lease - Create lease - * - * Lease mode resources, creating another drm_master. - * - * The @object_ids array must reference at least one CRTC, one connector and - * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. Alternatively, - * the lease can be completely empty. - */ -struct drm_mode_create_lease { - /** @object_ids: Pointer to array of object ids (__u32) */ - __u64 object_ids; - /** @object_count: Number of object ids */ - __u32 object_count; - /** @flags: flags for new FD (O_CLOEXEC, etc) */ - __u32 flags; - - /** @lessee_id: Return: unique identifier for lessee. */ - __u32 lessee_id; - /** @fd: Return: file descriptor to new drm_master file */ - __u32 fd; -}; - -/** - * struct drm_mode_list_lessees - List lessees - * - * List lesses from a drm_master. - */ -struct drm_mode_list_lessees { - /** - * @count_lessees: Number of lessees. - * - * On input, provides length of the array. - * On output, provides total number. No - * more than the input number will be written - * back, so two calls can be used to get - * the size and then the data. - */ - __u32 count_lessees; - /** @pad: Padding. */ - __u32 pad; - - /** - * @lessees_ptr: Pointer to lessees. - * - * Pointer to __u64 array of lessee ids - */ - __u64 lessees_ptr; -}; - -/** - * struct drm_mode_get_lease - Get Lease - * - * Get leased objects. - */ -struct drm_mode_get_lease { - /** - * @count_objects: Number of leased objects. - * - * On input, provides length of the array. - * On output, provides total number. No - * more than the input number will be written - * back, so two calls can be used to get - * the size and then the data. - */ - __u32 count_objects; - /** @pad: Padding. */ - __u32 pad; - - /** - * @objects_ptr: Pointer to objects. - * - * Pointer to __u32 array of object ids. - */ - __u64 objects_ptr; -}; - -/** - * struct drm_mode_revoke_lease - Revoke lease - */ -struct drm_mode_revoke_lease { - /** @lessee_id: Unique ID of lessee */ - __u32 lessee_id; -}; - -/** - * struct drm_mode_rect - Two dimensional rectangle. - * @x1: Horizontal starting coordinate (inclusive). - * @y1: Vertical starting coordinate (inclusive). - * @x2: Horizontal ending coordinate (exclusive). - * @y2: Vertical ending coordinate (exclusive). - * - * With drm subsystem using struct drm_rect to manage rectangular area this - * export it to user-space. - * - * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS. - */ -struct drm_mode_rect { - __s32 x1; - __s32 y1; - __s32 x2; - __s32 y2; -}; - -/** - * struct drm_mode_closefb - * @fb_id: Framebuffer ID. - * @pad: Must be zero. - */ -struct drm_mode_closefb { - __u32 fb_id; - __u32 pad; -}; - -#if defined(__cplusplus) -} -#endif - -#endif diff --git a/plugins/amdgpu/kfd_ioctl.h b/plugins/amdgpu/kfd_ioctl.h index a63d453f0..1a3bcea95 100644 --- a/plugins/amdgpu/kfd_ioctl.h +++ b/plugins/amdgpu/kfd_ioctl.h @@ -23,12 +23,9 @@ #ifndef KFD_IOCTL_H_INCLUDED #define KFD_IOCTL_H_INCLUDED +#include #include -/* Define __user as empty for kernel headers in user-space */ -#define __user -#include "drm.h" - /* * - 1.1 - initial version * - 1.3 - Add SMI events support diff --git a/plugins/cuda/Makefile b/plugins/cuda/Makefile index 2c1944a34..cc3d98ac9 100644 --- a/plugins/cuda/Makefile +++ b/plugins/cuda/Makefile @@ -19,7 +19,7 @@ all: $(DEPS_CUDA) cuda_plugin.so: cuda_plugin.c $(call msg-gen, $@) - $(Q) $(CC) $(PLUGIN_CFLAGS) $(DEFINES) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) + $(Q) $(CC) $(PLUGIN_CFLAGS) $(shell $(COMPEL) includes) $^ -o $@ $(PLUGIN_INCLUDE) $(PLUGIN_LDFLAGS) clean: $(call msg-clean, $@) diff --git a/plugins/cuda/cuda_plugin.c b/plugins/cuda/cuda_plugin.c index 9ccb04224..23c3f4b1a 100644 --- a/plugins/cuda/cuda_plugin.c +++ b/plugins/cuda/cuda_plugin.c @@ -26,13 +26,6 @@ #define ACTION_RESTORE "restore" #define ACTION_UNLOCK "unlock" -typedef enum { - CUDA_TASK_RUNNING = 0, - CUDA_TASK_LOCKED, - CUDA_TASK_CHECKPOINTED, - CUDA_TASK_UNKNOWN = -1 -} cuda_task_state_t; - #define CUDA_CKPT_BUF_SIZE (128) #ifdef LOG_PREFIX @@ -45,12 +38,9 @@ typedef enum { */ bool plugin_disabled = false; -bool plugin_added_to_inventory = false; - struct pid_info { int pid; char checkpointed; - cuda_task_state_t initial_task_state; struct list_head list; }; @@ -70,7 +60,7 @@ static void dealloc_pid_buffer(struct list_head *pid_buf) } } -static int add_pid_to_buf(struct list_head *pid_buf, int pid, cuda_task_state_t state) +static int add_pid_to_buf(struct list_head *pid_buf, int pid) { struct pid_info *new = xmalloc(sizeof(*new)); @@ -80,12 +70,25 @@ static int add_pid_to_buf(struct list_head *pid_buf, int pid, cuda_task_state_t new->pid = pid; new->checkpointed = 0; - new->initial_task_state = state; list_add_tail(&new->list, pid_buf); return 0; } +static int update_checkpointed_pid(struct list_head *pid_buf, int pid) +{ + struct pid_info *info; + + list_for_each_entry(info, pid_buf, list) { + if (info->pid == pid) { + info->checkpointed = 1; + return 0; + } + } + + return -1; +} + static int launch_cuda_checkpoint(const char **args, char *buf, int buf_size) { #define READ 0 @@ -93,7 +96,7 @@ static int launch_cuda_checkpoint(const char **args, char *buf, int buf_size) int fd[2], buf_off; if (pipe(fd) != 0) { - pr_perror("Couldn't create pipes for reading cuda-checkpoint output"); + pr_err("Couldn't create pipes for reading cuda-checkpoint output\n"); return -1; } @@ -101,7 +104,7 @@ static int launch_cuda_checkpoint(const char **args, char *buf, int buf_size) int child_pid = fork(); if (child_pid == -1) { - pr_perror("Failed to fork to exec cuda-checkpoint"); + pr_err("Failed to fork to exec cuda-checkpoint\n"); close(fd[READ]); close(fd[WRITE]); return -1; @@ -166,6 +169,7 @@ static int launch_cuda_checkpoint(const char **args, char *buf, int buf_size) } if (WIFSIGNALED(status)) { int sig = WTERMSIG(status); + pr_err("cuda-checkpoint unexpectedly signaled with %d: %s\n", sig, strsignal(sig)); } else if (WIFEXITED(status)) { exit_code = WEXITSTATUS(status); @@ -225,37 +229,6 @@ static int get_cuda_restore_tid(int root_pid) return atoi(pid_out); } -static cuda_task_state_t get_task_state_enum(const char *state_str) -{ - if (strncmp(state_str, "running", 7) == 0) - return CUDA_TASK_RUNNING; - - if (strncmp(state_str, "locked", 6) == 0) - return CUDA_TASK_LOCKED; - - if (strncmp(state_str, "checkpointed", 12) == 0) - return CUDA_TASK_CHECKPOINTED; - - pr_err("Unknown CUDA state: %s\n", state_str); - return CUDA_TASK_UNKNOWN; -} - -static cuda_task_state_t get_cuda_state(pid_t pid) -{ - char pid_buf[16]; - char state_str[CUDA_CKPT_BUF_SIZE]; - const char *args[] = { CUDA_CHECKPOINT, "--get-state", "--pid", pid_buf, NULL }; - - snprintf(pid_buf, sizeof(pid_buf), "%d", pid); - - if (launch_cuda_checkpoint(args, state_str, sizeof(state_str))) { - pr_err("Failed to launch cuda-checkpoint to retrieve state: %s\n", state_str); - return CUDA_TASK_UNKNOWN; - } - - return get_task_state_enum(state_str); -} - static int cuda_process_checkpoint_action(int pid, const char *action, unsigned int timeout, char *msg_buf, int buf_size) { @@ -282,8 +255,8 @@ static int interrupt_restore_thread(int restore_tid, k_rtsigset_t *restore_sigse * a compel_interrupt_task() */ if (ptrace(PTRACE_INTERRUPT, restore_tid, NULL, 0)) { - pr_perror("Could not interrupt cuda restore tid %d after checkpoint, process may be in strange state", - restore_tid); + pr_err("Could not interrupt cuda restore tid %d after checkpoint, process may be in strange state\n", + restore_tid); return -1; } @@ -294,12 +267,12 @@ static int interrupt_restore_thread(int restore_tid, k_rtsigset_t *restore_sigse } if (ptrace(PTRACE_SETOPTIONS, restore_tid, NULL, PTRACE_O_SUSPEND_SECCOMP | PTRACE_O_TRACESYSGOOD)) { - pr_perror("Failed to set ptrace options on interrupt for restore tid %d", restore_tid); + pr_err("Failed to set ptrace options on interrupt for restore tid %d\n", restore_tid); return -1; } if (ptrace(PTRACE_SETSIGMASK, restore_tid, sizeof(*restore_sigset), restore_sigset)) { - pr_perror("Unable to restore original sigmask to restore tid %d", restore_tid); + pr_err("Unable to restore original sigmask to restore tid %d\n", restore_tid); return -1; } @@ -311,7 +284,7 @@ static int resume_restore_thread(int restore_tid, k_rtsigset_t *save_sigset) k_rtsigset_t block; if (ptrace(PTRACE_GETSIGMASK, restore_tid, sizeof(*save_sigset), save_sigset)) { - pr_perror("Failed to get current sigmask for restore tid %d", restore_tid); + pr_err("Failed to get current sigmask for restore tid %d\n", restore_tid); return -1; } @@ -319,18 +292,18 @@ static int resume_restore_thread(int restore_tid, k_rtsigset_t *save_sigset) ksigdelset(&block, SIGTRAP); if (ptrace(PTRACE_SETSIGMASK, restore_tid, sizeof(block), &block)) { - pr_perror("Failed to block signals on restore tid %d", restore_tid); + pr_err("Failed to block signals on restore tid %d\n", restore_tid); return -1; } // Clear out PTRACE_O_SUSPEND_SECCOMP when we resume the restore thread if (ptrace(PTRACE_SETOPTIONS, restore_tid, NULL, 0)) { - pr_perror("Could not clear ptrace options on restore tid %d", restore_tid); + pr_err("Could not clear ptrace options on restore tid %d\n", restore_tid); return -1; } if (ptrace(PTRACE_CONT, restore_tid, NULL, 0)) { - pr_perror("Could not resume cuda restore tid %d", restore_tid); + pr_err("Could not resume cuda restore tid %d\n", restore_tid); return -1; } @@ -344,11 +317,9 @@ int cuda_plugin_checkpoint_devices(int pid) int int_ret; int status; k_rtsigset_t save_sigset; - struct pid_info *task_info; - bool pid_found = false; if (plugin_disabled) { - return -ENOTSUP; + return 0; } restore_tid = get_cuda_restore_tid(pid); @@ -363,26 +334,6 @@ int cuda_plugin_checkpoint_devices(int pid) return 0; } - /* Check if the process is already in a checkpointed state */ - list_for_each_entry(task_info, &cuda_pids, list) { - if (task_info->pid == pid) { - if (task_info->initial_task_state == CUDA_TASK_CHECKPOINTED) { - pr_info("pid %d already in a checkpointed state\n", pid); - return 0; - } - pid_found = true; - break; - } - } - - if (pid_found == false) { - /* We return an error here. The task should be restored - * to its original state at cuda_plugin_fini(). - */ - pr_err("Failed to track pid %d\n", pid); - return -1; - } - pr_info("Checkpointing CUDA devices on pid %d restore_tid %d\n", pid, restore_tid); /* We need to resume the checkpoint thread to prepare the mappings for * checkpointing @@ -390,15 +341,23 @@ int cuda_plugin_checkpoint_devices(int pid) if (resume_restore_thread(restore_tid, &save_sigset)) { return -1; } - - task_info->checkpointed = 1; status = cuda_process_checkpoint_action(pid, ACTION_CHECKPOINT, 0, msg_buf, sizeof(msg_buf)); if (status) { pr_err("CHECKPOINT_DEVICES failed with %s\n", msg_buf); + goto interrupt; } - + status = update_checkpointed_pid(&cuda_pids, pid); + if (status) { + pr_err("Failed to track checkpointed pid %d\n", pid); + status = cuda_process_checkpoint_action(pid, ACTION_RESTORE, 0, msg_buf, sizeof(msg_buf)); + if (status) { + pr_err("Failed to restore process after error %s on pid %d\n", msg_buf, pid); + } + } +interrupt: int_ret = interrupt_restore_thread(restore_tid, &save_sigset); - return status != 0 ? -1 : int_ret; + + return status != 0 ? status : int_ret; } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__CHECKPOINT_DEVICES, cuda_plugin_checkpoint_devices); @@ -406,10 +365,9 @@ int cuda_plugin_pause_devices(int pid) { int restore_tid; char msg_buf[CUDA_CKPT_BUF_SIZE]; - cuda_task_state_t task_state; if (plugin_disabled) { - return -ENOTSUP; + return 0; } restore_tid = get_cuda_restore_tid(pid); @@ -419,34 +377,6 @@ int cuda_plugin_pause_devices(int pid) return 0; } - task_state = get_cuda_state(restore_tid); - if (task_state == CUDA_TASK_UNKNOWN) { - pr_err("Failed to get CUDA state for PID %d\n", restore_tid); - return -1; - } - - if (!plugin_added_to_inventory) { - if (add_inventory_plugin(CR_PLUGIN_DESC.name)) { - pr_err("Failed to add CUDA plugin to inventory image\n"); - return -1; - } - plugin_added_to_inventory = true; - } - - if (task_state == CUDA_TASK_LOCKED) { - pr_info("pid %d already in a locked state\n", pid); - /* Leave this PID in a "locked" state at resume_device() */ - add_pid_to_buf(&cuda_pids, pid, CUDA_TASK_LOCKED); - return 0; - } - - if (task_state == CUDA_TASK_CHECKPOINTED) { - /* We need to skip this PID in cuda_plugin_checkpoint_devices(), - * and leave it in a "checkpoined" state at resume_device(). */ - add_pid_to_buf(&cuda_pids, pid, CUDA_TASK_CHECKPOINTED); - return 0; - } - pr_info("pausing devices on pid %d\n", pid); int status = cuda_process_checkpoint_action(pid, ACTION_LOCK, opts.timeout * 1000, msg_buf, sizeof(msg_buf)); if (status) { @@ -456,7 +386,7 @@ int cuda_plugin_pause_devices(int pid) return -1; } - if (add_pid_to_buf(&cuda_pids, pid, CUDA_TASK_RUNNING)) { + if (add_pid_to_buf(&cuda_pids, pid)) { pr_err("unable to track paused pid %d\n", pid); goto unlock; } @@ -471,7 +401,7 @@ unlock: } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__PAUSE_DEVICES, cuda_plugin_pause_devices) -int resume_device(int pid, int checkpointed, cuda_task_state_t initial_task_state) +int resume_device(int pid, int checkpointed) { char msg_buf[CUDA_CKPT_BUF_SIZE]; int status; @@ -479,11 +409,6 @@ int resume_device(int pid, int checkpointed, cuda_task_state_t initial_task_stat int int_ret; k_rtsigset_t save_sigset; - if (initial_task_state == CUDA_TASK_UNKNOWN) { - pr_info("skip resume for PID %d (unknown state)\n", pid); - return 0; - } - int restore_tid = get_cuda_restore_tid(pid); if (restore_tid == -1) { pr_info("No need to resume devices on pid %d\n", pid); @@ -503,8 +428,7 @@ int resume_device(int pid, int checkpointed, cuda_task_state_t initial_task_stat return -1; } - if (checkpointed && (initial_task_state == CUDA_TASK_RUNNING || initial_task_state == CUDA_TASK_LOCKED)) { - /* If the process was "locked" or "running" before checkpointing it, we need to restore it */ + if (checkpointed) { status = cuda_process_checkpoint_action(pid, ACTION_RESTORE, 0, msg_buf, sizeof(msg_buf)); if (status) { pr_err("RESUME_DEVICES RESTORE failed with %s\n", msg_buf); @@ -513,13 +437,10 @@ int resume_device(int pid, int checkpointed, cuda_task_state_t initial_task_stat } } - if (initial_task_state == CUDA_TASK_RUNNING) { - /* If the process was "running" before we paused it, we need to unlock it */ - status = cuda_process_checkpoint_action(pid, ACTION_UNLOCK, 0, msg_buf, sizeof(msg_buf)); - if (status) { - pr_err("RESUME_DEVICES UNLOCK failed with %s\n", msg_buf); - ret = -1; - } + status = cuda_process_checkpoint_action(pid, ACTION_UNLOCK, 0, msg_buf, sizeof(msg_buf)); + if (status) { + pr_err("RESUME_DEVICES UNLOCK failed with %s\n", msg_buf); + ret = -1; } interrupt: @@ -534,48 +455,16 @@ int cuda_plugin_resume_devices_late(int pid) return -ENOTSUP; } - /* RESUME_DEVICES_LATE is used during `criu restore`. - * Here, we assume that users expect the target process - * to be in a "running" state after restore, even if it was - * in a "locked" or "checkpointed" state during `criu dump`. - */ - return resume_device(pid, 1, CUDA_TASK_RUNNING); + return resume_device(pid, 1); } CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, cuda_plugin_resume_devices_late) -/** - * Check if a CUDA device is available on the system - */ -static bool is_cuda_device_available(void) -{ - const char *gpu_path = "/proc/driver/nvidia/gpus/"; - struct stat sb; - - if (stat(gpu_path, &sb) != 0) - return false; - - return S_ISDIR(sb.st_mode); -} - int cuda_plugin_init(int stage) { int ret; - /* Disable CUDA checkpointing with pre-dump */ - if (stage == CR_PLUGIN_STAGE__PRE_DUMP) { - plugin_disabled = true; - return 0; - } - - if (stage == CR_PLUGIN_STAGE__RESTORE) { - if (!check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name))) { - plugin_disabled = true; - return 0; - } - } - - if (!fault_injected(FI_PLUGIN_CUDA_FORCE_ENABLE) && !is_cuda_device_available()) { - pr_info("No GPU device found; CUDA plugin is disabled\n"); + if (!fault_injected(FI_PLUGIN_CUDA_FORCE_ENABLE) && access("/dev/nvidiactl", F_OK)) { + pr_info("/dev/nvidiactl doesn't exist. The CUDA plugin is disabled.\n"); plugin_disabled = true; return 0; } @@ -602,7 +491,7 @@ int cuda_plugin_init(int stage) INIT_LIST_HEAD(&cuda_pids); } - set_compel_interrupt_only_mode(); + dont_use_freeze_cgroup(); return 0; } @@ -621,7 +510,7 @@ void cuda_plugin_fini(int stage, int ret) if (stage == CR_PLUGIN_STAGE__DUMP && (opts.final_state == TASK_ALIVE || ret != 0)) { struct pid_info *info; list_for_each_entry(info, &cuda_pids, list) { - resume_device(info->pid, info->checkpointed, info->initial_task_state); + resume_device(info->pid, info->checkpointed); } } if (stage == CR_PLUGIN_STAGE__DUMP) { diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index ed883f300..329d7791d 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -1,14 +1,52 @@ FROM alpine ARG CC=gcc +RUN apk update && apk add \ + $CC \ + bash \ + build-base \ + coreutils \ + procps \ + git \ + gnutls-dev \ + libaio-dev \ + libcap-dev \ + libnet-dev \ + libnl3-dev \ + nftables \ + nftables-dev \ + pkgconfig \ + protobuf-c-dev \ + protobuf-dev \ + py3-pip \ + py3-protobuf \ + python3 \ + sudo \ + libcap-utils \ + libdrm-dev \ + util-linux + COPY . /criu WORKDIR /criu - -RUN apk add --no-cache "$CC" && /criu/contrib/dependencies/apk-packages.sh - RUN make mrproper && date && make -j $(nproc) CC="$CC" && date +RUN apk add \ + ip6tables \ + iptables \ + iptables-legacy \ + nftables \ + iproute2 \ + tar \ + bash \ + go \ + e2fsprogs \ + py-yaml \ + py3-importlib-metadata \ + asciidoctor + # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test +RUN pip3 install junit_xml --break-system-packages + RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.amd-rocm b/scripts/build/Dockerfile.amd-rocm index ed66ae4fe..c466a73d2 100644 --- a/scripts/build/Dockerfile.amd-rocm +++ b/scripts/build/Dockerfile.amd-rocm @@ -56,7 +56,6 @@ RUN apt-get clean -qqy && apt-get update -qqy && apt-get install -qqy --no-insta python-protobuf \ python3-minimal \ python-ipaddress \ - uuid-dev \ curl \ wget \ vim \ diff --git a/scripts/build/Dockerfile.archlinux b/scripts/build/Dockerfile.archlinux index 261bd2d79..405651489 100644 --- a/scripts/build/Dockerfile.archlinux +++ b/scripts/build/Dockerfile.archlinux @@ -5,11 +5,40 @@ ARG CC=gcc # Initialize machine ID RUN systemd-machine-id-setup +RUN pacman -Syu --noconfirm \ + $CC \ + bash \ + make \ + coreutils \ + git \ + gnutls \ + libaio \ + libcap \ + libnet \ + libnl \ + nftables \ + pkgconfig \ + protobuf-c \ + protobuf \ + python-pip \ + python-protobuf \ + which \ + sudo \ + iptables \ + nftables \ + iproute2 \ + tar \ + bash \ + go \ + python-yaml \ + asciidoctor \ + python-junit-xml \ + python-importlib-metadata \ + libdrm \ + diffutils + COPY . /criu WORKDIR /criu - -RUN pacman -Syu --noconfirm "$CC" && contrib/dependencies/pacman-packages.sh - RUN make mrproper && date && make -j $(nproc) CC="$CC" && date # The rpc test cases are running as user #1000, let's add the user diff --git a/scripts/build/Dockerfile.centos8 b/scripts/build/Dockerfile.centos8 new file mode 100644 index 000000000..a67212344 --- /dev/null +++ b/scripts/build/Dockerfile.centos8 @@ -0,0 +1,50 @@ +FROM registry.centos.org/centos/centos:8 + +ARG CC=gcc + +RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm dnf-plugins-core +RUN yum config-manager --set-enabled powertools +RUN yum install -y --allowerasing \ + asciidoc \ + coreutils \ + chkconfig \ + diffutils \ + findutils \ + gcc \ + git \ + gnutls-devel \ + iproute \ + iptables \ + libaio-devel \ + libasan \ + libcap-devel \ + libnet-devel \ + libnl3-devel \ + libselinux-devel \ + make \ + procps-ng \ + protobuf-c-devel \ + protobuf-devel \ + python3-devel \ + python3-PyYAML \ + python3-protobuf \ + python3-pip \ + sudo \ + tar \ + which \ + xmlto + +RUN alternatives --set python /usr/bin/python3 +ENV PYTHON=python3 + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) CC="$CC" && date + +# The rpc test cases are running as user #1000, let's add the user +RUN adduser -u 1000 test + +RUN pip3 install junit_xml + +RUN make -C test/zdtm -j $(nproc) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index c26a5fd57..9d3bb0f87 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -1,10 +1,11 @@ ARG CC=gcc +COPY scripts/ci/prepare-for-fedora-rawhide.sh /bin/prepare-for-fedora-rawhide.sh +RUN /bin/prepare-for-fedora-rawhide.sh + COPY . /criu WORKDIR /criu -RUN dnf install -y "$CC" && scripts/ci/prepare-for-fedora-rawhide.sh - RUN make mrproper && date && make -j $(nproc) CC="$CC" && date # The rpc test cases are running as user #1000, let's add the user diff --git a/scripts/build/Dockerfile.hotspot-alpine b/scripts/build/Dockerfile.hotspot-alpine index cd632dddf..cb9332fd0 100644 --- a/scripts/build/Dockerfile.hotspot-alpine +++ b/scripts/build/Dockerfile.hotspot-alpine @@ -1,11 +1,29 @@ FROM docker.io/library/eclipse-temurin:11-alpine ARG CC=gcc +RUN apk update && apk add \ + bash \ + build-base \ + coreutils \ + git \ + gnutls-dev \ + libaio-dev \ + libcap-dev \ + libnet-dev \ + libnl3-dev \ + pkgconfig \ + protobuf-c-dev \ + protobuf-dev \ + python3 \ + sudo \ + maven \ + ip6tables \ + iptables \ + bash + COPY . /criu WORKDIR /criu -RUN apk add --no-cache maven "$CC" && contrib/dependencies/apk-packages.sh - RUN make mrproper && make -j $(nproc) CC="$CC" -ENTRYPOINT ["mvn", "-q", "-f", "test/javaTests/pom.xml", "test"] +ENTRYPOINT mvn -q -f test/javaTests/pom.xml test diff --git a/scripts/build/Dockerfile.hotspot-ubuntu b/scripts/build/Dockerfile.hotspot-ubuntu index a459e1ec7..0318f650f 100644 --- a/scripts/build/Dockerfile.hotspot-ubuntu +++ b/scripts/build/Dockerfile.hotspot-ubuntu @@ -1,11 +1,32 @@ -FROM docker.io/library/eclipse-temurin:11-jammy +FROM docker.io/library/eclipse-temurin:11-focal ARG CC=gcc +COPY scripts/ci/apt-install /bin/apt-install + +RUN apt-install protobuf-c-compiler \ + libprotobuf-c-dev \ + libaio-dev \ + libprotobuf-dev \ + protobuf-compiler \ + libcap-dev \ + libnl-3-dev \ + gdb \ + bash \ + python3-protobuf \ + python3-yaml \ + libnet-dev \ + libnl-route-3-dev \ + libbsd-dev \ + make \ + git \ + pkg-config \ + iptables \ + gcc \ + maven + COPY . /criu WORKDIR /criu -RUN contrib/apt-install maven "$CC" && contrib/dependencies/apt-packages.sh - RUN make mrproper && make -j $(nproc) CC="$CC" -ENTRYPOINT ["mvn", "-q", "-f", "test/javaTests/pom.xml", "test"] +ENTRYPOINT mvn -q -f test/javaTests/pom.xml test diff --git a/scripts/build/Dockerfile.linux32.tmpl b/scripts/build/Dockerfile.linux32.tmpl index a37f16e49..13e992642 100644 --- a/scripts/build/Dockerfile.linux32.tmpl +++ b/scripts/build/Dockerfile.linux32.tmpl @@ -1,10 +1,31 @@ ARG CC=gcc +COPY scripts/ci/apt-install /bin/apt-install + +RUN apt-install \ + libnet-dev \ + libnl-route-3-dev \ + $CC \ + bsdmainutils \ + build-essential \ + git-core \ + iptables \ + libaio-dev \ + libcap-dev \ + libgnutls28-dev \ + libgnutls30 \ + libnl-3-dev \ + libprotobuf-c-dev \ + libprotobuf-dev \ + libselinux-dev \ + pkg-config \ + protobuf-c-compiler \ + protobuf-compiler \ + python3-minimal + COPY . /criu WORKDIR /criu -RUN contrib/apt-install "$CC" && contrib/dependencies/apt-packages.sh - RUN uname -m && setarch linux32 uname -m && setarch --list RUN make mrproper && date && \ diff --git a/scripts/build/Dockerfile.openj9-ubuntu b/scripts/build/Dockerfile.openj9-ubuntu index 18664f100..c2cf20a36 100644 --- a/scripts/build/Dockerfile.openj9-ubuntu +++ b/scripts/build/Dockerfile.openj9-ubuntu @@ -1,12 +1,32 @@ -FROM docker.io/library/ibm-semeru-runtimes:open-11-jdk-jammy +FROM docker.io/library/ibm-semeru-runtimes:open-11-jdk-focal ARG CC=gcc -RUN mkdir -p /etc/criu && echo 'ghost-limit 16777216' > /etc/criu/default.conf +COPY scripts/ci/apt-install /bin/apt-install + +RUN apt-install protobuf-c-compiler \ + libprotobuf-c-dev \ + libaio-dev \ + libprotobuf-dev \ + protobuf-compiler \ + libcap-dev \ + libnl-3-dev \ + gdb \ + bash \ + python3-protobuf \ + python3-yaml \ + libnet-dev \ + libnl-route-3-dev \ + libbsd-dev \ + make \ + git \ + pkg-config \ + iptables \ + gcc \ + maven + COPY . /criu WORKDIR /criu -RUN contrib/apt-install maven "$CC" && contrib/dependencies/apt-packages.sh - RUN make mrproper && make -j $(nproc) CC="$CC" -ENTRYPOINT ["mvn", "-f", "test/javaTests/pom.xml", "test"] +ENTRYPOINT mvn -q -f test/javaTests/pom.xml test diff --git a/scripts/build/Dockerfile.riscv64-stable-cross.hdr b/scripts/build/Dockerfile.riscv64-stable-cross.hdr deleted file mode 100644 index d4c414023..000000000 --- a/scripts/build/Dockerfile.riscv64-stable-cross.hdr +++ /dev/null @@ -1,5 +0,0 @@ -FROM ubuntu:jammy - -ENV ARCH=riscv64 -ENV DEBIAN_ARCH=riscv64 -ENV CROSS_TRIPLET=riscv64-linux-gnu diff --git a/scripts/build/Dockerfile.riscv64-stable-cross.tmpl b/scripts/build/Dockerfile.riscv64-stable-cross.tmpl deleted file mode 100644 index 8933a6c82..000000000 --- a/scripts/build/Dockerfile.riscv64-stable-cross.tmpl +++ /dev/null @@ -1,31 +0,0 @@ -# Add the cross compiler sources -RUN apt-get clean -y && apt-get update -y && apt-get install -y --no-install-recommends gnupg2 - -RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 871920D1991BC93C 8D69674688B6CB36 B523E5F3FC4E5F2C - -COPY scripts/ci/riscv64-cross/amd64-sources.list /etc/apt/sources.list - -COPY scripts/ci/riscv64-cross/riscv64-sources.list /etc/apt/sources.list.d/ - -RUN dpkg --add-architecture ${DEBIAN_ARCH} && \ - apt-get update -y - -ENV CROSS_COMPILE=${CROSS_TRIPLET}- \ - CROSS_ROOT=/usr/${CROSS_TRIPLET} \ - AS=/usr/bin/${CROSS_TRIPLET}-as \ - AR=/usr/bin/${CROSS_TRIPLET}-ar \ - CC=/usr/bin/${CROSS_TRIPLET}-gcc \ - CPP=/usr/bin/${CROSS_TRIPLET}-cpp \ - CXX=/usr/bin/${CROSS_TRIPLET}-g++ \ - LD=/usr/bin/${CROSS_TRIPLET}-ld \ - FC=/usr/bin/${CROSS_TRIPLET}-gfortran - -ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ - PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLET}/pkgconfig - -COPY . /criu -WORKDIR /criu - -RUN contrib/dependencies/apt-cross-packages.sh - -RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Dockerfile.stable-cross.tmpl b/scripts/build/Dockerfile.stable-cross.tmpl index 56104081f..078372c38 100644 --- a/scripts/build/Dockerfile.stable-cross.tmpl +++ b/scripts/build/Dockerfile.stable-cross.tmpl @@ -1,7 +1,29 @@ +COPY scripts/ci/apt-install /bin/apt-install + # Add the cross compiler sources RUN echo "deb http://deb.debian.org/debian/ stable main" >> /etc/apt/sources.list && \ dpkg --add-architecture ${DEBIAN_ARCH} +RUN apt-install \ + crossbuild-essential-${DEBIAN_ARCH} \ + libc6-dev-${DEBIAN_ARCH}-cross \ + libc6-${DEBIAN_ARCH}-cross \ + libbz2-dev:${DEBIAN_ARCH} \ + libexpat1-dev:${DEBIAN_ARCH} \ + ncurses-dev:${DEBIAN_ARCH} \ + libssl-dev:${DEBIAN_ARCH} \ + protobuf-c-compiler \ + protobuf-compiler \ + python3-protobuf \ + libnl-3-dev:${DEBIAN_ARCH} \ + libprotobuf-dev:${DEBIAN_ARCH} \ + libnet-dev:${DEBIAN_ARCH} \ + libprotobuf-c-dev:${DEBIAN_ARCH} \ + libcap-dev:${DEBIAN_ARCH} \ + libaio-dev:${DEBIAN_ARCH} \ + libnl-route-3-dev:${DEBIAN_ARCH} \ + libdrm-dev:${DEBIAN_ARCH} + ENV CROSS_COMPILE=${CROSS_TRIPLET}- \ CROSS_ROOT=/usr/${CROSS_TRIPLET} \ AS=/usr/bin/${CROSS_TRIPLET}-as \ @@ -18,8 +40,6 @@ ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ COPY . /criu WORKDIR /criu -RUN contrib/dependencies/apt-cross-packages.sh - # amdgpu_plugin with armv7 is not supported RUN make mrproper && date && \ make -j $(nproc) && \ diff --git a/scripts/build/Dockerfile.tmpl b/scripts/build/Dockerfile.tmpl index 498b99be9..9b53a76aa 100644 --- a/scripts/build/Dockerfile.tmpl +++ b/scripts/build/Dockerfile.tmpl @@ -1,11 +1,38 @@ ARG CC=gcc -COPY . /criu -WORKDIR /criu +COPY scripts/ci/apt-install /bin/apt-install # On Ubuntu, kernel modules such as ip_tables and xt_mark may not be loaded by default # We need to install kmod to enable iptables to load these modules for us. -RUN contrib/apt-install "$CC" && contrib/dependencies/apt-packages.sh +RUN apt-install \ + libnet-dev \ + libnl-route-3-dev \ + $CC \ + bsdmainutils \ + build-essential \ + git-core \ + iptables \ + libaio-dev \ + libbsd-dev \ + libcap-dev \ + libgnutls28-dev \ + libgnutls30 \ + libnftables-dev \ + libnl-3-dev \ + libprotobuf-c-dev \ + libprotobuf-dev \ + libselinux-dev \ + iproute2 \ + kmod \ + pkg-config \ + protobuf-c-compiler \ + protobuf-compiler \ + python3-minimal \ + python3-protobuf \ + python3-yaml + +COPY . /criu +WORKDIR /criu RUN git clean -dfx && date && \ # Check single object build diff --git a/scripts/build/Dockerfile.unstable-cross.tmpl b/scripts/build/Dockerfile.unstable-cross.tmpl index 7edb289b6..dacfd96ef 100644 --- a/scripts/build/Dockerfile.unstable-cross.tmpl +++ b/scripts/build/Dockerfile.unstable-cross.tmpl @@ -1,7 +1,28 @@ +COPY scripts/ci/apt-install /bin/apt-install + # Add the cross compiler sources RUN echo "deb http://deb.debian.org/debian/ unstable main" >> /etc/apt/sources.list && \ dpkg --add-architecture ${DEBIAN_ARCH} +RUN apt-install \ + crossbuild-essential-${DEBIAN_ARCH} \ + libc6-dev-${DEBIAN_ARCH}-cross \ + libc6-${DEBIAN_ARCH}-cross \ + libbz2-dev:${DEBIAN_ARCH} \ + libexpat1-dev:${DEBIAN_ARCH} \ + ncurses-dev:${DEBIAN_ARCH} \ + libssl-dev:${DEBIAN_ARCH} \ + protobuf-c-compiler \ + protobuf-compiler \ + python3-protobuf \ + libnl-3-dev:${DEBIAN_ARCH} \ + libprotobuf-dev:${DEBIAN_ARCH} \ + libnet-dev:${DEBIAN_ARCH} \ + libprotobuf-c-dev:${DEBIAN_ARCH} \ + libcap-dev:${DEBIAN_ARCH} \ + libaio-dev:${DEBIAN_ARCH} \ + libnl-route-3-dev:${DEBIAN_ARCH} + ENV CROSS_COMPILE=${CROSS_TRIPLET}- \ CROSS_ROOT=/usr/${CROSS_TRIPLET} \ AS=/usr/bin/${CROSS_TRIPLET}-as \ @@ -18,6 +39,4 @@ ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ COPY . /criu WORKDIR /criu -RUN contrib/dependencies/apt-cross-packages.sh - -RUN make mrproper && date && make -j $(nproc) zdtm && date +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Dockerfile.x86_64.hdr b/scripts/build/Dockerfile.x86_64.hdr index a666f6c26..566b4c916 100644 --- a/scripts/build/Dockerfile.x86_64.hdr +++ b/scripts/build/Dockerfile.x86_64.hdr @@ -1,5 +1,5 @@ FROM ubuntu:24.04 -COPY contrib/apt-install /bin/apt-install +COPY scripts/ci/apt-install /bin/apt-install RUN apt-install gcc-multilib diff --git a/scripts/build/Makefile b/scripts/build/Makefile index a420cea94..bc4a59db1 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ -ARCHES := x86_64 fedora-asan fedora-rawhide armv7hf -STABLE_CROSS_ARCHES := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross riscv64-stable-cross +ARCHES := x86_64 fedora-asan fedora-rawhide armv7hf centos8 +STABLE_CROSS_ARCHES := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross UNSTABLE_CROSS_ARCHES := armv7-unstable-cross aarch64-unstable-cross ppc64-unstable-cross mips64el-unstable-cross NON_CLANG := $(UNSTABLE_CROSS_ARCHES) $(STABLE_CROSS_ARCHES) CREATE_DOCKERFILES := $(ARCHES) $(NON_CLANG) diff --git a/scripts/ci/Makefile b/scripts/ci/Makefile index bad8065f2..9dc0190b3 100644 --- a/scripts/ci/Makefile +++ b/scripts/ci/Makefile @@ -11,7 +11,7 @@ ifdef CLANG target-suffix = -clang endif -TARGETS := alpine fedora-rawhide archlinux +TARGETS := alpine fedora-rawhide centos8 archlinux ZDTM_OPTS := UNAME := $(shell uname -m) export UNAME @@ -30,9 +30,9 @@ endif export CONTAINER_TERMINAL -# Here we assume that any CPU architecture besides x86_64 is running in containers -# that may not support running docker with '--privileged'. ifeq ($(UNAME),x86_64) + # On anything besides x86_64 Travis is running unprivileged LXD + # containers which do not support running docker with '--privileged'. CONTAINER_OPTS := --rm $(CONTAINER_TERMINAL) --privileged --userns=host --cgroupns=host -v /lib/modules:/lib/modules --tmpfs /run else CONTAINER_OPTS := --rm -v /lib/modules:/lib/modules --tmpfs /run diff --git a/contrib/apt-install b/scripts/ci/apt-install similarity index 100% rename from contrib/apt-install rename to scripts/ci/apt-install diff --git a/scripts/ci/docker-test.sh b/scripts/ci/docker-test.sh index c1c745544..aaf443afd 100755 --- a/scripts/ci/docker-test.sh +++ b/scripts/ci/docker-test.sh @@ -2,24 +2,6 @@ set -x -e -o pipefail -# Workaround: Docker 28.x and 29.x has a known regression that breaks the checkpoint and -# restore (C/R) feature. Let's install previous, or next major version. See -# https://github.com/moby/moby/issues/50750 for details on the bug. -export DEBIAN_FRONTEND=noninteractive -apt remove -y docker-ce docker-ce-cli -../../contrib/apt-install -y ca-certificates curl -install -m 0755 -d /etc/apt/keyrings -curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc -chmod a+r /etc/apt/keyrings/docker.asc -# shellcheck disable=SC1091 -echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" > /etc/apt/sources.list.d/docker.list -apt update -y -apt-cache madison docker-ce | awk '{ print $3 }' -verstr="$(apt-cache madison docker-ce | awk '{ print $3 }' | sort | grep -Ev ':(28|29)\.'| tail -n 1)" -../../contrib/apt-install -y "docker-ce=$verstr" "docker-ce-cli=$verstr" - # docker checkpoint and restore is an experimental feature echo '{ "experimental": true }' > /etc/docker/daemon.json service docker restart diff --git a/scripts/ci/java-test.sh b/scripts/ci/java-test.sh index a5b13a107..7cf704f07 100755 --- a/scripts/ci/java-test.sh +++ b/scripts/ci/java-test.sh @@ -2,8 +2,6 @@ cd ../.. || exit 1 -sudo modprobe iptable_filter - failures="" docker build -t criu-openj9-ubuntu-test:latest -f scripts/build/Dockerfile.openj9-ubuntu . diff --git a/scripts/ci/loongarch64-qemu-test.sh b/scripts/ci/loongarch64-qemu-test.sh index 7e00ab65a..d5646468e 100755 --- a/scripts/ci/loongarch64-qemu-test.sh +++ b/scripts/ci/loongarch64-qemu-test.sh @@ -4,7 +4,7 @@ set -o nounset set -o errexit set -x -../../contrib/apt-install \ +./apt-install \ apt-transport-https \ ca-certificates \ curl \ @@ -19,7 +19,7 @@ add-apt-repository \ $(lsb_release -cs) \ stable test" -../../contrib/apt-install docker-ce +./apt-install docker-ce # shellcheck source=/dev/null . /etc/lsb-release diff --git a/scripts/ci/prepare-for-fedora-rawhide.sh b/scripts/ci/prepare-for-fedora-rawhide.sh index b0b45fcc3..09085c403 100755 --- a/scripts/ci/prepare-for-fedora-rawhide.sh +++ b/scripts/ci/prepare-for-fedora-rawhide.sh @@ -1,21 +1,42 @@ #!/bin/bash set -e -x -contrib/dependencies/dnf-packages.sh dnf install -y \ diffutils \ - e2fsprogs \ findutils \ - gawk \ + gcc \ + git \ + gnutls-devel \ gzip \ - kmod \ + iproute \ + iptables \ + nftables \ + nftables-devel \ + libaio-devel \ + libasan \ + libcap-devel \ + libnet-devel \ + libnl3-devel \ + libbsd-devel \ libselinux-utils \ + make \ procps-ng \ + protobuf-c-devel \ + protobuf-devel \ + python3-PyYAML \ + python3-protobuf \ + python3-junit_xml \ python3-pip \ + python3-importlib-metadata \ python-unversioned-command \ redhat-rpm-config \ sudo \ - tar + tar \ + which \ + e2fsprogs \ + rubygem-asciidoctor \ + libdrm-devel \ + kmod # /tmp is no longer 755 in the rawhide container image and breaks CI - fix it chmod 1777 /tmp diff --git a/scripts/ci/riscv64-cross/amd64-sources.list b/scripts/ci/riscv64-cross/amd64-sources.list deleted file mode 100644 index 72dad920c..000000000 --- a/scripts/ci/riscv64-cross/amd64-sources.list +++ /dev/null @@ -1,10 +0,0 @@ -deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main restricted -deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates main restricted -deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy universe -deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates universe -deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy multiverse -deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-updates multiverse -deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse -deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security main restricted -deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security universe -deb [arch=amd64] http://security.ubuntu.com/ubuntu/ jammy-security multiverse \ No newline at end of file diff --git a/scripts/ci/riscv64-cross/riscv64-sources.list b/scripts/ci/riscv64-cross/riscv64-sources.list deleted file mode 100644 index 67b8067b6..000000000 --- a/scripts/ci/riscv64-cross/riscv64-sources.list +++ /dev/null @@ -1,42 +0,0 @@ -# See http://help.ubuntu.com/community/UpgradeNotes for how to upgrade to -# newer versions of the distribution. -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted - -## Major bug fix updates produced after the final release of the -## distribution. -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted - -## N.B. software from this repository is ENTIRELY UNSUPPORTED by the Ubuntu -## team. Also, please note that software in universe WILL NOT receive any -## review or updates from the Ubuntu security team. -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy universe -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy universe -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates universe -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates universe - -## N.B. software from this repository is ENTIRELY UNSUPPORTED by the Ubuntu -## team, and may not be under a free licence. Please satisfy yourself as to -## your rights to use the software. Also, please note that software in -## multiverse WILL NOT receive any review or updates from the Ubuntu -## security team. -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy multiverse -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy multiverse -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-updates multiverse -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-updates multiverse - -## N.B. software from this repository may not have been tested as -## extensively as that contained in the main release, although it includes -## newer versions of some applications which may provide useful features. -## Also, please note that software in backports WILL NOT receive any review -## or updates from the Ubuntu security team. -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted universe multiverse -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted universe multiverse - -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security universe -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security universe -deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ jammy-security multiverse -# deb-src http://ports.ubuntu.com/ubuntu-ports/ jammy-security multiverse \ No newline at end of file diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index 05a3b71e8..38b7b5097 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -1,7 +1,12 @@ #!/bin/bash set -x -e -CI_PKGS=() +CI_PKGS=(protobuf-c-compiler libprotobuf-c-dev libaio-dev libgnutls28-dev + libgnutls30 libprotobuf-dev protobuf-compiler libcap-dev + libnl-3-dev gdb bash libnet-dev util-linux asciidoctor + libnl-route-3-dev time libbsd-dev python3-yaml + libperl-dev pkg-config python3-protobuf python3-pip + python3-importlib-metadata python3-junit.xml libdrm-dev) X86_64_PKGS=(gcc-multilib) @@ -11,10 +16,13 @@ IFS=" " read -r -a ZDTM_OPTS <<< "$ZDTM_OPTS" UNAME_M=$(uname -m) if [ "$UNAME_M" != "x86_64" ]; then - # Some tests rely on kernel features that may not be available - # when running in a container. Here we assume that x86_64 systems - # are baremetal, and skip the tests for all other CPU architectures. - # The RUN_TESTS environment variable can override this, e.g., for aarch64. + # For Travis only x86_64 seems to be baremetal. Other + # architectures are running in unprivileged LXD containers. + # That seems to block most of CRIU's interfaces. + + # But with the introduction of baremetal aarch64 systems in + # Travis (arch: arm64-graviton2) we can override this using + # an environment variable [ -n "$RUN_TESTS" ] || SKIP_CI_TEST=1 fi @@ -28,13 +36,9 @@ ci_prep () { # not run anymore with 'sudo -u \#1000' if the UID does not exist. adduser -u 1000 --disabled-password --gecos "criutest" criutest || : - # This can fail on aarch64 + # This can fail on aarch64 travis service apport stop || : - # Ubuntu has set up AppArmor in 24.04 so that it blocks use of user - # namespaces by unprivileged users. We need this for some of our tests. - sysctl kernel.apparmor_restrict_unprivileged_userns=0 || : - if [ "$CLANG" = "1" ]; then # clang support CC=clang @@ -52,8 +56,7 @@ ci_prep () { CI_PKGS+=("${X86_64_PKGS[@]}") fi - contrib/dependencies/apt-packages.sh - contrib/apt-install "${CI_PKGS[@]}" + scripts/ci/apt-install "${CI_PKGS[@]}" chmod a+x "$HOME" } @@ -118,14 +121,8 @@ if [ "${CD_TO_TOP}" = "1" ]; then fi export GCOV CC -if [ -z "$COMPILE_FLAGS" ]; then - LOCAL_COMPILE_FLAGS=("V=1") -else - IFS=" " read -r -a LOCAL_COMPILE_FLAGS <<< "$COMPILE_FLAGS" - LOCAL_COMPILE_FLAGS=("V=1" "${LOCAL_COMPILE_FLAGS[@]}") -fi $CC --version -time make CC="$CC" -j4 "${LOCAL_COMPILE_FLAGS[@]}" +time make CC="$CC" -j4 V=1 ./criu/criu -v4 cpuinfo dump || : ./criu/criu -v4 cpuinfo check || : @@ -153,7 +150,6 @@ ulimit -c unlimited cgid=$$ cleanup_cgroup() { ./test/zdtm_umount_cgroups $cgid - dmesg } trap cleanup_cgroup EXIT ./test/zdtm_mount_cgroups $cgid @@ -180,7 +176,7 @@ if [ "${COMPAT_TEST}x" = "yx" ] ; then done apt-get remove "${INCOMPATIBLE_LIBS[@]}" dpkg --add-architecture i386 - contrib/apt-install "${IA32_PKGS[@]}" + scripts/ci/apt-install "${IA32_PKGS[@]}" mkdir -p /usr/lib/x86_64-linux-gnu/ mv "$REFUGE"/* /usr/lib/x86_64-linux-gnu/ fi @@ -255,7 +251,7 @@ if [ -z "$SKIP_EXT_DEV_TEST" ]; then fi make -C test/others/make/ run CC="$CC" -if [ -n "$CIRCLECI" ]; then +if [ -n "$TRAVIS" ] || [ -n "$CIRCLECI" ]; then # GitHub Actions (and Cirrus CI) does not provide a real TTY and CRIU will fail with: # Error (criu/tty.c:1014): tty: Don't have tty to inherit session from, aborting make -C test/others/shell-job/ run @@ -366,6 +362,5 @@ make -C plugins/amdgpu/ test_topology_remap ./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin cuda ./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin amdgpu ./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin amdgpu cuda -./test/zdtm.py run -t zdtm/static/busyloop00 --criu-plugin inventory_test_enabled inventory_test_disabled ./test/zdtm.py run -t zdtm/static/sigpending -t zdtm/static/pthread00 --mocked-cuda-checkpoint --fault 138 diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh index 5f2de32b8..3904c51d2 100755 --- a/scripts/ci/vagrant.sh +++ b/scripts/ci/vagrant.sh @@ -6,42 +6,42 @@ set -e set -x -VAGRANT_VERSION=2.4.7 -FEDORA_VERSION=42 -FEDORA_BOX_VERSION=1.1.0 +VAGRANT_VERSION=2.4.1 +FEDORA_VERSION=40 +FEDORA_BOX_VERSION=40.20240414.0 setup() { + if [ -n "$TRAVIS" ]; then + # Load the kvm modules for vagrant to use qemu + modprobe kvm kvm_intel + fi + # Tar up the git checkout to have vagrant rsync it to the VM - tar cf /tmp/criu.tar -C ../../../ criu + tar cf criu.tar ../../../criu # Cirrus has problems with the following certificate. wget --no-check-certificate https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}-1_"$(dpkg --print-architecture)".deb -O /tmp/vagrant.deb && \ dpkg -i /tmp/vagrant.deb - ../../contrib/apt-install libvirt-clients libvirt-daemon-system libvirt-dev qemu-utils qemu-system \ - ruby build-essential libxml2-dev qemu-kvm rsync ebtables dnsmasq-base openssh-client + ./apt-install libvirt-clients libvirt-daemon-system libvirt-dev qemu-utils qemu \ + ruby build-essential libxml2-dev qemu-kvm rsync ebtables dnsmasq-base \ + openssh-client systemctl restart libvirtd vagrant plugin install vagrant-libvirt - vagrant init cloud-image/fedora-${FEDORA_VERSION} --box-version ${FEDORA_BOX_VERSION} - + vagrant init fedora/${FEDORA_VERSION}-cloud-base --box-version ${FEDORA_BOX_VERSION} # The default libvirt Vagrant VM uses 512MB. - # VMs in our CI typically have around 16GB. + # Travis VMs should have around 7.5GB. # Increasing it to 4GB should work. sed -i Vagrantfile -e 's,^end$, config.vm.provider :libvirt do |libvirt|'"\n"' libvirt.memory = 4096;end'"\n"'end,g' - # Sync /tmp/criu.tar into the VM - # We want to use $HOME without expansion - # shellcheck disable=SC2016 - sed -i Vagrantfile -e 's|^end$| config.vm.provision "file", source: "/tmp/criu.tar", destination: "$HOME/criu.tar"'"\n"'end|g' - vagrant up --provider=libvirt --no-tty mkdir -p /root/.ssh vagrant ssh-config >> /root/.ssh/config - + ssh default sudo dnf upgrade -y + ssh default sudo dnf install -y gcc git gnutls-devel nftables-devel libaio-devel \ + libasan libcap-devel libnet-devel libnl3-devel libbsd-devel make protobuf-c-devel \ + protobuf-devel python3-protobuf python3-importlib-metadata python3-junit_xml \ + rubygem-asciidoctor iptables libselinux-devel libbpf-devel python3-yaml # Disable sssd to avoid zdtm test failures in pty04 due to sssd socket ssh default sudo systemctl mask sssd - - ssh default 'sudo mkdir -p --mode=777 /vagrant && mv $HOME/criu.tar /vagrant && cd /vagrant && tar xf criu.tar' - ssh default sudo dnf upgrade -y - ssh default sudo /vagrant/criu/contrib/dependencies/dnf-packages.sh ssh default cat /proc/cmdline } @@ -49,7 +49,7 @@ fedora-no-vdso() { ssh default sudo grubby --update-kernel ALL --args="vdso=0" vagrant reload ssh default cat /proc/cmdline - ssh default 'cd /vagrant/criu; make -j' + ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a --keep-going' # This test (pidfd_store_sk) requires pidfd_getfd syscall which is guaranteed in Fedora 33. # It is also skipped from -a because it runs in RPC mode only @@ -57,10 +57,6 @@ fedora-no-vdso() { } fedora-rawhide() { - # Upgrade the kernel to the latest vanilla one - ssh default sudo dnf -y copr enable @kernel-vanilla/stable - ssh default sudo dnf upgrade -y - # The 6.2 kernel of Fedora 38 in combination with rawhide userspace breaks # zdtm/static/socket-tcp-nfconntrack. To activate the new kernel previously # installed this reboots the VM. @@ -78,12 +74,12 @@ fedora-rawhide() { # In the container it is not possible to change the state of selinux. # Let's just disable it for this test run completely. ssh default 'sudo setenforce Permissive' - ssh default 'cd /vagrant/criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"' + ssh default 'cd /vagrant; tar xf criu.tar; cd criu; sudo -E make -C scripts/ci fedora-rawhide CONTAINER_RUNTIME=podman BUILD_OPTIONS="--security-opt seccomp=unconfined"' } fedora-non-root() { ssh default uname -a - ssh default 'cd /vagrant/criu; make -j' + ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' # Setting the capability should be the only line needed to run as non-root on Fedora # In other environments either set /proc/sys/kernel/yama/ptrace_scope to 0 or grant cap_sys_ptrace to criu ssh default 'sudo setcap cap_checkpoint_restore+eip /vagrant/criu/criu/criu' diff --git a/scripts/feature-tests.mak b/scripts/feature-tests.mak index 727e9689e..fb5d2ef7a 100644 --- a/scripts/feature-tests.mak +++ b/scripts/feature-tests.mak @@ -35,6 +35,34 @@ int main(void) } endef +define FEATURE_TEST_STRLCPY + +#include + +#ifdef CONFIG_HAS_LIBBSD +# include +#endif + +int main(void) +{ + return strlcpy(NULL, NULL, 0); +} +endef + +define FEATURE_TEST_STRLCAT + +#include + +#ifdef CONFIG_HAS_LIBBSD +# include +#endif + +int main(void) +{ + return strlcat(NULL, NULL, 0); +} +endef + define FEATURE_TEST_PTRACE_PEEKSIGINFO #include diff --git a/scripts/install-debian-pkgs.sh b/scripts/install-debian-pkgs.sh new file mode 100755 index 000000000..8be49c787 --- /dev/null +++ b/scripts/install-debian-pkgs.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Install required packages for development environment in Debian Distro + +REQ_PKGS=${REQ_PKGS:=contrib/debian/dev-packages.lst} + +help_msg="Install required packages for development environment in Debian Distro +Usage: + scripts/install-debian-pkgs.sh" + +function print_help() +{ + exec echo -e "$help_msg" +} + +function process() +{ + sudo apt-get update + sudo apt-get install -yq "$( sed 's/\#.*$//' "${REQ_PKGS}" )" +} + +if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then + print_help +else + process +fi diff --git a/scripts/nmk/scripts/include.mk b/scripts/nmk/scripts/include.mk index 603c322cf..55c5be307 100644 --- a/scripts/nmk/scripts/include.mk +++ b/scripts/nmk/scripts/include.mk @@ -21,7 +21,6 @@ ARCH ?= $(shell echo $(SUBARCH) | sed \ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ \ -e s/aarch64.*/aarch64/ \ - -e s/riscv64.*/riscv64/ \ -e s/loongarch64.*/loongarch64/) export SUBARCH ARCH diff --git a/scripts/nmk/scripts/tools.mk b/scripts/nmk/scripts/tools.mk index de5782c13..724204a03 100644 --- a/scripts/nmk/scripts/tools.mk +++ b/scripts/nmk/scripts/tools.mk @@ -23,7 +23,7 @@ MAKE := make MKDIR := mkdir -p AWK := awk PERL := perl -FULL_PYTHON := $(shell command -v python3 2>/dev/null) +FULL_PYTHON := $(shell which python3 2>/dev/null) PYTHON ?= $(shell basename $(FULL_PYTHON)) FIND := find SH := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ @@ -36,7 +36,7 @@ CTAGS := ctags export RM HOSTLD LD HOSTCC CC CPP AS AR STRIP OBJCOPY OBJDUMP export NM SH MAKE MKDIR AWK PERL PYTHON SH CSCOPE -export USE_ASCIIDOCTOR ?= $(shell command -v asciidoctor 2>/dev/null) +export USE_ASCIIDOCTOR ?= $(shell which asciidoctor 2>/dev/null) # # Footer. diff --git a/scripts/uninstall_module.py b/scripts/uninstall_module.py index 2da63c800..8a9b70892 100755 --- a/scripts/uninstall_module.py +++ b/scripts/uninstall_module.py @@ -10,16 +10,6 @@ import site import subprocess import sys -# With Python 3.13 the subprocess module now uses the `posix_spawn()` -# function which requires loading the `signal` module: -# https://docs.python.org/3/whatsnew/3.13.html#subprocess -# -# We need to load this module here, before PYTHONPATH and sys.path -# have been modified to use the path specified with `--prefix`. -# -# flake8: noqa: F401 -import signal - import importlib_metadata diff --git a/test/check_actions.py b/test/check_actions.py new file mode 100755 index 000000000..84d738dbb --- /dev/null +++ b/test/check_actions.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +import sys +import os + +actions = set(['pre-dump', 'pre-restore', 'post-dump', 'setup-namespaces', \ + 'post-setup-namespaces', 'post-restore', 'post-resume', \ + 'network-lock', 'network-unlock' ]) +errors = [] +af = os.path.dirname(os.path.abspath(__file__)) + '/actions_called.txt' + +for act in open(af): + act = act.strip().split() + act.append('EMPTY') + act.append('EMPTY') + + if act[0] == 'EMPTY': + raise Exception("Error in test, bogus actions line") + + if act[1] == 'EMPTY': + errors.append('Action %s misses CRTOOLS_IMAGE_DIR' % act[0]) + + if act[0] in ('post-dump', 'setup-namespaces', 'post-setup-namespaces', \ + 'post-restore', 'post-resume', 'network-lock', 'network-unlock'): + if act[2] == 'EMPTY': + errors.append('Action %s misses CRTOOLS_INIT_PID' % act[0]) + elif not act[2].isdigit() or int(act[2]) == 0: + errors.append('Action %s PID is not number (%s)' % + (act[0], act[2])) + + actions -= set([act[0]]) + +if actions: + errors.append('Not all actions called: %r' % actions) + +if errors: + for x in errors: + print(x) + sys.exit(1) + +print('PASS') diff --git a/test/cuda-checkpoint/cuda-checkpoint.c b/test/cuda-checkpoint/cuda-checkpoint.c index 3b7ce8b9f..f35a4b41d 100644 --- a/test/cuda-checkpoint/cuda-checkpoint.c +++ b/test/cuda-checkpoint/cuda-checkpoint.c @@ -11,7 +11,6 @@ int main(int argc, char *argv[]) int option_index = 0; static struct option long_options[] = { { "pid", required_argument, 0, 'p' }, - { "get-state", no_argument, 0, 's' }, { "get-restore-tid", no_argument, 0, 'g' }, { "action", required_argument, 0, 'a' }, { "timeout", required_argument, 0, 't' }, @@ -32,9 +31,6 @@ int main(int argc, char *argv[]) case 'a': case 't': break; - case 's': - printf("running\n"); - break; case 'h': printf("--action - execute an action"); break; diff --git a/test/inhfd/memfd.py.checkskip b/test/inhfd/memfd.py.checkskip index 32c57d929..27e2b7b15 100755 --- a/test/inhfd/memfd.py.checkskip +++ b/test/inhfd/memfd.py.checkskip @@ -3,5 +3,5 @@ import ctypes libc = ctypes.CDLL(None) -# libc may not have memfd_create (e.g., centos) +# libc may not have memfd_create (e.g., centos on travis) libc.memfd_create("test".encode('utf8'), 0) diff --git a/test/jenkins/actions.sh b/test/jenkins/actions.sh new file mode 100755 index 000000000..801904500 --- /dev/null +++ b/test/jenkins/actions.sh @@ -0,0 +1,8 @@ +# Check how crit de/encodes images +set -e +source `dirname $0`/criu-lib.sh +# prep +rm -f actions_called.txt +./test/zdtm.py run -t zdtm/static/env00 --script "$(pwd)/test/show_action.sh" || fail +./test/check_actions.py || fail +exit 0 diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index 6ee7ce33a..fc0eddc2b 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -40,11 +40,9 @@ fi # also check for the main thread corruption ./test/zdtm.py run -t zdtm/static/fpu00 --fault 134 -f h --norst || fail -# check set_compel_interrupt_only_mode +# check dont_use_freeze_cgroup ./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 ./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 --norst -# check set_compel_interrupt_only_mode when test cgroup is frozen -./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:f --fault 137 if ./test/zdtm.py run -t zdtm/static/vfork00 --fault 136 --report report -f h ; then fail diff --git a/test/jenkins/criu-lazy-migration.pipeline b/test/jenkins/criu-lazy-migration.pipeline index 45dc2c776..2c863f170 100644 --- a/test/jenkins/criu-lazy-migration.pipeline +++ b/test/jenkins/criu-lazy-migration.pipeline @@ -21,6 +21,7 @@ pipeline { stage('Test'){ steps { sh './test/jenkins/run_ct sh -c "mount --make-rprivate / && mount --rbind . /mnt && cd /mnt && ./test/jenkins/criu-lazy-migration.sh"' + junit 'test/report/criu-testreport*.xml' } } } diff --git a/test/others/action-script/.gitignore b/test/others/action-script/.gitignore index ca9a0b541..c0b6a2490 100644 --- a/test/others/action-script/.gitignore +++ b/test/others/action-script/.gitignore @@ -1 +1 @@ -actions_called.txt +img-dir-* diff --git a/test/others/action-script/Makefile b/test/others/action-script/Makefile index 594edc070..f1ce191db 100644 --- a/test/others/action-script/Makefile +++ b/test/others/action-script/Makefile @@ -1,3 +1,5 @@ run: + @make -C .. loop ./run.sh + .PHONY: run diff --git a/test/others/action-script/action-script.sh b/test/others/action-script/action-script.sh new file mode 100755 index 000000000..aba8292c0 --- /dev/null +++ b/test/others/action-script/action-script.sh @@ -0,0 +1,2 @@ +#!/bin/bash +touch action-hook-"$CRTOOLS_SCRIPT_ACTION" diff --git a/test/others/action-script/check_actions.py b/test/others/action-script/check_actions.py deleted file mode 100755 index 0140d8762..000000000 --- a/test/others/action-script/check_actions.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 - -import os -import sys - -EXPECTED_ACTIONS = [ - 'pre-dump', - 'network-lock', - 'post-dump', - 'pre-restore', - 'setup-namespaces', - 'post-setup-namespaces', - 'post-restore', - 'network-unlock', - 'pre-resume', - 'post-resume', -] - -errors = [] -actions_called = [] -actions_called_file = os.path.join(os.path.dirname(__file__), 'actions_called.txt') - -with open(actions_called_file) as f: - for index, line in enumerate(f): - parts = line.strip().split() - parts += ['EMPTY'] * (3 - len(parts)) - action_hook, image_dir, pid = parts - - if action_hook == 'EMPTY': - raise ValueError("Error in test: bogus actions line") - - expected_action = EXPECTED_ACTIONS[index] if index < len(EXPECTED_ACTIONS) else None - if action_hook != expected_action: - raise ValueError(f"Invalid action: {action_hook} != {expected_action}") - - if image_dir == 'EMPTY': - errors.append(f'Action {action_hook} misses CRTOOLS_IMAGE_DIR') - - if action_hook != 'pre-restore': - if pid == 'EMPTY': - errors.append(f'Action {action_hook} misses CRTOOLS_INIT_PID') - elif not pid.isdigit() or int(pid) == 0: - errors.append(f'Action {action_hook} PID is not a valid number ({pid})') - - actions_called.append(action_hook) - -if actions_called != EXPECTED_ACTIONS: - errors.append(f'Not all actions called: {actions_called!r}') - -if errors: - print('\n'.join(errors)) - sys.exit(1) - -print('Check Actions PASS') diff --git a/test/others/action-script/run.sh b/test/others/action-script/run.sh index 574f6fc86..a82fccf35 100755 --- a/test/others/action-script/run.sh +++ b/test/others/action-script/run.sh @@ -1,11 +1,60 @@ #!/bin/bash -set -e +set -ebm -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +# shellcheck source=test/others/env.sh +source ../env.sh || exit 1 -rm -f "${SCRIPT_DIR}"/actions_called.txt -"${SCRIPT_DIR}"/../../zdtm.py run -t zdtm/static/env00 -f ns --script "$SCRIPT_DIR/show_action.sh" || exit 1 -"${SCRIPT_DIR}"/check_actions.py || exit 1 +SELFDIR="$(dirname "$(readlink -f "$0")")" +SCRIPT="$SELFDIR/action-script.sh" +IMGDIR="$SELFDIR/img-dir-$$" +rm -rf "$IMGDIR" +mkdir "$IMGDIR" + +trap "cleanup" QUIT TERM INT HUP EXIT + +# shellcheck disable=SC2317 +# https://github.com/koalaman/shellcheck/issues/2660 +function cleanup() +{ + if [[ -n "$PID" ]]; then + kill -9 "$PID" + fi +} + +PID=$(../loop) +if ! $CRIU dump -v4 -o dump.log -t "$PID" -D "$IMGDIR" --action-script "$SCRIPT"; then + echo "Failed to checkpoint process $PID" + cat dump.log + kill -9 "$PID" + exit 1 +fi + +if ! $CRIU restore -v4 -o restore.log -D "$IMGDIR" -d --pidfile test.pidfile --action-script "$SCRIPT"; then + echo "CRIU restore failed" + echo FAIL + exit 1 +fi + +PID=$(cat "$IMGDIR"/test.pidfile) + +found_missing_file=false +hooks=("pre-dump" "post-dump" "pre-restore" "pre-resume" "post-restore" "post-resume") + +for hook in "${hooks[@]}" +do + if [ ! -e "$IMGDIR/action-hook-$hook" ]; then + echo "ERROR: action-hook-$hook does not exist" + found_missing_file=true + fi +done + +if [ "$found_missing_file" = true ]; then + exit 1 +fi + +echo PASS + +rm -rf "$IMGDIR" exit 0 diff --git a/test/others/criu-coredump/test.sh b/test/others/criu-coredump/test.sh index 2be82e64c..4399044d7 100755 --- a/test/others/criu-coredump/test.sh +++ b/test/others/criu-coredump/test.sh @@ -45,8 +45,9 @@ function run_test { UNAME_M=$(uname -m) -if [[ "$UNAME_M" != "aarch64" && "$UNAME_M" != "armv7l" &&"$UNAME_M" != "x86_64" ]]; then - echo "criu-coredump only supports aarch64 armv7l, and x86_64. skipping." +if [ "$UNAME_M" != "x86_64" ]; then + # the criu-coredump script is only x86_64 aware + echo "criu-coredump only support x86_64. skipping." exit 0 fi diff --git a/test/others/libcriu/.gitignore b/test/others/libcriu/.gitignore index 30a56999c..0f6e52bb4 100644 --- a/test/others/libcriu/.gitignore +++ b/test/others/libcriu/.gitignore @@ -8,4 +8,3 @@ test_pre_dump test_feature_check output/ libcriu.so.* -test_rpc_config diff --git a/test/others/libcriu/Makefile b/test/others/libcriu/Makefile index 927f17c23..ae7330533 100644 --- a/test/others/libcriu/Makefile +++ b/test/others/libcriu/Makefile @@ -3,12 +3,10 @@ include ../../../../criu/Makefile.versions TESTS += test_sub TESTS += test_self TESTS += test_notify -TESTS += test_rpc_config TESTS += test_iters TESTS += test_errno TESTS += test_join_ns TESTS += test_pre_dump -TESTS += test_check TESTS += test_feature_check all: $(TESTS) diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index 6b36d4496..f7d363aab 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -55,7 +55,6 @@ run_test() { run_test test_sub run_test test_self run_test test_notify -run_test test_rpc_config if [ "$(uname -m)" = "x86_64" ]; then # Skip this on aarch64 as aarch64 has no dirty page tracking run_test test_iters @@ -63,7 +62,6 @@ if [ "$(uname -m)" = "x86_64" ]; then fi run_test test_errno run_test test_join_ns -run_test test_check if criu check --feature mem_dirty_track > /dev/null; then export CRIU_FEATURE_MEM_TRACK=1 fi diff --git a/test/others/libcriu/test_check.c b/test/others/libcriu/test_check.c deleted file mode 100644 index 4af3b3630..000000000 --- a/test/others/libcriu/test_check.c +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include "criu.h" -#include "lib.h" - -int main(int argc, char **argv) -{ - int ret; - - printf("--- Start check ---\n"); - criu_init_opts(); - criu_set_service_binary(argv[1]); - - if (criu_check()) - return -1; - - return 0; -} diff --git a/test/others/libcriu/test_rpc_config.c b/test/others/libcriu/test_rpc_config.c deleted file mode 100644 index 529f13637..000000000 --- a/test/others/libcriu/test_rpc_config.c +++ /dev/null @@ -1,223 +0,0 @@ -#include "criu.h" -#include "lib.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define RANDOM_NAME_LEN 6 -#define PATH_BUF_SIZE 128 - -static volatile sig_atomic_t stop = 0; -static char base_name[RANDOM_NAME_LEN + 1]; -static char log_file[PATH_BUF_SIZE]; -static char conf_file[PATH_BUF_SIZE]; - -static void handle_signal(int sig) -{ - (void)sig; - stop = 1; -} - -static void generate_random_base_name(void) -{ - const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - size_t charset_len; - int i; - - charset_len = sizeof(charset) - 1; - - for (i = 0; i < RANDOM_NAME_LEN; i++) { - base_name[i] = charset[rand() % charset_len]; - } - base_name[i] = '\0'; - - snprintf(log_file, sizeof(log_file), "/tmp/criu-%s.log", base_name); - snprintf(conf_file, sizeof(conf_file), "/tmp/criu-%s.conf", base_name); -} - -static int create_criu_config_file(void) -{ - int fd; - FILE *fp; - - srand(time(NULL)); - generate_random_base_name(); - - fd = open(conf_file, O_CREAT | O_EXCL | O_WRONLY, 0600); - if (fd < 0) { - perror("Failed to create config file"); - return -1; - } - - fp = fdopen(fd, "w"); - if (!fp) { - perror("fdopen failed"); - close(fd); - unlink(conf_file); - return -1; - } - - fprintf(fp, "log-file=%s\n", log_file); - fflush(fp); - fclose(fp); - - return 0; -} - -static int check_log_file(void) -{ - struct stat st; - - if (stat(log_file, &st) < 0) { - perror("Config file does not exist"); - return -1; - } - - if (st.st_size == 0) { - fprintf(stderr, "Config file is empty\n"); - return -1; - } - - unlink(log_file); - return 0; -} - -int main(int argc, char **argv) -{ - int pipe_fd[2]; - pid_t pid; - int ret; - int child_ret; - - int img_fd = open(argv[2], O_DIRECTORY); - if (img_fd < 0) { - perror("Failed to open images directory"); - goto cleanup; - } - - if (create_criu_config_file() < 0) { - printf("Failed to create config file\n"); - return EXIT_FAILURE; - } - - if (pipe(pipe_fd) < 0) { - perror("pipe"); - return EXIT_FAILURE; - } - - pid = fork(); - if (pid < 0) { - perror("fork failed"); - return EXIT_FAILURE; - } - - if (pid == 0) { - /** child process **/ - printf(" `- loop: initializing\n"); - - if (setsid() < 0 || signal(SIGUSR1, handle_signal) == SIG_ERR) { - _exit(EXIT_FAILURE); - } - - close(STDIN_FILENO); - close(STDOUT_FILENO); - close(STDERR_FILENO); - close(pipe_fd[0]); - - child_ret = SUCC_ECODE; - write(pipe_fd[1], &child_ret, sizeof(child_ret)); - close(pipe_fd[1]); - - while (!stop) { - sleep(1); - } - - _exit(SUCC_ECODE); - } - - /** parent process **/ - close(pipe_fd[1]); - - ret = -1; - if (read(pipe_fd[0], &ret, sizeof(ret)) != sizeof(ret) || ret != SUCC_ECODE) { - printf("Error starting loop\n"); - goto cleanup; - } - - read(pipe_fd[0], &ret, 1); - close(pipe_fd[0]); - - printf("--- Loop process started (pid: %d) ---\n", pid); - - printf("--- Checkpoint ---\n"); - criu_init_opts(); - criu_set_service_binary(argv[1]); - criu_set_images_dir_fd(img_fd); - criu_set_pid(pid); - criu_set_log_level(CRIU_LOG_DEBUG); - - /* The RPC config file should overwrite the log-file set below */ - printf("Setting dump RPC config file: %s\n", conf_file); - criu_set_config_file(conf_file); - criu_set_log_file("dump.log"); - - ret = criu_dump(); - if (ret < 0) { - what_err_ret_mean(ret); - kill(pid, SIGKILL); - printf("criu dump failed\n"); - goto cleanup; - } - - printf(" `- Dump succeeded\n"); - waitpid(pid, NULL, 0); - - if (check_log_file()) { - printf("Error: log file not overwritten by RPC config file\n"); - goto cleanup; - } - - printf("--- Restore loop ---\n"); - criu_init_opts(); - criu_set_images_dir_fd(img_fd); - criu_set_log_level(CRIU_LOG_DEBUG); - - /* The RPC config file should overwrite the log-file set below */ - printf("Setting restore RPC config file: %s\n", conf_file); - criu_set_config_file(conf_file); - criu_set_log_file("restore.log"); - - pid = criu_restore_child(); - if (pid <= 0) { - what_err_ret_mean(pid); - ret = EXIT_FAILURE; - goto cleanup; - } - - printf(" `- Restore returned pid %d\n", pid); - kill(pid, SIGUSR1); - - if (check_log_file()) { - printf("Error: log file not overwritten by RPC config file\n"); - goto cleanup; - } - -cleanup: - if (waitpid(pid, &ret, 0) < 0) { - perror("waitpid failed"); - return EXIT_FAILURE; - } - - printf("Remove RPC config file: %s\n", conf_file); - unlink(conf_file); - return chk_exit(ret, SUCC_ECODE); -} diff --git a/test/others/pycriu/.gitignore b/test/others/pycriu/.gitignore deleted file mode 100644 index 567609b12..000000000 --- a/test/others/pycriu/.gitignore +++ /dev/null @@ -1 +0,0 @@ -build/ diff --git a/test/others/pycriu/Makefile b/test/others/pycriu/Makefile deleted file mode 100644 index b6e3b4814..000000000 --- a/test/others/pycriu/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -.SHELLFLAGS := -eu -o pipefail -c -.ONESHELL: - -CRIU ?= ../../../criu/criu -BUILD_DIR ?= build -SOCKET_NAME ?= criu_service.socket -PIDFILE_NAME ?= pidfile -SERVICE_LOG ?= service.log -PYTHON ?= python3 - -PIDFILE := $(BUILD_DIR)/$(PIDFILE_NAME) -CRIU_SOCKET := $(BUILD_DIR)/$(SOCKET_NAME) -STATUS_FIFO := $(BUILD_DIR)/startup.status -STATUS_FD := 200 - -run: start - cleanup() { $(MAKE) --no-print-directory stop || true; } - trap cleanup EXIT INT TERM - "$(PYTHON)" test_check.py - "$(PYTHON)" test_check_fail.py - "$(PYTHON)" test_check_images_dir.py - "$(PYTHON)" test_check_work_dir_fd.py - -start: - mkdir -p "$(BUILD_DIR)" - if [ -s "$(PIDFILE)" ] && kill -0 "$$(cat "$(PIDFILE)")" 2>/dev/null; then - echo "Service running (PID $$(cat "$(PIDFILE)"))." - exit 0 - fi - if ! command -v "$(CRIU)" >/dev/null 2>&1; then - echo "CRIU not found at $(CRIU)" - exit 1 - fi - mkfifo "$(STATUS_FIFO)" - exec $(STATUS_FD)<>"$(STATUS_FIFO)" - "$(CRIU)" service \ - -v4 \ - -W "$(BUILD_DIR)" \ - --address "$(SOCKET_NAME)" \ - -d \ - --pidfile "$(PIDFILE_NAME)" \ - -o "$(SERVICE_LOG)" \ - --status-fd "$(STATUS_FD)" - "$(PYTHON)" read.py "$(STATUS_FIFO)" - -stop: - if [ ! -s "$(PIDFILE)" ]; then - echo "pidfile missing or empty" - exit 1 - fi - pid=$$(cat "$(PIDFILE)") - if kill -0 "$$pid" 2>/dev/null; then - kill -9 "$$pid" || true - fi - rm -f "$(PIDFILE)" "$(CRIU_SOCKET)" "$(STATUS_FIFO)" - -clean: - if [ -s "$(PIDFILE)" ] && kill -0 "$$(cat "$(PIDFILE)")" 2>/dev/null; then - kill -9 "$$(cat "$(PIDFILE)")" || true - fi - rm -rf "$(BUILD_DIR)" - -.PHONY: start stop clean run \ No newline at end of file diff --git a/test/others/pycriu/read.py b/test/others/pycriu/read.py deleted file mode 120000 index c2c1e1365..000000000 --- a/test/others/pycriu/read.py +++ /dev/null @@ -1 +0,0 @@ -../rpc/read.py \ No newline at end of file diff --git a/test/others/pycriu/test_check.py b/test/others/pycriu/test_check.py deleted file mode 100755 index 9888158db..000000000 --- a/test/others/pycriu/test_check.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys - -# Add ../../../lib so we can import pycriu -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib")) -if LIB_DIR not in sys.path: - sys.path.insert(0, LIB_DIR) - -import pycriu # noqa: E402 - -def main(): - socket_path = os.path.join(SCRIPT_DIR, "build", "criu_service.socket") - - criu = pycriu.criu() - criu.use_sk(socket_path) - - try: - criu.check() - except Exception as e: - print(f"FAIL: {e}") - return 1 - - print("PASS") - return 0 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/test/others/pycriu/test_check_fail.py b/test/others/pycriu/test_check_fail.py deleted file mode 100755 index b5634c60b..000000000 --- a/test/others/pycriu/test_check_fail.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys - -# Add ../../../lib so we can import pycriu -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib")) -if LIB_DIR not in sys.path: - sys.path.insert(0, LIB_DIR) - -import pycriu # noqa: E402 - -def main(): - socket_path = os.path.join(SCRIPT_DIR, "build", "criu_service.socket") - - criu = pycriu.criu() - criu.use_sk(socket_path) - - # Intentionally set only log_file (no images/work dir) to ensure check() fails - criu.opts.log_file = "check.log" - - try: - criu.check() - except Exception: - print("PASS") - return 0 - - print("FAIL: check() did not fail when log_file is set without images/work dir") - return 1 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/test/others/pycriu/test_check_images_dir.py b/test/others/pycriu/test_check_images_dir.py deleted file mode 100755 index f479c2a88..000000000 --- a/test/others/pycriu/test_check_images_dir.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys - -# Add ../../../lib so we can import pycriu -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib")) -if LIB_DIR not in sys.path: - sys.path.insert(0, LIB_DIR) - -import pycriu # noqa: E402 - -def _log_path(images_dir, log_file): - return log_file if os.path.isabs(log_file) else os.path.join(images_dir, log_file) - -def main(): - build_dir = os.path.join(SCRIPT_DIR, "build") - socket_path = os.path.join(build_dir, "criu_service.socket") - - criu = pycriu.criu() - criu.use_sk(socket_path) - - criu.opts.images_dir = build_dir - criu.opts.log_file = "check.log" - criu.opts.log_level = 4 - - try: - criu.check() - except Exception as e: - lp = _log_path(build_dir, criu.opts.log_file) - msg = f"FAIL: {e} ({'see log: ' + lp if os.path.exists(lp) else 'no log found'})" - print(msg) - return 1 - - lp = _log_path(build_dir, criu.opts.log_file) - if not (os.path.isfile(lp) and os.path.getsize(lp) > 0): - print(f"FAIL: log file missing or empty: {lp}") - return 1 - - print("PASS") - return 0 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/test/others/pycriu/test_check_work_dir_fd.py b/test/others/pycriu/test_check_work_dir_fd.py deleted file mode 100755 index e20a83097..000000000 --- a/test/others/pycriu/test_check_work_dir_fd.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys - -# Add ../../../lib so we can import pycriu -SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) -LIB_DIR = os.path.normpath(os.path.join(SCRIPT_DIR, "../../../lib")) -if LIB_DIR not in sys.path: - sys.path.insert(0, LIB_DIR) - -import pycriu # noqa: E402 - -def main(): - build_dir = os.path.join(SCRIPT_DIR, "build") - socket_path = os.path.join(build_dir, "criu_service.socket") - os.makedirs(build_dir, exist_ok=True) - - # Open a directory FD to use as work_dir_fd (prefer O_PATH if available) - flags = getattr(os, "O_PATH", 0) or os.O_RDONLY - fd = os.open(build_dir, flags) - - criu = pycriu.criu() - criu.use_sk(socket_path) - - criu.opts.work_dir_fd = fd - criu.opts.log_file = "check.log" - criu.opts.log_level = 4 - - try: - criu.check() - except Exception as e: - print(f"FAIL: {e}") - return 1 - finally: - try: - os.close(fd) - except Exception: - pass - - print("PASS") - return 0 - -if __name__ == "__main__": - sys.exit(main()) diff --git a/test/others/rpc/Makefile b/test/others/rpc/Makefile index c0e56d528..69537bb0d 100644 --- a/test/others/rpc/Makefile +++ b/test/others/rpc/Makefile @@ -8,18 +8,9 @@ PYTHON ?= python3 run: all @make -C .. loop - mkdir -p build/{imgs_errno,imgs_ps,imgs_c,imgs_loop,imgs_py} + mkdir -p build chmod a+rwx build - chmod a+rwx build/{imgs_errno,imgs_ps,imgs_c,imgs_loop,imgs_py} rm -f build/status - rm -f build/_marker_* - @# Create all log files to be accessible for anybody - @# so that they can be displayed by any user. - for i in imgs_errno/criu.log imgs_ps/page-server.log imgs_ps/dump.log \ - imgs_c/restore-c.log imgs_loop/criu.log imgs_loop/dump-loop.log \ - imgs_py/criu.log imgs_py/restore-py.log imgs_c/criu.log service.log; do \ - touch build/$$i; chmod 666 build/$$i; \ - done sudo -g '#1000' -u '#1000' mkfifo build/status @# Need to start the criu daemon here to access the pidfile. @# The script read.py is used to wait until 'criu service' @@ -48,7 +39,7 @@ rpc_pb2.py: rpc.proto protoc --proto_path=. --python_out=. rpc.proto rpc.pb-c.c: rpc.proto - protoc --proto_path=. --c_out=. rpc.proto + protoc-c --proto_path=. --c_out=. rpc.proto clean: rm -rf build rpc.pb-c.o test-c.o test-c rpc.pb-c.c rpc.pb-c.h rpc_pb2.py rpc_pb2.pyc criu diff --git a/test/others/rpc/action-script.sh b/test/others/rpc/action-script.sh deleted file mode 100755 index 991e315de..000000000 --- a/test/others/rpc/action-script.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -MARKER_FILE="_marker_${CRTOOLS_SCRIPT_ACTION}" - -if [ -z "$CRTOOLS_SCRIPT_ACTION" ]; then - echo "Error: CRTOOLS_SCRIPT_ACTION is not set." - exit 2 -fi - -if [ ! -f "$MARKER_FILE" ]; then - touch "$MARKER_FILE" -else - echo "Error: Running the same action hook for the second time" - exit 1 -fi - -exit 0 diff --git a/test/others/rpc/config_file.py b/test/others/rpc/config_file.py index c1a8276d8..6cffe270d 100755 --- a/test/others/rpc/config_file.py +++ b/test/others/rpc/config_file.py @@ -13,9 +13,6 @@ from setup_swrk import setup_swrk log_file = 'config_file_test.log' does_not_exist = 'does-not.exist' -script_path = os.path.dirname(os.path.abspath(__file__)) -action_script_file = os.path.join(script_path, 'action-script.sh') - def setup_config_file(content): # Creating a temporary file which will be used as configuration file. @@ -92,37 +89,29 @@ def test_broken_configuration_file(): sys.exit(-1) -def search_in_log_file(log_path, message): - with open(log_path) as f: +def search_in_log_file(log, message): + with open(os.path.join(args['dir'], log)) as f: if message not in f.read(): - print('FAIL: Missing the expected error message (%s) in the log file' % message) + print( + 'FAIL: Missing the expected error message (%s) in the log file' + % message) sys.exit(-1) -def print_log_file(log_path): - print("\n--- Begin log file: %s ---" % log_path) - with open(log_path, 'r') as f: - print(f.read()) - print("--- End log file ---\n") - - def check_results(resp, log): # Check if the specified log file exists - log_path = os.path.join(args['dir'], log) - if not os.path.isfile(log_path): + if not os.path.isfile(os.path.join(args['dir'], log)): print('FAIL: Expected log file %s does not exist' % log) sys.exit(-1) # Dump should have failed with: 'The criu itself is within dumped tree' if resp.type != rpc.DUMP: print('FAIL: Unexpected msg type %r' % resp.type) - print_log_file(log_path) sys.exit(-1) if 'The criu itself is within dumped tree' not in resp.cr_errmsg: print('FAIL: Missing the expected error message in RPC response') - print_log_file(log_path) sys.exit(-1) # Look into the log file for the same message - search_in_log_file(log_path, 'The criu itself is within dumped tree') + search_in_log_file(log, 'The criu itself is within dumped tree') def test_rpc_without_configuration_file(): @@ -167,7 +156,6 @@ def test_rpc_with_configuration_file_overwriting_rpc(): # file settings in the default configuration. log = does_not_exist content = 'log-file ' + log + '\n' - content += 'action-script ' + action_script_file + '\n' content += 'no-tcp-established\nno-shell-job' path = setup_config_file(content) # Only set the configuration file via RPC; @@ -192,18 +180,11 @@ args = vars(parser.parse_args()) cleanup_output(args['dir']) -print("*** Test broken config file ***") test_broken_configuration_file() cleanup_output(args['dir']) - -print("*** Test RPC without config file ***") test_rpc_without_configuration_file() cleanup_output(args['dir']) - -print("*** Test RPC with config file ***") test_rpc_with_configuration_file() cleanup_output(args['dir']) - -print("*** Test configuration file overwriting RPC ***") test_rpc_with_configuration_file_overwriting_rpc() cleanup_output(args['dir']) diff --git a/test/others/rpc/errno.py b/test/others/rpc/errno.py index ea841199f..b600b6d1c 100755 --- a/test/others/rpc/errno.py +++ b/test/others/rpc/errno.py @@ -40,7 +40,7 @@ class test: resp.ParseFromString(self.s.recv(self._MAX_MSG_SIZE)) return resp - def check_resp(self, resp, typ, err, errmsg = None): + def check_resp(self, resp, typ, err): if resp.type != typ: raise Exception('Unexpected response type ' + str(resp.type)) @@ -50,9 +50,6 @@ class test: if err and resp.cr_errno != err: raise Exception('Unexpected cr_errno ' + str(resp.cr_errno)) - if errmsg and errmsg not in str(resp.cr_errmsg): - raise Exception('Unexpected cr_msg \'' + str(resp.cr_errmsg) + '\'') - def no_process(self): print('Try to dump unexisting process') # Get pid of non-existing process. @@ -70,7 +67,6 @@ class test: req = self.get_base_req() req.type = rpc.DUMP req.opts.pid = pid - req.opts.network_lock = rpc.SKIP self.send_req(req) resp = self.recv_resp() @@ -88,7 +84,6 @@ class test: req = self.get_base_req() req.type = rpc.DUMP req.opts.leave_running = True - req.opts.network_lock = rpc.SKIP self.send_req(req) resp = self.recv_resp() @@ -135,27 +130,11 @@ class test: print('Success') - def child_first_err(self): - print('Receive correct first error message') - - req = self.get_base_req() - req.type = rpc.CHECK - # Log file must not have subdirectory - req.opts.log_file = "/foo/bar.log" - - self.send_req(req) - resp = self.recv_resp() - - self.check_resp(resp, rpc.CHECK, None, "No subdirs are allowed in log_file name") - - print('Success') - def run(self): self.no_process() self.process_exists() self.bad_options() self.bad_request() - self.child_first_err() t = test() diff --git a/test/others/rpc/ps_test.py b/test/others/rpc/ps_test.py index 259f22e77..daeda49bc 100755 --- a/test/others/rpc/ps_test.py +++ b/test/others/rpc/ps_test.py @@ -23,7 +23,6 @@ req.type = rpc.PAGE_SERVER req.opts.log_file = 'page-server.log' req.opts.log_level = 4 req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) -req.opts.network_lock = rpc.SKIP s.send(req.SerializeToString()) diff --git a/test/others/rpc/read.py b/test/others/rpc/read.py old mode 100755 new mode 100644 diff --git a/test/others/rpc/run.sh b/test/others/rpc/run.sh index b6158dfea..afd4fb5e3 100755 --- a/test/others/rpc/run.sh +++ b/test/others/rpc/run.sh @@ -3,7 +3,6 @@ set -e CRIU=./criu -FAIL=1 export PROTODIR=`readlink -f "${PWD}/../../protobuf"` @@ -20,13 +19,6 @@ function stop_server { title_print "Shutdown service server" kill -SIGTERM $(cat build/pidfile) unlink build/pidfile - if [ "${FAIL}" == "1" ]; then - for i in build/output*; do - echo "File: $i" - cat $i - done - find . -name "*.log" -print -exec cat {} \; || true - fi } function test_c { @@ -59,7 +51,7 @@ function test_restore_loop { title_print "Dump loop process" # So theoretically '-j' (--shell-job) should not be necessary, but on alpine # this test fails without it. - ${CRIU} dump -j -v4 -o dump-loop.log --network-lock skip -D build/imgs_loop -t ${P} + ${CRIU} dump -j -v4 -o dump-loop.log -D build/imgs_loop -t ${P} title_print "Run restore-loop" ./restore-loop.py build/criu_service.socket build/imgs_loop @@ -88,8 +80,6 @@ test_restore_loop test_ps test_errno -FAIL=0 - stop_server trap 'echo "Success"' EXIT diff --git a/test/others/rpc/test-c.c b/test/others/rpc/test-c.c index b3507975f..792dbbf9c 100644 --- a/test/others/rpc/test-c.c +++ b/test/others/rpc/test-c.c @@ -99,8 +99,6 @@ int main(int argc, char *argv[]) req.opts->images_dir_fd = dir_fd; req.opts->has_log_level = true; req.opts->log_level = 4; - req.opts->has_network_lock = true; - req.opts->network_lock = CRIU_NETWORK_LOCK_METHOD__SKIP; /* * Connect to service socket diff --git a/test/others/rpc/test.py b/test/others/rpc/test.py index 6f692f755..ce8411bc6 100755 --- a/test/others/rpc/test.py +++ b/test/others/rpc/test.py @@ -24,7 +24,6 @@ req.type = rpc.DUMP req.opts.leave_running = True req.opts.log_level = 4 req.opts.images_dir_fd = os.open(args['dir'], os.O_DIRECTORY) -req.opts.network_lock = rpc.SKIP # Send request s.send(req.SerializeToString()) diff --git a/test/others/unix-callback/Makefile b/test/others/unix-callback/Makefile index 984044077..25bcf228b 100644 --- a/test/others/unix-callback/Makefile +++ b/test/others/unix-callback/Makefile @@ -4,7 +4,7 @@ run: all ./run.sh unix.pb-c.c: unix.proto - protoc --proto_path=. --c_out=. unix.proto + protoc-c --proto_path=. --c_out=. unix.proto unix-lib.so: unix-lib.c unix.pb-c.c gcc -g -Werror -Wall -shared -nostartfiles unix-lib.c unix.pb-c.c -o unix-lib.so -iquote ../../../criu/include -fPIC diff --git a/test/plugins/Makefile b/test/plugins/Makefile index 4f620ad50..7827b655c 100644 --- a/test/plugins/Makefile +++ b/test/plugins/Makefile @@ -1,13 +1,5 @@ SRC_DIR := ../../plugins -PLUGIN_TARGETS := inventory_test_enabled_plugin.so inventory_test_disabled_plugin.so amdgpu_plugin.so cuda_plugin.so - -ARCH := x86 - -PLUGIN_INCLUDE := -iquote../../include -PLUGIN_INCLUDE += -iquote../../criu/include -PLUGIN_INCLUDE += -iquote../../criu/arch/$(ARCH)/include/ -PLUGIN_INCLUDE += -iquote../../ -PLUGIN_CFLAGS := -g -Wall -Werror -shared -nostartfiles -fPIC +PLUGIN_TARGETS := amdgpu_plugin.so cuda_plugin.so # Silent make rules. Q := @ @@ -20,12 +12,6 @@ amdgpu_plugin.so: $(SRC_DIR)/amdgpu/amdgpu_plugin.so cuda_plugin.so: $(SRC_DIR)/cuda/cuda_plugin.so $(Q) cp $< $@ -inventory_test_enabled_plugin.so: inventory_test_enabled_plugin.c - $(Q) $(CC) $(PLUGIN_CFLAGS) $< -o $@ $(PLUGIN_INCLUDE) - -inventory_test_disabled_plugin.so: inventory_test_disabled_plugin.c - $(Q) $(CC) $(PLUGIN_CFLAGS) $< -o $@ $(PLUGIN_INCLUDE) - clean: $(Q) $(RM) $(PLUGIN_TARGETS) diff --git a/test/plugins/inventory_test_disabled_plugin.c b/test/plugins/inventory_test_disabled_plugin.c deleted file mode 100644 index 468fe924b..000000000 --- a/test/plugins/inventory_test_disabled_plugin.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "criu-plugin.h" -#include "image.h" - -int inventory_test_disabled_plugin_init(int stage) -{ - if (stage == CR_PLUGIN_STAGE__RESTORE) - return check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name)); - - return 0; -} - -void inventory_test_disabled_plugin_fini(int stage, int ret) -{ - return; -} - -CR_PLUGIN_REGISTER("inventory_test_disabled_plugin", inventory_test_disabled_plugin_init, inventory_test_disabled_plugin_fini) \ No newline at end of file diff --git a/test/plugins/inventory_test_enabled_plugin.c b/test/plugins/inventory_test_enabled_plugin.c deleted file mode 100644 index 89e684e2a..000000000 --- a/test/plugins/inventory_test_enabled_plugin.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "criu-plugin.h" -#include "image.h" - -int inventory_test_enabled_plugin_init(int stage) -{ - if (stage == CR_PLUGIN_STAGE__RESTORE) - return !check_and_remove_inventory_plugin(CR_PLUGIN_DESC.name, strlen(CR_PLUGIN_DESC.name)); - - return add_inventory_plugin(CR_PLUGIN_DESC.name); -} - -void inventory_test_enabled_plugin_fini(int stage, int ret) -{ - return; -} - -CR_PLUGIN_REGISTER("inventory_test_enabled_plugin", inventory_test_enabled_plugin_init, inventory_test_enabled_plugin_fini) \ No newline at end of file diff --git a/test/others/action-script/show_action.sh b/test/show_action.sh similarity index 66% rename from test/others/action-script/show_action.sh rename to test/show_action.sh index afbfc3f27..86468b67a 100755 --- a/test/others/action-script/show_action.sh +++ b/test/show_action.sh @@ -1,4 +1,3 @@ #!/bin/bash - echo "${CRTOOLS_SCRIPT_ACTION} ${CRTOOLS_IMAGE_DIR} ${CRTOOLS_INIT_PID}" \ - >> "$(dirname "$0")/actions_called.txt" + >> "$(dirname $0)/actions_called.txt" diff --git a/test/zdtm.py b/test/zdtm.py index e21356c30..6b2132cc3 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -22,11 +22,11 @@ import sys import tempfile import time import uuid -import site from builtins import input, int, open, range, str, zip import yaml +import pycriu as crpc from zdtm.criu_config import criu_config # File to store content of streamed images @@ -443,7 +443,6 @@ class zdtm_test: self._bins = [name] self._env = {'TMPDIR': os.environ.get('TMPDIR', '/tmp')} self._deps = desc.get('deps', []) - self._bind = desc.get('bind') self.auto_reap = True def __make_action(self, act, env=None, root=None): @@ -514,8 +513,6 @@ class zdtm_test: if self.__flavor.ns: env['ZDTM_NEWNS'] = "1" env['ZDTM_ROOT'] = self.__flavor.root - if self._bind: - env['ZDTM_BIND'] = self._bind env['ZDTM_DEV'] = self.__flavor.devpath env['PATH'] = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" @@ -1142,24 +1139,6 @@ class criu: self.__img_streamer_process = None self.__tls = self.__tls_options() if opts['tls'] else [] self.__criu_bin = opts['criu_bin'] - - global crpc - pycriu_search_path = opts.get('pycriu_search_path') - if pycriu_search_path: - sys.path.insert(0, pycriu_search_path) - - try: - import pycriu as crpc - if pycriu_search_path: - print(f"pycriu loaded from: {crpc.__file__}") - except ImportError: - if not pycriu_search_path: - print("Consider building CRIU or using '--pycriu-search-path' option.") - raise - finally: - if pycriu_search_path: - sys.path.pop(0) - self.__crit_bin = opts['crit_bin'] self.__pre_dump_mode = opts['pre_dump_mode'] self.__preload_libfault = bool(opts['preload_libfault']) @@ -1611,7 +1590,6 @@ class criu: def available(): if not os.access(opts['criu_bin'], os.X_OK): print("CRIU binary not found at %s" % opts['criu_bin']) - print("Consider building CRIU or using '--criu-bin' option.") sys.exit(1) def kill(self): @@ -2078,6 +2056,8 @@ class Launcher: self.__subs = {} self.__fail = False self.__file_report = None + self.__junit_file = None + self.__junit_test_cases = None self.__failed = [] self.__nr_skip = 0 if self.__max > 1 and self.__total > 1: @@ -2089,14 +2069,22 @@ class Launcher: if opts['report'] and (opts['keep_going'] or self.__total == 1): global TestSuite, TestCase + from junit_xml import TestCase, TestSuite now = datetime.datetime.now() att = 0 reportname = os.path.join(report_dir, "criu-testreport.tap") - while os.access(reportname, os.F_OK): + junitreport = os.path.join(report_dir, "criu-testreport.xml") + while os.access(reportname, os.F_OK) or os.access( + junitreport, os.F_OK): reportname = os.path.join(report_dir, "criu-testreport" + ".%d.tap" % att) + junitreport = os.path.join(report_dir, + "criu-testreport" + ".%d.xml" % att) att += 1 + self.__junit_file = open(junitreport, 'a') + self.__junit_test_cases = [] + self.__file_report = open(reportname, 'a') print(u"TAP version 13", file=self.__file_report) print(u"# Hardware architecture: " + arch, file=self.__file_report) @@ -2131,6 +2119,10 @@ class Launcher: self.__runtest += 1 self.__nr_skip += 1 + if self.__junit_test_cases is not None: + tc = TestCase(name) + tc.add_skipped_info(reason) + self.__junit_test_cases.append(tc) if self.__file_report: testline = u"ok %d - %s # SKIP %s" % (self.__runtest, name, reason) print(testline, file=self.__file_report) @@ -2174,8 +2166,7 @@ class Launcher: 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'stream', 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mntns_compat_mode', - 'rootless', 'preload_libfault', 'mocked_cuda_checkpoint', - 'pycriu_search_path') + 'rootless', 'preload_libfault', 'mocked_cuda_checkpoint') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) if self.__use_log: @@ -2232,6 +2223,11 @@ class Launcher: # The following wait() is not useful for our domain logic. # It's useful for taming warnings in subprocess.Popen.__del__() sub['sub'].wait() + tc = None + if self.__junit_test_cases is not None: + tc = TestCase(sub['name'], + elapsed_sec=time.time() - sub['start']) + self.__junit_test_cases.append(tc) if status != 0: self.__fail = True failed_flavor = decode_flav(os.WEXITSTATUS(status)) @@ -2242,6 +2238,7 @@ class Launcher: with open(sub['log']) as sublog: output = sublog.read() details = {'output': output} + tc.add_error_info(output=output) print(testline, file=self.__file_report) print("%s" % yaml.safe_dump(details, explicit_start=True, @@ -2287,6 +2284,10 @@ class Launcher: if not opts['fault'] and check_core_files(): self.__fail = True if self.__file_report: + ts = TestSuite(opts['title'], self.__junit_test_cases, + os.getenv("NODE_NAME")) + self.__junit_file.write(TestSuite.to_xml_string([ts])) + self.__junit_file.close() self.__file_report.close() if opts['keep_going']: @@ -2856,9 +2857,6 @@ def get_cli_args(): rp.add_argument("--criu-bin", help="Path to criu binary", default='../criu/criu') - rp.add_argument("--pycriu-search-path", - help=f"Path to search for pycriu module first (e.g., {site.getsitepackages()[0]})", - default=None) rp.add_argument("--crit-bin", help="Path to crit binary", default='../crit/crit') @@ -2879,7 +2877,7 @@ def get_cli_args(): rp.add_argument("--preload-libfault", action="store_true", help="Run criu with library preload to simulate special cases") rp.add_argument("--criu-plugin", help="Run tests with CRIU plugin", - choices=['amdgpu', 'cuda', 'inventory_test_enabled', 'inventory_test_disabled'], + choices=['amdgpu', 'cuda'], nargs='+', default=None) rp.add_argument("--mocked-cuda-checkpoint", @@ -2949,7 +2947,7 @@ if __name__ == '__main__': if opts['debug']: sys.settrace(traceit) - if opts['action'] == run_tests: + if opts['action'] == 'run': criu.available() for tst in test_classes.values(): tst.available() diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index c95b4ef6a..24f32c606 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -27,7 +27,7 @@ ifeq ($(ARCH),arm) else ifeq ($(ARMV),7) ARCHCFLAGS += -march=armv7-a+fp else ifeq ($(ARMV),8) - # To build aarch32 on armv8 (see criu Makefile) + # To build aarch32 on armv8 Travis-CI (see criu Makefile) ARCHCFLAGS += -march=armv7-a ARMV := 7 endif @@ -66,11 +66,6 @@ endif export PKG_CONFIG_PATH endif -ifeq ($(SHSTK_ENABLE),1) - CFLAGS += -mshstk - LDFLAGS += -Wl,-z,shstk -endif - define pkg-libs $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(PKG_CONFIG) --libs $(1)) endef @@ -79,17 +74,9 @@ define pkg-cflags $(shell PKG_CONFIG_PATH="$(PKG_CONFIG_PATH)" $(PKG_CONFIG) --cflags $(1)) endef -ifeq ($(GCS_ENABLE),1) - CFLAGS += -mbranch-protection=standard - LDFLAGS += -z experimental-gcs=check - TEST_ENV = GLIBC_TUNABLES=glibc.cpu.aarch64_gcs=1:glibc.cpu.aarch64_gcs_policy=2 -else - TEST_ENV = -endif - %.d: %.c $(E) " DEP " $@ - $(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP $< -o $@ + $(Q)$(CC) $(CFLAGS) $(CPPFLAGS) -MM -MP -c $< -o $@ %.o: %.c | %.d $(E) " CC " $@ diff --git a/test/zdtm/lib/arch/riscv64/include/asm/atomic.h b/test/zdtm/lib/arch/riscv64/include/asm/atomic.h deleted file mode 100644 index a4faf1322..000000000 --- a/test/zdtm/lib/arch/riscv64/include/asm/atomic.h +++ /dev/null @@ -1,107 +0,0 @@ -#ifndef __CR_ATOMIC_H__ -#define __CR_ATOMIC_H__ - -typedef uint32_t atomic_t; - -/* Copied from the Linux header arch/riscv/include/asm/barrier.h */ - -#define nop() __asm__ __volatile__("nop") - -#define RISCV_FENCE(p, s) __asm__ __volatile__("fence " #p "," #s : : : "memory") - -/* These barriers need to enforce ordering on both devices or memory. */ -#define mb() RISCV_FENCE(iorw, iorw) -#define rmb() RISCV_FENCE(ir, ir) -#define wmb() RISCV_FENCE(ow, ow) - -/* These barriers do not need to enforce ordering on devices, just memory. */ -#define __smp_mb() RISCV_FENCE(rw, rw) -#define __smp_rmb() RISCV_FENCE(r, r) -#define __smp_wmb() RISCV_FENCE(w, w) - -#define __smp_store_release(p, v) \ - do { \ - compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(rw, w); \ - WRITE_ONCE(*p, v); \ - } while (0) - -#define __smp_load_acquire(p) \ - ({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - RISCV_FENCE(r, rw); \ - ___p1; \ - }) - -/* Copied from the Linux kernel header arch/riscv/include/asm/atomic.h */ - -static inline int atomic_read(const atomic_t *v) -{ - return (*(volatile int *)v); -} - -static inline void atomic_set(atomic_t *v, int i) -{ - *v = i; -} - -#define atomic_get atomic_read - -static inline int atomic_add_return(int i, atomic_t *v) -{ - int result; - - asm volatile("amoadd.w.aqrl %1, %2, %0" : "+A"(*v), "=r"(result) : "r"(i) : "memory"); - __smp_mb(); - return result + i; -} - -static inline int atomic_sub_return(int i, atomic_t *v) -{ - return atomic_add_return(-i, v); -} - -static inline int atomic_inc(atomic_t *v) -{ - return atomic_add_return(1, v) - 1; -} - -static inline int atomic_add(int val, atomic_t *v) -{ - return atomic_add_return(val, v) - val; -} - -static inline int atomic_dec(atomic_t *v) -{ - return atomic_sub_return(1, v) + 1; -} - -/* true if the result is 0, or false for all other cases. */ -#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) - -#define atomic_inc_return(v) (atomic_add_return(1, v)) - -static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) -{ - unsigned long tmp; - int oldval; - - __smp_mb(); - - asm volatile("1:\n" - " lr.w %1, %2\n" - " bne %1, %3, 2f\n" - " sc.w %0, %4, %2\n" - " bnez %0, 1b\n" - "2:" - : "=&r"(tmp), "=&r"(oldval), "+A"(*ptr) - : "r"(old), "r"(new) - : "memory"); - - __smp_mb(); - return oldval; -} - -#endif /* __CR_ATOMIC_H__ */ diff --git a/test/zdtm/lib/fs.c b/test/zdtm/lib/fs.c index efcc7a1d0..bf8cd9cd3 100644 --- a/test/zdtm/lib/fs.c +++ b/test/zdtm/lib/fs.c @@ -54,7 +54,7 @@ mnt_info_t *get_cwd_mnt_info(void) while (fgets(str, sizeof(str), f)) { char *hyphen = strchr(str, '-'); - ret = sscanf(str, "%i %i %u:%u %4095s %4095s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint); + ret = sscanf(str, "%i %i %u:%u %s %s", &mnt_id, &parent_mnt_id, &kmaj, &kmin, root, mountpoint); if (ret != 6 || !hyphen) goto err; ret = sscanf(hyphen + 1, " %ms", &fsname); diff --git a/test/zdtm/lib/ns.c b/test/zdtm/lib/ns.c index 822e09c92..3c0dbdeb8 100644 --- a/test/zdtm/lib/ns.c +++ b/test/zdtm/lib/ns.c @@ -28,9 +28,8 @@ extern int pivot_root(const char *new_root, const char *put_old); static int prepare_mntns(void) { int dfd, ret; - char *root, *criu_path, *dev_path, *zdtm_bind; + char *root, *criu_path, *dev_path; char path[PATH_MAX]; - char bind_path[PATH_MAX]; root = getenv("ZDTM_ROOT"); if (!root) { @@ -53,21 +52,6 @@ static int prepare_mntns(void) return -1; } - zdtm_bind = getenv("ZDTM_BIND"); - if (zdtm_bind) { - /* - * Bindmount the directory to itself. - * e.g.: The mnt_ro_root test makes "/" mount readonly, but we - * still want to write logs to /zdtm/static/ so let's make it - * separate writable bind mount. - */ - snprintf(bind_path, sizeof(bind_path), "%s/%s", root, zdtm_bind); - if (mount(bind_path, bind_path, NULL, MS_BIND, NULL)) { - fprintf(stderr, "Can't bind-mount ZDTM_BIND: %m\n"); - return -1; - } - } - dev_path = getenv("ZDTM_DEV"); if (dev_path) { snprintf(path, sizeof(path), "%s/dev", root); diff --git a/test/zdtm/lib/sysctl.c b/test/zdtm/lib/sysctl.c index 3b1ebc168..9583ec3df 100644 --- a/test/zdtm/lib/sysctl.c +++ b/test/zdtm/lib/sysctl.c @@ -3,49 +3,6 @@ #include "zdtmtst.h" #include "sysctl.h" -int sysctl_read_str(const char *name, char *data, size_t size) -{ - int fd, ret; - - fd = open(name, O_RDONLY); - if (fd < 0) { - pr_perror("Can't open %s", name); - return -1; - } - - ret = read(fd, data, size - 1); - if (ret < 0) { - pr_perror("Can't read %s", name); - close(fd); - return -1; - } - data[ret] = '\0'; - close(fd); - - return 0; -} - -int sysctl_write_str(const char *name, char *data) -{ - int fd, ret; - - fd = open(name, O_WRONLY); - if (fd < 0) { - pr_perror("Can't open %s", name); - return -1; - } - - ret = write(fd, data, strlen(data)); - if (ret < 0) { - pr_perror("Can't write %s into %s", data, name); - close(fd); - return -1; - } - close(fd); - - return 0; -} - int sysctl_read_int(const char *name, int *data) { int fd; diff --git a/test/zdtm/lib/sysctl.h b/test/zdtm/lib/sysctl.h index d435bd7e9..67129102f 100644 --- a/test/zdtm/lib/sysctl.h +++ b/test/zdtm/lib/sysctl.h @@ -3,7 +3,5 @@ extern int sysctl_read_int(const char *name, int *data); extern int sysctl_write_int(const char *name, int val); -extern int sysctl_read_str(const char *name, char *data, size_t size); -extern int sysctl_write_str(const char *name, char *data); #endif diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c index 95017e42e..a5ba38b2d 100644 --- a/test/zdtm/lib/test.c +++ b/test/zdtm/lib/test.c @@ -406,7 +406,7 @@ pid_t sys_clone_unified(unsigned long flags, void *child_stack, void *parent_tid { #ifdef __x86_64__ return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, child_tid, newtls); -#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__ || __loongarch64 || __riscv) +#elif (__i386__ || __arm__ || __aarch64__ || __powerpc64__ || __mips__ || __loongarch64) return (pid_t)syscall(__NR_clone, flags, child_stack, parent_tid, newtls, child_tid); #elif __s390x__ return (pid_t)syscall(__NR_clone, child_stack, flags, parent_tid, child_tid, newtls); diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index e1df2e5fa..1e891f0ba 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -8,7 +8,6 @@ TST_NOFILE := \ sleeping00 \ pid00 \ caps00 \ - caps01 \ wait00 \ zombie00 \ zombie01 \ @@ -24,7 +23,6 @@ TST_NOFILE := \ sse20 \ mprotect00 \ timers \ - timers01 \ timerfd \ unbound_sock \ sched_prio00 \ @@ -37,8 +35,6 @@ TST_NOFILE := \ socket_udp-corked \ socket6_udp \ socket_udp_shutdown \ - socket_icmp \ - socket6_icmp \ sk-freebind \ sk-freebind-false \ socket_udplite \ @@ -57,18 +53,10 @@ TST_NOFILE := \ shm \ shm-mp \ ptrace_sig \ - pidfd_self \ - pidfd_of_thread \ - pidfd_dead \ - pidfd_diffdead \ - pidfd_child \ - pidfd_kill \ - fd_from_pidfd \ pipe00 \ pipe01 \ pipe02 \ pthread00 \ - pthread00-pac \ pthread01 \ pthread02 \ pthread_timers \ @@ -152,7 +140,6 @@ TST_NOFILE := \ maps05 \ maps09 \ maps10 \ - maps11 \ mlock_setuid \ xids00 \ groups \ @@ -290,7 +277,6 @@ TST_NOFILE := \ PKG_CONFIG ?= pkg-config pkg-config-check = $(shell sh -c '$(PKG_CONFIG) $(1) && echo y') -pkg-config-atleast-version = $(shell sh -c '$(PKG_CONFIG) --atleast-version=$(2) $(1) && echo y') ifeq ($(call pkg-config-check,libbpf),y) TST_NOFILE += \ bpf_hash \ @@ -299,10 +285,7 @@ endif ifneq ($(ARCH),arm) ifneq ($(COMPAT_TEST),y) - TST_NOFILE += maps03 -ifeq ($(call pkg-config-atleast-version,libtracefs,1.7),y) - TST_NOFILE += uprobes -endif + TST_NOFILE += maps03 endif endif @@ -319,7 +302,6 @@ TST_FILE = \ write_read02 \ write_read10 \ maps00 \ - maps12 \ link10 \ file_attr \ deleted_unix_sock \ @@ -386,8 +368,6 @@ TST_FILE = \ sk-unix-listen02 \ sk-unix-listen03 \ sk-unix-listen04 \ - sk-unix-restore-fs-share \ - mnt_ext_file_bind_auto \ TST_DIR = \ cwd00 \ @@ -433,7 +413,6 @@ TST_DIR = \ mntns_ghost \ mntns_ghost01 \ mntns_ro_root \ - mnt_ro_root \ mntns_link_ghost \ mntns_shared_bind \ mntns_shared_bind02 \ @@ -509,41 +488,35 @@ STATE_OUT = $(TST_STATE:%=%.out) include ../Makefile.inc -ifeq ($(ARCH),aarch64) - PAC_CFLAGS := -mbranch-protection=standard -else - PAC_CFLAGS := -endif - all: $(TST) criu-rtc.so install: all .PHONY: all install $(TST_NOFILE:%=%.pid): %.pid: % - $(TEST_ENV) $(> .gitignore $(Q)echo $(@:%.c=%.h) >> .gitignore $(E) " PBCC " $@ - $(Q)protoc --proto_path=. --c_out=. criu-rtc.proto + $(Q)protoc-c --proto_path=. --c_out=. criu-rtc.proto criu-rtc.so: criu-rtc.c criu-rtc.pb-c.c $(E) " LD " $@ diff --git a/test/zdtm/static/apparmor.c b/test/zdtm/static/apparmor.c index dc1636821..713ffaa46 100644 --- a/test/zdtm/static/apparmor.c +++ b/test/zdtm/static/apparmor.c @@ -59,7 +59,7 @@ int checkprofile(void) return -1; } - len = fscanf(f, "%1023[^ \n]s", profile); + len = fscanf(f, "%[^ \n]s", profile); fclose(f); if (len != 1) { fail("wrong number of items scanned %d", len); diff --git a/test/zdtm/static/apparmor_stacking.c b/test/zdtm/static/apparmor_stacking.c index 0bc36048c..76de8b8b4 100644 --- a/test/zdtm/static/apparmor_stacking.c +++ b/test/zdtm/static/apparmor_stacking.c @@ -56,7 +56,7 @@ static int checkprofile(pid_t pid, char *expected) return -1; } - len = fscanf(f, "%1023[^ \n]s", profile); + len = fscanf(f, "%[^ \n]s", profile); fclose(f); if (len != 1) { fail("wrong number of items scanned %d", len); diff --git a/test/zdtm/static/caps01.c b/test/zdtm/static/caps01.c deleted file mode 100644 index 0f8a7101e..000000000 --- a/test/zdtm/static/caps01.c +++ /dev/null @@ -1,168 +0,0 @@ -#include -#include -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Check that CapAmb are preserved"; -const char *test_author = "Liu Chao "; - -struct cap_hdr { - unsigned int version; - int pid; -}; - -struct cap_data { - unsigned int eff; - unsigned int prm; - unsigned int inh; -}; - -#define _LINUX_CAPABILITY_VERSION_3 0x20080522 -#define _LINUX_CAPABILITY_U32S_3 2 -#define CAP_DAC_OVERRIDE 1 -#define PR_CAP_AMBIENT 47 -#define PR_CAP_AMBIENT_IS_SET 1 -#define PR_CAP_AMBIENT_RAISE 2 -#define PR_CAP_AMBIENT_LOWER 3 - -int capget(struct cap_hdr *hdrp, struct cap_data *datap); -int capset(struct cap_hdr *hdrp, const struct cap_data *datap); - -static int cap_last_cap = 63; - -int main(int argc, char **argv) -{ - task_waiter_t t; - int pid, result_pipe[2]; - unsigned int amb[_LINUX_CAPABILITY_U32S_3]; - unsigned int amb_2[_LINUX_CAPABILITY_U32S_3]; - char res = 'x'; - FILE *f; - - test_init(argc, argv); - task_waiter_init(&t); - - f = fopen("/proc/sys/kernel/cap_last_cap", "r"); - if (f) { - if (fscanf(f, "%d", &cap_last_cap) != 1) { - pr_perror("Unable to read cal_last_cap"); - fclose(f); - return 1; - } - fclose(f); - } else - test_msg("/proc/sys/kernel/cap_last_cap is not available\n"); - - if (pipe(result_pipe)) { - pr_perror("Can't create pipe"); - return 1; - } - - pid = test_fork(); - if (pid == 0) { - int b, i, ret; - struct cap_hdr hdr; - struct cap_data data[_LINUX_CAPABILITY_U32S_3]; - - hdr.version = _LINUX_CAPABILITY_VERSION_3; - hdr.pid = 0; - - if (capget(&hdr, data) < 0) { - pr_perror("capget"); - return -1; - } - - hdr.version = _LINUX_CAPABILITY_VERSION_3; - hdr.pid = 0; - - data[0].eff &= ~((1 << CAP_CHOWN) | (1 << CAP_DAC_OVERRIDE)); - data[0].prm &= ~(1 << CAP_DAC_OVERRIDE); - data[0].inh = data[0].prm; - data[1].inh = data[1].prm; - - if (capset(&hdr, data) < 0) { - pr_perror("capset"); - return -1; - } - - for (b = 0; b < _LINUX_CAPABILITY_U32S_3; b++) { - amb[b] = data[b].prm; - for (i = 0; i < 32; i++) { - if (b * 32 + i > cap_last_cap) - break; - if ((amb[b] & (1 << i)) > 0) - ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i + b * 32, 0, 0); - else - ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i + b * 32, 0, 0); - if (ret) { - pr_perror("Unable to set ambient capability %d to %d: %d", i + b * 32, amb[b] & (1 << i), ret); - return -1; - } - } - } - - task_waiter_complete_current(&t); - task_waiter_wait4(&t, getppid()); - - for (b = 0; b < _LINUX_CAPABILITY_U32S_3; b++) { - amb_2[b] = 0; - for (i = 0; i < 32; i++) { - if (b * 32 + i > cap_last_cap) - break; - ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i + b * 32, 0, 0); - if (ret < 0) { - pr_perror("Unable to read ambient capability %d: %d", i + b * 32, ret); - goto bad; - } - - amb_2[b] |= (ret << i); - } - } - - for (b = 0; b < _LINUX_CAPABILITY_U32S_3; b++) { - if (amb[b] != amb_2[b]) { - res = '1'; - goto bad; - } - } - - res = '0'; - bad: - write(result_pipe[1], &res, 1); - - if (res != '0') { - write(result_pipe[1], amb, sizeof(amb)); - write(result_pipe[1], amb_2, sizeof(amb_2)); - } - - close(result_pipe[0]); - close(result_pipe[1]); - _exit(0); - } - - task_waiter_wait4(&t, pid); - - test_daemon(); - test_waitsig(); - - task_waiter_complete_current(&t); - - read(result_pipe[0], &res, 1); - - if (res == '0') - pass(); - else { - read(result_pipe[0], amb, sizeof(amb)); - read(result_pipe[0], amb_2, sizeof(amb_2)); - test_msg("amb[]=%08x, %08x\n", amb[0], amb[1]); - test_msg("amb[]=%08x, %08x\n", amb_2[0], amb_2[1]); - fail("Fail: %c", res); - } - close(result_pipe[0]); - close(result_pipe[1]); - - return 0; -} diff --git a/test/zdtm/static/caps01.desc b/test/zdtm/static/caps01.desc deleted file mode 100644 index 2eac7e654..000000000 --- a/test/zdtm/static/caps01.desc +++ /dev/null @@ -1 +0,0 @@ -{'flags': 'suid'} diff --git a/test/zdtm/static/cgroup01.c b/test/zdtm/static/cgroup01.c index 7bfb67762..bc8515264 100644 --- a/test/zdtm/static/cgroup01.c +++ b/test/zdtm/static/cgroup01.c @@ -79,7 +79,7 @@ int main(int argc, char **argv) if (!s) continue; - sscanf(paux, "%*d %*d %*d:%*d %*s %1023s", aux); + sscanf(paux, "%*d %*d %*d:%*d %*s %s", aux); test_msg("found cgroup at %s\n", aux); for (i = 0; i < 2; i++) { diff --git a/test/zdtm/static/cgroup02.c b/test/zdtm/static/cgroup02.c index 8a925c0a4..6229a8a08 100644 --- a/test/zdtm/static/cgroup02.c +++ b/test/zdtm/static/cgroup02.c @@ -75,7 +75,7 @@ bool test_exists(char *mountinfo_line, char *path) char aux[1024], paux[1024]; struct stat st; - sscanf(mountinfo_line, "%*d %*d %*d:%*d %*s %1023s", aux); + sscanf(mountinfo_line, "%*d %*d %*d:%*d %*s %s", aux); test_msg("found cgroup at %s\n", aux); ssprintf(paux, "%s/%s", aux, path); diff --git a/test/zdtm/static/cgroup_stray.c b/test/zdtm/static/cgroup_stray.c index f5754410f..0c0ed93cf 100644 --- a/test/zdtm/static/cgroup_stray.c +++ b/test/zdtm/static/cgroup_stray.c @@ -135,7 +135,7 @@ out: int main(int argc, char **argv) { int ret = -1, sk_pair[2], sk, status; - char path[PATH_MAX], c = 0; + char path[PATH_MAX], c; pid_t pid = 0; test_init(argc, argv); diff --git a/test/zdtm/static/change_mnt_context.c b/test/zdtm/static/change_mnt_context.c index 8787ae5cf..6d436014b 100644 --- a/test/zdtm/static/change_mnt_context.c +++ b/test/zdtm/static/change_mnt_context.c @@ -46,7 +46,7 @@ int main(int argc, char **argv) if (!pos) continue; - result = sscanf(pos, " - %*s %*s %1023s", opts); + result = sscanf(pos, " - %*s %*s %s", opts); if (result != 1) { fail("Not able to sscanf line from mountinfo"); goto out; diff --git a/test/zdtm/static/fanotify00.c b/test/zdtm/static/fanotify00.c index 0400cc74b..69ead43e7 100644 --- a/test/zdtm/static/fanotify00.c +++ b/test/zdtm/static/fanotify00.c @@ -22,7 +22,7 @@ #elif defined(__PPC64__) #define __NR_fanotify_init 323 #define __NR_fanotify_mark 324 -#elif (__aarch64__ || __riscv) +#elif __aarch64__ #define __NR_fanotify_init 262 #define __NR_fanotify_mark 263 #elif __s390x__ diff --git a/test/zdtm/static/fd_from_pidfd.c b/test/zdtm/static/fd_from_pidfd.c deleted file mode 100644 index 1f863d6c0..000000000 --- a/test/zdtm/static/fd_from_pidfd.c +++ /dev/null @@ -1,108 +0,0 @@ -#include -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Check if fd obtained from pidfd_get_fd is C/R correctly\n"; -const char *test_author = "Bhavik Sachdev "; - -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int pidfd_getfd(int pidfd, int targetfd, unsigned int flags) -{ - return syscall(__NR_pidfd_getfd, pidfd, targetfd, flags); -} - -static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - -int main(int argc, char* argv[]) -{ - #define READ 0 - #define WRITE 1 - - int pidfd, child, p[2], child_read, read_data, status; - int data = 42; - - test_init(argc, argv); - - if (pipe(p)) { - pr_perror("pipe"); - return 1; - } - - child = fork(); - if (child < 0) { - pr_perror("fork"); - return 1; - } - - if (child == 0) { - close(p[WRITE]); - test_waitsig(); - return 0; - } - - pidfd = pidfd_open(child, 0); - if (pidfd < 0) { - pr_perror("pidfd_open failed"); - return 1; - } - - close(p[READ]); - if (write(p[WRITE], &data, sizeof(data)) != sizeof(data)) { - pr_perror("write"); - return 1; - } - close(p[WRITE]); - - child_read = pidfd_getfd(pidfd, p[READ], 0); - if (child_read < 0) { - pr_perror("pidfd_getfd"); - return 1; - } - - test_daemon(); - test_waitsig(); - - if (read(child_read, &read_data, sizeof(read_data)) != sizeof(read_data)) { - pr_perror("read"); - goto err_close; - } - - if (read_data != data) { - fail("data from fd obtained using pidfd_getfd incorrect"); - goto err_close; - } - - if (pidfd_send_signal(pidfd, SIGTERM, NULL, 0)) { - pr_perror("Could not send signal"); - goto err_close; - } - - if (waitpid(child, &status, 0) != child) { - pr_perror("waitpid()"); - return 1; - } - - if (status != 0) { - fail("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status)); - return 1; - } - - pass(); - close(child_read); - close(pidfd); - return 0; -err_close: - close(child_read); - close(pidfd); - return 1; -} diff --git a/test/zdtm/static/file_locks01.c b/test/zdtm/static/file_locks01.c index bfdca51d9..beea171f5 100644 --- a/test/zdtm/static/file_locks01.c +++ b/test/zdtm/static/file_locks01.c @@ -107,7 +107,7 @@ static int check_file_lock(int fd, char *expected_type, char *expected_option, u memset(fl_type, 0, sizeof(fl_type)); memset(fl_option, 0, sizeof(fl_option)); - num = sscanf(buf, "%*s %*d:%15s %15s %15s %d %x:%x:%ld %*d %*s", fl_flag, fl_type, fl_option, &fl_owner, &maj, + num = sscanf(buf, "%*s %*d:%s %s %s %d %x:%x:%ld %*d %*s", fl_flag, fl_type, fl_option, &fl_owner, &maj, &min, &i_no); if (num < 7) { pr_err("Invalid lock info\n"); diff --git a/test/zdtm/static/file_locks02.c b/test/zdtm/static/file_locks02.c index ae4827de9..d2049ebaa 100644 --- a/test/zdtm/static/file_locks02.c +++ b/test/zdtm/static/file_locks02.c @@ -41,7 +41,7 @@ static int check_file_lock(pid_t pid, pid_t child, int fd, char *expected_type, memset(fl_type, 0, sizeof(fl_type)); memset(fl_option, 0, sizeof(fl_option)); - num = sscanf(buf, "%*s %*d:%15s %15s %15s %d", fl_flag, fl_type, fl_option, &fl_owner); + num = sscanf(buf, "%*s %*d:%s %s %s %d", fl_flag, fl_type, fl_option, &fl_owner); if (num < 4) { pr_perror("Invalid lock info."); break; diff --git a/test/zdtm/static/file_locks03.c b/test/zdtm/static/file_locks03.c index 228e66892..35ef41a21 100644 --- a/test/zdtm/static/file_locks03.c +++ b/test/zdtm/static/file_locks03.c @@ -41,7 +41,7 @@ static int check_file_lock(pid_t pid, pid_t child, int fd, char *expected_type, memset(fl_type, 0, sizeof(fl_type)); memset(fl_option, 0, sizeof(fl_option)); - num = sscanf(buf, "%*s %*d:%15s %15s %15s %d", fl_flag, fl_type, fl_option, &fl_owner); + num = sscanf(buf, "%*s %*d:%s %s %s %d", fl_flag, fl_type, fl_option, &fl_owner); if (num < 4) { pr_perror("Invalid lock info."); break; diff --git a/test/zdtm/static/file_locks04.c b/test/zdtm/static/file_locks04.c index 7e0d2654e..11d224fa7 100644 --- a/test/zdtm/static/file_locks04.c +++ b/test/zdtm/static/file_locks04.c @@ -34,7 +34,7 @@ static int check_file_locks(pid_t child_pid, int fd, int child_fd) continue; test_msg("c: %s", buf); - num = sscanf(buf, "%*s %*d:%15s %15s %15s %d %*02x:%*02x:%*d %*d %*s", fl_flag, fl_type, fl_option, + num = sscanf(buf, "%*s %*d:%s %s %s %d %*02x:%*02x:%*d %*d %*s", fl_flag, fl_type, fl_option, &fl_owner); if (num < 4) { diff --git a/test/zdtm/static/get_smaps_bits.c b/test/zdtm/static/get_smaps_bits.c index 3d952ac95..31d0d92b2 100644 --- a/test/zdtm/static/get_smaps_bits.c +++ b/test/zdtm/static/get_smaps_bits.c @@ -6,10 +6,6 @@ #define MAP_HUGETLB 0x40000 #endif -#ifndef MAP_DROPPABLE -#define MAP_DROPPABLE 0x08 -#endif - #ifndef MADV_HUGEPAGE #define MADV_HUGEPAGE 14 #endif @@ -22,10 +18,6 @@ #define MADV_DONTDUMP 16 #endif -#ifndef MADV_WIPEONFORK -#define MADV_WIPEONFORK 18 -#endif - static void parse_vmflags(char *buf, unsigned long *flags, unsigned long *madv) { char *tok; @@ -49,8 +41,6 @@ static void parse_vmflags(char *buf, unsigned long *flags, unsigned long *madv) *flags |= MAP_NORESERVE; else if (_vmflag_match(tok, "ht")) *flags |= MAP_HUGETLB; - else if (_vmflag_match(tok, "dp")) - *flags |= MAP_DROPPABLE; /* madvise() block */ if (_vmflag_match(tok, "sr")) @@ -67,8 +57,6 @@ static void parse_vmflags(char *buf, unsigned long *flags, unsigned long *madv) *madv |= (1ul << MADV_HUGEPAGE); else if (_vmflag_match(tok, "nh")) *madv |= (1ul << MADV_NOHUGEPAGE); - else if (_vmflag_match(tok, "wf")) - *madv |= (1ul << MADV_WIPEONFORK); /* * Anything else is just ignored. diff --git a/test/zdtm/static/maps02.c b/test/zdtm/static/maps02.c index 38244f020..29f1372c9 100644 --- a/test/zdtm/static/maps02.c +++ b/test/zdtm/static/maps02.c @@ -2,19 +2,11 @@ #include "zdtmtst.h" #include "get_smaps_bits.h" -#ifndef MAP_DROPPABLE -#define MAP_DROPPABLE 0x08 -#endif - #ifndef MADV_DONTDUMP #define MADV_DONTDUMP 16 #endif -#ifndef MADV_WIPEONFORK -#define MADV_WIPEONFORK 18 -#endif - -const char *test_doc = "Test private memory with advises"; +const char *test_doc = "Test shared memory with advises"; const char *test_author = "Cyrill Gorcunov "; struct mmap_data { @@ -31,14 +23,8 @@ static int alloc_anon_mmap(struct mmap_data *m, int flags, int adv) { m->start = mmap(NULL, MEM_SIZE, PROT_READ | PROT_WRITE, flags, -1, 0); if (m->start == MAP_FAILED) { - if (errno == EINVAL) { - test_msg("mmap failed, no kernel support\n"); - *m = (struct mmap_data){}; - return 0; - } else { - pr_perror("mmap failed"); - return -1; - } + pr_perror("mmap failed"); + return -1; } if (madvise(m->start, MEM_SIZE, adv)) { @@ -57,12 +43,12 @@ static int alloc_anon_mmap(struct mmap_data *m, int flags, int adv) int main(int argc, char **argv) { - struct mmap_data m[7] = {}; + struct mmap_data m[5] = {}; size_t i; test_init(argc, argv); - test_msg("Alloc dontfork\n"); + test_msg("Alloc growsdown\n"); if (alloc_anon_mmap(&m[0], MAP_PRIVATE | MAP_ANONYMOUS, MADV_DONTFORK)) return -1; @@ -78,18 +64,10 @@ int main(int argc, char **argv) if (alloc_anon_mmap(&m[3], MAP_PRIVATE | MAP_ANONYMOUS, MADV_HUGEPAGE)) return -1; - test_msg("Alloc mergeable\n"); + test_msg("Alloc dontfork/random|mergeable\n"); if (alloc_anon_mmap(&m[4], MAP_PRIVATE | MAP_ANONYMOUS, MADV_MERGEABLE)) return -1; - test_msg("Alloc wipeonfork\n"); - if (alloc_anon_mmap(&m[5], MAP_PRIVATE | MAP_ANONYMOUS, MADV_WIPEONFORK)) - return -1; - - test_msg("Alloc droppable\n"); - if (alloc_anon_mmap(&m[6], MAP_DROPPABLE | MAP_ANONYMOUS, MADV_NORMAL)) - return -1; - test_msg("Fetch existing flags/adv\n"); for (i = 0; i < sizeof(m) / sizeof(m[0]); i++) { if (get_smaps_bits((unsigned long)m[i].start, &m[i].orig_flags, &m[i].orig_madv)) diff --git a/test/zdtm/static/maps11.c b/test/zdtm/static/maps11.c deleted file mode 100644 index df309714b..000000000 --- a/test/zdtm/static/maps11.c +++ /dev/null @@ -1,205 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "zdtmtst.h" - -#ifndef MAP_DROPPABLE -#define MAP_DROPPABLE 0x08 -#endif - -#ifndef MADV_WIPEONFORK -#define MADV_WIPEONFORK 18 -#endif - -const char *test_doc = "Test MAP_DROPPABLE/MADV_WIPEONFORK mappings with 2 processes"; -const char *test_author = "Alexander Mikhalitsyn "; - -bool mem_is_zero(const uint8_t *buffer, size_t length) -{ - size_t i; - - for (i = 0; i < length; i++) - if (buffer[i] != 0) - return false; - - return true; -} - -int main(int argc, char **argv) -{ - uint8_t *p1, *p2; - pid_t pid; - int status; - const char data[] = "MADV_WIPEONFORK vma data"; - bool criu_was_there = false; - struct stat st1, st2; - - test_init(argc, argv); - - p1 = mmap(NULL, sizeof(data), PROT_READ | PROT_WRITE, - MAP_DROPPABLE | MAP_ANONYMOUS, 0, 0); - if (p1 == MAP_FAILED) { - if (errno == EINVAL) { - skip("mmap failed, no kernel support for MAP_DROPPABLE\n"); - goto skip; - } else { - pr_perror("mmap failed"); - return -1; - } - } - - p2 = mmap(NULL, sizeof(data), PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); - if (p2 == MAP_FAILED) { - pr_perror("mmap failed"); - return 1; - } - - if (madvise(p2, sizeof(data), MADV_WIPEONFORK)) { - pr_perror("madvise failed"); - return -1; - } - - /* contents of this mapping is supposed to be dropped after C/R */ - memcpy(p1, data, sizeof(data)); - - /* contents of this mapping is supposed to be dropped after fork() */ - memcpy(p2, data, sizeof(data)); - - /* - * Let's spawn a process before C/R so our mappings get inherited - * then, after C/R we need to ensure that CRIU memory premapping - * machinery works properly. - * - * It is important, because we restore MADV_WIPEONFORK on a later - * stages (after vma premapping happens) and we need to ensure that - * CRIU handles everything in a right way. - */ - pid = test_fork(); - if (pid < 0) { - pr_perror("fork failed"); - return 1; - } - - if (pid == 0) { - test_waitsig(); - - /* - * Both mappings have VM_WIPEONFORK flag set, - * so we expect to have it null-ified after fork(). - */ - if (!mem_is_zero(p1, sizeof(data)) || - !mem_is_zero(p2, sizeof(data))) { - pr_err("1st child: memory check failed\n"); - return 1; - } - - return 0; - } - - /* - * A simple way to detect if C/R happened is to compare st_ino - * fields of stat() on the procfs files of the current task. - * - * Hopefully, this terrible hack is never used in real-world - * applications ;-) Here, we only need this to make test - * to pass with/without --nocr option. - */ - if (stat("/proc/self/status", &st1)) { - pr_perror("stat"); - return 1; - } - - test_daemon(); - test_waitsig(); - - /* signal a child process to continue */ - if (kill(pid, SIGTERM)) { - pr_perror("kill"); - goto err; - } - - if (waitpid(pid, &status, 0) != pid) { - pr_perror("1st waitpid"); - goto err; - } - - if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { - fail("1st process didn't exit cleanly: status=%d", status); - goto err; - } - - if (stat("/proc/self/status", &st2)) { - pr_perror("stat"); - return 1; - } - - /* detect CRIU */ - criu_was_there = st1.st_ino != st2.st_ino; - - /* - * We should mark failure if one of the following happens: - * 1. MAP_DROPPABLE memory is not zero after C/R - * 2. MAP_DROPPABLE memory somehow changed without C/R - * (kernel issue? memory pressure?) - * 3. MADV_WIPEONFORK memory is not preserved - * - * We care about 2nd case only because we would like test - * to pass even with --nocr zdtm.py option. - */ - if ((criu_was_there && !mem_is_zero(p1, sizeof(data))) || - (!criu_was_there && memcmp(p1, data, sizeof(data))) || - memcmp(p2, data, sizeof(data))) { - fail("Data mismatch"); - return 1; - } - - /* contents of these mappings is supposed to be dropped after fork() */ - memcpy(p1, data, sizeof(data)); - memcpy(p2, data, sizeof(data)); - - pid = test_fork(); - if (pid < 0) { - pr_perror("fork failed"); - return 1; - } - - if (pid == 0) { - if (!mem_is_zero(p1, sizeof(data)) || - !mem_is_zero(p2, sizeof(data))) { - pr_err("2nd child: memory check failed\n"); - return 1; - } - - return 0; - } - - if (waitpid(pid, &status, 0) != pid) { - pr_perror("2nd waitpid"); - goto err; - } - - if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { - fail("2nd process didn't exit cleanly: status=%d", status); - goto err; - } - - pass(); - - return 0; -err: - if (waitpid(-1, NULL, WNOHANG) == 0) { - kill(pid, SIGTERM); - wait(NULL); - } - return 1; - -skip: - test_daemon(); - test_waitsig(); - pass(); - return 0; -} diff --git a/test/zdtm/static/maps12.c b/test/zdtm/static/maps12.c deleted file mode 100644 index f0d6c2381..000000000 --- a/test/zdtm/static/maps12.c +++ /dev/null @@ -1,351 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "zdtmtst.h" - -const char *test_doc = "Test madvise(MADV_GUARD_INSTALL)"; -const char *test_author = "Alexander Mikhalitsyn "; -/* some parts of code were taken from Linux kernel's kselftest guard-pages.c - written by Lorenzo Stoakes */ - -char *filename; -int fd; -TEST_OPTION(filename, string, "file name", 1); - -#ifndef MADV_GUARD_INSTALL -#define MADV_GUARD_INSTALL 102 -#endif - -uint8_t *map_base; - -struct { - unsigned int pages_num; - bool filemap; -} vmas[] = { - { 2, false }, - { 2, false }, - { 2, false }, - { 2, true }, - { 2, true }, - { 2, true }, -}; - -struct { - bool guarded; - bool wipeonfork; -} pages[] = { - { false, false }, /* vmas[0] */ - { true, false }, - { true, false }, /* vmas[1] */ - { false, false }, - { false, false }, /* vmas[2] */ - { true, true }, - { true, false }, /* vmas[3] */ - { false, false }, - { true, false }, /* vmas[4] */ - { true, false }, - { false, false }, /* vmas[5] */ - { true, false }, -}; - -static volatile sig_atomic_t signal_jump_set; -static sigjmp_buf signal_jmp_buf; - -static void handle_sigsegv(int signo) -{ - if (!signal_jump_set) - return; - - siglongjmp(signal_jmp_buf, 1); -} - -static bool try_write_to_addr(uint8_t *ptr) -{ - bool failed; - - /* Tell signal handler to jump back here on fatal signal. */ - signal_jump_set = true; - /* If a fatal signal arose, we will jump back here and failed is set. */ - failed = sigsetjmp(signal_jmp_buf, 1) != 0; - - if (!failed) - *ptr = 'x'; - - signal_jump_set = false; - return !failed; -} - -static int setup_sigsegv_handler(void) -{ - uint8_t write_me; - - if (signal(SIGSEGV, handle_sigsegv) == SIG_ERR) { - pr_perror("setting SIGSEGV handler failed"); - return 1; - } - - /* ensure that try_write_to_addr() works properly */ - if (!try_write_to_addr(&write_me)) { - pr_err("Failed to write at valid addr. Buggy try_write_to_addr()?\n"); - return 1; - } - - if (try_write_to_addr(NULL)) { - pr_err("Failed to detect an invalid write. Buggy try_write_to_addr()?\n"); - return 1; - } - - return 0; -} - -static inline void *mmap_pages(void *addr_hint, unsigned int count, bool filemap) -{ - char *map; - - map = mmap(addr_hint, count * PAGE_SIZE, PROT_WRITE | PROT_READ, - MAP_PRIVATE | (filemap ? 0 : MAP_ANONYMOUS) | (addr_hint ? MAP_FIXED : 0), - filemap ? fd : -1, - filemap ? (off_t)((intptr_t)addr_hint - (intptr_t)map_base) : 0); - if (map == MAP_FAILED || (addr_hint && (map != addr_hint))) - return MAP_FAILED; - - return map; -} - -static int __check_guards(const char *when, bool in_child) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(pages); i++) { - /* - * Skip pages that were never guarded, and also those - * that were, but have MADV_WIPEONFORK which means that - * guards were removed on fork. - */ - if (!pages[i].guarded || (in_child && pages[i].wipeonfork)) - continue; - - if (try_write_to_addr(&map_base[i * PAGE_SIZE])) { - pr_err("successful write to a guarded area %d %s C/R\n", - i, when); - return 1; - } - } - - return 0; -} - -static int check_guards(const char *when) -{ - int status; - pid_t pid; - - /* - * First of all, check that guards are on their places - * in a main test process. - */ - if (__check_guards(when, false)) { - return 1; - } - - /* - * Now, check that guards are on their places - * after fork(). This allows to ensure that - * combo MADV_WIPEONFORK + MADV_GUARD_INSTALL - * is restored properly too. - */ - - pid = test_fork(); - if (pid < 0) { - pr_perror("check_guards: fork failed"); - return 1; - } - - if (pid == 0) { - if (__check_guards(when, true)) { - pr_err("check_guards(\"%s\") failed in child\n", when); - exit(1); - } - - exit(0); - } - - if (waitpid(pid, &status, 0) != pid) { - pr_perror("check_guards: waitpid"); - return 1; - } - - if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { - pr_err("check_guards: process didn't exit cleanly: status=%d\n", status); - return 1; - } - - return 0; -} - -static void gen_pages_data(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(pages); i++) { - uint32_t crc; - - if (pages[i].guarded) - continue; - - crc = ~0; - datagen(&map_base[i * PAGE_SIZE], PAGE_SIZE, &crc); - } -} - -static int set_pages_madvs(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(pages); i++) { - if (pages[i].guarded) { - if (madvise(&map_base[i * PAGE_SIZE], PAGE_SIZE, - MADV_GUARD_INSTALL)) { - pr_perror("MADV_GUARD_INSTALL failed on page %d", i); - return 1; - } - } - - if (pages[i].wipeonfork) { - if (madvise(&map_base[i * PAGE_SIZE], PAGE_SIZE, - MADV_WIPEONFORK)) { - pr_perror("MADV_WIPEONFORK failed on page %d", i); - return 1; - } - } - } - - return 0; -} - -static int check_pages_data(void) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(pages); i++) { - uint32_t crc; - - if (pages[i].guarded) - continue; - - crc = ~0; - if (datachk(&map_base[i * PAGE_SIZE], PAGE_SIZE, &crc)) { - pr_err("Page %d is corrupted\n", i); - return 1; - } - } - - return 0; -} - -static int prepare_vmas(void) -{ - char *map; - int i, shift; - - shift = 0; - for (i = 0; i < ARRAY_SIZE(vmas); i++) { - map = mmap_pages(&map_base[shift * PAGE_SIZE], - vmas[i].pages_num, vmas[i].filemap); - if (map == MAP_FAILED) { - pr_err("mmap of [%d,%d] pages failed\n", - shift, shift + vmas[i].pages_num); - return 1; - } - - shift += vmas[i].pages_num; - } - - if (shift != ARRAY_SIZE(pages)) { - pr_err("Different number of pages in vmas and pages arrays.\n"); - return 1; - } - - return 0; -} - -int main(int argc, char **argv) -{ - unsigned int pages_num = ARRAY_SIZE(pages); - - test_init(argc, argv); - - fd = open(filename, O_TRUNC | O_CREAT | O_RDWR, 0600); - if (fd < 0) { - pr_perror("Unable to create a test file"); - return -1; - } - - if (ftruncate(fd, pages_num * PAGE_SIZE)) { - pr_perror("Unable to ftruncate a test file"); - return -1; - } - - if (setup_sigsegv_handler()) { - pr_err("setup_sigsegv_handler() failed\n"); - return 1; - } - - /* let's find a large enough area in address space */ - map_base = mmap_pages(NULL, pages_num, false); - if (map_base == MAP_FAILED) { - pr_err("mmap of %d pages failed\n", pages_num); - return 1; - } - - /* - * Now we know that we have a free vm address space area - * [map_base, map_base + pages_num * PAGE_SIZE). - * We can use (map_base) as a hint for our further mmaps. - */ - if (prepare_vmas()) { - pr_err("prepare_vmas() failed\n"); - return 1; - } - - /* fill non-guarded pages with data and preserve checksums */ - gen_pages_data(); - - if (set_pages_madvs()) { - pr_err("set_pages_madvs() failed\n"); - return 1; - } - - /* ensure that madvise(MADV_GUARD_INSTALL) works like expected */ - if (check_guards("before")) { - pr_err("check_guards(\"before\") failed\n"); - return 1; - } - - test_daemon(); - test_waitsig(); - - /* ensure that guards are at their places */ - if (check_guards("after")) { - fail("check_guards(\"after\") failed"); - return 1; - } - - /* check that non-guarded pages still contain original data */ - if (check_pages_data()) { - fail("check_pages_data() failed"); - return 1; - } - - pass(); - munmap(map_base, pages_num * PAGE_SIZE); - close(fd); - return 0; -} diff --git a/test/zdtm/static/maps12.desc b/test/zdtm/static/maps12.desc deleted file mode 100644 index 3f7627ff3..000000000 --- a/test/zdtm/static/maps12.desc +++ /dev/null @@ -1 +0,0 @@ -{'flavor': 'h', 'feature': 'pagemap_scan_guard_pages'} diff --git a/test/zdtm/static/mnt_ext_file_bind_auto.c b/test/zdtm/static/mnt_ext_file_bind_auto.c deleted file mode 100644 index 0c3b9f5fb..000000000 --- a/test/zdtm/static/mnt_ext_file_bind_auto.c +++ /dev/null @@ -1,104 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Check if external file mount works"; -const char *test_author = "Pavel Tikhomirov "; - -char *filename = "mnt_ext_file_bind_auto_bind_auto.file"; -TEST_OPTION(filename, string, "file name", 1); - -char *source = "mnt_ext_file_bind_auto_bind_auto.source"; - -int create_file(const char *path) -{ - int fd; - - fd = open(path, O_CREAT | O_RDWR, 0644); - if (fd < 0) { - pr_perror("open"); - return -1; - } - - close(fd); - return 0; -} - -int main(int argc, char **argv) -{ - char *zdtm_newns = getenv("ZDTM_NEWNS"); - char *tmp = "/tmp/zdtm_ext_file_bind_auto.tmp"; - char *sourcefile = "/tmp/zdtm_ext_file_bind_auto.file"; - char *root, tmpfile[PATH_MAX], testfile[PATH_MAX]; - - root = getenv("ZDTM_ROOT"); - if (root == NULL) { - pr_perror("root"); - return 1; - } - - if (!zdtm_newns) { - pr_perror("ZDTM_NEWNS is not set"); - return 1; - } else if (strcmp(zdtm_newns, "1")) { - goto test; - } - - /* Prepare file bindmount in criu root (source for external file bindmount) */ - mkdir(tmp, 0755); - if (mount(source, tmp, "tmpfs", 0, NULL)) { - pr_perror("mount tmpfs"); - return 1; - } - if (mount(NULL, tmp, NULL, MS_PRIVATE, NULL)) { - pr_perror("make private"); - return 1; - } - - sprintf(tmpfile, "%s/%s", tmp, filename); - if (create_file(tmpfile)) - return 1; - - if (create_file(sourcefile)) - return 1; - - if (mount(tmpfile, sourcefile, NULL, MS_BIND, NULL)) { - pr_perror("bind"); - return 1; - } - - umount2(tmp, MNT_DETACH); - - /* Prepare file in test root (mount point for external file bindmount) */ - sprintf(testfile, "%s/%s", root, filename); - if (create_file(testfile)) - return 1; - - /* - * Create temporary mntns, next mounts will not show up in criu mntns - * and will be inherited into test mntns - */ - if (unshare(CLONE_NEWNS)) { - pr_perror("unshare"); - return 1; - } - - if (mount(sourcefile, testfile, NULL, MS_BIND, NULL)) { - pr_perror("bind"); - return 1; - } -test: - test_init(argc, argv); - - test_daemon(); - test_waitsig(); - - pass(); - return 0; -} diff --git a/test/zdtm/static/mnt_ext_file_bind_auto.desc b/test/zdtm/static/mnt_ext_file_bind_auto.desc deleted file mode 100644 index 825b08127..000000000 --- a/test/zdtm/static/mnt_ext_file_bind_auto.desc +++ /dev/null @@ -1,4 +0,0 @@ -{ 'opts': '--external mnt[]', - 'feature': 'mnt_id', - 'flavor': 'ns uns', - 'flags': 'suid'} diff --git a/test/zdtm/static/mnt_ro_root.c b/test/zdtm/static/mnt_ro_root.c deleted file mode 100644 index 2d8370150..000000000 --- a/test/zdtm/static/mnt_ro_root.c +++ /dev/null @@ -1,32 +0,0 @@ -#include - -#include "zdtmtst.h" - -const char *test_doc = "Check if root mount remains read-only after c/r"; -const char *test_author = "Pavel Tikhomirov "; - -char *dirname; -TEST_OPTION(dirname, string, "directory name", 1); - -int main(int argc, char **argv) -{ - test_init(argc, argv); - - if (mount(NULL, "/", NULL, MS_REMOUNT | MS_RDONLY | MS_BIND, NULL)) { - pr_perror("mount"); - return 1; - } - - test_daemon(); - test_waitsig(); - - /* - * Note: In zdtm.py:check_visible_state() we already check for all - * tests, that all mounts in the test's mount namespace remain the - * same, by comparing mountinfo before and after c/r. So rw/ro mount - * option inconsistency will be detected there and we don't need to - * check it in the test itself. - */ - pass(); - return 0; -} diff --git a/test/zdtm/static/mnt_ro_root.desc b/test/zdtm/static/mnt_ro_root.desc deleted file mode 100644 index c9a8e4f18..000000000 --- a/test/zdtm/static/mnt_ro_root.desc +++ /dev/null @@ -1,6 +0,0 @@ -{ - 'flavor': 'ns uns', - 'flags': 'suid', - 'feature': 'mnt_id', - 'bind': 'zdtm/static', -} diff --git a/test/zdtm/static/net_lock_socket_iptables.desc b/test/zdtm/static/net_lock_socket_iptables.desc index cb622536f..936ff8702 100644 --- a/test/zdtm/static/net_lock_socket_iptables.desc +++ b/test/zdtm/static/net_lock_socket_iptables.desc @@ -1,6 +1,5 @@ { 'flavor': 'h', - 'feature': 'has_ipt_legacy', 'flags': 'suid excl reqrst', 'dopts': '--tcp-established --network-lock iptables', 'ropts': '--tcp-established', diff --git a/test/zdtm/static/net_lock_socket_iptables6.desc b/test/zdtm/static/net_lock_socket_iptables6.desc index cb622536f..936ff8702 100644 --- a/test/zdtm/static/net_lock_socket_iptables6.desc +++ b/test/zdtm/static/net_lock_socket_iptables6.desc @@ -1,6 +1,5 @@ { 'flavor': 'h', - 'feature': 'has_ipt_legacy', 'flags': 'suid excl reqrst', 'dopts': '--tcp-established --network-lock iptables', 'ropts': '--tcp-established', diff --git a/test/zdtm/static/netns-dev.c b/test/zdtm/static/netns-dev.c index f268f2fec..1e6ee1dea 100644 --- a/test/zdtm/static/netns-dev.c +++ b/test/zdtm/static/netns-dev.c @@ -414,7 +414,7 @@ static int check_stable_secret(struct test_conf *tc) return -1; } - ret = fscanf(fp, "%200s", val); + ret = fscanf(fp, "%s", val); if (ret != 1) { pr_perror("fscanf"); fclose(fp); diff --git a/test/zdtm/static/netns-nf.desc b/test/zdtm/static/netns-nf.desc index 58c23e8ba..e7e73b1ae 100644 --- a/test/zdtm/static/netns-nf.desc +++ b/test/zdtm/static/netns-nf.desc @@ -1,7 +1,6 @@ { 'deps': [ '/bin/sh', '/sbin/iptables|/usr/sbin/iptables', - '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so', + '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so', '/usr/bin/diff'], 'flags': 'suid', - 'feature': 'has_ipt_legacy', 'flavor': 'ns uns'} diff --git a/test/zdtm/static/netns-nft-ipt.desc b/test/zdtm/static/netns-nft-ipt.desc index 6d04589b3..4120f74d6 100644 --- a/test/zdtm/static/netns-nft-ipt.desc +++ b/test/zdtm/static/netns-nft-ipt.desc @@ -2,7 +2,7 @@ 'deps': [ '/bin/sh', '/usr/sbin/nft', '/sbin/iptables|/usr/sbin/iptables', - '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so', + '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/iptables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so', '/usr/bin/diff'], 'flags': 'suid', 'flavor': 'ns uns'} diff --git a/test/zdtm/static/netns_lock_iptables.desc b/test/zdtm/static/netns_lock_iptables.desc index b465706b8..69020f34e 100644 --- a/test/zdtm/static/netns_lock_iptables.desc +++ b/test/zdtm/static/netns_lock_iptables.desc @@ -1,7 +1,6 @@ { 'flavor': 'h', 'flags': 'suid excl reqrst', - 'feature': 'has_ipt_legacy', 'opts': '--tcp-established', 'dopts': '--network-lock iptables', 'ropts': '--join-ns net:/var/run/netns/criu-net-lock-test' diff --git a/test/zdtm/static/netns_sub_sysctl.c b/test/zdtm/static/netns_sub_sysctl.c index 03b478b7d..545a17308 100644 --- a/test/zdtm/static/netns_sub_sysctl.c +++ b/test/zdtm/static/netns_sub_sysctl.c @@ -1,38 +1,20 @@ #include -#include -#include #include "zdtmtst.h" #include "sysctl.h" -const char *test_doc = "Check dump and restore of sysctls in subns"; +const char *test_doc = "Check dump and restore a net.unix.max_dgram_qlen sysctl parameter in subns"; const char *test_author = "Alexander Mikhalitsyn "; -#define MAX_STR_SYSCTL_LEN 200 - -enum { - SYSCTL_INT, - SYSCTL_STR, -}; - typedef struct { const char *path; - int type; int old; int new; - char s_old[MAX_STR_SYSCTL_LEN]; - char s_new[MAX_STR_SYSCTL_LEN]; - bool set; } sysctl_opt_t; #define CONF_UNIX_BASE "/proc/sys/net/unix" -#define IPV4_SYSCTL_BASE "/proc/sys/net/ipv4" -static sysctl_opt_t net_unix_params[] = { - {CONF_UNIX_BASE "/max_dgram_qlen", SYSCTL_INT}, - {IPV4_SYSCTL_BASE "/ping_group_range", SYSCTL_STR, 0, 0, "40000\t50000\n"}, - {NULL, 0, 0} -}; +static sysctl_opt_t net_unix_params[] = { { CONF_UNIX_BASE "/max_dgram_qlen", 0, 0 }, { NULL, 0, 0 } }; int main(int argc, char **argv) { @@ -41,22 +23,10 @@ int main(int argc, char **argv) test_init(argc, argv); for (p = net_unix_params; p->path != NULL; p++) { - if (access(p->path, W_OK) != 0) { - test_msg("%s doesn't exist\n", p->path); - continue; - } - p->set = true; - if (p->type == SYSCTL_INT) { - p->old = (((unsigned)lrand48()) % 1023) + 1; - if (sysctl_write_int(p->path, p->old)) { - pr_perror("Can't change %s", p->path); - return -1; - } - } else if (p->type == SYSCTL_STR) { - if (sysctl_write_str(p->path, p->s_old)) { - pr_perror("Can't change %s", p->path); - return -1; - } + p->old = (((unsigned)lrand48()) % 1023) + 1; + if (sysctl_write_int(p->path, p->old)) { + pr_perror("Can't change %s", p->path); + return -1; } } @@ -64,27 +34,13 @@ int main(int argc, char **argv) test_waitsig(); for (p = net_unix_params; p->path != NULL; p++) { - if (!p->set) - continue; - if (p->type == SYSCTL_INT) { - if (sysctl_read_int(p->path, &p->new)) - ret = 1; + if (sysctl_read_int(p->path, &p->new)) + ret = 1; - if (p->old != p->new) { - errno = EINVAL; - pr_perror("%s changed: %d ---> %d", p->path, p->old, p->new); - ret = 1; - } - } else if (p->type == SYSCTL_STR) { - if (sysctl_read_str(p->path, p->s_new, MAX_STR_SYSCTL_LEN)) { - ret = 1; - } else { - if (strcmp(p->s_old, p->s_new)) { - errno = EINVAL; - pr_perror("%s changed: %s ---> %s", p->path, p->s_old, p->s_new); - ret = 1; - } - } + if (p->old != p->new) { + errno = EINVAL; + pr_perror("%s changed: %d ---> %d", p->path, p->old, p->new); + ret = 1; } } diff --git a/test/zdtm/static/netns_sub_sysctl.desc b/test/zdtm/static/netns_sub_sysctl.desc index 0c357aefe..535842668 100644 --- a/test/zdtm/static/netns_sub_sysctl.desc +++ b/test/zdtm/static/netns_sub_sysctl.desc @@ -1,4 +1,4 @@ { - 'flavor': 'ns uns', + 'flavor': 'ns', 'flags': 'suid' } diff --git a/test/zdtm/static/ofd_file_locks.c b/test/zdtm/static/ofd_file_locks.c index a68fa38ee..68b6f22f5 100644 --- a/test/zdtm/static/ofd_file_locks.c +++ b/test/zdtm/static/ofd_file_locks.c @@ -16,7 +16,7 @@ static int parse_ofd_lock(char *buf, struct flock *lck) if (strncmp(buf, "lock:\t", 6) != 0) return 1; /* isn't lock, skip record */ - num = sscanf(buf, "%*s %*d: %9s %14s %9s %*d %*x:%*x:%*d %lld %31s", fl_flag, fl_type, fl_option, &start, fl_end); + num = sscanf(buf, "%*s %*d: %s %s %s %*d %*x:%*x:%*d %lld %s", fl_flag, fl_type, fl_option, &start, fl_end); if (num < 4) { pr_err("Invalid lock info %s\n", buf); diff --git a/test/zdtm/static/packet_sock.c b/test/zdtm/static/packet_sock.c index c1c94ac21..4a9078f81 100644 --- a/test/zdtm/static/packet_sock.c +++ b/test/zdtm/static/packet_sock.c @@ -5,7 +5,7 @@ const char *test_author = "Pavel Emelyanov "; /* * Description: - * Create and bind several packet sockets, check that getname + * Create and bind several packet sockets, check thet getname * reports same result before and after c/r cycle. This is enough * for _basic_ packet functionality only, but still. */ diff --git a/test/zdtm/static/pidfd_child.c b/test/zdtm/static/pidfd_child.c deleted file mode 100644 index ec559605d..000000000 --- a/test/zdtm/static/pidfd_child.c +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Checks pidfd sends signal to child process after restore\n"; -const char *test_author = "Bhavik Sachdev "; - -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - -int main(int argc, char* argv[]) -{ - int pidfd, status; - pid_t child; - - test_init(argc, argv); - - child = fork(); - if (child < 0) { - pr_perror("Unable to fork a new process"); - return 1; - } else if (child == 0) { - test_waitsig(); - return 0; - } - - pidfd = pidfd_open(child, 0); - if (pidfd < 0) { - pr_perror("pidfd_open failed"); - return 1; - } - - test_daemon(); - test_waitsig(); - - if (pidfd_send_signal(pidfd, SIGTERM, NULL, 0)) { - fail("Could not send signal"); - goto err_close; - } - - if (waitpid(child, &status, 0) != child) { - pr_perror("waitpid()"); - goto err_close; - } - - if (status != 0) { - fail("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), WIFSIGNALED(status), WTERMSIG(status)); - goto err_close; - } - - pass(); - close(pidfd); - return 0; -err_close: - close(pidfd); - return 1; -} diff --git a/test/zdtm/static/pidfd_dead.c b/test/zdtm/static/pidfd_dead.c deleted file mode 100644 index 9c825899d..000000000 --- a/test/zdtm/static/pidfd_dead.c +++ /dev/null @@ -1,244 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Check C/R of pidfds that point to dead processes\n"; -const char *test_author = "Bhavik Sachdev "; - -#ifndef PID_FS_MAGIC -#define PID_FS_MAGIC 0x50494446 -#endif - -/* - * main - * `- child - * `- grandchild - * - * main opens a pidfd for both child and grandchild. - * Before C/R we kill both child and grandchild. - * We end up with two unique dead pidfds. - */ - -static long get_fs_type(int lfd) -{ - struct statfs fst; - - if (fstatfs(lfd, &fst)) { - return -1; - } - return fst.f_type; -} - -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - -static int open_pidfd_pair(int pidfd[2], int pid) -{ - pidfd[0] = pidfd_open(pid, 0); - if (pidfd[0] < 0) { - pr_perror("pidfd_open() failed"); - return 1; - } - - pidfd[1] = pidfd_open(pid, 0); - if (pidfd[1] < 0) { - close(pidfd[0]); - pr_perror("pidfd_open() failed"); - return 1; - } - return 0; -} - -static int compare_pidfds(int pidfd[2]) -{ - /* - * After linux 6.9 we can compare inode numbers - * to determine if two pidfds point to the same process. - * While the inode number may change before and after C/R - * pidfds pointing to the same pid should have the same inode number. - */ - struct statx stats[2]; - statx(pidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[0]); - statx(pidfd[1], "", AT_EMPTY_PATH, STATX_ALL, &stats[1]); - if (stats[0].stx_ino != stats[1].stx_ino) - return 1; - return 0; -} - -static int check_for_pidfs(void) -{ - long type; - int pidfd = pidfd_open(getpid(), 0); - if (pidfd < 0) { - pr_perror("pidfd open() failed"); - return -1; - } - type = get_fs_type(pidfd); - close(pidfd); - return type == PID_FS_MAGIC; -} - -int main(int argc, char* argv[]) -{ - #define READ 0 - #define WRITE 1 - - int child, ret, gchild, p[2], status; - int cpidfd[2], gpidfd[2]; - struct statx stats[2]; - - test_init(argc, argv); - - ret = check_for_pidfs(); - if (ret < 0) - return 1; - - if (ret == 0) { - test_daemon(); - test_waitsig(); - skip("Test requires pidfs. skipping..."); - pass(); - return 0; - } - - if (pipe(p)) { - pr_perror("pipe"); - return 1; - } - - child = test_fork(); - if (child < 0) { - pr_perror("fork"); - return 1; - } else if (child == 0) { - int gchild = test_fork(); - close(p[READ]); - if (gchild < 0) { - pr_perror("fork"); - return 1; - } else if (gchild == 0) { - close(p[WRITE]); - while(1) - sleep(1000); - } else { - if (write(p[WRITE], &gchild, sizeof(int)) != sizeof(int)) { - pr_perror("write"); - return 1; - } - close(p[WRITE]); - if (waitpid(gchild, &status, 0) != gchild) { - pr_perror("waitpid"); - return 1; - } - - if (!WIFSIGNALED(status)) { - fail("Expected grandchild to be terminated by a signal"); - return 1; - } - - if (WTERMSIG(status) != SIGKILL) { - fail("Expected grandchild to be terminated by SIGKILL"); - return 1; - } - - return 0; - } - } - - ret = open_pidfd_pair(cpidfd, child); - if (ret) - return 1; - - close(p[WRITE]); - if (read(p[READ], &gchild, sizeof(int)) != sizeof(int)) { - pr_perror("write"); - return 1; - } - close(p[READ]); - - ret = open_pidfd_pair(gpidfd, gchild); - if (ret) - return 1; - - /* - * We kill grandchild and child processes only after opening pidfds. - */ - if (pidfd_send_signal(gpidfd[0], SIGKILL, NULL, 0)) { - pr_perror("pidfd_send_signal"); - goto fail_close; - } - - if (waitpid(child, &status, 0) != child) { - pr_perror("waitpid"); - goto fail_close; - } - - if (!WIFEXITED(status)) { - fail("Expected child to exit normally"); - goto fail_close; - } - - if (WEXITSTATUS(status) != 0) { - fail("Expected child to exit with 0"); - goto fail_close; - } - usleep(1000); - - if (kill(gchild, 0) != -1 && errno != ESRCH) { - fail("Expected grand child to not exist"); - goto fail_close; - } - - if (kill(child, 0) != -1 && errno != ESRCH) { - fail("Expected child to not exist"); - goto fail_close; - } - - test_daemon(); - test_waitsig(); - - ret = compare_pidfds(cpidfd); - if (ret) { - fail("inodes not same for same pid"); - goto fail_close; - } - - ret = compare_pidfds(gpidfd); - if (ret) { - fail("inodes not same for same pid"); - goto fail_close; - } - - statx(cpidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[0]); - statx(gpidfd[0], "", AT_EMPTY_PATH, STATX_ALL, &stats[1]); - if (stats[0].stx_ino == stats[1].stx_ino) { - fail("pidfds pointing to diff pids should have diff inodes"); - goto fail_close; - } - - pass(); - close(cpidfd[0]); - close(cpidfd[1]); - close(gpidfd[0]); - close(gpidfd[1]); - return 0; - -fail_close: - close(cpidfd[0]); - close(cpidfd[1]); - close(gpidfd[0]); - close(gpidfd[1]); - return 1; -} diff --git a/test/zdtm/static/pidfd_diffdead.c b/test/zdtm/static/pidfd_diffdead.c deleted file mode 100644 index 5bc1911a5..000000000 --- a/test/zdtm/static/pidfd_diffdead.c +++ /dev/null @@ -1,228 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Check C/R of processes that point to a common dead pidfd\n"; -const char *test_author = "Bhavik Sachdev "; - -#ifndef PID_FS_MAGIC -#define PID_FS_MAGIC 0x50494446 -#endif - -/* - * main - * `- child - * `- grandchild - * - * main and child open a pidfd for grandchild. - * Before C/R we kill grandchild. - * We end up with two pidfds in two diff processes that point to the same dead process. - */ - -static long get_fs_type(int lfd) -{ - struct statfs fst; - - if (fstatfs(lfd, &fst)) { - return -1; - } - return fst.f_type; -} - -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int pidfd_send_signal(int pidfd, int sig, siginfo_t *info, unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - -static int check_for_pidfs(void) -{ - long type; - int pidfd = pidfd_open(getpid(), 0); - if (pidfd < 0) { - pr_perror("pidfd open() failed"); - return -1; - } - type = get_fs_type(pidfd); - close(pidfd); - return type == PID_FS_MAGIC; -} - -int main(int argc, char *argv[]) -{ -#define READ 0 -#define WRITE 1 - - int child, ret, gchild, status; - struct statx stat; - task_waiter_t t; - unsigned long long ino; - - /* - * We use the inop pipe to send the inode number of the - * pidfd opened in the child to the main process for - * comparison. - */ - int p[2]; - int pidfd; - - test_init(argc, argv); - task_waiter_init(&t); - - ret = check_for_pidfs(); - if (ret < 0) - return 1; - - if (ret == 0) { - test_daemon(); - test_waitsig(); - skip("Test requires pidfs. skipping..."); - pass(); - return 0; - } - - if (pipe(p)) { - pr_perror("pipe"); - return 1; - } - - child = test_fork(); - if (child < 0) { - pr_perror("fork"); - return 1; - } else if (child == 0) { - int gchild; - gchild = test_fork(); - if (gchild < 0) { - pr_perror("fork"); - return 1; - } else if (gchild == 0) { - close(p[READ]); - close(p[WRITE]); - while (1) - sleep(1000); - } else { - if (write(p[WRITE], &gchild, sizeof(int)) != sizeof(int)) { - pr_perror("write"); - return 1; - } - - pidfd = pidfd_open(gchild, 0); - if (pidfd < 0) { - pr_perror("pidfd_open"); - return 1; - } - - if (waitpid(gchild, &status, 0) != gchild) { - pr_perror("waitpid"); - return 1; - } - - if (!WIFSIGNALED(status)) { - fail("Expected grandchild to be terminated by a signal"); - return 1; - } - - if (WTERMSIG(status) != SIGKILL) { - fail("Expected grandchild to be terminated by SIGKILL"); - return 1; - } - task_waiter_complete(&t, 1); - - test_waitsig(); - - if (statx(pidfd, "", AT_EMPTY_PATH, STATX_ALL, &stat) < 0) { - pr_perror("statx"); - return 1; - } - - close(p[WRITE]); - if (read(p[READ], &ino, sizeof(ino)) != sizeof(ino)) { - pr_perror("read"); - return 1; - } - close(p[READ]); - close(pidfd); - - /* ino number should be same because both pidfds were for the same process */ - if (ino != stat.stx_ino) { - exit(1); - } - exit(0); - } - } - - if (read(p[READ], &gchild, sizeof(int)) != sizeof(int)) { - pr_perror("write"); - return 1; - } - - pidfd = pidfd_open(gchild, 0); - if (pidfd < 0) { - pr_perror("pidfd_open"); - return 1; - } - - /* - * We kill grandchild process only after opening pidfd. - */ - if (pidfd_send_signal(pidfd, SIGKILL, NULL, 0)) { - pr_perror("pidfd_send_signal"); - return 1; - } - - /* Wait for child to waitpid on gchild */ - task_waiter_wait4(&t, 1); - - test_daemon(); - test_waitsig(); - - close(p[READ]); - if (statx(pidfd, "", AT_EMPTY_PATH, STATX_ALL, &stat) < 0) { - pr_perror("statx"); - goto err; - } - - /* Send inode number of pidfd to child for comparison */ - if (write(p[WRITE], &stat.stx_ino, sizeof(stat.stx_ino)) != sizeof(stat.stx_ino)) { - pr_perror("write"); - goto err; - } - close(p[WRITE]); - - if (kill(child, SIGTERM)) { - pr_perror("kill"); - goto err; - } - - if (waitpid(child, &status, 0) != child) { - pr_perror("waitpid"); - goto err; - } - - if (!WIFEXITED(status)) { - fail("Expected child to terminate normally"); - goto err; - } - - if (WEXITSTATUS(status) != 0) { - fail("Child failed"); - goto err; - } - - pass(); - close(pidfd); - return 0; -err: - close(pidfd); - return 1; -} diff --git a/test/zdtm/static/pidfd_kill.c b/test/zdtm/static/pidfd_kill.c deleted file mode 100644 index 6232d033a..000000000 --- a/test/zdtm/static/pidfd_kill.c +++ /dev/null @@ -1,128 +0,0 @@ -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Kill child and grandchild process using pidfds\n"; -const char *test_author = "Bhavik Sachdev "; - -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - -static int wait_for_child(int child) -{ - int status; - if (waitpid(child, &status, 0) != child) { - pr_perror("waitpid()"); - return 1; - } - - if (status != 0) { - test_msg("%d:%d:%d:%d", WIFEXITED(status), WEXITSTATUS(status), - WIFSIGNALED(status), WTERMSIG(status)); - } - - return 0; -} - -int main(int argc, char* argv[]) -{ - #define READ 0 - #define WRITE 1 - - int child, gchild, cpidfd, gpidfd, gchild_pid, ret; - int p[2]; - - if (pipe(p)) { - pr_perror("pipe"); - return 1; - } - - test_init(argc, argv); - - child = fork(); - if (child < 0) { - pr_perror("fork"); - return 1; - } - - if (child == 0) { - gchild = fork(); - if (gchild < 0) { - pr_perror("fork"); - return 1; - } - - if (gchild == 0) { - test_waitsig(); - return 0; - } - - close(p[READ]); - if (write(p[WRITE], &gchild, sizeof(gchild)) - != sizeof(gchild)) { - pr_perror("write"); - return 1; - } - close(p[WRITE]); - - test_waitsig(); - return wait_for_child(gchild); - } - - cpidfd = pidfd_open(child, 0); - if (cpidfd < 0) { - pr_perror("pidfd_open"); - return 1; - } - - close(p[WRITE]); - if (read(p[READ], &gchild_pid, sizeof(gchild_pid)) - != sizeof(gchild_pid)) { - pr_perror("read"); - return 1; - } - close(p[READ]); - - gpidfd = pidfd_open(gchild_pid, 0); - if (gpidfd < 0) { - pr_perror("pidfd_open"); - return 1; - } - - test_daemon(); - test_waitsig(); - - if (pidfd_send_signal(gpidfd, SIGKILL, NULL, 0)) { - pr_perror("Could not send signal"); - goto fail_close; - } - - if (pidfd_send_signal(cpidfd, SIGKILL, NULL, 0)) { - pr_perror("Could not send signal"); - goto fail_close; - } - - ret = wait_for_child(child); - if (ret) - goto fail_close; - - pass(); - close(cpidfd); - close(gpidfd); - return 0; - -fail_close: - fail(); - close(cpidfd); - close(gpidfd); - return 1; -} diff --git a/test/zdtm/static/pidfd_of_thread.c b/test/zdtm/static/pidfd_of_thread.c deleted file mode 100644 index d232c7ac1..000000000 --- a/test/zdtm/static/pidfd_of_thread.c +++ /dev/null @@ -1,114 +0,0 @@ -#include -#include -#include -#include - -#include "zdtmtst.h" -#include "lock.h" - -const char *test_doc = "Check C/R of pidfds that point to threads\n"; -const char *test_author = "Bhavik Sachdev "; - -/* see also: https://codebrowser.dev/glibc/glibc/sysdeps/unix/sysv/linux/tst-clone3.c.html */ - -#ifndef PIDFD_THREAD -#define PIDFD_THREAD O_EXCL -#endif - -#ifndef PIDFD_SIGNAL_THREAD -#define PIDFD_SIGNAL_THREAD (1UL << 0) -#endif - -#ifndef PID_FS_MAGIC -#define PID_FS_MAGIC 0x50494446 -#endif - -static long get_fs_type(int lfd) -{ - struct statfs fst; - - if (fstatfs(lfd, &fst)) { - return -1; - } - return fst.f_type; -} - -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - -static int thread_func(void *a) -{ - test_waitsig(); - return 0; -} - -#define CTID_INIT_VAL 1 - -int main(int argc, char* argv[]) -{ - char st[64 * 1024] __attribute__ ((aligned)); - pid_t tid; - int pidfd, test_pidfd; - futex_t exited; - - int clone_flags = CLONE_THREAD; - clone_flags |= CLONE_VM | CLONE_SIGHAND; - clone_flags |= CLONE_CHILD_CLEARTID; - - test_init(argc, argv); - - test_pidfd = pidfd_open(getpid(), 0); - if (test_pidfd < 0) { - pr_perror("pidfd_open() failed"); - return 1; - } - - /* PIDFD_THREAD, PIDFD_SIGNAL_THREAD are supported only with pidfs */ - if (get_fs_type(test_pidfd) != PID_FS_MAGIC) { - test_daemon(); - test_waitsig(); - skip("pidfs not supported."); - close(test_pidfd); - return 0; - } - close(test_pidfd); - - futex_set(&exited, CTID_INIT_VAL); - - tid = clone(thread_func, st + sizeof(st), clone_flags, NULL, NULL, NULL, &(exited.raw)); - if (tid == -1) { - pr_perror("clone() failed"); - return 1; - } - - test_msg("Successfully created a thread with tid: %d\n", tid); - pidfd = pidfd_open(tid, PIDFD_THREAD); - if (pidfd < 0) { - pr_perror("pidfd_open() failed"); - return 1; - } - - test_daemon(); - test_waitsig(); - - if (pidfd_send_signal(pidfd, SIGTERM, NULL, PIDFD_SIGNAL_THREAD)) { - pr_perror("pidfd_send_signal() failed"); - fail(); - close(pidfd); - return 1; - } - - test_msg("Waiting for thread to exit\n"); - futex_wait_until(&exited, 0); - - pass(); - close(pidfd); - return 0; -} diff --git a/test/zdtm/static/pidfd_of_thread.desc b/test/zdtm/static/pidfd_of_thread.desc deleted file mode 100644 index 802caed65..000000000 --- a/test/zdtm/static/pidfd_of_thread.desc +++ /dev/null @@ -1 +0,0 @@ -{'flags': 'noauto crfail'} diff --git a/test/zdtm/static/pidfd_self.c b/test/zdtm/static/pidfd_self.c deleted file mode 100644 index 2730ee123..000000000 --- a/test/zdtm/static/pidfd_self.c +++ /dev/null @@ -1,140 +0,0 @@ -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Check pidfd /proc/self/fdinfo/ entry remains consistent after checkpoint/restore\n"; -const char *test_author = "Bhavik Sachdev "; - -struct pidfd_status { - unsigned int flags; - pid_t pid; -}; - -static int pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - -static int pidfd_send_signal(int pidfd, int sig, siginfo_t* info, unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - -static void show_pidfd(char *prefix, struct pidfd_status *s) -{ - test_msg("\n\t%s\n\tflags: 0%o\n\tpid: %d\n", prefix, s->flags, s->pid); -} - -static int parse_self_fdinfo(int pidfd, struct pidfd_status *s) -{ - char buf[256]; - int ret = -1; - FILE *f; - - sprintf(buf, "/proc/self/fdinfo/%d", pidfd); - f = fopen(buf, "r"); - if (!f) { - perror("Can't open /proc/self/fdinfo/ to parse"); - return -1; - } - - memset(s, 0, sizeof(*s)); - - /* - * flags: file access mode (octal) 02000002 => [O_RDWR | O_CLOEXEC] - * pid: the pid to which we have pidfd open - */ - while (fgets(buf, sizeof(buf), f)) { - if (!fgets(buf, sizeof(buf), f)) - goto parse_err; - - if (sscanf(buf, "flags: 0%o", &s->flags) != 1) { - goto parse_err; - } - - if (!fgets(buf, sizeof(buf), f)) - goto parse_err; - if (!fgets(buf, sizeof(buf), f)) - goto parse_err; - - if (!fgets(buf, sizeof(buf), f)) - goto parse_err; - - if (sscanf(buf, "Pid: %d", &s->pid) != 1) - goto parse_err; - ret = 0; - break; - } - - if (ret) - goto parse_err; -err: - fclose(f); - return ret; - -parse_err: - pr_perror("Format error"); - goto err; -} - -static int check_pidfd(int fd, struct pidfd_status *old) -{ - struct pidfd_status new; - - if (parse_self_fdinfo(fd, &new)) - return -1; - - show_pidfd("restored", &new); - - if (old->flags != new.flags || old->pid != new.pid) - return -1; - - return 0; -} - -int main(int argc, char* argv[]) -{ - struct pidfd_status old; - int pidfd, ret; - - test_init(argc, argv); - - pidfd = pidfd_open(getpid(), 0); - if (pidfd < 0) { - pr_perror("pidfd_open failed"); - return 1; - } - - parse_self_fdinfo(pidfd, &old); - - show_pidfd("old", &old); - - if (pidfd_send_signal(pidfd, 0, NULL, 0)) { - pr_perror("Could not send signal"); - return 1; - } - - test_daemon(); - test_waitsig(); - - ret = check_pidfd(pidfd, &old); - if (ret) { - fail(); - goto err; - } - - if (pidfd_send_signal(pidfd, 0, NULL, 0)) { - pr_perror("Could not send signal"); - fail(); - goto err; - } - - pass(); - close(pidfd); - return 0; -err: - close(pidfd); - return 1; -} diff --git a/test/zdtm/static/pthread00-pac.c b/test/zdtm/static/pthread00-pac.c deleted file mode 120000 index 3ee8dc1f1..000000000 --- a/test/zdtm/static/pthread00-pac.c +++ /dev/null @@ -1 +0,0 @@ -pthread00.c \ No newline at end of file diff --git a/test/zdtm/static/scm06.desc b/test/zdtm/static/scm06.desc index 38cc3be51..2eac7e654 100644 --- a/test/zdtm/static/scm06.desc +++ b/test/zdtm/static/scm06.desc @@ -1,4 +1 @@ -# This test isn't executed in the host flavor (in the same network namespace, -# because the kernel releases a test socket asynchronously, so the restore -# can fail if it is executed before the kernel actually destroys the socket. -{'flags': 'suid', 'flavor': 'ns uns'} +{'flags': 'suid'} diff --git a/test/zdtm/static/sk-unix-restore-fs-share.c b/test/zdtm/static/sk-unix-restore-fs-share.c deleted file mode 100644 index d4f6dde75..000000000 --- a/test/zdtm/static/sk-unix-restore-fs-share.c +++ /dev/null @@ -1,196 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Test non-empty process group with terminated parent and unix socket"; -const char *test_author = "Qiao Ma "; - -char *filename; -TEST_OPTION(filename, string, "socket file name", 1); - -static int create_and_connect(void) -{ - struct sockaddr_un addr; - int client_fd; - - client_fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (client_fd == -1) { - pr_perror("socket"); - return -1; - } - - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_UNIX; - if (snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", filename) >= (int)sizeof(addr.sun_path)) { - pr_err("Socket path too long\n"); - close(client_fd); - return -1; - } - - if (connect(client_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { - pr_perror("connect"); - close(client_fd); - return -1; - } - - return 0; -} - -static int child(int ready_fd) -{ - int listen_fd; - struct sockaddr_un addr; - int ret = EXIT_FAILURE; - - listen_fd = socket(AF_UNIX, SOCK_STREAM, 0); - if (listen_fd == -1) { - pr_perror("socket"); - return EXIT_FAILURE; - } - - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_UNIX; - if (strlen(filename) >= sizeof(addr.sun_path)) { - pr_err("Socket path too long\n"); - goto cleanup; - } - strncpy(addr.sun_path, filename, sizeof(addr.sun_path)); - - unlink(filename); /* Ignore error if file doesn't exist */ - - if (bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)) == -1) { - pr_perror("bind"); - goto cleanup; - } - - if (listen(listen_fd, 5) == -1) { - pr_perror("listen"); - goto cleanup; - } - - if (create_and_connect() != 0) { - pr_err("Failed to create and connect\n"); - goto cleanup; - } - - /* Signal parent that socket is ready */ - if (write(ready_fd, "1", 1) != 1) { - pr_perror("write ready_fd"); - goto cleanup; - } - - /* Wait indefinitely */ - pause(); - - ret = EXIT_SUCCESS; -cleanup: - if (listen_fd != -1) - close(listen_fd); - unlink(filename); - - return ret; -} - -static int zombie_leader(int *cpid) -{ - char buf; - pid_t pid; - int pipefd[2]; - - if (pipe(pipefd) == -1) { - pr_perror("pipe"); - return EXIT_FAILURE; - } - - if (setpgid(0, 0) == -1) { - pr_perror("setpgid"); - return EXIT_FAILURE; - } - - pid = fork(); - if (pid < 0) { - pr_perror("Failed to fork child"); - return EXIT_FAILURE; - } - - if (pid == 0) { - /* Close read end */ - close(pipefd[0]); - exit(child(pipefd[1])); - } - - /* Close write end in parent */ - close(pipefd[1]); - - /* Wait for child to set up socket */ - if (read(pipefd[0], &buf, 1) != 1) { - pr_err("Failed to receive readiness signal from child\n"); - close(pipefd[0]); - return EXIT_FAILURE; - } - close(pipefd[0]); - - *cpid = pid; - return EXIT_SUCCESS; -} - -int main(int argc, char **argv) -{ - int ret = EXIT_FAILURE, status; - pid_t pid; - int *cpid; - - test_init(argc, argv); - - cpid = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); - if (cpid == MAP_FAILED) { - pr_perror("mmap"); - return EXIT_FAILURE; - } - *cpid = 0; - - pid = fork(); - if (pid < 0) { - pr_perror("Failed to fork zombie"); - goto out; - } - - if (pid == 0) - exit(zombie_leader(cpid)); - - if (waitpid(pid, &status, 0) < 0) { - pr_perror("Failed to waitpid zombie"); - goto out; - } - - if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) { - pr_err("Unexpected exit code: %d\n", WEXITSTATUS(status)); - goto out; - } - - if (!*cpid) { - pr_err("Don't know grandchild's pid\n"); - goto out; - } - - test_daemon(); - test_waitsig(); - - ret = EXIT_SUCCESS; - pass(); -out: - /* Clean up */ - if (*cpid) - kill(*cpid, SIGKILL); - - munmap(cpid, sizeof(int)); - - return ret; -} diff --git a/test/zdtm/static/sk-unix-restore-fs-share.desc b/test/zdtm/static/sk-unix-restore-fs-share.desc deleted file mode 100644 index 6c4afe5f0..000000000 --- a/test/zdtm/static/sk-unix-restore-fs-share.desc +++ /dev/null @@ -1 +0,0 @@ -{'flavor': 'ns uns'} diff --git a/test/zdtm/static/sock_opts00.c b/test/zdtm/static/sock_opts00.c index 854aaa591..fcf00ffed 100644 --- a/test/zdtm/static/sock_opts00.c +++ b/test/zdtm/static/sock_opts00.c @@ -31,7 +31,7 @@ int main(int argc, char **argv) static const int NOPTS = sizeof(vname) / sizeof(*vname); #undef OPT - int sock, usock, sk, ret = 0, val[NOPTS], rval, i; + int sock, ret = 0, val[NOPTS], rval, i; socklen_t len = sizeof(int); test_init(argc, argv); @@ -42,15 +42,8 @@ int main(int argc, char **argv) return 1; } - usock = socket(AF_UNIX, SOCK_STREAM, 0); - if (usock < 0) { - pr_perror("can't create unix socket"); - return 1; - } - for (i = 0; i < NOPTS; i++) { - sk = vname[i].opt == SO_PASSCRED || vname[i].opt == SO_PASSSEC ? usock : sock; - ret = getsockopt(sk, SOL_SOCKET, vname[i].opt, &val[i], &len); + ret = getsockopt(sock, SOL_SOCKET, vname[i].opt, &val[i], &len); if (ret) { pr_perror("can't get %s", vname[i].name); return 1; @@ -58,13 +51,13 @@ int main(int argc, char **argv) val[i]++; - ret = setsockopt(sk, SOL_SOCKET, vname[i].opt, &val[i], len); + ret = setsockopt(sock, SOL_SOCKET, vname[i].opt, &val[i], len); if (ret) { pr_perror("can't set %s = %d", vname[i].name, val[i]); return 1; } - ret = getsockopt(sk, SOL_SOCKET, vname[i].opt, &rval, &len); + ret = getsockopt(sock, SOL_SOCKET, vname[i].opt, &rval, &len); if (ret) { pr_perror("can't re-get %s", vname[i].name); return 1; @@ -85,8 +78,7 @@ int main(int argc, char **argv) test_waitsig(); for (i = 0; i < NOPTS; i++) { - sk = vname[i].opt == SO_PASSCRED || vname[i].opt == SO_PASSSEC ? usock : sock; - ret = getsockopt(sk, SOL_SOCKET, vname[i].opt, &rval, &len); + ret = getsockopt(sock, SOL_SOCKET, vname[i].opt, &rval, &len); if (ret) { pr_perror("can't verify %s", vname[i].name); return 1; @@ -101,7 +93,6 @@ int main(int argc, char **argv) pass(); close(sock); - close(usock); return 0; } diff --git a/test/zdtm/static/socket-tcp-closed-last-ack.desc b/test/zdtm/static/socket-tcp-closed-last-ack.desc index c77d58477..d4cfe5064 100644 --- a/test/zdtm/static/socket-tcp-closed-last-ack.desc +++ b/test/zdtm/static/socket-tcp-closed-last-ack.desc @@ -1,10 +1,10 @@ { 'deps': [ '/bin/sh', '/sbin/iptables|/usr/sbin/iptables', - '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_tcp.so', - '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so', + '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so', + '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so', ], 'opts': '--tcp-established', 'flags': 'suid nouser samens', - 'feature' : 'tcp_half_closed has_ipt_legacy', + 'feature' : 'tcp_half_closed', 'flavor': 'ns uns', } diff --git a/test/zdtm/static/socket-tcp-closing.c b/test/zdtm/static/socket-tcp-closing.c index df291d446..87e1d7533 100644 --- a/test/zdtm/static/socket-tcp-closing.c +++ b/test/zdtm/static/socket-tcp-closing.c @@ -31,13 +31,10 @@ static int port = 8880; int fill_sock_buf(int fd) { - char zdtm[512]; int flags; int size; int ret; - memset(zdtm, 5, sizeof(zdtm)); - flags = fcntl(fd, F_GETFL, 0); if (flags == -1) { pr_perror("Can't get flags"); @@ -50,6 +47,7 @@ int fill_sock_buf(int fd) size = 0; while (1) { + char zdtm[] = "zdtm test packet"; ret = write(fd, zdtm, sizeof(zdtm)); if (ret == -1) { if (errno == EAGAIN) diff --git a/test/zdtm/static/socket-tcp-reseted.desc b/test/zdtm/static/socket-tcp-reseted.desc index ff92e9f9f..3ebdfeef8 100644 --- a/test/zdtm/static/socket-tcp-reseted.desc +++ b/test/zdtm/static/socket-tcp-reseted.desc @@ -1,10 +1,10 @@ { 'deps': [ '/bin/sh', '/sbin/iptables|/usr/sbin/iptables', - '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_tcp.so', - '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so', - '/lib/xtables/libipt_REJECT.so|/usr/lib64/xtables/libipt_REJECT.so|/usr/lib/powerpc64le-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/x86_64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/xtables/libipt_REJECT.so|/usr/lib/s390x-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/aarch64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/riscv64-linux-gnu/xtables/libipt_REJECT.so', + '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so', + '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so', + '/lib/xtables/libipt_REJECT.so|/usr/lib64/xtables/libipt_REJECT.so|/usr/lib/powerpc64le-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/x86_64-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/xtables/libipt_REJECT.so|/usr/lib/s390x-linux-gnu/xtables/libipt_REJECT.so|/usr/lib/aarch64-linux-gnu/xtables/libipt_REJECT.so', ], 'opts': '--tcp-established', 'flags': 'suid nouser samens', - 'feature' : 'tcp_half_closed has_ipt_legacy' + 'feature' : 'tcp_half_closed' } diff --git a/test/zdtm/static/socket-tcp-syn-sent.desc b/test/zdtm/static/socket-tcp-syn-sent.desc index 52382414b..4cc23c8fc 100644 --- a/test/zdtm/static/socket-tcp-syn-sent.desc +++ b/test/zdtm/static/socket-tcp-syn-sent.desc @@ -1,9 +1,9 @@ { 'deps': [ '/bin/sh', '/sbin/iptables|/usr/sbin/iptables', - '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_tcp.so', - '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so|/usr/lib/riscv64-linux-gnu/xtables/libxt_standard.so', + '/lib/xtables/libxt_tcp.so|/usr/lib64/xtables/libxt_tcp.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_tcp.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_tcp.so|/usr/lib/xtables/libxt_tcp.so|/usr/lib/s390x-linux-gnu/xtables/libxt_tcp.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_tcp.so', + '/lib/xtables/libxt_standard.so|/usr/lib64/xtables/libxt_standard.so|/usr/lib/powerpc64le-linux-gnu/xtables/libxt_standard.so|/usr/lib/x86_64-linux-gnu/xtables/libxt_standard.so|/usr/lib/xtables/libxt_standard.so|/usr/lib/s390x-linux-gnu/xtables/libxt_standard.so|/usr/lib/aarch64-linux-gnu/xtables/libxt_standard.so', ], 'opts': '--tcp-established', 'flags': 'suid nouser samens', - 'feature' : 'tcp_half_closed has_ipt_legacy' + 'feature' : 'tcp_half_closed' } diff --git a/test/zdtm/static/socket6_icmp.c b/test/zdtm/static/socket6_icmp.c deleted file mode 120000 index 24d8fd806..000000000 --- a/test/zdtm/static/socket6_icmp.c +++ /dev/null @@ -1 +0,0 @@ -socket_icmp.c \ No newline at end of file diff --git a/test/zdtm/static/socket_icmp.c b/test/zdtm/static/socket_icmp.c deleted file mode 100644 index f72e348bf..000000000 --- a/test/zdtm/static/socket_icmp.c +++ /dev/null @@ -1,128 +0,0 @@ -#include "zdtmtst.h" - -const char *test_doc = "static test for ICMP socket\n"; -const char *test_author = "समीर सिंह Sameer Singh \n"; - -/* Description: - * Send a ping to localhost using ICMP socket - */ - -#include -#include -#include -#include -#if defined(ZDTM_IPV6) -#include -#else -#include -#endif -#include -#include -#include - -#include "sysctl.h" - -#define PACKET_SIZE 64 -#define RECV_TIMEOUT 1 - -static int echo_id = 1234; - -#if defined(ZDTM_IPV6) -#define TEST_ICMP_ECHOREPLY ICMP6_ECHOREPLY -#else -#define TEST_ICMP_ECHOREPLY ICMP_ECHOREPLY -#endif -int main(int argc, char **argv) -{ - int ret, sock, seq = 0; - char packet[PACKET_SIZE], recv_packet[PACKET_SIZE]; - - struct timeval tv; -#if defined(ZDTM_IPV6) - struct sockaddr_in6 addr, recv_addr; -#else - struct icmphdr icmp_header, *icmp_reply; -#endif - struct sockaddr_in addr, recv_addr; - socklen_t addr_len; - - // Allow GIDs 0-58468 to open an unprivileged ICMP socket - if (sysctl_write_str("/proc/sys/net/ipv4/ping_group_range", "0 58468")) - return -1; - - test_init(argc, argv); - -#if defined(ZDTM_IPV6) - sock = socket(PF_INET6, SOCK_DGRAM, IPPROTO_ICMPV6); -#else - sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_ICMP); -#endif - if (sock < 0) { - pr_perror("Can't create socket"); - return 1; - } - - tv.tv_sec = RECV_TIMEOUT; - tv.tv_usec = 0; - if (setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) < 0) { - pr_perror("Can't set socket option"); - return 1; - } - - memset(&addr, 0, sizeof(addr)); - memset(&icmp_header, 0, sizeof(icmp_header)); -#if defined(ZDTM_IPV6) - addr.sin6_family = AF_INET6; - inet_pton(AF_INET6, "::1", &addr.sin6_addr); - - icmp_header.icmp6_type = ICMP6_ECHO_REQUEST; - icmp_header.icmp6_code = 0; - icmp_header.icmp6_id = echo_id; - icmp_header.icmp6_seq = seq; -#else - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = inet_addr("127.0.0.1"); - - icmp_header.type = ICMP_ECHO; - icmp_header.code = 0; - icmp_header.un.echo.id = echo_id; - icmp_header.un.echo.sequence = seq; -#endif - - memcpy(packet, &icmp_header, sizeof(icmp_header)); - memset(packet + sizeof(icmp_header), 0xa5, - PACKET_SIZE - sizeof(icmp_header)); - - test_daemon(); - test_waitsig(); - - ret = sendto(sock, packet, PACKET_SIZE, 0, - (struct sockaddr *)&addr, sizeof(addr)); - - if (ret < 0) { - fail("Can't send"); - return 1; - } - - addr_len = sizeof(recv_addr); - - ret = recvfrom(sock, recv_packet, sizeof(recv_packet), 0, - (struct sockaddr *)&recv_addr, &addr_len); - - if (ret < 0) { - fail("Can't recv"); - return 1; - } - - icmp_reply = (struct icmphdr *)recv_packet; - - if (icmp_reply->type != ICMP_ECHOREPLY) { - fail("Got no ICMP_ECHO_REPLY"); - return 1; - } - - close(sock); - - pass(); - return 0; -} diff --git a/test/zdtm/static/tempfs_subns.c b/test/zdtm/static/tempfs_subns.c index 490fdad6e..ed3ef9a3a 100644 --- a/test/zdtm/static/tempfs_subns.c +++ b/test/zdtm/static/tempfs_subns.c @@ -20,7 +20,7 @@ int main(int argc, char **argv) { int fds[2], i; pid_t pid; - int status, fd = -1; + int fd, status; test_init(argc, argv); diff --git a/test/zdtm/static/timers01.c b/test/zdtm/static/timers01.c deleted file mode 100644 index 10ecc3481..000000000 --- a/test/zdtm/static/timers01.c +++ /dev/null @@ -1,74 +0,0 @@ -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Checks non-periodic timers\n"; -const char *test_author = "Andrei Vagin "; - -static struct { - const int timer_type; - const int signal; - volatile sig_atomic_t count; -} timer_tests[] = { - /* from slowest to fastest */ - { ITIMER_VIRTUAL, SIGVTALRM }, - { ITIMER_PROF, SIGPROF }, - { ITIMER_REAL, SIGALRM }, -}; - -#define NUM_TIMERS (sizeof(timer_tests) / sizeof(timer_tests[0])) -#define TIMER_TIMEOUT 3600 -#define TIMER_ALLOWED_DELTA 300 - -static void setup_timers(void) -{ - int i; - struct itimerval tv = { - .it_interval = { .tv_sec = 0, .tv_usec = 0 }, - .it_value = { .tv_sec = TIMER_TIMEOUT, .tv_usec = 0 }, - }; - - for (i = 0; i < NUM_TIMERS; i++) { - if (setitimer(timer_tests[i].timer_type, &tv, NULL) < 0) { - pr_perror("can't set timer %d", i); - exit(1); - } - } -} - -static void check_timers(void) -{ - int i; - - for (i = 0; i < NUM_TIMERS; i++) { - struct itimerval tv = {}; - - if (getitimer(timer_tests[i].timer_type, &tv)) { - pr_perror("gettimer"); - exit(1); - } - if (tv.it_value.tv_sec > TIMER_TIMEOUT || - tv.it_value.tv_sec < TIMER_TIMEOUT - TIMER_ALLOWED_DELTA) { - fail("%ld isn't in [%d, %d]", (long)tv.it_value.tv_sec, - TIMER_TIMEOUT, - TIMER_TIMEOUT - TIMER_ALLOWED_DELTA); - exit(1); - } - } - pass(); -} - -int main(int argc, char **argv) -{ - test_init(argc, argv); - - setup_timers(); - - test_daemon(); - test_waitsig(); - - check_timers(); - return 0; -} diff --git a/test/zdtm/static/uprobes.c b/test/zdtm/static/uprobes.c deleted file mode 100644 index 6ef9a56bc..000000000 --- a/test/zdtm/static/uprobes.c +++ /dev/null @@ -1,295 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "zdtmtst.h" - -const char *test_doc = "Test the --allow-uprobes option"; -const char *test_author = "Shashank Balaji "; - -#define UPROBE_GROUP_NAME "zdtm" -#define UPROBE_EVENT_NAME "uprobes_test" -#define UPROBED_FUNCTION uprobe_target - -/* - * A uprobe can be set at the start of a function, but not all instructions - * will trigger the creation of a uprobes vma. - * - * Examples: - * - aarch64: if the function is a single `ret`, then no vma creation - * - x64: if the function is `nop; ret`, then no vma creation - * - * So to guarantee vma creation, create a volatile dummy variable (to prevent - * compiler optimization) and use it (to prevent "unused variable" warning) - */ -void UPROBED_FUNCTION(void) { - volatile int dummy __maybe_unused = 0; - dummy += 1; -} -/* Calling via volatile function pointer ensures noinline at callsite */ -typedef void (*func_ptr)(void); -volatile func_ptr uprobe_target_alias = UPROBED_FUNCTION; - -struct uprobe_context { - struct tracefs_instance *instance; - struct tracefs_dynevent *uprobe; -}; - -volatile bool got_sigtrap = false; - -/* - * Returns the file offset of a symbol in the executable of this program - * Returns 0 on failure -*/ -uint64_t calc_sym_offset(const char *sym_name) -{ - GElf_Shdr section_header; - Elf_Scn *section = NULL; - Elf_Data *symtab_data; - uint64_t offset = 0; - char buf[PATH_MAX]; - GElf_Sym symbol; - ssize_t n_bytes; - int n_entries; - Elf *elf; - int fd; - int i; - - if (elf_version(EV_CURRENT) == EV_NONE) { - pr_err("ELF version of libelf is lower than that of the program\n"); - return 0; - } - - n_bytes = readlink("/proc/self/exe", buf, sizeof(buf)); - if (n_bytes < 0) { - pr_perror("Failed to readlink /proc/self/exe"); - return 0; - } - buf[n_bytes] = '\0'; - - fd = open(buf, O_RDONLY); - if (fd < 0) { - pr_perror("Failed to open self-executable"); - return 0; - } - - elf = elf_begin(fd, ELF_C_READ, NULL); - if (!elf) { - pr_err("%s\n", elf_errmsg(elf_errno())); - goto out_fd; - } - - /* Look for the symbol table section and its header */ - while ((section = elf_nextscn(elf, section)) != NULL) { - gelf_getshdr(section, §ion_header); - if (section_header.sh_type == SHT_SYMTAB) - break; - } - if (!section) { - pr_err("Failed to find symbol table\n"); - goto out_elf; - } - symtab_data = elf_getdata(section, NULL); - n_entries = section_header.sh_size / section_header.sh_entsize; - - /* Look for a symbol with the required name */ - for (i = 0; i < n_entries; i++) { - gelf_getsym(symtab_data, i, &symbol); - /* Symbol table's sh_link is the index of the string table section header */ - if (!strcmp(sym_name, - elf_strptr(elf, section_header.sh_link, symbol.st_name))) - break; - } - if (i == n_entries) { - pr_err("Failed to find symbol \"%s\"\n", sym_name); - goto out_elf; - } - - /* Get the section the symbol belongs to (mostly .text) */ - section = elf_getscn(elf, symbol.st_shndx); - gelf_getshdr(section, §ion_header); - offset = symbol.st_value - section_header.sh_addr + section_header.sh_offset; - -out_elf: - elf_end(elf); -out_fd: - close(fd); - return offset; -} - -/* - * Set and enable a uprobe on the file at the given offset - * Returns struct uprobe_context with members set to NULL on failure -*/ -struct uprobe_context enable_uprobe(const char *file, uint64_t offset) -{ - struct tracefs_instance *trace_instance; - struct tracefs_dynevent *uprobe; - struct uprobe_context context = {}; - - trace_instance = tracefs_instance_create("zdtm_uprobes_test"); - if (!trace_instance) { - pr_perror("Failed to create tracefs instance"); - return context; - } - tracefs_instance_reset(trace_instance); - - uprobe = tracefs_uprobe_alloc(UPROBE_GROUP_NAME, UPROBE_EVENT_NAME, file, offset, NULL); - if (!uprobe) { - pr_perror("Failed to allocate uprobe"); - goto instance_destroy; - } - - if (tracefs_dynevent_create(uprobe)) { - pr_perror("Failed to create uprobe"); - goto uprobe_free; - } - - if (tracefs_event_enable(trace_instance, UPROBE_GROUP_NAME, UPROBE_EVENT_NAME)) { - pr_perror("Failed to enable uprobe"); - goto uprobe_destroy; - } - - context.instance = trace_instance; - context.uprobe = uprobe; - return context; - -uprobe_destroy: - tracefs_dynevent_destroy(uprobe, false); -uprobe_free: - tracefs_dynevent_free(uprobe); -instance_destroy: - tracefs_instance_destroy(trace_instance); - tracefs_instance_free(trace_instance); - return context; -} - -void destroy_uprobe(struct uprobe_context context) -{ - tracefs_dynevent_destroy(context.uprobe, true); - tracefs_dynevent_free(context.uprobe); - tracefs_instance_destroy(context.instance); - tracefs_instance_free(context.instance); -} - -/* - * Check for the existence of the "[uprobes]" vma in /proc/self/maps - * Returns -1 on failure, 0 if not found, 1 if found -*/ -int uprobes_vma_exists(void) -{ - FILE *f; - char buf[LINE_MAX]; - int ret = 0; - - f = fopen("/proc/self/maps", "r"); - if (!f) { - pr_perror("Failed to open /proc/self/maps"); - return -1; - } - - while (fgets(buf, sizeof(buf), f)) { - if (strstr(buf, "[uprobes]")) { - ret = 1; - break; - } - } - if (ret == 0 && !feof(f)) { - pr_err("Failed to finish reading /proc/self/maps\n"); - ret = -1; - } - - fclose(f); - return ret; -} - -/* - * SIGTRAP is sent if execution reaches a previously set uprobed location, and - * the corresponding uprobe is not active. We don't want this to happen on restore -*/ -void sigtrap_handler(int signo, siginfo_t *info, void* context) -{ - if (info->si_code == SI_KERNEL) { - got_sigtrap = true; - fail("SIGTRAP on attempting to call uprobed function"); - } -} - -int main(int argc, char **argv) -{ - struct uprobe_context context; - struct sigaction sa; - char buf[PATH_MAX]; - uint64_t offset; - int n_bytes; - int ret = 1; - - test_init(argc, argv); - - offset = calc_sym_offset(__stringify(UPROBED_FUNCTION)); - if (!offset) - return 1; - - n_bytes = readlink("/proc/self/exe", buf, sizeof(buf)); - if (n_bytes < 0) { - pr_perror("Failed to readlink /proc/self/exe"); - return 1; - } - buf[n_bytes] = '\0'; - - sa.sa_flags = SA_SIGINFO; - sa.sa_sigaction = sigtrap_handler; - sigemptyset(&sa.sa_mask); - if (sigaction(SIGTRAP, &sa, NULL)) { - pr_perror("Failed to set SIGTRAP handler"); - return 1; - } - - context = enable_uprobe(buf, offset); - if (!context.instance) - return 1; - - /* - * Execution must reach the uprobed location at least once - * for the kernel to create the uprobes vma - */ - uprobe_target_alias(); - - switch (uprobes_vma_exists()) { - case -1: - goto out_uprobe; - break; - case 0: - pr_err("uprobes vma does not exist\n"); - goto out_uprobe; - break; - case 1: - test_msg("Found uprobes vma\n"); - break; - } - - test_daemon(); - test_waitsig(); - - /* - * Calling the uprobed function after restore should not cause - * a SIGTRAP, since the uprobe is still active - */ - uprobe_target_alias(); - if (!got_sigtrap) { - pass(); - ret = 0; - } - -out_uprobe: - destroy_uprobe(context); - return ret; -} diff --git a/test/zdtm/static/uprobes.desc b/test/zdtm/static/uprobes.desc deleted file mode 100644 index 6eab1f498..000000000 --- a/test/zdtm/static/uprobes.desc +++ /dev/null @@ -1,6 +0,0 @@ -{ - 'feature': 'cgroupns', - 'flags': 'suid nouser', - 'flavor': 'h', - 'opts': '--allow-uprobes' -} diff --git a/test/zdtm/static/vdso-proxy.c b/test/zdtm/static/vdso-proxy.c index a53e6cdc0..43334974f 100644 --- a/test/zdtm/static/vdso-proxy.c +++ b/test/zdtm/static/vdso-proxy.c @@ -70,7 +70,6 @@ static int parse_maps(struct vm_area *vmas) #endif v->is_vvar_or_vdso |= strstr(buf, "[vdso]") != NULL; v->is_vvar_or_vdso |= strstr(buf, "[vvar]") != NULL; - v->is_vvar_or_vdso |= strstr(buf, "[vvar_vclock]") != NULL; test_msg("[NOTE]\tVMA: [%#" PRIx64 ", %#" PRIx64 "]\n", v->start, v->end); } @@ -87,35 +86,42 @@ static int parse_maps(struct vm_area *vmas) return i; } -static int check_vvar_vdso(struct vm_area *before, int nr_before, struct vm_area *after, int nr_after) +int compare_vmas(struct vm_area *vmax, struct vm_area *vmay) +{ + if (vmax->start > vmay->start) + return 1; + if (vmax->start < vmay->start) + return -1; + if (vmax->end > vmay->end) + return 1; + if (vmax->end < vmay->end) + return -1; + + return 0; +} + +static int check_vvar_vdso(struct vm_area *before, struct vm_area *after) { int i, j = 0; - for (i = 0, j = 0; i < nr_before || j < nr_after;) { - if (j == nr_after || before[i].start < after[j].start) { + for (i = 0; i < MAX_VMAS && j < MAX_VMAS; i++, j++) { + int cmp = compare_vmas(&before[i], &after[j]); + + if (cmp == 0) + continue; + + if (cmp < 0) { /* Lost mapping */ test_msg("[NOTE]\tLost mapping: %#" PRIx64 "-%#" PRIx64 "\n", before[i].start, before[i].end); + j--; if (before[i].is_vvar_or_vdso) { fail("Lost vvar/vdso mapping"); return -1; } - i++; continue; } - if (i == nr_before || before[i].start > after[j].start) { - test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end); - j++; - continue; - } - if (before[i].end == after[j].end) { - i++; - j++; - } else if (before[i].end > after[j].end) { - before[i].start = after[j].end; - j++; - } else { - after[j].start = before[i].end; - i++; - } + + test_msg("[NOTE]\tNew mapping appeared: %#" PRIx64 "-%#" PRIx64 "\n", after[j].start, after[j].end); + i--; } return 0; @@ -123,10 +129,11 @@ static int check_vvar_vdso(struct vm_area *before, int nr_before, struct vm_area static struct vm_area vmas_before[MAX_VMAS]; static struct vm_area vmas_after[MAX_VMAS]; -static int nr_before, nr_after; int main(int argc, char *argv[]) { + int nr_before, nr_after; + test_init(argc, argv); test_msg("[NOTE]\tMappings before:\n"); @@ -147,7 +154,7 @@ int main(int argc, char *argv[]) } /* After restore vDSO/VVAR blobs must remain in the old place. */ - if (check_vvar_vdso(vmas_before, nr_before, vmas_after, nr_after)) + if (check_vvar_vdso(vmas_before, vmas_after)) return -1; if (nr_before + 2 < nr_after) { diff --git a/test/zdtm/static/vdso02.c b/test/zdtm/static/vdso02.c index 5779b7fd6..2050bca71 100644 --- a/test/zdtm/static/vdso02.c +++ b/test/zdtm/static/vdso02.c @@ -29,8 +29,7 @@ static int parse_vm_area(char *buf, struct vm_area *vma) return -1; } -static int find_blobs(pid_t pid, struct vm_area *vdso, - struct vm_area *vvar, struct vm_area *vvar_vclock) +static int find_blobs(pid_t pid, struct vm_area *vdso, struct vm_area *vvar) { char buf[BUF_SZ]; int ret = -1; @@ -40,8 +39,6 @@ static int find_blobs(pid_t pid, struct vm_area *vdso, vdso->end = VDSO_BAD_ADDR; vvar->start = VVAR_BAD_ADDR; vvar->end = VVAR_BAD_ADDR; - vvar_vclock->start = VVAR_BAD_ADDR; - vvar_vclock->end = VVAR_BAD_ADDR; if (snprintf(buf, BUF_SZ, "/proc/%d/maps", pid) < 0) { pr_perror("snprintf() failure for path"); @@ -60,18 +57,12 @@ static int find_blobs(pid_t pid, struct vm_area *vdso, if (strstr(buf, "[vvar]") && parse_vm_area(buf, vvar)) goto err; - if (strstr(buf, "[vvar_vclock]") && - parse_vm_area(buf, vvar_vclock)) - goto err; } if (vdso->start != VDSO_BAD_ADDR) test_msg("[vdso] %lx-%lx\n", vdso->start, vdso->end); if (vvar->start != VVAR_BAD_ADDR) test_msg("[vvar] %lx-%lx\n", vvar->start, vvar->end); - if (vvar_vclock->start != VVAR_BAD_ADDR) - test_msg("[vvar_vclock] %lx-%lx\n", - vvar_vclock->start, vvar_vclock->end); ret = 0; err: fclose(maps); @@ -152,10 +143,10 @@ void sys_exit(int status) static int unmap_blobs(void) { - struct vm_area vdso, vvar, vvar_vclock; + struct vm_area vdso, vvar; int ret; - if (find_blobs(getpid(), &vdso, &vvar, &vvar_vclock)) + if (find_blobs(getpid(), &vdso, &vvar)) return -1; if (vdso.start != VDSO_BAD_ADDR) { @@ -168,19 +159,13 @@ static int unmap_blobs(void) if (ret) return ret; } - if (vvar_vclock.start != VVAR_BAD_ADDR) { - ret = sys_munmap((void *)vvar_vclock.start, - vvar_vclock.end - vvar_vclock.start); - if (ret) - return ret; - } return 0; } int main(int argc, char *argv[]) { - struct vm_area vdso, vvar, vvar_vclock; + struct vm_area vdso, vvar; pid_t child; int status, ret = -1; @@ -216,11 +201,9 @@ int main(int argc, char *argv[]) goto out_kill; } - if (find_blobs(child, &vdso, &vvar, &vvar_vclock)) + if (find_blobs(child, &vdso, &vvar)) goto out_kill; - if (vdso.start != VDSO_BAD_ADDR || - vvar.start != VVAR_BAD_ADDR || - vvar_vclock.start != VVAR_BAD_ADDR) { + if (vdso.start != VDSO_BAD_ADDR || vvar.start != VVAR_BAD_ADDR) { pr_err("Found vvar or vdso blob(s) in child, which should have unmapped them\n"); goto out_kill; } @@ -228,7 +211,7 @@ int main(int argc, char *argv[]) test_daemon(); test_waitsig(); - if (find_blobs(child, &vdso, &vvar, &vvar_vclock)) + if (find_blobs(child, &vdso, &vvar)) goto out_kill; if (vdso.start != VDSO_BAD_ADDR || vvar.start != VVAR_BAD_ADDR) { pr_err("Child without vdso got it after C/R\n");